]> git.uio.no Git - ifi-stolz-refaktor.git/blame - case-study/jdt-after/ui/org/eclipse/jdt/internal/ui/text/spelling/engine/AbstractSpellDictionary.java
Case Study: adding data and statistics
[ifi-stolz-refaktor.git] / case-study / jdt-after / ui / org / eclipse / jdt / internal / ui / text / spelling / engine / AbstractSpellDictionary.java
CommitLineData
1b2798f6
EK
1/*******************************************************************************
2 * Copyright (c) 2000, 2011 IBM Corporation and others.
3 * All rights reserved. This program and the accompanying materials
4 * are made available under the terms of the Eclipse Public License v1.0
5 * which accompanies this distribution, and is available at
6 * http://www.eclipse.org/legal/epl-v10.html
7 *
8 * Contributors:
9 * IBM Corporation - initial API and implementation
10 *******************************************************************************/
11package org.eclipse.jdt.internal.ui.text.spelling.engine;
12
13import java.io.BufferedReader;
14import java.io.FileNotFoundException;
15import java.io.IOException;
16import java.io.InputStream;
17import java.io.InputStreamReader;
18import java.io.UnsupportedEncodingException;
19import java.net.MalformedURLException;
20import java.net.URL;
21import java.nio.charset.Charset;
22import java.nio.charset.CharsetDecoder;
23import java.nio.charset.CodingErrorAction;
24import java.nio.charset.MalformedInputException;
25import java.util.ArrayList;
26import java.util.Arrays;
27import java.util.HashMap;
28import java.util.HashSet;
29import java.util.Iterator;
30import java.util.Map;
31import java.util.Set;
32
33import org.eclipse.core.runtime.IStatus;
34import org.eclipse.core.runtime.Status;
35
36import org.eclipse.core.resources.ResourcesPlugin;
37
38import org.eclipse.jdt.internal.corext.util.Messages;
39
40import org.eclipse.jdt.ui.JavaUI;
41import org.eclipse.jdt.ui.PreferenceConstants;
42
43import org.eclipse.jdt.internal.ui.JavaPlugin;
44import org.eclipse.jdt.internal.ui.JavaUIMessages;
45import org.eclipse.jdt.internal.ui.viewsupport.BasicElementLabels;
46
47
48/**
49 * Partial implementation of a spell dictionary.
50 *
51 * @since 3.0
52 */
53public abstract class AbstractSpellDictionary implements ISpellDictionary {
54
55 /**
56 * Byte array wrapper
57 * @since 3.6
58 */
59 static class ByteArrayWrapper {
60
61 private static int hashCode(byte[] array) {
62 int prime= 31;
63 if (array == null)
64 return 0;
65 int result= 1;
66 for (int index= 0; index < array.length; index++) {
67 result= prime * result + array[index];
68 }
69 return result;
70 }
71
72 private byte[] byteArray;
73
74 public ByteArrayWrapper(byte[] byteArray) {
75 this.byteArray= byteArray;
76 }
77 @Override
78 public int hashCode() {
79 final int prime= 31;
80 int result= 1;
81 result= prime * result + ByteArrayWrapper.hashCode(byteArray);
82 return result;
83 }
84
85 @Override
86 public boolean equals(Object obj) {
87 if (this == obj)
88 return true;
89 if (obj == null)
90 return false;
91 if (!(obj instanceof ByteArrayWrapper))
92 return false;
93 ByteArrayWrapper other= (ByteArrayWrapper)obj;
94 return other.generated_4132004541768120010(ByteArrayWrapper.this);
95 }
96 public void generated_1990004924458704530(AbstractSpellDictionary abstractspelldictionary, byte[] wordBytes) {
97 Object bucket= abstractspelldictionary.fHashBuckets.get(this);
98
99 if (bucket == null) {
100 abstractspelldictionary.fHashBuckets.put(this, wordBytes);
101 } else if (bucket instanceof ArrayList) {
102 @SuppressWarnings("unchecked")
103 ArrayList<byte[]> bucketList= (ArrayList<byte[]>)bucket;
104 bucketList.add(wordBytes);
105 } else {
106 ArrayList<Object> list= new ArrayList<Object>(AbstractSpellDictionary.BUCKET_CAPACITY);
107 list.add(bucket);
108 list.add(wordBytes);
109 abstractspelldictionary.fHashBuckets.put(this, list);
110 }
111 }
112 public boolean generated_4132004541768120010(ByteArrayWrapper bytearraywrapper) {
113 if (!Arrays.equals(bytearraywrapper.byteArray, byteArray))
114 return false;
115 return true;
116 }
117 }
118
119
120 /**
121 * Canonical name for UTF-8 encoding
122 * @since 3.6
123 */
124 private static final String UTF_8= "UTF-8"; //$NON-NLS-1$
125
126 /** The bucket capacity */
127 protected static final int BUCKET_CAPACITY= 4;
128
129 /** The word buffer capacity */
130 protected static final int BUFFER_CAPACITY= 32;
131
132 /** The distance threshold */
133 protected static final int DISTANCE_THRESHOLD= 160;
134
135 /**
136 * The hash load factor
137 * @since 3.6
138 */
139 protected static final float LOAD_FACTOR= 0.85f;
140
141 /** The phonetic distance algorithm */
142 private IPhoneticDistanceAlgorithm fDistanceAlgorithm= new DefaultPhoneticDistanceAlgorithm();
143
144 /** The mapping from phonetic hashes to word lists */
145 private final Map<ByteArrayWrapper, Object> fHashBuckets= new HashMap<ByteArrayWrapper, Object>(getInitialSize(), LOAD_FACTOR);
146
147 /** The phonetic hash provider */
148 private IPhoneticHashProvider fHashProvider= new DefaultPhoneticHashProvider();
149
150 /** Is the dictionary already loaded? */
151 private boolean fLoaded= false;
152 /**
153 * Must the dictionary be loaded?
154 * @since 3.2
155 */
156 private boolean fMustLoad= true;
157
158 /**
159 * Tells whether to strip non-letters at word boundaries.
160 * @since 3.3
161 */
162 boolean fIsStrippingNonLetters= true;
163
164 /**
165 * Returns the initial size of dictionary.
166 *
167 * @return The initial size of dictionary.
168 * @since 3.6
169 */
170 protected int getInitialSize() {
171 return 32;
172 }
173
174 /**
175 * Returns all candidates with the same phonetic hash.
176 *
177 * @param hash
178 * The hash to retrieve the candidates of
179 * @return Array of candidates for the phonetic hash
180 */
181 protected final Object getCandidates(final String hash) {
182 ByteArrayWrapper hashBytes;
183 try {
184 hashBytes= new ByteArrayWrapper(hash.getBytes(UTF_8));
185 } catch (UnsupportedEncodingException e) {
186 JavaPlugin.log(e);
187 return null;
188 }
189 return fHashBuckets.get(hashBytes);
190 }
191
192 /**
193 * Returns all candidates that have a phonetic hash within a bounded
194 * distance to the specified word.
195 *
196 * @param word
197 * The word to find the nearest matches for
198 * @param sentence
199 * <code>true</code> iff the proposals start a new sentence,
200 * <code>false</code> otherwise
201 * @param hashs
202 * Array of close hashes to find the matches
203 * @return Set of ranked words with bounded distance to the specified word
204 */
205 protected final Set<RankedWordProposal> getCandidates(final String word, final boolean sentence, final ArrayList<String> hashs) {
206
207 int distance= 0;
208 String hash= null;
209
210 final StringBuffer buffer= new StringBuffer(BUFFER_CAPACITY);
211 final HashSet<RankedWordProposal> result= new HashSet<RankedWordProposal>(BUCKET_CAPACITY * hashs.size());
212
213 for (int index= 0; index < hashs.size(); index++) {
214
215 hash= hashs.get(index);
216
217 final Object candidates= getCandidates(hash);
218 if (candidates == null)
219 continue;
220 else if (candidates instanceof byte[]) {
221 String candidate;
222 try {
223 candidate= new String((byte[])candidates, UTF_8);
224 } catch (UnsupportedEncodingException e) {
225 JavaPlugin.log(e);
226 return result;
227 }
228 distance= fDistanceAlgorithm.getDistance(word, candidate);
229 if (distance < DISTANCE_THRESHOLD) {
230 buffer.setLength(0);
231 buffer.append(candidate);
232 if (sentence)
233 buffer.setCharAt(0, Character.toUpperCase(buffer.charAt(0)));
234 result.add(new RankedWordProposal(buffer.toString(), -distance));
235 }
236 continue;
237 }
238
239 @SuppressWarnings("unchecked")
240 final ArrayList<byte[]> candidateList= (ArrayList<byte[]>)candidates;
241 int candidateSize= Math.min(500, candidateList.size()); // see https://bugs.eclipse.org/bugs/show_bug.cgi?id=195357
242 for (int offset= 0; offset < candidateSize; offset++) {
243
244 String candidate;
245 try {
246 candidate= new String(candidateList.get(offset), UTF_8);
247 } catch (UnsupportedEncodingException e) {
248 JavaPlugin.log(e);
249 return result;
250 }
251 distance= fDistanceAlgorithm.getDistance(word, candidate);
252
253 if (distance < DISTANCE_THRESHOLD) {
254
255 buffer.setLength(0);
256 buffer.append(candidate);
257
258 if (sentence)
259 buffer.setCharAt(0, Character.toUpperCase(buffer.charAt(0)));
260
261 result.add(new RankedWordProposal(buffer.toString(), -distance));
262 }
263 }
264 }
265 return result;
266 }
267
268 /**
269 * Returns all approximations that have a phonetic hash with smallest
270 * possible distance to the specified word.
271 *
272 * @param word
273 * The word to find the nearest matches for
274 * @param sentence
275 * <code>true</code> iff the proposals start a new sentence,
276 * <code>false</code> otherwise
277 * @param result
278 * Set of ranked words with smallest possible distance to the
279 * specified word
280 */
281 protected final void getCandidates(final String word, final boolean sentence, final Set<RankedWordProposal> result) {
282
283 int distance= 0;
284 int minimum= Integer.MAX_VALUE;
285
286 StringBuffer buffer= new StringBuffer(BUFFER_CAPACITY);
287
288 final Object candidates= getCandidates(fHashProvider.getHash(word));
289 if (candidates == null)
290 return;
291 else if (candidates instanceof byte[]) {
292 String candidate;
293 try {
294 candidate= new String((byte[])candidates, UTF_8);
295 } catch (UnsupportedEncodingException e) {
296 JavaPlugin.log(e);
297 return;
298 }
299 distance= fDistanceAlgorithm.getDistance(word, candidate);
300 buffer.append(candidate);
301 if (sentence)
302 buffer.setCharAt(0, Character.toUpperCase(buffer.charAt(0)));
303 result.add(new RankedWordProposal(buffer.toString(), -distance));
304 return;
305 }
306
307 @SuppressWarnings("unchecked")
308 final ArrayList<byte[]> candidateList= (ArrayList<byte[]>)candidates;
309 final ArrayList<RankedWordProposal> matches= new ArrayList<RankedWordProposal>(candidateList.size());
310
311 for (int index= 0; index < candidateList.size(); index++) {
312 String candidate;
313 try {
314 candidate= new String(candidateList.get(index), UTF_8);
315 } catch (UnsupportedEncodingException e) {
316 JavaPlugin.log(e);
317 return;
318 }
319 distance= fDistanceAlgorithm.getDistance(word, candidate);
320
321 if (distance <= minimum) {
322
323 if (distance < minimum)
324 matches.clear();
325
326 buffer.setLength(0);
327 buffer.append(candidate);
328
329 if (sentence)
330 buffer.setCharAt(0, Character.toUpperCase(buffer.charAt(0)));
331
332 matches.add(new RankedWordProposal(buffer.toString(), -distance));
333 minimum= distance;
334 }
335 }
336
337 result.addAll(matches);
338 }
339
340 /**
341 * Tells whether this dictionary is empty.
342 *
343 * @return <code>true</code> if this dictionary is empty
344 * @since 3.3
345 */
346 protected boolean isEmpty() {
347 return fHashBuckets.size() == 0;
348 }
349
350 /**
351 * Returns the used phonetic distance algorithm.
352 *
353 * @return The phonetic distance algorithm
354 */
355 protected final IPhoneticDistanceAlgorithm getDistanceAlgorithm() {
356 return fDistanceAlgorithm;
357 }
358
359 /**
360 * Returns the used phonetic hash provider.
361 *
362 * @return The phonetic hash provider
363 */
364 protected final IPhoneticHashProvider getHashProvider() {
365 return fHashProvider;
366 }
367
368 /*
369 * @see org.eclipse.jdt.internal.ui.text.spelling.engine.ISpellDictionary#getProposals(java.lang.String,boolean)
370 */
371 public Set<RankedWordProposal> getProposals(final String word, final boolean sentence) {
372
373 try {
374
375 if (!fLoaded) {
376 synchronized (this) {
377 fLoaded= load(getURL());
378 if (fLoaded)
379 compact();
380 }
381 }
382
383 } catch (MalformedURLException exception) {
384 // Do nothing
385 }
386
387 final String hash= fHashProvider.getHash(word);
388 final char[] mutators= fHashProvider.getMutators();
389
390 final ArrayList<String> neighborhood= new ArrayList<String>((word.length() + 1) * (mutators.length + 2));
391 neighborhood.add(hash);
392
393 final Set<RankedWordProposal> candidates= getCandidates(word, sentence, neighborhood);
394 neighborhood.clear();
395
396 char previous= 0;
397 char next= 0;
398
399 char[] characters= word.toCharArray();
400 for (int index= 0; index < word.length() - 1; index++) {
401
402 next= characters[index];
403 previous= characters[index + 1];
404
405 characters[index]= previous;
406 characters[index + 1]= next;
407
408 neighborhood.add(fHashProvider.getHash(new String(characters)));
409
410 characters[index]= next;
411 characters[index + 1]= previous;
412 }
413
414 final String sentinel= word + " "; //$NON-NLS-1$
415
416 characters= sentinel.toCharArray();
417 int offset= characters.length - 1;
418
419 while (true) {
420
421 for (int index= 0; index < mutators.length; index++) {
422
423 characters[offset]= mutators[index];
424 neighborhood.add(fHashProvider.getHash(new String(characters)));
425 }
426
427 if (offset == 0)
428 break;
429
430 characters[offset]= characters[offset - 1];
431 --offset;
432 }
433
434 char mutated= 0;
435 characters= word.toCharArray();
436
437 for (int index= 0; index < word.length(); index++) {
438
439 mutated= characters[index];
440 for (int mutator= 0; mutator < mutators.length; mutator++) {
441
442 characters[index]= mutators[mutator];
443 neighborhood.add(fHashProvider.getHash(new String(characters)));
444 }
445 characters[index]= mutated;
446 }
447
448 characters= word.toCharArray();
449 final char[] deleted= new char[characters.length - 1];
450
451 for (int index= 0; index < deleted.length; index++)
452 deleted[index]= characters[index];
453
454 next= characters[characters.length - 1];
455 offset= deleted.length;
456
457 while (true) {
458
459 neighborhood.add(fHashProvider.getHash(new String(characters)));
460 if (offset == 0)
461 break;
462
463 previous= next;
464 next= deleted[offset - 1];
465
466 deleted[offset - 1]= previous;
467 --offset;
468 }
469
470 neighborhood.remove(hash);
471 final Set<RankedWordProposal> matches= getCandidates(word, sentence, neighborhood);
472
473 if (matches.size() == 0 && candidates.size() == 0)
474 getCandidates(word, sentence, candidates);
475
476 candidates.addAll(matches);
477
478 return candidates;
479 }
480
481 /**
482 * Returns the URL of the dictionary word list.
483 *
484 * @throws MalformedURLException
485 * if the URL could not be retrieved
486 * @return The URL of the dictionary word list
487 */
488 protected abstract URL getURL() throws MalformedURLException;
489
490 /**
491 * Hashes the word into the dictionary.
492 *
493 * @param word
494 * The word to hash in the dictionary
495 */
496 protected final void hashWord(final String word) {
497
498 final String hash= fHashProvider.getHash(word);
499 ByteArrayWrapper hashBytes;
500 byte[] wordBytes;
501 try {
502 hashBytes= new ByteArrayWrapper(hash.getBytes(UTF_8));
503 wordBytes= word.getBytes(UTF_8);
504 } catch (UnsupportedEncodingException e) {
505 JavaPlugin.log(e);
506 return;
507 }
508
509 hashBytes.generated_1990004924458704530(this, wordBytes);
510 }
511
512 /*
513 * @see org.eclipse.jdt.internal.ui.text.spelling.engine.ISpellDictionary#isCorrect(java.lang.String)
514 */
515 public boolean isCorrect(String word) {
516 word= stripNonLetters(word);
517 try {
518
519 if (!fLoaded) {
520 synchronized (this) {
521 fLoaded= load(getURL());
522 if (fLoaded)
523 compact();
524 }
525 }
526
527 } catch (MalformedURLException exception) {
528 // Do nothing
529 }
530
531 final Object candidates= getCandidates(fHashProvider.getHash(word));
532 if (candidates == null)
533 return false;
534 else if (candidates instanceof byte[]) {
535 String candidate;
536 try {
537 candidate= new String((byte[])candidates, UTF_8);
538 } catch (UnsupportedEncodingException e) {
539 JavaPlugin.log(e);
540 return false;
541 }
542 if (candidate.equals(word) || candidate.equals(word.toLowerCase()))
543 return true;
544 return false;
545 }
546 @SuppressWarnings("unchecked")
547 final ArrayList<byte[]> candidateList= (ArrayList<byte[]>)candidates;
548 byte[] wordBytes;
549 byte[] lowercaseWordBytes;
550 try {
551 wordBytes= word.getBytes(UTF_8);
552 lowercaseWordBytes= word.toLowerCase().getBytes(UTF_8);
553 } catch (UnsupportedEncodingException e) {
554 JavaPlugin.log(e);
555 return false;
556 }
557 for (int index= 0; index < candidateList.size(); index++) {
558 byte[] candidate= candidateList.get(index);
559 if (Arrays.equals(candidate, wordBytes) || Arrays.equals(candidate, lowercaseWordBytes)) {
560 return true;
561 }
562 }
563 return false;
564 }
565
566 /*
567 * @see org.eclipse.jdt.internal.ui.text.spelling.engine.ISpellDictionary#setStripNonLetters(boolean)
568 * @since 3.3
569 */
570 public void setStripNonLetters(boolean state) {
571 fIsStrippingNonLetters= state;
572 }
573
574 /**
575 * Strips non-letter characters from the given word.
576 * <p>
577 * This will only happen if the corresponding preference is enabled.
578 * </p>
579 *
580 * @param word the word to strip
581 * @return the stripped word
582 * @since 3.3
583 */
584 protected String stripNonLetters(String word) {
585 if (!fIsStrippingNonLetters)
586 return word;
587
588 int i= 0;
589 int j= word.length() - 1;
590 while (i <= j && !Character.isLetter(word.charAt(i)))
591 i++;
592 if (i > j)
593 return ""; //$NON-NLS-1$
594
595 while (j > i && !Character.isLetter(word.charAt(j)))
596 j--;
597
598 return word.substring(i, j+1);
599 }
600
601 /*
602 * @see org.eclipse.jdt.ui.text.spelling.engine.ISpellDictionary#isLoaded()
603 */
604 public synchronized final boolean isLoaded() {
605 return fLoaded || fHashBuckets.size() > 0;
606 }
607
608 /**
609 * Loads a dictionary word list from disk.
610 *
611 * @param url
612 * The URL of the word list to load
613 * @return <code>true</code> iff the word list could be loaded, <code>false</code>
614 * otherwise
615 */
616 protected synchronized boolean load(final URL url) {
617 if (!fMustLoad)
618 return fLoaded;
619
620 if (url != null) {
621 InputStream stream= null;
622 int line= 0;
623 try {
624 stream= url.openStream();
625 if (stream != null) {
626 String word= null;
627
628 // Setup a reader with a decoder in order to read over malformed input if needed.
629 CharsetDecoder decoder= Charset.forName(getEncoding()).newDecoder();
630 decoder.onMalformedInput(CodingErrorAction.REPORT);
631 decoder.onUnmappableCharacter(CodingErrorAction.REPORT);
632 final BufferedReader reader= new BufferedReader(new InputStreamReader(stream, decoder));
633
634 boolean doRead= true;
635 while (doRead) {
636 try {
637 word= reader.readLine();
638 } catch (MalformedInputException ex) {
639 // Tell the decoder to replace malformed input in order to read the line.
640 decoder.onMalformedInput(CodingErrorAction.REPLACE);
641 decoder.reset();
642 word= reader.readLine();
643 decoder.onMalformedInput(CodingErrorAction.REPORT);
644
645 String message= Messages.format(JavaUIMessages.AbstractSpellingDictionary_encodingError, new String[] { word, decoder.replacement(), BasicElementLabels.getURLPart(url.toString()) });
646 IStatus status= new Status(IStatus.ERROR, JavaUI.ID_PLUGIN, IStatus.OK, message, ex);
647 JavaPlugin.log(status);
648
649 doRead= word != null;
650 continue;
651 }
652 doRead= word != null;
653 if (doRead)
654 hashWord(word);
655 }
656 return true;
657 }
658 } catch (FileNotFoundException ex) {
659 String urlString= url.toString();
660 String lowercaseUrlString= urlString.toLowerCase();
661 if (urlString.equals(lowercaseUrlString))
662 JavaPlugin.log(ex);
663 else
664 try {
665 return load(new URL(lowercaseUrlString));
666 } catch (MalformedURLException e) {
667 JavaPlugin.log(e);
668 }
669 } catch (IOException exception) {
670 if (line > 0) {
671 String message= Messages.format(JavaUIMessages.AbstractSpellingDictionary_encodingError, new Object[] { new Integer(line), BasicElementLabels.getURLPart(url.toString()) });
672 IStatus status= new Status(IStatus.ERROR, JavaUI.ID_PLUGIN, IStatus.OK, message, exception);
673 JavaPlugin.log(status);
674 } else
675 JavaPlugin.log(exception);
676 } finally {
677 fMustLoad= false;
678 try {
679 if (stream != null)
680 stream.close();
681 } catch (IOException x) {
682 }
683 }
684 }
685 return false;
686 }
687
688 /**
689 * Compacts the dictionary.
690 *
691 * @since 3.3.
692 */
693 private void compact() {
694 Iterator<Object> iter= fHashBuckets.values().iterator();
695 while (iter.hasNext()) {
696 Object element= iter.next();
697 if (element instanceof ArrayList)
698 ((ArrayList<?>)element).trimToSize();
699 }
700 }
701
702 /**
703 * Sets the phonetic distance algorithm to use.
704 *
705 * @param algorithm
706 * The phonetic distance algorithm
707 */
708 protected final void setDistanceAlgorithm(final IPhoneticDistanceAlgorithm algorithm) {
709 fDistanceAlgorithm= algorithm;
710 }
711
712 /**
713 * Sets the phonetic hash provider to use.
714 *
715 * @param provider
716 * The phonetic hash provider
717 */
718 protected final void setHashProvider(final IPhoneticHashProvider provider) {
719 fHashProvider= provider;
720 }
721
722 /*
723 * @see org.eclipse.jdt.ui.text.spelling.engine.ISpellDictionary#unload()
724 */
725 public synchronized void unload() {
726 fLoaded= false;
727 fMustLoad= true;
728 fHashBuckets.clear();
729 }
730
731 /*
732 * @see org.eclipse.jdt.ui.text.spelling.engine.ISpellDictionary#acceptsWords()
733 */
734 public boolean acceptsWords() {
735 return false;
736 }
737
738 /*
739 * @see org.eclipse.jdt.internal.ui.text.spelling.engine.ISpellDictionary#addWord(java.lang.String)
740 */
741 public void addWord(final String word) {
742 // Do nothing
743 }
744
745 /**
746 * Returns the encoding of this dictionary.
747 *
748 * @return the encoding of this dictionary
749 * @since 3.3
750 */
751 protected String getEncoding() {
752 String encoding= JavaPlugin.getDefault().getPreferenceStore().getString(PreferenceConstants.SPELLING_USER_DICTIONARY_ENCODING);
753 if (encoding == null || encoding.length() == 0)
754 encoding= ResourcesPlugin.getEncoding();
755 return encoding;
756 }
757
758}