]> git.uio.no Git - ifi-stolz-refaktor.git/blame - case-study/jdt-before/ui/org/eclipse/jdt/internal/ui/text/spelling/engine/AbstractSpellDictionary.java
Case Study: adding data and statistics
[ifi-stolz-refaktor.git] / case-study / jdt-before / ui / org / eclipse / jdt / internal / ui / text / spelling / engine / AbstractSpellDictionary.java
CommitLineData
1b2798f6
EK
1/*******************************************************************************
2 * Copyright (c) 2000, 2011 IBM Corporation and others.
3 * All rights reserved. This program and the accompanying materials
4 * are made available under the terms of the Eclipse Public License v1.0
5 * which accompanies this distribution, and is available at
6 * http://www.eclipse.org/legal/epl-v10.html
7 *
8 * Contributors:
9 * IBM Corporation - initial API and implementation
10 *******************************************************************************/
11package org.eclipse.jdt.internal.ui.text.spelling.engine;
12
13import java.io.BufferedReader;
14import java.io.FileNotFoundException;
15import java.io.IOException;
16import java.io.InputStream;
17import java.io.InputStreamReader;
18import java.io.UnsupportedEncodingException;
19import java.net.MalformedURLException;
20import java.net.URL;
21import java.nio.charset.Charset;
22import java.nio.charset.CharsetDecoder;
23import java.nio.charset.CodingErrorAction;
24import java.nio.charset.MalformedInputException;
25import java.util.ArrayList;
26import java.util.Arrays;
27import java.util.HashMap;
28import java.util.HashSet;
29import java.util.Iterator;
30import java.util.Map;
31import java.util.Set;
32
33import org.eclipse.core.runtime.IStatus;
34import org.eclipse.core.runtime.Status;
35
36import org.eclipse.core.resources.ResourcesPlugin;
37
38import org.eclipse.jdt.internal.corext.util.Messages;
39
40import org.eclipse.jdt.ui.JavaUI;
41import org.eclipse.jdt.ui.PreferenceConstants;
42
43import org.eclipse.jdt.internal.ui.JavaPlugin;
44import org.eclipse.jdt.internal.ui.JavaUIMessages;
45import org.eclipse.jdt.internal.ui.viewsupport.BasicElementLabels;
46
47
48/**
49 * Partial implementation of a spell dictionary.
50 *
51 * @since 3.0
52 */
53public abstract class AbstractSpellDictionary implements ISpellDictionary {
54
55 /**
56 * Byte array wrapper
57 * @since 3.6
58 */
59 private static class ByteArrayWrapper {
60
61 private static int hashCode(byte[] array) {
62 int prime= 31;
63 if (array == null)
64 return 0;
65 int result= 1;
66 for (int index= 0; index < array.length; index++) {
67 result= prime * result + array[index];
68 }
69 return result;
70 }
71
72 private byte[] byteArray;
73
74 public ByteArrayWrapper(byte[] byteArray) {
75 this.byteArray= byteArray;
76 }
77 @Override
78 public int hashCode() {
79 final int prime= 31;
80 int result= 1;
81 result= prime * result + ByteArrayWrapper.hashCode(byteArray);
82 return result;
83 }
84
85 @Override
86 public boolean equals(Object obj) {
87 if (this == obj)
88 return true;
89 if (obj == null)
90 return false;
91 if (!(obj instanceof ByteArrayWrapper))
92 return false;
93 ByteArrayWrapper other= (ByteArrayWrapper)obj;
94 if (!Arrays.equals(byteArray, other.byteArray))
95 return false;
96 return true;
97 }
98 }
99
100
101 /**
102 * Canonical name for UTF-8 encoding
103 * @since 3.6
104 */
105 private static final String UTF_8= "UTF-8"; //$NON-NLS-1$
106
107 /** The bucket capacity */
108 protected static final int BUCKET_CAPACITY= 4;
109
110 /** The word buffer capacity */
111 protected static final int BUFFER_CAPACITY= 32;
112
113 /** The distance threshold */
114 protected static final int DISTANCE_THRESHOLD= 160;
115
116 /**
117 * The hash load factor
118 * @since 3.6
119 */
120 protected static final float LOAD_FACTOR= 0.85f;
121
122 /** The phonetic distance algorithm */
123 private IPhoneticDistanceAlgorithm fDistanceAlgorithm= new DefaultPhoneticDistanceAlgorithm();
124
125 /** The mapping from phonetic hashes to word lists */
126 private final Map<ByteArrayWrapper, Object> fHashBuckets= new HashMap<ByteArrayWrapper, Object>(getInitialSize(), LOAD_FACTOR);
127
128 /** The phonetic hash provider */
129 private IPhoneticHashProvider fHashProvider= new DefaultPhoneticHashProvider();
130
131 /** Is the dictionary already loaded? */
132 private boolean fLoaded= false;
133 /**
134 * Must the dictionary be loaded?
135 * @since 3.2
136 */
137 private boolean fMustLoad= true;
138
139 /**
140 * Tells whether to strip non-letters at word boundaries.
141 * @since 3.3
142 */
143 boolean fIsStrippingNonLetters= true;
144
145 /**
146 * Returns the initial size of dictionary.
147 *
148 * @return The initial size of dictionary.
149 * @since 3.6
150 */
151 protected int getInitialSize() {
152 return 32;
153 }
154
155 /**
156 * Returns all candidates with the same phonetic hash.
157 *
158 * @param hash
159 * The hash to retrieve the candidates of
160 * @return Array of candidates for the phonetic hash
161 */
162 protected final Object getCandidates(final String hash) {
163 ByteArrayWrapper hashBytes;
164 try {
165 hashBytes= new ByteArrayWrapper(hash.getBytes(UTF_8));
166 } catch (UnsupportedEncodingException e) {
167 JavaPlugin.log(e);
168 return null;
169 }
170 return fHashBuckets.get(hashBytes);
171 }
172
173 /**
174 * Returns all candidates that have a phonetic hash within a bounded
175 * distance to the specified word.
176 *
177 * @param word
178 * The word to find the nearest matches for
179 * @param sentence
180 * <code>true</code> iff the proposals start a new sentence,
181 * <code>false</code> otherwise
182 * @param hashs
183 * Array of close hashes to find the matches
184 * @return Set of ranked words with bounded distance to the specified word
185 */
186 protected final Set<RankedWordProposal> getCandidates(final String word, final boolean sentence, final ArrayList<String> hashs) {
187
188 int distance= 0;
189 String hash= null;
190
191 final StringBuffer buffer= new StringBuffer(BUFFER_CAPACITY);
192 final HashSet<RankedWordProposal> result= new HashSet<RankedWordProposal>(BUCKET_CAPACITY * hashs.size());
193
194 for (int index= 0; index < hashs.size(); index++) {
195
196 hash= hashs.get(index);
197
198 final Object candidates= getCandidates(hash);
199 if (candidates == null)
200 continue;
201 else if (candidates instanceof byte[]) {
202 String candidate;
203 try {
204 candidate= new String((byte[])candidates, UTF_8);
205 } catch (UnsupportedEncodingException e) {
206 JavaPlugin.log(e);
207 return result;
208 }
209 distance= fDistanceAlgorithm.getDistance(word, candidate);
210 if (distance < DISTANCE_THRESHOLD) {
211 buffer.setLength(0);
212 buffer.append(candidate);
213 if (sentence)
214 buffer.setCharAt(0, Character.toUpperCase(buffer.charAt(0)));
215 result.add(new RankedWordProposal(buffer.toString(), -distance));
216 }
217 continue;
218 }
219
220 @SuppressWarnings("unchecked")
221 final ArrayList<byte[]> candidateList= (ArrayList<byte[]>)candidates;
222 int candidateSize= Math.min(500, candidateList.size()); // see https://bugs.eclipse.org/bugs/show_bug.cgi?id=195357
223 for (int offset= 0; offset < candidateSize; offset++) {
224
225 String candidate;
226 try {
227 candidate= new String(candidateList.get(offset), UTF_8);
228 } catch (UnsupportedEncodingException e) {
229 JavaPlugin.log(e);
230 return result;
231 }
232 distance= fDistanceAlgorithm.getDistance(word, candidate);
233
234 if (distance < DISTANCE_THRESHOLD) {
235
236 buffer.setLength(0);
237 buffer.append(candidate);
238
239 if (sentence)
240 buffer.setCharAt(0, Character.toUpperCase(buffer.charAt(0)));
241
242 result.add(new RankedWordProposal(buffer.toString(), -distance));
243 }
244 }
245 }
246 return result;
247 }
248
249 /**
250 * Returns all approximations that have a phonetic hash with smallest
251 * possible distance to the specified word.
252 *
253 * @param word
254 * The word to find the nearest matches for
255 * @param sentence
256 * <code>true</code> iff the proposals start a new sentence,
257 * <code>false</code> otherwise
258 * @param result
259 * Set of ranked words with smallest possible distance to the
260 * specified word
261 */
262 protected final void getCandidates(final String word, final boolean sentence, final Set<RankedWordProposal> result) {
263
264 int distance= 0;
265 int minimum= Integer.MAX_VALUE;
266
267 StringBuffer buffer= new StringBuffer(BUFFER_CAPACITY);
268
269 final Object candidates= getCandidates(fHashProvider.getHash(word));
270 if (candidates == null)
271 return;
272 else if (candidates instanceof byte[]) {
273 String candidate;
274 try {
275 candidate= new String((byte[])candidates, UTF_8);
276 } catch (UnsupportedEncodingException e) {
277 JavaPlugin.log(e);
278 return;
279 }
280 distance= fDistanceAlgorithm.getDistance(word, candidate);
281 buffer.append(candidate);
282 if (sentence)
283 buffer.setCharAt(0, Character.toUpperCase(buffer.charAt(0)));
284 result.add(new RankedWordProposal(buffer.toString(), -distance));
285 return;
286 }
287
288 @SuppressWarnings("unchecked")
289 final ArrayList<byte[]> candidateList= (ArrayList<byte[]>)candidates;
290 final ArrayList<RankedWordProposal> matches= new ArrayList<RankedWordProposal>(candidateList.size());
291
292 for (int index= 0; index < candidateList.size(); index++) {
293 String candidate;
294 try {
295 candidate= new String(candidateList.get(index), UTF_8);
296 } catch (UnsupportedEncodingException e) {
297 JavaPlugin.log(e);
298 return;
299 }
300 distance= fDistanceAlgorithm.getDistance(word, candidate);
301
302 if (distance <= minimum) {
303
304 if (distance < minimum)
305 matches.clear();
306
307 buffer.setLength(0);
308 buffer.append(candidate);
309
310 if (sentence)
311 buffer.setCharAt(0, Character.toUpperCase(buffer.charAt(0)));
312
313 matches.add(new RankedWordProposal(buffer.toString(), -distance));
314 minimum= distance;
315 }
316 }
317
318 result.addAll(matches);
319 }
320
321 /**
322 * Tells whether this dictionary is empty.
323 *
324 * @return <code>true</code> if this dictionary is empty
325 * @since 3.3
326 */
327 protected boolean isEmpty() {
328 return fHashBuckets.size() == 0;
329 }
330
331 /**
332 * Returns the used phonetic distance algorithm.
333 *
334 * @return The phonetic distance algorithm
335 */
336 protected final IPhoneticDistanceAlgorithm getDistanceAlgorithm() {
337 return fDistanceAlgorithm;
338 }
339
340 /**
341 * Returns the used phonetic hash provider.
342 *
343 * @return The phonetic hash provider
344 */
345 protected final IPhoneticHashProvider getHashProvider() {
346 return fHashProvider;
347 }
348
349 /*
350 * @see org.eclipse.jdt.internal.ui.text.spelling.engine.ISpellDictionary#getProposals(java.lang.String,boolean)
351 */
352 public Set<RankedWordProposal> getProposals(final String word, final boolean sentence) {
353
354 try {
355
356 if (!fLoaded) {
357 synchronized (this) {
358 fLoaded= load(getURL());
359 if (fLoaded)
360 compact();
361 }
362 }
363
364 } catch (MalformedURLException exception) {
365 // Do nothing
366 }
367
368 final String hash= fHashProvider.getHash(word);
369 final char[] mutators= fHashProvider.getMutators();
370
371 final ArrayList<String> neighborhood= new ArrayList<String>((word.length() + 1) * (mutators.length + 2));
372 neighborhood.add(hash);
373
374 final Set<RankedWordProposal> candidates= getCandidates(word, sentence, neighborhood);
375 neighborhood.clear();
376
377 char previous= 0;
378 char next= 0;
379
380 char[] characters= word.toCharArray();
381 for (int index= 0; index < word.length() - 1; index++) {
382
383 next= characters[index];
384 previous= characters[index + 1];
385
386 characters[index]= previous;
387 characters[index + 1]= next;
388
389 neighborhood.add(fHashProvider.getHash(new String(characters)));
390
391 characters[index]= next;
392 characters[index + 1]= previous;
393 }
394
395 final String sentinel= word + " "; //$NON-NLS-1$
396
397 characters= sentinel.toCharArray();
398 int offset= characters.length - 1;
399
400 while (true) {
401
402 for (int index= 0; index < mutators.length; index++) {
403
404 characters[offset]= mutators[index];
405 neighborhood.add(fHashProvider.getHash(new String(characters)));
406 }
407
408 if (offset == 0)
409 break;
410
411 characters[offset]= characters[offset - 1];
412 --offset;
413 }
414
415 char mutated= 0;
416 characters= word.toCharArray();
417
418 for (int index= 0; index < word.length(); index++) {
419
420 mutated= characters[index];
421 for (int mutator= 0; mutator < mutators.length; mutator++) {
422
423 characters[index]= mutators[mutator];
424 neighborhood.add(fHashProvider.getHash(new String(characters)));
425 }
426 characters[index]= mutated;
427 }
428
429 characters= word.toCharArray();
430 final char[] deleted= new char[characters.length - 1];
431
432 for (int index= 0; index < deleted.length; index++)
433 deleted[index]= characters[index];
434
435 next= characters[characters.length - 1];
436 offset= deleted.length;
437
438 while (true) {
439
440 neighborhood.add(fHashProvider.getHash(new String(characters)));
441 if (offset == 0)
442 break;
443
444 previous= next;
445 next= deleted[offset - 1];
446
447 deleted[offset - 1]= previous;
448 --offset;
449 }
450
451 neighborhood.remove(hash);
452 final Set<RankedWordProposal> matches= getCandidates(word, sentence, neighborhood);
453
454 if (matches.size() == 0 && candidates.size() == 0)
455 getCandidates(word, sentence, candidates);
456
457 candidates.addAll(matches);
458
459 return candidates;
460 }
461
462 /**
463 * Returns the URL of the dictionary word list.
464 *
465 * @throws MalformedURLException
466 * if the URL could not be retrieved
467 * @return The URL of the dictionary word list
468 */
469 protected abstract URL getURL() throws MalformedURLException;
470
471 /**
472 * Hashes the word into the dictionary.
473 *
474 * @param word
475 * The word to hash in the dictionary
476 */
477 protected final void hashWord(final String word) {
478
479 final String hash= fHashProvider.getHash(word);
480 ByteArrayWrapper hashBytes;
481 byte[] wordBytes;
482 try {
483 hashBytes= new ByteArrayWrapper(hash.getBytes(UTF_8));
484 wordBytes= word.getBytes(UTF_8);
485 } catch (UnsupportedEncodingException e) {
486 JavaPlugin.log(e);
487 return;
488 }
489
490 Object bucket= fHashBuckets.get(hashBytes);
491
492 if (bucket == null) {
493 fHashBuckets.put(hashBytes, wordBytes);
494 } else if (bucket instanceof ArrayList) {
495 @SuppressWarnings("unchecked")
496 ArrayList<byte[]> bucketList= (ArrayList<byte[]>)bucket;
497 bucketList.add(wordBytes);
498 } else {
499 ArrayList<Object> list= new ArrayList<Object>(BUCKET_CAPACITY);
500 list.add(bucket);
501 list.add(wordBytes);
502 fHashBuckets.put(hashBytes, list);
503 }
504 }
505
506 /*
507 * @see org.eclipse.jdt.internal.ui.text.spelling.engine.ISpellDictionary#isCorrect(java.lang.String)
508 */
509 public boolean isCorrect(String word) {
510 word= stripNonLetters(word);
511 try {
512
513 if (!fLoaded) {
514 synchronized (this) {
515 fLoaded= load(getURL());
516 if (fLoaded)
517 compact();
518 }
519 }
520
521 } catch (MalformedURLException exception) {
522 // Do nothing
523 }
524
525 final Object candidates= getCandidates(fHashProvider.getHash(word));
526 if (candidates == null)
527 return false;
528 else if (candidates instanceof byte[]) {
529 String candidate;
530 try {
531 candidate= new String((byte[])candidates, UTF_8);
532 } catch (UnsupportedEncodingException e) {
533 JavaPlugin.log(e);
534 return false;
535 }
536 if (candidate.equals(word) || candidate.equals(word.toLowerCase()))
537 return true;
538 return false;
539 }
540 @SuppressWarnings("unchecked")
541 final ArrayList<byte[]> candidateList= (ArrayList<byte[]>)candidates;
542 byte[] wordBytes;
543 byte[] lowercaseWordBytes;
544 try {
545 wordBytes= word.getBytes(UTF_8);
546 lowercaseWordBytes= word.toLowerCase().getBytes(UTF_8);
547 } catch (UnsupportedEncodingException e) {
548 JavaPlugin.log(e);
549 return false;
550 }
551 for (int index= 0; index < candidateList.size(); index++) {
552 byte[] candidate= candidateList.get(index);
553 if (Arrays.equals(candidate, wordBytes) || Arrays.equals(candidate, lowercaseWordBytes)) {
554 return true;
555 }
556 }
557 return false;
558 }
559
560 /*
561 * @see org.eclipse.jdt.internal.ui.text.spelling.engine.ISpellDictionary#setStripNonLetters(boolean)
562 * @since 3.3
563 */
564 public void setStripNonLetters(boolean state) {
565 fIsStrippingNonLetters= state;
566 }
567
568 /**
569 * Strips non-letter characters from the given word.
570 * <p>
571 * This will only happen if the corresponding preference is enabled.
572 * </p>
573 *
574 * @param word the word to strip
575 * @return the stripped word
576 * @since 3.3
577 */
578 protected String stripNonLetters(String word) {
579 if (!fIsStrippingNonLetters)
580 return word;
581
582 int i= 0;
583 int j= word.length() - 1;
584 while (i <= j && !Character.isLetter(word.charAt(i)))
585 i++;
586 if (i > j)
587 return ""; //$NON-NLS-1$
588
589 while (j > i && !Character.isLetter(word.charAt(j)))
590 j--;
591
592 return word.substring(i, j+1);
593 }
594
595 /*
596 * @see org.eclipse.jdt.ui.text.spelling.engine.ISpellDictionary#isLoaded()
597 */
598 public synchronized final boolean isLoaded() {
599 return fLoaded || fHashBuckets.size() > 0;
600 }
601
602 /**
603 * Loads a dictionary word list from disk.
604 *
605 * @param url
606 * The URL of the word list to load
607 * @return <code>true</code> iff the word list could be loaded, <code>false</code>
608 * otherwise
609 */
610 protected synchronized boolean load(final URL url) {
611 if (!fMustLoad)
612 return fLoaded;
613
614 if (url != null) {
615 InputStream stream= null;
616 int line= 0;
617 try {
618 stream= url.openStream();
619 if (stream != null) {
620 String word= null;
621
622 // Setup a reader with a decoder in order to read over malformed input if needed.
623 CharsetDecoder decoder= Charset.forName(getEncoding()).newDecoder();
624 decoder.onMalformedInput(CodingErrorAction.REPORT);
625 decoder.onUnmappableCharacter(CodingErrorAction.REPORT);
626 final BufferedReader reader= new BufferedReader(new InputStreamReader(stream, decoder));
627
628 boolean doRead= true;
629 while (doRead) {
630 try {
631 word= reader.readLine();
632 } catch (MalformedInputException ex) {
633 // Tell the decoder to replace malformed input in order to read the line.
634 decoder.onMalformedInput(CodingErrorAction.REPLACE);
635 decoder.reset();
636 word= reader.readLine();
637 decoder.onMalformedInput(CodingErrorAction.REPORT);
638
639 String message= Messages.format(JavaUIMessages.AbstractSpellingDictionary_encodingError, new String[] { word, decoder.replacement(), BasicElementLabels.getURLPart(url.toString()) });
640 IStatus status= new Status(IStatus.ERROR, JavaUI.ID_PLUGIN, IStatus.OK, message, ex);
641 JavaPlugin.log(status);
642
643 doRead= word != null;
644 continue;
645 }
646 doRead= word != null;
647 if (doRead)
648 hashWord(word);
649 }
650 return true;
651 }
652 } catch (FileNotFoundException ex) {
653 String urlString= url.toString();
654 String lowercaseUrlString= urlString.toLowerCase();
655 if (urlString.equals(lowercaseUrlString))
656 JavaPlugin.log(ex);
657 else
658 try {
659 return load(new URL(lowercaseUrlString));
660 } catch (MalformedURLException e) {
661 JavaPlugin.log(e);
662 }
663 } catch (IOException exception) {
664 if (line > 0) {
665 String message= Messages.format(JavaUIMessages.AbstractSpellingDictionary_encodingError, new Object[] { new Integer(line), BasicElementLabels.getURLPart(url.toString()) });
666 IStatus status= new Status(IStatus.ERROR, JavaUI.ID_PLUGIN, IStatus.OK, message, exception);
667 JavaPlugin.log(status);
668 } else
669 JavaPlugin.log(exception);
670 } finally {
671 fMustLoad= false;
672 try {
673 if (stream != null)
674 stream.close();
675 } catch (IOException x) {
676 }
677 }
678 }
679 return false;
680 }
681
682 /**
683 * Compacts the dictionary.
684 *
685 * @since 3.3.
686 */
687 private void compact() {
688 Iterator<Object> iter= fHashBuckets.values().iterator();
689 while (iter.hasNext()) {
690 Object element= iter.next();
691 if (element instanceof ArrayList)
692 ((ArrayList<?>)element).trimToSize();
693 }
694 }
695
696 /**
697 * Sets the phonetic distance algorithm to use.
698 *
699 * @param algorithm
700 * The phonetic distance algorithm
701 */
702 protected final void setDistanceAlgorithm(final IPhoneticDistanceAlgorithm algorithm) {
703 fDistanceAlgorithm= algorithm;
704 }
705
706 /**
707 * Sets the phonetic hash provider to use.
708 *
709 * @param provider
710 * The phonetic hash provider
711 */
712 protected final void setHashProvider(final IPhoneticHashProvider provider) {
713 fHashProvider= provider;
714 }
715
716 /*
717 * @see org.eclipse.jdt.ui.text.spelling.engine.ISpellDictionary#unload()
718 */
719 public synchronized void unload() {
720 fLoaded= false;
721 fMustLoad= true;
722 fHashBuckets.clear();
723 }
724
725 /*
726 * @see org.eclipse.jdt.ui.text.spelling.engine.ISpellDictionary#acceptsWords()
727 */
728 public boolean acceptsWords() {
729 return false;
730 }
731
732 /*
733 * @see org.eclipse.jdt.internal.ui.text.spelling.engine.ISpellDictionary#addWord(java.lang.String)
734 */
735 public void addWord(final String word) {
736 // Do nothing
737 }
738
739 /**
740 * Returns the encoding of this dictionary.
741 *
742 * @return the encoding of this dictionary
743 * @since 3.3
744 */
745 protected String getEncoding() {
746 String encoding= JavaPlugin.getDefault().getPreferenceStore().getString(PreferenceConstants.SPELLING_USER_DICTIONARY_ENCODING);
747 if (encoding == null || encoding.length() == 0)
748 encoding= ResourcesPlugin.getEncoding();
749 return encoding;
750 }
751
752}