AbstractSpellDictionary.java example

Explorer
tern.java-master
/*******************************************************************************
 * Copyright (c) 2000, 2015 IBM Corporation and others.
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the Eclipse Public License v1.0
 * which accompanies this distribution, and is available at
 * http://www.eclipse.org/legal/epl-v10.html
 *
 * Contributors:
 *     IBM Corporation - initial API and implementation
 *******************************************************************************/

package org.eclipse.wst.jsdt.internal.ui.text.spelling.engine;

import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CodingErrorAction;
import java.nio.charset.MalformedInputException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;

import org.eclipse.core.resources.ResourcesPlugin;
import org.eclipse.core.runtime.IStatus;
import org.eclipse.core.runtime.Status;
import org.eclipse.wst.jsdt.internal.corext.util.Messages;
import org.eclipse.wst.jsdt.internal.ui.JavaScriptPlugin;
import org.eclipse.wst.jsdt.internal.ui.JavaUIMessages;
import org.eclipse.wst.jsdt.ui.JavaScriptUI;
import org.eclipse.wst.jsdt.ui.PreferenceConstants;

/**
 * Partial implementation of a spell dictionary.
 *
 * 
 */
public abstract class AbstractSpellDictionary implements ISpellDictionary {

	/** The bucket capacity */
	protected static final int BUCKET_CAPACITY= 4;

	/** The word buffer capacity */
	protected static final int BUFFER_CAPACITY= 32;

	/** The distance threshold */
	protected static final int DISTANCE_THRESHOLD= 160;

	/** The hash capacity */
	protected static final int HASH_CAPACITY= 22 * 1024;

	/** The phonetic distance algorithm */
	private IPhoneticDistanceAlgorithm fDistanceAlgorithm= new DefaultPhoneticDistanceAlgorithm();

	/** The mapping from phonetic hashes to word lists */
	private final Map fHashBuckets= new HashMap(HASH_CAPACITY);

	/** The phonetic hash provider */
	private IPhoneticHashProvider fHashProvider= new DefaultPhoneticHashProvider();

	/** Is the dictionary already loaded? */
	private boolean fLoaded= false;
	/**
	 * Must the dictionary be loaded?
	 * 
	 */
	private boolean fMustLoad= true;

	/**
	 * Tells whether to strip non-letters at word boundaries.
	 * 
	 */
	boolean fIsStrippingNonLetters= true;

	/**
	 * Returns all candidates with the same phonetic hash.
	 *
	 * @param hash
	 *                   The hash to retrieve the candidates of
	 * @return Array of candidates for the phonetic hash
	 */
	protected final Object getCandidates(final String hash) {
		return fHashBuckets.get(hash);
	}

	/**
	 * Returns all candidates that have a phonetic hash within a bounded
	 * distance to the specified word.
	 *
	 * @param word
	 *                   The word to find the nearest matches for
	 * @param sentence
	 *                   <code>true</code> iff the proposals start a new sentence,
	 *                   <code>false</code> otherwise
	 * @param hashs
	 *                   Array of close hashes to find the matches
	 * @return Set of ranked words with bounded distance to the specified word
	 */
	protected final Set getCandidates(final String word, final boolean sentence, final ArrayList hashs) {

		int distance= 0;
		String hash= null;

		final StringBuffer buffer= new StringBuffer(BUFFER_CAPACITY);
		final HashSet result= new HashSet(BUCKET_CAPACITY * hashs.size());

		for (int index= 0; index < hashs.size(); index++) {

			hash= (String)hashs.get(index);

			final Object candidates= getCandidates(hash);
			if (candidates == null)
				continue;
			else if (candidates instanceof String) {
				String candidate= (String)candidates;
				distance= fDistanceAlgorithm.getDistance(word, candidate);
				if (distance < DISTANCE_THRESHOLD) {
					buffer.setLength(0);
					buffer.append(candidate);
					if (sentence)
						buffer.setCharAt(0, Character.toUpperCase(buffer.charAt(0)));
					result.add(new RankedWordProposal(buffer.toString(), -distance));
				}
				continue;
			}

			final ArrayList candidateList= (ArrayList)candidates;
			for (int offset= 0; offset < candidateList.size(); offset++) {

				String candidate= (String)candidateList.get(offset);
				distance= fDistanceAlgorithm.getDistance(word, candidate);

				if (distance < DISTANCE_THRESHOLD) {

					buffer.setLength(0);
					buffer.append(candidate);

					if (sentence)
						buffer.setCharAt(0, Character.toUpperCase(buffer.charAt(0)));

					result.add(new RankedWordProposal(buffer.toString(), -distance));
				}
			}
		}
		return result;
	}

	/**
	 * Returns all approximations that have a phonetic hash with smallest
	 * possible distance to the specified word.
	 *
	 * @param word
	 *                   The word to find the nearest matches for
	 * @param sentence
	 *                   <code>true</code> iff the proposals start a new sentence,
	 *                   <code>false</code> otherwise
	 * @param result
	 *                   Set of ranked words with smallest possible distance to the
	 *                   specified word
	 */
	protected final void getCandidates(final String word, final boolean sentence, final Set result) {

		int distance= 0;
		int minimum= Integer.MAX_VALUE;

		StringBuffer buffer= new StringBuffer(BUFFER_CAPACITY);

		final Object candidates= getCandidates(fHashProvider.getHash(word));
		if (candidates == null)
			return;
		else if (candidates instanceof String) {
			String candidate= (String)candidates;
			distance= fDistanceAlgorithm.getDistance(word, candidate);
			buffer.append(candidate);
			if (sentence)
				buffer.setCharAt(0, Character.toUpperCase(buffer.charAt(0)));
			result.add(new RankedWordProposal(buffer.toString(), -distance));
			return;
		}

		final ArrayList candidateList= (ArrayList)candidates;
		final ArrayList matches= new ArrayList(candidateList.size());

		for (int index= 0; index < candidateList.size(); index++) {
			String candidate= (String)candidateList.get(index);
			distance= fDistanceAlgorithm.getDistance(word, candidate);

			if (distance <= minimum) {
				
				if (distance < minimum)
					matches.clear();

				buffer.setLength(0);
				buffer.append(candidate);

				if (sentence)
					buffer.setCharAt(0, Character.toUpperCase(buffer.charAt(0)));

				matches.add(new RankedWordProposal(buffer.toString(), -distance));
				minimum= distance;
			}
		}

		result.addAll(matches);
	}
	
	/**
	 * Tells whether this dictionary is empty.
	 * 
	 * @return <code>true</code> if this dictionary is empty
	 * 
	 */
	protected boolean isEmpty() {
		return fHashBuckets.size() == 0;
	}

	/**
	 * Returns the used phonetic distance algorithm.
	 *
	 * @return The phonetic distance algorithm
	 */
	protected final IPhoneticDistanceAlgorithm getDistanceAlgorithm() {
		return fDistanceAlgorithm;
	}

	/**
	 * Returns the used phonetic hash provider.
	 *
	 * @return The phonetic hash provider
	 */
	protected final IPhoneticHashProvider getHashProvider() {
		return fHashProvider;
	}

	/*
	 * @see org.eclipse.wst.jsdt.internal.ui.text.spelling.engine.ISpellDictionary#getProposals(java.lang.String,boolean)
	 */
	public Set getProposals(final String word, final boolean sentence) {

		try {

			if (!fLoaded) {
				synchronized (this) {
					fLoaded= load(getURL());
					if (fLoaded)
						compact();
				}
			}

		} catch (MalformedURLException exception) {
			// Do nothing
		}

		final String hash= fHashProvider.getHash(word);
		final char[] mutators= fHashProvider.getMutators();

		final ArrayList neighborhood= new ArrayList((word.length() + 1) * (mutators.length + 2));
		neighborhood.add(hash);

		final Set candidates= getCandidates(word, sentence, neighborhood);
		neighborhood.clear();

		char previous= 0;
		char next= 0;

		char[] characters= word.toCharArray();
		for (int index= 0; index < word.length() - 1; index++) {

			next= characters[index];
			previous= characters[index + 1];

			characters[index]= previous;
			characters[index + 1]= next;

			neighborhood.add(fHashProvider.getHash(new String(characters)));

			characters[index]= next;
			characters[index + 1]= previous;
		}

		final String sentinel= word + " "; //$NON-NLS-1$

		characters= sentinel.toCharArray();
		int offset= characters.length - 1;

		while (true) {

			for (int index= 0; index < mutators.length; index++) {

				characters[offset]= mutators[index];
				neighborhood.add(fHashProvider.getHash(new String(characters)));
			}

			if (offset == 0)
				break;

			characters[offset]= characters[offset - 1];
			--offset;
		}

		char mutated= 0;
		characters= word.toCharArray();

		for (int index= 0; index < word.length(); index++) {

			mutated= characters[index];
			for (int mutator= 0; mutator < mutators.length; mutator++) {

				characters[index]= mutators[mutator];
				neighborhood.add(fHashProvider.getHash(new String(characters)));
			}
			characters[index]= mutated;
		}

		characters= word.toCharArray();
		final char[] deleted= new char[characters.length - 1];

		for (int index= 0; index < deleted.length; index++)
			deleted[index]= characters[index];

		next= characters[characters.length - 1];
		offset= deleted.length;

		while (true) {

			neighborhood.add(fHashProvider.getHash(new String(characters)));
			if (offset == 0)
				break;

			previous= next;
			next= deleted[offset - 1];

			deleted[offset - 1]= previous;
			--offset;
		}

		neighborhood.remove(hash);
		final Set matches= getCandidates(word, sentence, neighborhood);

		if (matches.size() == 0 && candidates.size() == 0)
			getCandidates(word, sentence, candidates);

		candidates.addAll(matches);

		return candidates;
	}

	/**
	 * Returns the URL of the dictionary word list.
	 *
	 * @throws MalformedURLException
	 *                    if the URL could not be retrieved
	 * @return The URL of the dictionary word list
	 */
	protected abstract URL getURL() throws MalformedURLException;

	/**
	 * Hashes the word into the dictionary.
	 *
	 * @param word
	 *                   The word to hash in the dictionary
	 */
	protected final void hashWord(final String word) {

		final String hash= fHashProvider.getHash(word);
		Object bucket= fHashBuckets.get(hash);

		if (bucket == null) {
			fHashBuckets.put(hash, word);
		} else if (bucket instanceof ArrayList) {
			((ArrayList)bucket).add(word);
		} else {
			ArrayList list= new ArrayList(BUCKET_CAPACITY);
			list.add(bucket);
			list.add(word);
			fHashBuckets.put(hash, list);
		}
	}

	/*
	 * @see org.eclipse.wst.jsdt.internal.ui.text.spelling.engine.ISpellDictionary#isCorrect(java.lang.String)
	 */
	public boolean isCorrect(String word) {
		word= stripNonLetters(word);
		try {
			
			if (!fLoaded) {
				synchronized (this) {
					fLoaded= load(getURL());
					if (fLoaded)
						compact();
				}
			}

		} catch (MalformedURLException exception) {
			// Do nothing
		}

		final Object candidates= getCandidates(fHashProvider.getHash(word));
		if (candidates == null)
			return false;
		else if (candidates instanceof String) {
			String candidate= (String)candidates;
			if (candidate.equals(word) || candidate.equals(word.toLowerCase()))
				return true;
			return false;
		}
		final ArrayList candidateList= (ArrayList)candidates;
		if (candidateList.contains(word) || candidateList.contains(word.toLowerCase()))
			return true;

		return false;
	}
	
	/*
	 * @see org.eclipse.wst.jsdt.internal.ui.text.spelling.engine.ISpellDictionary#setStripNonLetters(boolean)
	 * 
	 */
	public void setStripNonLetters(boolean state) {
		fIsStrippingNonLetters= state;
	}
	
	/**
	 * Strips non-letter characters from the given word.
	 * <p>
	 * This will only happen if the corresponding preference is enabled.
	 * </p>
	 * 
	 * @param word the word to strip
	 * @return the stripped word
	 * 
	 */
	protected String stripNonLetters(String word) {
		if (!fIsStrippingNonLetters)
			return word;
		
		int i= 0;
		int j= word.length() - 1;
		while (i <= j && !Character.isLetter(word.charAt(i)))
			i++;
		if (i > j)
			return ""; //$NON-NLS-1$
		
		while (j > i && !Character.isLetter(word.charAt(j)))
			j--;
		
		return word.substring(i, j+1);
	}

	/*
	 * @see org.eclipse.wst.jsdt.ui.text.spelling.engine.ISpellDictionary#isLoaded()
	 */
	public final synchronized boolean isLoaded() {
		return fLoaded || fHashBuckets.size() > 0;
	}

	/**
	 * Loads a dictionary word list from disk.
	 *
	 * @param url
	 *                   The URL of the word list to load
	 * @return <code>true</code> iff the word list could be loaded, <code>false</code>
	 *               otherwise
	 */
	protected synchronized boolean load(final URL url) {
		 if (!fMustLoad)
			 return fLoaded;

		if (url != null) {
			InputStream stream= null;
			int line= 0;
			try {
				stream= url.openStream();
				if (stream != null) {
					String word= null;
					
					// Setup a reader with a decoder in order to read over malformed input if needed.
					CharsetDecoder decoder= Charset.forName(getEncoding()).newDecoder();
					decoder.onMalformedInput(CodingErrorAction.REPORT);
					decoder.onUnmappableCharacter(CodingErrorAction.REPORT);
					final BufferedReader reader= new BufferedReader(new InputStreamReader(stream, decoder));
					try {
						boolean doRead= true;
						while (doRead) {
							try {
								word= reader.readLine();
							} catch (MalformedInputException ex) {
								// Tell the decoder to replace malformed input in order to read the line.
								decoder.onMalformedInput(CodingErrorAction.REPLACE);
								decoder.reset();
								word= reader.readLine();
								decoder.onMalformedInput(CodingErrorAction.REPORT);
								
								String message= Messages.format(JavaUIMessages.AbstractSpellingDictionary_encodingError, new String[] { word, decoder.replacement(), url.toString() });
								IStatus status= new Status(IStatus.ERROR, JavaScriptUI.ID_PLUGIN, IStatus.OK, message, ex);
								JavaScriptPlugin.log(status);
								
								doRead= word != null;
								continue;
							}
							doRead= word != null;
							if (doRead)
								hashWord(word);
						}
					} finally {
						reader.close();
					}
					return true;
				}
			} catch (FileNotFoundException ex) {
				String urlString= url.toString();
				String lowercaseUrlString= urlString.toLowerCase();
				if (urlString.equals(lowercaseUrlString))
					JavaScriptPlugin.log(ex);
				else
					try {
						return load(new URL(lowercaseUrlString));
					} catch (MalformedURLException e) {
						JavaScriptPlugin.log(e);
					}
			} catch (IOException exception) {
				if (line > 0) {
					String message= Messages.format(JavaUIMessages.AbstractSpellingDictionary_encodingError, new Object[] { Integer.valueOf(line), url.toString() });
					IStatus status= new Status(IStatus.ERROR, JavaScriptUI.ID_PLUGIN, IStatus.OK, message, exception);
					JavaScriptPlugin.log(status);
				} else
					JavaScriptPlugin.log(exception);
			} finally {
				fMustLoad= false;
				try {
					if (stream != null)
						stream.close();
				} catch (IOException x) {
				}
			}
		}
		return false;
	}

	/**
	 * Compacts the dictionary.
	 * 
	 * 
	 */
	private void compact() {
		Iterator iter= fHashBuckets.values().iterator();
		while (iter.hasNext()) {
			Object element= iter.next();
			if (element instanceof ArrayList)
				((ArrayList)element).trimToSize();
		}
	}

	/**
	 * Sets the phonetic distance algorithm to use.
	 *
	 * @param algorithm
	 *                   The phonetic distance algorithm
	 */
	protected final void setDistanceAlgorithm(final IPhoneticDistanceAlgorithm algorithm) {
		fDistanceAlgorithm= algorithm;
	}

	/**
	 * Sets the phonetic hash provider to use.
	 *
	 * @param provider
	 *                   The phonetic hash provider
	 */
	protected final void setHashProvider(final IPhoneticHashProvider provider) {
		fHashProvider= provider;
	}

	/*
	 * @see org.eclipse.wst.jsdt.ui.text.spelling.engine.ISpellDictionary#unload()
	 */
	public synchronized void unload() {
		fLoaded= false;
		fMustLoad= true;
		fHashBuckets.clear();
	}

	/*
	 * @see org.eclipse.wst.jsdt.ui.text.spelling.engine.ISpellDictionary#acceptsWords()
	 */
	public boolean acceptsWords() {
		return false;
	}

	/*
	 * @see org.eclipse.wst.jsdt.internal.ui.text.spelling.engine.ISpellDictionary#addWord(java.lang.String)
	 */
	public void addWord(final String word) {
		// Do nothing
	}
	
	/**
	 * Returns the encoding of this dictionary.
	 * 
	 * @return the encoding of this dictionary
	 * 
	 */
	protected String getEncoding() {
		String encoding= JavaScriptPlugin.getDefault().getPreferenceStore().getString(PreferenceConstants.SPELLING_USER_DICTIONARY_ENCODING);
		if (encoding == null || encoding.length() == 0)
			encoding= ResourcesPlugin.getEncoding();
		return encoding;
	}
	
}