SpellDictionary.java example

Explorer

EKit-for-SCIL-master
- src
  - com
    - hexidec
      - ekit
        Ekit.java
        EkitApplet.java
        EkitCore.java
        EkitCoreSpell.java
        action
        AlignAction.java
        CustomAction.java
        FormatAction.java
        ListAutomationAction.java
        SetFontFamilyAction.java
        StylesAction.java
        bridges
        EkitCoreBridge.java
        EkitCoreService.java
        MutatorService.java
        TextPaneBridge.java
        TextPaneService.java
        UserInputBridge.java
        UserInputService.java
        component
        EkitStandardParserCallback.java
        ExtendedHTMLDocument.java
        ExtendedHTMLEditorKit.java
        FontSelectorDialog.java
        HTMLUtilities.java
        ImageDialog.java
        ImageFileChooser.java
        ImageFileChooserPreview.java
        ImageURLDialog.java
        JButtonNoFocus.java
        JComboBoxNoFocus.java
        JToggleButtonNoFocus.java
        MutableFilter.java
        PropertiesDialog.java
        RelativeImageView.java
        SearchDialog.java
        SimpleInfoDialog.java
        TableInputDialog.java
        UnicodeDialog.java
        UserInputAnchorDialog.java
        UserInputDialog.java
        thirdparty
        print
        DocumentRenderer.java
      - util
        Base64Codec.java
        Translatrix.java
    - swabunga
      - spell
        engine
        Configuration.java
        DoubleMeta.java
        EditDistance.java
        GenericTransformator.java
        PropertyConfiguration.java
        SpellDictionary.java
        Transformator.java
        Word.java
        event
        BasicSpellCheckEvent.java
        DocumentWordTokenizer.java
        SpellCheckEvent.java
        SpellCheckListener.java
        SpellChecker.java
        StringWordTokenizer.java
        WordTokenizer.java
        swing
        JSpellDialog.java
        JSpellForm.java

/*
 * put your module comment here
 * formatted with JxBeauty (c) johann.langhofer@nextra.at
 */

package com.swabunga.spell.engine;

import java.io.*;
import java.util.*;

/**
 * The SpellDictionary class holds the instance of the dictionary.
 * <p>
 * This class is thread safe. Derived classes should ensure that this preserved.
 * </p>
 * <p>
 * There are many open source dictionary files. For just a few see:
 * http://wordlist.sourceforge.net/
 * </p>
 * <p>
 * This dictionary class reads words one per line. Make sure that your word list
 * is formatted in this way (most are).
 * </p>
 */
public class SpellDictionary
{

	/** The replace list is used in the getSuggestions method*/
	private static final char[] replacelist =
		{
			'A',
			'B',
			'X',
			'S',
			'K',
			'J',
			'T',
			'F',
			'H',
			'L',
			'M',
			'N',
			'P',
			'R',
			'0' };

	/** A field indicating the initial hash map capacity (16KB) for the main
	 *  dictionary hash map. Interested to see what the performance of a
	 *  smaller initial capacity is like.
	 */
	private final static int INITIAL_CAPACITY = 16 * 1024;
	/**
	 * The hashmap that contains the word dictionary. The map is hashed on the doublemeta
	 * code. The map entry contains a LinkedList of words that have the same double meta code.
	 */
	protected HashMap mainDictionary = new HashMap(INITIAL_CAPACITY);
	/**The reference to a Transformator, used to transform a word into it's.
	 * phonetic code.
	 */
	private Transformator tf = null;


	/** Holds the dictionary file for appending*/
	private File dictFile = null;

	/**
	 * Dictionary Constructor.
	 */
	public SpellDictionary(Reader wordList) throws IOException {
		tf = new DoubleMeta();
		createDictionary(new BufferedReader(wordList));
	}

	/**
	 * Dictionary Constructor for JAR files
	 * @author Howard Kistler
	 */
	public SpellDictionary(String wordListResource) throws IOException
	{
		tf = new DoubleMeta();
		InputStream is = this.getClass().getResourceAsStream("dictionary/" + wordListResource);
		createDictionary(new BufferedReader(new InputStreamReader(is)));
	}

	/**
	 * Dictionary Convienence Constructor.
	 */
	public SpellDictionary(File wordList)
		throws FileNotFoundException, IOException {
		this(new FileReader(wordList));
		dictFile = wordList;
	}

	/**
	* Dictionary constructor that uses an aspell phonetic file to
	* build the transformation table.
	*/
	public SpellDictionary(File wordList, File phonetic)
		throws FileNotFoundException, IOException {
		tf = new GenericTransformator(phonetic);
		dictFile = wordList;
		createDictionary(new BufferedReader(new FileReader(wordList)));
	}

	/**
	 * Add a word permanantly to the dictionary (and the dictionary file).
	 * <p>This needs to be made thread safe (synchronized)</p>
	 */
	public void addWord(String word) {
		putWord(word);
		if (dictFile == null)
			return;
		try {
			FileWriter w = new FileWriter(dictFile.toString(), true);
			// Open with append.
			w.write(word);
			w.write("\n");
			w.close();
		} catch (IOException ex) {
			System.out.println("Error writing to dictionary file");
		}
	}

	/**
	 * Constructs the dictionary from a word list file.
	 * <p>
	 * Each word in the reader should be on a seperate line.
	 * <p>
	 * This is a very slow function. On my machine it takes quite a while to
	 * load the data in. I suspect that we could speed this up quite alot.
	 */
	protected void createDictionary(BufferedReader in) throws IOException {
		String line = "";
		while (line != null) {
			line = in.readLine();
			if (line != null) {
				line = new String(line.toCharArray());
				putWord(line);
			}
		}
	}

	/**
	 * Returns the code representing the word.
	 */
	public String getCode(String word) {
		return tf.transform(word);
	}

	/**
	 * Allocates a word in the dictionary
	 */
	protected void putWord(String word) {
		String code = getCode(word);
		LinkedList list = (LinkedList) mainDictionary.get(code);
		if (list != null) {
			list.add(word);
		} else {
			list = new LinkedList();
			list.add(word);
			mainDictionary.put(code, list);
		}
	}

	/**
	 * Returns a list of strings (words) for the code.
	 */
	public LinkedList getWords(String code) {
		//Check the main dictionary.
		LinkedList mainDictResult = (LinkedList) mainDictionary.get(code);
		if (mainDictResult == null)
			return new LinkedList();
		return mainDictResult;
	}

	/**
	 * Returns true if the word is correctly spelled against the current word list.
	 */
	public boolean isCorrect(String word) {
		LinkedList possible = getWords(getCode(word));
		if (possible.contains(word))
			return true;
		//JMH should we always try the lowercase version. If I dont then capitalised
		//words are always returned as incorrect.
		else if (possible.contains(word.toLowerCase()))
			return true;
		return false;
	}

	/**
	 * Returns a linked list of Word objects that are the suggestions to an
	 * incorrect word.
	 * <p>
	 * @param word Suggestions for given mispelt word
	 * @param threshold The lower boundary of similarity to mispelt word
	 * @return LinkedList a List of suggestions
	 */
	public LinkedList getSuggestions(String word, int threshold) {

		HashSet nearmisscodes = new HashSet();
		String code = getCode(word);

		// add all words that have the same codeword
		nearmisscodes.add(code);

		// do some tranformations to pick up more results
		//interchange 
		char[] charArray = word.toCharArray();
		for (int i = 0; i < word.length() - 1; i++) {
			char a = charArray[i];
			char b = charArray[i + 1];
			charArray[i] = b;
			charArray[i + 1] = a;
			nearmisscodes.add(getCode(new String(charArray)));
			charArray[i] = a;
			charArray[i + 1] = b;
		}
		//change
		charArray = word.toCharArray();
		for (int i = 0; i < word.length(); i++) {
			char original = charArray[i];
			for (int j = 0; j < replacelist.length; j++) {
				charArray[i] = replacelist[j];
				nearmisscodes.add(getCode(new String(charArray)));
			}
			charArray[i] = original;
		}
		//add
		charArray = (word += " ").toCharArray();
		int iy = charArray.length - 1;
		while (true) {
			for (int j = 0; j < replacelist.length; j++) {
				charArray[iy] = replacelist[j];
				nearmisscodes.add(getCode(new String(charArray)));
			}
			if (iy == 0)
				break;
			charArray[iy] = charArray[iy - 1];
			--iy;
		}
		//delete
		word = word.trim();
		charArray = word.toCharArray();
		char[] charArray2 = new char[charArray.length - 1];
		for (int ix = 0; ix < charArray2.length; ix++) {
			charArray2[ix] = charArray[ix];
		}
		char a, b;
		a = charArray[charArray.length - 1];
		int ii = charArray2.length;
		while (true) {
			nearmisscodes.add(getCode(new String(charArray)));
			if (ii == 0)
				break;
			b = a;
			a = charArray2[ii - 1];
			charArray2[ii - 1] = b;
			--ii;
		}

		LinkedList wordlist = getWordsFromCode(word, nearmisscodes);
		// We sort a linkedlist at the end instead of maintaining a
		// continously sorted TreeSet because everytime you add a collection
		// to a treeset it has to be resorted. It's better to do this operation
		// once at the end.
		Collections.sort( wordlist, new Word());
		return wordlist;
	}

	private LinkedList getWordsFromCode(String word, Collection codes) {
		Configuration config = Configuration.getConfiguration();
		LinkedList result = new LinkedList();
		for (Iterator i = codes.iterator(); i.hasNext();) {
			String code = (String) i.next();
			LinkedList simwordlist = getWords(code);
			for (Iterator j = simwordlist.iterator(); j.hasNext();) {
				String similar = (String) j.next();
				int distance = EditDistance.getDistance(word, similar);
				if (distance < config.getInteger(Configuration.SPELL_THRESHOLD)) {
					Word w = new Word(similar, distance);
					result.add(w);
				}
			}
		}
		return result;
	}


	/** Added to free up the class memory and resources,
	  * which otherwise trash the system quickly (code by Steve Birmingham)
	  */
	public void dispose()
	{
		mainDictionary   = null;
		tf               = null;
		dictFile         = null;
	}

}