/*
* $Id$
*
* Copyright (c) 2004-2010 by the TeXlapse Team.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*/
package net.sourceforge.texlipse.spelling;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.Reader;
import java.io.Writer;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.StringTokenizer;
import com.swabunga.spell.engine.SpellDictionaryASpell;
/**
* A memory optimized dictionary class
* @author Boris von Loesch
*
*/
public class TexSpellDictionary extends SpellDictionaryASpell {
private final static int INITIAL_CAPACITY = 32 * 1024;
private final static char SEP_CHAR = ';';
protected Map<Integer, StringBuilder> mainDictionary = new HashMap<Integer, StringBuilder>(INITIAL_CAPACITY);
/**
* User dictionary
*/
private File dictFile = null;
/**
* Dictionary Constructor.
* @param wordList The file containing the words list for the dictionary
* @throws java.io.IOException indicates problems reading the words list
* file
*/
public TexSpellDictionary(Reader wordList) throws IOException {
super((File) null);
createDictionary(new BufferedReader(wordList));
}
/**
* Dictionary constructor that uses an aspell phonetic file to
* build the transformation table.
* @param wordList The file containing the words list for the dictionary
* @param phonetic The reader to use for phonetic transformation of the
* wordlist.
* @throws java.io.IOException indicates problems reading the words list
* or phonetic information
*/
public TexSpellDictionary(Reader wordList, Reader phonetic) throws IOException {
super(phonetic);
createDictionary(new BufferedReader(wordList));
}
/**
* Add words from a file to existing dictionary hashmap.
* This function can be called as many times as needed to
* build the internal word list. Duplicates are not added.
* <p>
* Note that adding a dictionary does not affect the target
* dictionary file for the addWord method. That is, addWord() continues
* to make additions to the dictionary file specified in createDictionary()
* <P>
* @param wordList a File object that contains the words, on word per line.
* @throws FileNotFoundException
* @throws IOException
*/
public void addDictionary(File wordList) throws FileNotFoundException, IOException {
addDictionaryHelper(new BufferedReader(new FileReader(wordList)));
}
/**
* Set the user dictionary file
* @param userDict
*/
public void setUserDict(File userDict) {
dictFile = userDict;
try {
addDictionary(userDict);
} catch (IOException e) {
//Do nothing
}
}
/**
* Add a word permanently to the dictionary (and the dictionary file).
* <p>This needs to be made thread safe (synchronized)</p>
*/
public void addWord(String word) {
putWordUnique(word);
if (dictFile == null) return;
try {
if (!dictFile.exists()) {
boolean succ = dictFile.createNewFile();
if (!succ) return;
}
Writer w = new FileWriter(dictFile.toString(), true);
// Open with append.
w.write(word);
w.write("\n");
w.close();
} catch (IOException ex) {
}
}
/**
* Constructs the dictionary from a word list file.
* <p>
* Each word in the reader should be on a separate line.
* <p>
* This is a very slow function. On my machine it takes quite a while to
* load the data in. I suspect that we could speed this up quite allot.
*/
protected void createDictionary(BufferedReader in) throws IOException {
String line;
while ((line = in.readLine()) != null) {
if (line.length() > 0) {
putWord(line.trim());
}
}
}
/**
* Adds to the existing dictionary from a word list file. If the word
* already exists in the dictionary, a new entry is not added.
* <p>
* Each word in the reader should be on a separate line.
* <p>
* Note: for whatever reason that I haven't yet looked into, the phonetic codes
* for a particular word map to a vector of words rather than a hash table.
* This is a drag since in order to check for duplicates you have to iterate
* through all the words that use the phonetic code.
* If the vector-based implementation is important, it may be better
* to subclass for the cases where duplicates are bad.
*/
public void addDictionaryHelper(BufferedReader in) throws IOException {
String line;
while ((line = in.readLine()) != null) {
if (line.length() > 0) {
putWordUnique(line.trim());
}
}
}
/**
* Allocates a word in the dictionary
* @param word The word to add
*/
protected void putWord(String word) {
int code = getCode(word).hashCode();
StringBuilder words = mainDictionary.get(code);
if (words != null) {
words.append(word);
words.append(SEP_CHAR);
} else {
words = new StringBuilder();
words.append(SEP_CHAR);
words.append(word);
words.append(SEP_CHAR);
mainDictionary.put(code, words);
}
}
/**
* Allocates a word, if it is not already present in the dictionary. A word
* with a different case is considered the same.
* @param word The word to add
*/
protected void putWordUnique(String word) {
int code = getCode(word).hashCode();
StringBuilder words = mainDictionary.get(code);
if (words != null) {
if (words.indexOf(SEP_CHAR + word + SEP_CHAR) == -1) {
words.append(word);
words.append(SEP_CHAR);
}
//else the word is already in the dictionary
}
else {
words = new StringBuilder();
words.append(SEP_CHAR);
words.append(word);
words.append(SEP_CHAR);
mainDictionary.put(code, words);
}
}
/**
* Compresses the dictionary so that it takes less memory
*/
public void compress() {
Collection<StringBuilder> c = mainDictionary.values();
for (StringBuilder st : c) {
st.trimToSize();
}
}
/**
* Returns a list of strings (words) for the code.
*/
@Override
public List<String> getWords(String code) {
//Check the main dictionary.
StringBuilder mainDictResult = mainDictionary.get(code.hashCode());
if (mainDictResult == null) return new ArrayList<String>(1);
StringTokenizer stk = new StringTokenizer(mainDictResult.toString(), ""+SEP_CHAR);
List<String> list = new ArrayList<String>(1);
while (stk.hasMoreTokens()) list.add(stk.nextToken());
return list;
}
/**
* Returns true if the word is correctly spelled against the current word list.
*/
@Override
public boolean isCorrect(String word) {
StringBuilder words = mainDictionary.get(getCode(word).hashCode());
if (words == null) return false;
if (words.indexOf(SEP_CHAR + word + SEP_CHAR) >= 0) return true;
//JMH should we always try the lowercase version. If I dont then capitalised
//words are always returned as incorrect.
if (words.indexOf(SEP_CHAR + word.toLowerCase() + SEP_CHAR) >= 0) return true;
return false;
}
}