package edu.northwestern.at.utils.spellcheck;
import java.io.*;
import java.util.*;
import edu.northwestern.at.utils.*;
import edu.northwestern.at.utils.corpuslinguistics.phonetics.*;
import edu.northwestern.at.utils.corpuslinguistics.stringsimilarity.*;
/** TernaryTrieBasedSpellingChecker -- A very simple minded spelling checker.
*
* <p>
* Implements a very simple minded spelling checker using HashMaps
* to hold the dictionaries. The dictionaries are read as combined
* word lists and metaphone codes from text files into two hashmaps.
* One hashmap contains each word as a key and has the metaphone code
* for that word as a value. The second hashmap uses the metaphone values
* as keys and the list of words mapping to that metaphone as values.
* This allows presentation of suggested spellings for misspelled words.
* The list of suggestions may optionally be pruned by using
* a measure of the Levenstein distance between the original
* misspelling and each suggested spelling.
* </p>
*
* <p>
* Words beginning with a digit are assumed to be numbers and
* therefore spelled correctly.
* </p>
*
* <p>
* When creating a list of suggested replacements for a misspelled
* word, the first letter of the suggestions is capitalized if
* the first letter of the original misspelled word is capitalized.
* </p>
*
* <p>
* This class is not intended for production use because of
* the time needed to load and save dictionaries and the amount
* of memory used to hold the dictionaries in memory. This class
* serves as a reference implementation for the SpellingChecker
* interface as well as a testbed for applications needing a
* spelling checker during development.
* </p>
*/
public class TernaryTrieBasedSpellingChecker implements SpellingChecker
{
/** The global dictionary. */
protected SpellingDictionary globalDictionary = null;
/** The local dictionary. */
protected SpellingDictionary localDictionary = null;
/** Ignore list. */
protected SpellingDictionary ignoreList = new HashMapSpellingDictionary();
/** Create spelling checker specifying global and local dictionaries.
*/
public TernaryTrieBasedSpellingChecker
(
SpellingDictionary globalDictionary ,
SpellingDictionary localDictionary
)
{
this.globalDictionary = globalDictionary;
this.localDictionary = localDictionary;
}
/** Create spelling checker specifying global dictionary.
*/
public TernaryTrieBasedSpellingChecker
(
SpellingDictionary globalDictionary
)
{
this.globalDictionary = globalDictionary;
this.localDictionary = null;
}
/** Create spelling checked without loading any dictionaries. */
public TernaryTrieBasedSpellingChecker()
{
this.globalDictionary = null;
this.localDictionary = null;
}
/** Select global dictionary to use to check spelling.
*
* @param dictionary Identifies dictionary.
* The dictionary class must implement
* the SpellingDictionary interface.
*
* <p>
* The global dictionary is usually shared among many users.
* Typically it is created by a system administrator as a
* shareable resource.
* </p>
*/
public void useGlobalDictionary( SpellingDictionary dictionary )
{
this.globalDictionary = dictionary;
}
/** Select local dictionary to use to check spelling.
*
* @param dictionary The dictionary to use.
* If the name is null, no local
* dictionary is used.
* The dictionary class must implement
* the SpellingDictionary interface.
*
* <p>
* The local dictionary is usually limited to access by
* a single individual. The local dictionary generally
* contains words added by an individual while checking
* the spelling of one or more documents.
* </p>
*/
public void useLocalDictionary( SpellingDictionary dictionary )
{
this.localDictionary = dictionary;
}
/** Add word to a dictionary.
*
* @param word The word to add.
* @param dictionary The dictionary.
*
* @return True if word added.
*/
private boolean addWordToDictionary
(
String word ,
SpellingDictionary dictionary
)
{
return dictionary.addWord( word );
}
/** Check spelling of word.
*
* @param word The word whose spelling should be checked.
*
* @return True if the word was found in the
* global or local dictionaries.
*/
public boolean checkSpelling( String word )
{
// Consider null or empty word to be spelled
// correctly.
if ( ( word == null ) || ( word.length() <= 0 ) ) return true;
// Assume word which is all upper case to be
// spelled correctly.
if ( word.equals( word.toUpperCase() ) ) return true;
// If word starts with a digit,
// assume it is a number and
// say it is spelled OK.
if ( Character.isDigit( word.charAt( 0 ) ) )
return true;
// Convert word to lower case for
// dictionary lookups.
String lowerCaseWord = word.toLowerCase();
// Otherwise look up word in
// ignores list, local dictionary,
// and finally, global dictionary.
return ignoreList.lookupWord( lowerCaseWord ) ||
( ( localDictionary == null ) ?
false : localDictionary.lookupWord( lowerCaseWord ) ) ||
( ( globalDictionary == null ) ?
false : globalDictionary.lookupWord( lowerCaseWord ) );
}
/** Prunes list of suggestions to those most like the original word.
*
* @param word The original misspelled word.
* @param suggestions List of suggested spellings.
*
* @return Possibly pruned list of suggested words.
* The suggestions are pruned using the
* Levenstein distance.
*/
private List<String> pruneSuggestions
(
String word ,
List<String> suggestions
)
{
int nSuggestions = suggestions.size();
List<String> result = ListFactory.createNewList( nSuggestions );
for ( int i = 0; i < nSuggestions; i++ )
{
String suggestedWord = suggestions.get( i );
if ( LevensteinDistance.areAlike( word , suggestedWord ) )
{
result.add( suggestedWord );
}
}
return result;
}
/** Adds list of words to a Map.
*
* @param map The map.
* @param words The arraylist of words to add to map.
*/
private void addWords( Map<String, Integer> map , List<String> words )
{
if ( ( words == null ) || ( words.size() == 0 ) ) return;
for ( int i = 0; i < words.size(); i++ )
{
String word = words.get( i );
if ( !map.containsKey( word ) )
{
map.put( word , new Integer( 1 ) );
}
}
}
/** Suggest alternative words for misspelled item.
*
* @param word The misspelled word for which
* possible alternatives are desired.
*
* @param prune True to prune suggestions using
* Levenstein distance.
*
* @return Sorted array of correctly spelled
* words which are similar in some sense
* to the misspelled word.
*
* <p>
* The suggested words are typically words which are similar
* in sound or spelling to the misspelled word. If the misspelled
* word begins with a capital letter, the suggestions are also
* capitalized to match.
* </p>
*
*/
public String[] suggest( String word , boolean prune )
{
// Get suggestions from dictionary.
Set<String> suggestions = globalDictionary.getRelatedWords( word );
if ( localDictionary != null )
{
suggestions.addAll( localDictionary.getRelatedWords( word ) );
}
return (String[])suggestions.toArray(
new String[ suggestions.size() ] );
}
public String[] suggestMore( String word , boolean prune )
{
// Convert word to lower case for
// dictionary lookups.
Set<String> suggestions =
((TernaryTrieSpellingDictionary)globalDictionary).getMoreRelatedWords( word );
if ( localDictionary != null )
{
suggestions.addAll
(
((TernaryTrieSpellingDictionary)localDictionary).getMoreRelatedWords( word )
);
}
return (String[])suggestions.toArray( new String[]{} );
}
/** Suggest alternative words for misspelled item.
*
* @param word The misspelled word for which
* possible alternatives are desired.
*
* <p>
* The suggested words are typically words which are similar
* in sound or spelling to the misspelled word.
* </p>
*
*/
public String[] suggest( String word )
{
return suggest( word , false );
}
public String[] suggestMore( String word )
{
return suggestMore( word , false );
}
/** Add a word to the local dictionary.
*
* @param word The word to add to the local dictionary.
*
* @return True if word added successfully.
*/
public boolean addWordToLocalDictionary( String word )
{
if ( localDictionary != null )
return localDictionary.addWord( word );
else
return false;
}
/** Add a word to the global dictionary.
*
* @param word The word to add to the global dictionary.
*
* @return True if word added successfully.
*
* <p>
* Typically this function is retricted to system administrators.
* </p>
*/
public boolean addWordToGlobalDictionary( String word )
{
// return globalDictionary.addWord( word );
return false;
}
/** Add word to ignore list.
*
* @param word Word to ignore.
*
* @return True if word successfully added to ignore list.
*
* <p>
* The ignore list contains words which an individual marks as ignorable
* while checking the spelling of one of more documents. The ignore
* list is transient. If an ignored word is to persist across multiple
* spelling check sessions, the word should be added to the local
* dictionary.
* </p>
*/
public boolean addWordToIgnoreList( String word )
{
return ignoreList.addWord( word );
}
/** Empties the ignore list.
*
* <p>
* Removes all words from the ignore list. Typically this is
* invoked at the start of a spelling checker session for a
* new document.
* </p>
*/
public void emptyIgnoreList()
{
ignoreList.clear();
}
}