package edu.northwestern.at.utils.spellcheck;
import java.io.*;
import java.util.*;
import edu.northwestern.at.utils.*;
import edu.northwestern.at.utils.corpuslinguistics.phonetics.*;
/** HashMapSpellingDictionary -- implements hash map based spelling dictionary.
*
* <p>
* This class provides the basic methods for a spelling checker
* dictionary implemented using hash maps.
* </p>
*/
public class HashMapSpellingDictionary implements SpellingDictionary
{
/** The dictionary keyed by metaphone values. */
protected Map<String, List<String>> metaphoneDictionary =
MapFactory.createNewMap();
/** Metaphone encoder instance. */
private DoubleMetaphone metaphone = new DoubleMetaphone();
/** Create HashMapSpellingDictionary. */
public HashMapSpellingDictionary()
{
}
/** Reads the dictionary from a buffered reader.
*
* @param in The buffered reader.
*
* @throws IOException
*/
public void read( BufferedReader in )
throws IOException
{
String word;
String metaphoneValue;
// Read # of words in dictionary
String sWords = in.readLine();
int nWords = new Integer( sWords ).intValue();
// Read # of metaphone values
String sMeta = in.readLine();
int nMeta = new Integer( sMeta ).intValue();
// Set hash map capacities to match.
metaphoneDictionary = MapFactory.createNewMap();
// Pick up next metaphone value in
// dictionary.
while ( ( metaphoneValue = in.readLine() ) != null )
{
// Pick up the number of words having
// this metaphone value.
String sWordsThisMeta = in.readLine();
int nWordsThisMeta = new Integer( sWordsThisMeta ).intValue();
// Read the words matching this
// metaphone value.
List<String> words = ListFactory.createNewList();
for ( int i = 0; i < nWordsThisMeta; i++ )
{
word = in.readLine();
words.add( word );
}
// Make sure word list is sorted.
// We need this so that binary searches
// work later on when we lookup words.
Collections.sort( words );
// Add metaphone value.
metaphoneDictionary.put( metaphoneValue , words );
}
in.close();
}
/** Counts words in the dictionary.
*
* @return The number of words in the dictionary.
*/
public int wordCount()
{
int result = 0;
// Get list of metaphone values.
java.util.List<String> keys =
ListFactory.createNewList( metaphoneDictionary.keySet() );
// Loop over each metaphone value.
for ( String key : keys )
{
// Get word list for this metaphone value.
List<String> words = metaphoneDictionary.get( key );
// If not null, add # of words in this list
// to total.
if ( words != null )
{
result += words.size();
}
}
return result;
}
/** Writes the dictionary to a buffered writer.
*
* @param out The buffered writer.
*
* @throws IOException
*/
public void write( BufferedWriter out )
throws IOException
{
int nWords = wordCount();
int nMeta = metaphoneDictionary.size();
// Output # of words and # of metaphone values.
out.write( nWords + "\n" );
out.write( nMeta + "\n" );
// Get list of metaphone values.
java.util.List<String> keys =
ListFactory.createNewList( metaphoneDictionary.keySet() );
// Loop over each metaphone value.
for ( String key : keys )
{
// Get word list for this metaphone value.
List<String> words = metaphoneDictionary.get( key );
// Output the metaphone value.
out.write( key + "\n" );
// Output the number of words keyed to this
// metaphone value.
if ( words == null )
{
out.write( "0\n" );
}
else
{
// Output the words keyed to this metaphone value.
out.write( words.size() + "\n" );
for ( int i = 0; i < words.size(); i++ )
{
out.write( words.get( i ) + "\n" );
}
}
}
out.close();
}
/** Lookup word in dictionary.
*
* @param word The word to lookup.
*
* @return True if the word was found in the
* dictionary.
*
* <p>
* <strong>Note:</strong>
* </p>
*
* <p>
* Any processing of the word (conversion to lower case, etc.)
* should be done before calling this routine.
* </p>
*/
public boolean lookupWord( String word )
{
// Consider null or empty word to be spelled
// correctly.
if ( ( word == null ) || ( word.length() <= 0 ) ) return true;
// Get metaphone for word.
String lowerCaseWord = word.toLowerCase();
String metaphoneValue = "";
try
{
metaphoneValue = metaphone.encode( lowerCaseWord );
}
catch ( Exception e )
{
metaphoneValue = "";
}
// Get list of words for this metaphone value.
Set<String> words = getRelatedWords( metaphoneValue );
// If none, this word can't be in the
// dictionary, so say it's not found.
if ( words == null )
{
return false;
}
// See if the word is in the list of
// words for the metaphone value. If not,
// it's not in the dictionary.
else
{
return words.contains( lowerCaseWord );
}
}
/** Add a word to the dictionary.
*
* @param word The word to add to the dictionary.
*
* @return True if word added successfully.
*/
private boolean addWordPrivate( String word )
{
String lowerCaseWord = word.toLowerCase();
String metaphoneValue = "";
String metaphoneValue2 = "";
if ( lookupWord( lowerCaseWord ) ) return false;
try
{
metaphoneValue = metaphone.encode( lowerCaseWord );
}
catch ( Exception e )
{
metaphoneValue = "";
}
List<String> words = metaphoneDictionary.get( metaphoneValue );
if ( words == null )
{
words = ListFactory.createNewList();
}
words.add( lowerCaseWord );
Collections.sort( words );
metaphoneDictionary.put( metaphoneValue , words );
try
{
metaphoneValue2 = metaphone.getAlternate();
}
catch ( Exception e )
{
metaphoneValue2 = "";
}
if ( !metaphoneValue2.equals( metaphoneValue ) )
{
words = metaphoneDictionary.get( metaphoneValue2 );
if ( words == null )
{
words = ListFactory.createNewList();
}
words.add( lowerCaseWord );
Collections.sort( words );
metaphoneDictionary.put( metaphoneValue2 , words );
}
return true;
}
/** Add a word to the dictionary.
*
* @param word The word to add to the dictionary.
*
* @return True if word added successfully.
*/
public boolean addWord( String word )
{
return addWordPrivate( word );
}
/** Add multiple words to the dictionary.
*
* @param words The words to add to the dictionary.
*
* @return True if all words added successfully.
*/
public boolean addWords( String[] words )
{
boolean result = true;
for ( int i = 0; i < words.length; i++ )
{
boolean added = addWord( words[ i ] );
result = result && added;
}
return result;
}
/** Get list of words with a specified metaphone value.
*
* @param metaphone The metaphone value.
*
* @return Set of words with specified metaphone value.
* May be empty if no words with matching
* metaphone value found.
*/
public Set<String> getRelatedWords( String metaphone )
{
Set<String> result = SetFactory.createNewSet();
List<String> list = metaphoneDictionary.get( metaphone );
if ( list != null )
{
result.addAll( list );
}
return result;
}
/** Retrieves all words in dictionary.
*
* @return ArrayList of all words in dictionary.
*/
public Set<String> getAllWords()
{
Set<String> result = new TreeSet<String>();
// Get list of metaphone values.
java.util.List<String> keys =
ListFactory.createNewList( metaphoneDictionary.keySet() );
// Loop over each metaphone value.
for ( String key : keys )
{
// Get word list for this metaphone value.
List<String> words = metaphoneDictionary.get( key );
// If not null, add # of words in this list
// to total.
if ( words != null )
{
result.addAll( words );
}
}
return result;
}
/** Retrieves number of words in dictionary.
*
* @return Number of words in dictionary.
*/
public int getNumberOfWords()
{
return wordCount();
}
/** Clear dictionary of all words and metaphone values. */
public void clear()
{
metaphoneDictionary.clear();
}
}