package edu.northwestern.at.utils.spellcheck.tools; import java.awt.*; import java.awt.event.*; import java.io.*; import java.util.*; import edu.northwestern.at.utils.*; import edu.northwestern.at.utils.corpuslinguistics.phonetics.*; import edu.northwestern.at.utils.spellcheck.*; /** Creates a spelling dictionary data file. */ public class makedict { public static void main( String[] args ) { /** The input dictionary text. */ String dictionaryText = args[ 0 ]; /** The output spelling dictionary. */ String dictionaryName = args[ 1 ]; /** The dictionary as a treemap. Has words as keys. */ TreeMap<String, String>dictionary = new TreeMap<String, String>(); /** The dictionary keyed by metaphone values. */ TreeMap<String, java.util.List<String>> metaphones = new TreeMap<String, java.util.List<String>>(); /** Metaphone encoder instance. */ DoubleMetaphone metaphone = new DoubleMetaphone(); // Create dictionary. try { String word; int nWords = 0; try { BufferedReader in = new BufferedReader( new FileReader( dictionaryText ) ); while ( ( word = in.readLine() ) != null ) { String lowerCaseWord = word.toLowerCase(); String metaphoneValue = ""; try { metaphoneValue = metaphone.encode( lowerCaseWord ); } catch ( Exception e ) { metaphoneValue = ""; } if ( dictionary.get( lowerCaseWord ) == null ) { dictionary.put( lowerCaseWord , metaphoneValue ); java.util.List<String> words = metaphones.get( metaphoneValue ); if ( words == null ) { words = ListFactory.createNewList(); } words.add( lowerCaseWord ); metaphones.put( metaphoneValue , words ); nWords++; if ( ( ( nWords / 1000 ) * 1000 ) == nWords ) { System.out.println( nWords ); }; } } in.close(); } catch ( Exception e ) { e.printStackTrace(); } nWords = dictionary.size(); int nMeta = metaphones.size(); BufferedWriter out = new BufferedWriter( new FileWriter( dictionaryName ) ); out.write( nWords + "\n" ); out.write( nMeta + "\n" ); java.util.List<String> keys = new ArrayList<String>( metaphones.keySet() ); for ( String key : keys ) { out.write( key + "\n" ); java.util.List<String> words = metaphones.get( key ); if ( words == null ) { out.write( "0\n" ); } else { out.write( words.size() + "\n" ); for ( int i = 0; i < words.size(); i++ ) { key = words.get( i ); out.write( key + "\n" ); } } } out.close(); } catch ( Exception e ) { e.printStackTrace(); } } }