package edu.cmu.geolocator.nlp.soundex; /************************************************************************* * Compilation: javac Soundex.java * Execution: java surname1 surname2 * * * % java Soundex Wohrzhick Warzick * W622: Wohrzhick * W622: Warzick * * % java Soundex Smith Smyth * S530: Smith * S530: Smyth * * % java Soundex Washington Lee * W252: Washington * L000: Lee * * % java Soundex Pfister Jackson * P236: Pfister * J250: Jackson * * % java Soundex Scott Numbers * S300: Scott * N516: Numbers * * Note: we ignore the "Names with Prefix" and "Constant Separator" * rules from * http://www.archives.gov/research_room/genealogy/census/soundex.html * *************************************************************************/ public class Soundex { public static String soundex(String s) { char[] x = s.toUpperCase().toCharArray(); char firstLetter = x[0]; // convert letters to numeric code for (int i = 0; i < x.length; i++) { switch (x[i]) { case 'B': case 'F': case 'P': case 'V': { x[i] = '1'; break; } case 'C': case 'G': case 'J': case 'K': case 'Q': case 'S': case 'X': case 'Z': { x[i] = '2'; break; } case 'D': case 'T': { x[i] = '3'; break; } case 'L': { x[i] = '4'; break; } case 'M': case 'N': { x[i] = '5'; break; } case 'R': { x[i] = '6'; break; } default: { x[i] = '0'; break; } } } // remove duplicates String output = "" + firstLetter; for (int i = 1; i < x.length; i++) if (x[i] != x[i-1] && x[i] != '0') output += x[i]; // pad with 0's or truncate output = output + "0000"; return output.substring(0, 4); } public static void main(String[] args) { String name1 ="George Town";//= args[0]; String name2 ="George Colony";//= args[1]; String code1 = soundex(name1); String code2 = soundex(name2); System.out.println(code1 + ": " + name1); System.out.println(code2 + ": " + name2); } }