/** * (c) 2000-2011 Carlos G�mez Rodr�guez, todos los derechos reservados / all rights reserved. * Licencia en license.txt / License in license.txt * File created: 28/01/2011 17:07:48 */ package eu.irreality.age.spell; import java.util.Collection; import java.util.HashMap; import java.util.Iterator; import java.util.LinkedHashSet; import java.util.Map; import java.util.Set; public class SimpleReverseCorrector implements SpellingCorrector { private Set distance0 = new LinkedHashSet(); private Map distance1 = new HashMap(); private static final char WILDCARD = '?'; public void init(Collection words) { distance0 = new LinkedHashSet(); distance1 = new HashMap(); for ( Iterator iter = words.iterator() ; iter.hasNext(); ) { String nextWord = (String) iter.next(); addDictionaryWord ( nextWord ); } } public void addDictionaryWord(String word) throws UnsupportedOperationException { //add the word itself distance0.add(word); for ( int i = 0 ; i < word.length() ; i++ ) { StringBuffer sb = new StringBuffer(word); //add substitution of character i sb.setCharAt(i,WILDCARD); distance1.put(sb.toString(),word); //add deletion of character i sb.deleteCharAt(i); distance1.put(sb.toString(),word); //add insertion before character i sb = new StringBuffer(word); sb.insert(i,WILDCARD); distance1.put(sb.toString(),word); //add swapping of characters i, i+1 if ( i < word.length() - 1 ) { sb = new StringBuffer(word); char temp = sb.charAt(i); sb.setCharAt(i,sb.charAt(i+1)); sb.setCharAt(i+1,temp); distance1.put(sb.toString(),word); } } //add insertion at end distance1.put(word+WILDCARD,word); } public Correction getBestCorrection(String word) { if ( distance0.contains(word) ) { return new Correction(word,0); } else { String attempt = (String) distance1.get(word); if ( attempt != null ) //deletion or transposition return new Correction(attempt,1); for ( int i = 0 ; i < word.length() ; i++ ) { //try substitution StringBuffer sb = new StringBuffer(word); sb.setCharAt(i,WILDCARD); attempt = (String) distance1.get(sb.toString()); if ( attempt != null ) //substitution return new Correction(attempt,1); //try insertion before i sb = new StringBuffer(word); sb.insert(i,WILDCARD); attempt = (String) distance1.get(sb.toString()); if ( attempt != null ) //insertion return new Correction(attempt,1); } //try insertion at end attempt = (String) distance1.get(word+WILDCARD); if ( attempt != null ) //insertion at end return new Correction(attempt,1); } return null; } //test public static void main ( String[] args ) { SpellingCorrector c = new SimpleReverseCorrector(); c.addDictionaryWord("casa"); c.addDictionaryWord("coche"); System.out.println(c.getBestCorrection("casa")); System.out.println(c.getBestCorrection("coche")); System.out.println(c.getBestCorrection("cesa")); System.out.println(c.getBestCorrection("cocho")); System.out.println(c.getBestCorrection("acsa")); System.out.println(c.getBestCorrection("casar")); System.out.println(c.getBestCorrection("czsa")); System.out.println(c.getBestCorrection("czss")); } public String toString() { return "[simple reverse corrector with " + distance0.size() + " words , " + distance1.size() + " extended forms]"; } }