package com.swabunga.spell.engine; import java.io.*; import java.util.*; /** A Generic implementation of a transformator takes an aspell * phonetics file and constructs some sort of transformationtable using * the inner class Rule. * * @author Robert Gustavsson (robert@lindesign.se) */ public class GenericTransformator implements Transformator{ public static final char STARTMULTI='('; public static final char ENDMULTI=')'; Object[] ruleArray=null; public GenericTransformator(File phonetic)throws IOException{ buildRules(new BufferedReader(new FileReader(phonetic))); } /** * Returns the phonetic code of the word. */ public String transform(String word) { if(ruleArray==null) return null; TransformationRule rule; StringBuffer str=new StringBuffer(word.toUpperCase()); int strLength=str.length(); int startPos=0, add=1; while(startPos<strLength){ //System.out.println("StartPos:"+startPos); add=1; for(int i=0;i<ruleArray.length;i++){ //System.out.println("Testing rule#:"+i); rule=(TransformationRule)ruleArray[i]; if(rule.startsWithExp() && startPos>0) continue; if(startPos+rule.lengthOfMatch()>=strLength) continue; if(rule.isMatching(str,startPos)){ str.replace(startPos,startPos+rule.getTakeOut(),rule.getReplaceExp()); add=rule.getReplaceExp().length(); strLength-=rule.getTakeOut(); strLength+=add; //System.out.println("Replacing with rule#:"+i+" add="+add); break; } } startPos+=add; } return str.toString(); } // Used to build up the transformastion table. private void buildRules(BufferedReader in)throws IOException{ String read=null; LinkedList ruleList=new LinkedList(); while((read=in.readLine())!=null){ buildRule(realTrimmer(read),ruleList); } ruleArray=ruleList.toArray(); } // Here is where the real work of reading the phonetics file is done. private void buildRule(String str, LinkedList ruleList){ if(str.length()<1) return; if(str.startsWith("version")) return; TransformationRule rule=null; StringBuffer matchExp=new StringBuffer(); StringBuffer replaceExp=new StringBuffer(); boolean start=false, end=false; int takeOutPart=0, matchLength=0; boolean match=true, inMulti=false; for(int i=0;i<str.length();i++){ if(Character.isWhitespace(str.charAt(i))){ match=false; }else{ if(match){ if (!isReservedChar(str.charAt(i))){ matchExp.append(str.charAt(i)); if(!inMulti){ takeOutPart++; matchLength++; } if(str.charAt(i)==STARTMULTI || str.charAt(i)==ENDMULTI) inMulti=!inMulti; } if (str.charAt(i)=='-') takeOutPart--; if (str.charAt(i)=='^') start=true; if (str.charAt(i)=='$') end=true; }else{ replaceExp.append(str.charAt(i)); } } } rule=new TransformationRule(matchExp.toString(), replaceExp.toString() , takeOutPart, matchLength, start, end); ruleList.add(rule); } // Chars with special meaning to aspell. Not everyone is implemented here. private boolean isReservedChar(char ch){ if(ch=='<' || ch=='>' || ch=='^' || ch=='$' || ch=='-' || Character.isDigit(ch)) return true; return false; } // Trims off everything we don't care about. private String realTrimmer(String row){ int pos=row.indexOf('#'); if(pos!=-1){ row=row.substring(0,pos); } return row.trim(); } // Inner Classes /* * Holds the match string and the replace string and all the rule attributes. * Is responsible for indicating matches. */ private class TransformationRule{ private String replace; private char[] match; // takeOut=number of chars to replace; // matchLength=length of matching string counting multies as one. private int takeOut, matchLength; private boolean start, end; // Construktor public TransformationRule(String match, String replace, int takeout , int matchLength, boolean start, boolean end){ this.match=match.toCharArray(); this.replace=replace; this.takeOut=takeout; this.matchLength=matchLength; this.start=start; this.end=end; } /* * Returns true if word from pos and forward matches the match string. * Precondition: wordPos+matchLength<word.length() */ public boolean isMatching(StringBuffer word, int wordPos){ boolean matching=true, inMulti=false, multiMatch=false; char matchCh; for(int matchPos=0;matchPos<match.length;matchPos++){ matchCh=match[matchPos]; if(matchCh==STARTMULTI || matchCh==ENDMULTI){ inMulti=!inMulti; if(!inMulti) matching=matching & multiMatch; else multiMatch=false; }else{ if(matchCh!=word.charAt(wordPos)){ if(inMulti) multiMatch=multiMatch | false; else matching=false; }else{ if(inMulti) multiMatch=multiMatch | true; else matching=true; } if(!inMulti) wordPos++; if(!matching) break; } } if(end && wordPos!=word.length()-1) matching=false; return matching; } public String getReplaceExp(){ return replace; } public int getTakeOut(){ return takeOut; } public boolean startsWithExp(){ return start; } public int lengthOfMatch(){ return matchLength; } // Just for debugging purposes. public String toString(){ return "Match:"+String.valueOf(match) +" Replace:"+replace +" TakeOut:"+takeOut +" MatchLength:"+matchLength +" Start:"+start +" End:"+end; } } }