GenericTransformator.java example

Explorer
ekit-master
- src
  - com
package  com.swabunga.spell.engine;

import java.io.*;
import java.util.*;

/** A Generic implementation of a transformator takes an aspell
 *  phonetics file and constructs some sort of transformationtable using
 *  the inner class Rule.
 *
 * @author Robert Gustavsson (robert@lindesign.se)
 */
public class GenericTransformator implements Transformator{
    
    public static final char STARTMULTI='(';
    public static final char ENDMULTI=')';

    Object[] ruleArray=null;

    public GenericTransformator(File phonetic)throws IOException{
        buildRules(new BufferedReader(new FileReader(phonetic)));
    }

    /**
    * Returns the phonetic code of the word.
    */
    public String transform(String word) {       
        if(ruleArray==null)
            return null;
        TransformationRule rule;
        StringBuffer str=new StringBuffer(word.toUpperCase());
        int strLength=str.length();
        int startPos=0, add=1;

        while(startPos<strLength){
            //System.out.println("StartPos:"+startPos);
            add=1;
            for(int i=0;i<ruleArray.length;i++){
                //System.out.println("Testing rule#:"+i);
                rule=(TransformationRule)ruleArray[i];
                if(rule.startsWithExp() && startPos>0)
                    continue;
                if(startPos+rule.lengthOfMatch()>=strLength)
                    continue;
                if(rule.isMatching(str,startPos)){
                    str.replace(startPos,startPos+rule.getTakeOut(),rule.getReplaceExp());
                    add=rule.getReplaceExp().length();
                    strLength-=rule.getTakeOut();
                    strLength+=add;
                    //System.out.println("Replacing with rule#:"+i+" add="+add);
                    break;
                }
            }
            startPos+=add;
        }
        return str.toString();
    }

    // Used to build up the transformastion table.
    private void buildRules(BufferedReader in)throws IOException{
        String read=null;
        LinkedList ruleList=new LinkedList();
        while((read=in.readLine())!=null){
            buildRule(realTrimmer(read),ruleList);
        }
        ruleArray=ruleList.toArray();
    }
    
    // Here is where the real work of reading the phonetics file is done.
    private void buildRule(String str, LinkedList ruleList){
        if(str.length()<1)
            return;
        if(str.startsWith("version"))
            return;
        
        TransformationRule rule=null;
        StringBuffer matchExp=new StringBuffer();
        StringBuffer replaceExp=new StringBuffer();
        boolean start=false, end=false;
        int takeOutPart=0, matchLength=0;
        boolean match=true, inMulti=false;
        for(int i=0;i<str.length();i++){
            if(Character.isWhitespace(str.charAt(i))){
                match=false;
            }else{
                if(match){
                    if (!isReservedChar(str.charAt(i))){
                        matchExp.append(str.charAt(i));
                        if(!inMulti){
                            takeOutPart++;
                            matchLength++;
                        }
                        if(str.charAt(i)==STARTMULTI || str.charAt(i)==ENDMULTI)
                            inMulti=!inMulti;
                    }
                    if (str.charAt(i)=='-')
                        takeOutPart--;
                    if (str.charAt(i)=='^')
                        start=true;
                    if (str.charAt(i)=='$')
                        end=true;
                }else{
                    replaceExp.append(str.charAt(i));
                }
            }
        }
        rule=new TransformationRule(matchExp.toString(), replaceExp.toString()
                                        , takeOutPart, matchLength, start, end);
        ruleList.add(rule);
    }
    
    // Chars with special meaning to aspell. Not everyone is implemented here.
    private boolean isReservedChar(char ch){
        if(ch=='<' || ch=='>' || ch=='^' || ch=='$' || ch=='-' || Character.isDigit(ch))
            return true;
        return false;
    }

    // Trims off everything we don't care about.
    private String realTrimmer(String row){
        int pos=row.indexOf('#');
        if(pos!=-1){
            row=row.substring(0,pos);
        }
        return row.trim();
    }

    // Inner Classes
    /*
    * Holds the match string and the replace string and all the rule attributes.
    * Is responsible for indicating matches.
    */
    private class TransformationRule{

        private String replace;
        private char[] match;
        // takeOut=number of chars to replace; 
        // matchLength=length of matching string counting multies as one.
        private int takeOut, matchLength;
        private boolean start, end;

        // Construktor
        public TransformationRule(String match, String replace, int takeout
                                  , int matchLength, boolean start, boolean end){
            this.match=match.toCharArray();
            this.replace=replace;
            this.takeOut=takeout;
            this.matchLength=matchLength;
            this.start=start;
            this.end=end;
        }

        /*
        * Returns true if word from pos and forward matches the match string.
        * Precondition: wordPos+matchLength<word.length()
        */
        public boolean isMatching(StringBuffer word, int wordPos){
            boolean matching=true, inMulti=false, multiMatch=false;
            char matchCh;
            
            for(int matchPos=0;matchPos<match.length;matchPos++){
                matchCh=match[matchPos];
                if(matchCh==STARTMULTI || matchCh==ENDMULTI){
                    inMulti=!inMulti;
                    if(!inMulti)
                        matching=matching & multiMatch;
                    else
                        multiMatch=false;
                }else{
                    if(matchCh!=word.charAt(wordPos)){
                        if(inMulti)
                            multiMatch=multiMatch | false;
                        else
                            matching=false;
                    }else{
                        if(inMulti)
                            multiMatch=multiMatch | true;
                        else
                            matching=true;
                    }
                    if(!inMulti)
                        wordPos++;
                    if(!matching)
                        break;
                }
            }
            if(end && wordPos!=word.length()-1)
                matching=false;
            return matching;
        }

        public String getReplaceExp(){
            return  replace;
        }

        public int getTakeOut(){
            return takeOut;
        }

        public boolean startsWithExp(){
            return start;
        }
        
        public int lengthOfMatch(){
            return matchLength;
        }
        
        // Just for debugging purposes.
        public String toString(){
            return "Match:"+String.valueOf(match)
                   +" Replace:"+replace
                   +" TakeOut:"+takeOut
                   +" MatchLength:"+matchLength
                   +" Start:"+start
                   +" End:"+end;
        }

    }
}