package root.gast.speech.lucene; import java.io.Reader; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.LowerCaseFilter; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.WhitespaceTokenizer; import org.apache.lucene.analysis.en.EnglishMinimalStemFilter; import android.util.Log; /** * indexers for recognizing speech results */ public class RecognitionIndexer extends Analyzer { private static final String TAG = "RecognitionIndexer"; private boolean phonetic; private boolean doStem; /** * either do stemming or just phonetic, set one to true */ public RecognitionIndexer(boolean phonetic, boolean doStem) { this.phonetic = phonetic; this.doStem = doStem; } @Override public TokenStream tokenStream(String tokens, Reader reader) { WhitespaceTokenizer w = new WhitespaceTokenizer(LuceneParameters.VERSION, reader); LowerCaseFilter lower = new LowerCaseFilter(LuceneParameters.VERSION, w); TokenFilter filter = null; EnglishMinimalStemFilter stem = null; if (doStem) { stem = new EnglishMinimalStemFilter(lower); filter = stem; } if (phonetic) { if (doStem) { // Note: PhoneticFilter31 is our custom Lucene class that // works with Lucene 3.1.0 and uses any of phonetix algorithms PhoneticFilter31 phoneticWithStem = new PhoneticFilter31(stem, new com.tangentum.phonetix.Soundex()); filter = phoneticWithStem; Log.d(TAG, "do stem and phonetic!"); } else { PhoneticFilter31 phonetic = new PhoneticFilter31(w, new com.tangentum.phonetix.Soundex()); filter = phonetic; Log.d(TAG, "do just phonetic"); } } return filter; } }