package com.tyndalehouse.step.core.data.analyzers; import org.apache.lucene.analysis.KeywordAnalyzer; import org.apache.lucene.analysis.PerFieldAnalyzerWrapper; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.util.Version; /** * Class to analyze various definition fields * * @author chrisburrell * */ public class DefinitionAnalyzer extends PerFieldAnalyzerWrapper { /** * Initialises the analyzer. * It is an assumption of the code that stepGloss and translations use the same type of analyzer * relies on sharing the same analyzer for both stepGloss and translations */ public DefinitionAnalyzer() { super(new KeywordAnalyzer()); final StandardAnalyzer standard = new StandardAnalyzer(Version.LUCENE_30); final KeywordAnalyzer keyword = new KeywordAnalyzer(); final TransliterationAnalyzer transliteration = new TransliterationAnalyzer(); final PorterStemmerAnalyzer porterStemmerAnalyzer = new PorterStemmerAnalyzer(); addAnalyzer("accentedUnicode", new AncientLanguageAnalyzer()); addAnalyzer("strongNumber", keyword); addAnalyzer("relatedNumbers", new CommaDelimitedAnalyzer()); //it is an assumption of the code that stepGloss and translations use the same type of analyzer - see above //javadoc comment addAnalyzer("stepGloss", standard); addAnalyzer("translations", standard); addAnalyzer("translationsStem", porterStemmerAnalyzer); addAnalyzer("stepGlossStem", porterStemmerAnalyzer); addAnalyzer("betaAccented", new BetaAccentedAnalyzer()); addAnalyzer("twoLetter", keyword); addAnalyzer("otherTransliteration", transliteration); addAnalyzer("simplifiedStepTransliteration", transliteration); } }