DefinitionAnalyzer.java example

Explorer
step-master
package com.tyndalehouse.step.core.data.analyzers;

import org.apache.lucene.analysis.KeywordAnalyzer;
import org.apache.lucene.analysis.PerFieldAnalyzerWrapper;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.util.Version;

/**
 * Class to analyze various definition fields
 * 
 * @author chrisburrell
 * 
 */
public class DefinitionAnalyzer extends PerFieldAnalyzerWrapper {

    /**
     * Initialises the analyzer.
     * It is an assumption of the code that stepGloss and translations use the same type of analyzer
     * relies on sharing the same analyzer for both stepGloss and translations
     */
    public DefinitionAnalyzer() {
        super(new KeywordAnalyzer());
        final StandardAnalyzer standard = new StandardAnalyzer(Version.LUCENE_30);
        final KeywordAnalyzer keyword = new KeywordAnalyzer();
        final TransliterationAnalyzer transliteration = new TransliterationAnalyzer();
        final PorterStemmerAnalyzer porterStemmerAnalyzer = new PorterStemmerAnalyzer();
        addAnalyzer("accentedUnicode", new AncientLanguageAnalyzer());
        addAnalyzer("strongNumber", keyword);
        addAnalyzer("relatedNumbers", new CommaDelimitedAnalyzer());
        
        //it is an assumption of the code that stepGloss and translations use the same type of analyzer - see above 
        //javadoc comment
        addAnalyzer("stepGloss", standard);
        addAnalyzer("translations", standard);
        addAnalyzer("translationsStem", porterStemmerAnalyzer);
        addAnalyzer("stepGlossStem", porterStemmerAnalyzer);
        addAnalyzer("betaAccented", new BetaAccentedAnalyzer());
        addAnalyzer("twoLetter", keyword);
        addAnalyzer("otherTransliteration", transliteration);
        addAnalyzer("simplifiedStepTransliteration", transliteration);
    }
}