DSAnalyzer.java example

Explorer
DSpace-SVN-Deprecated-master
/**
 * The contents of this file are subject to the license and copyright
 * detailed in the LICENSE and NOTICE files at the root of the source
 * tree and available online at
 *
 * http://www.dspace.org/license/
 */
package org.dspace.search;

import java.io.Reader;
import java.util.Set;

import org.apache.lucene.analysis.LowerCaseFilter;
import org.apache.lucene.analysis.PorterStemFilter;
import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.StopwordAnalyzerBase;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.util.Version;
import org.dspace.core.ConfigurationManager;

/**
 * Custom Lucene Analyzer that combines the standard filter, lowercase filter,
 * stemming and stopword filters.
 */
public class DSAnalyzer extends StopwordAnalyzerBase
{
    protected final Version matchVersion;
    /*
     * An array containing some common words that are not usually useful for
     * searching.
     */
    protected static final String[] STOP_WORDS =
    {

    // new stopwords (per MargretB)
            "a", "am", "and", "are", "as", "at", "be", "but", "by", "for",
            "if", "in", "into", "is", "it", "no", "not", "of", "on", "or",
            "the", "to", "was"
    // old stopwords (Lucene default)
    /*
     * "a", "and", "are", "as", "at", "be", "but", "by", "for", "if", "in",
     * "into", "is", "it", "no", "not", "of", "on", "or", "s", "such", "t",
     * "that", "the", "their","then", "there","these", "they", "this", "to",
     * "was", "will", "with"
     */
    };

    /*
     * Stop table
     */
    protected final Set stopSet;

    /**
     * Builds an analyzer
     * @param matchVersion Lucene version to match
     */
    public DSAnalyzer(Version matchVersion) {
        super(matchVersion, StopFilter.makeStopSet(matchVersion, STOP_WORDS));
        this.stopSet = StopFilter.makeStopSet(matchVersion, STOP_WORDS);
        this.matchVersion = matchVersion;
    }

    @Override
    protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        final Tokenizer source = new DSTokenizer(matchVersion, reader);
        TokenStream result = new StandardFilter(matchVersion, source);

        result = new LowerCaseFilter(matchVersion, result);
        result = new StopFilter(matchVersion, result, stopSet);
        result = new PorterStemFilter(result);

        return new TokenStreamComponents(source, result);
    }

    @Override
    public int getPositionIncrementGap(String fieldName)
    {
        // If it is the default field, or bounded fields is turned off in the config, return the default value
        if ("default".equalsIgnoreCase(fieldName) || !ConfigurationManager.getBooleanProperty("search.boundedfields", false))
        {
            return super.getPositionIncrementGap(fieldName);
        }

        // Not the default field, and we want bounded fields, so return an large gap increment
        return 10;
    }
}