/**
* Copyright (c) 2008--2015 Red Hat, Inc.
*
* This software is licensed to you under the GNU General Public License,
* version 2 (GPLv2). There is NO WARRANTY for this software, express or
* implied, including the implied warranties of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. You should have received a copy of GPLv2
* along with this software; if not, see
* http://www.gnu.org/licenses/old-licenses/gpl-2.0.txt.
*
* Red Hat trademarks are not licensed under GPLv2. No permission is
* granted to use or replicate Red Hat trademarks that are incorporated
* in this software or its documentation.
*/
package com.redhat.satellite.search.index.ngram;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.LowerCaseFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.TokenStream;
// From Lucene Sandbox
import org.apache.lucene.analysis.ngram.NGramTokenFilter;
import java.io.Reader;
/**
* NGramAnalyzer
* A ngram will take a term and break it up into a series of smaller
* permutations of different letter combinations.
* @version $Rev$
*/
public class NGramAnalyzer extends Analyzer {
//Controls minimum size ngram to construct
protected int min_ngram;
//Controls maximum size ngram to construct
protected int max_ngram;
/**
* Constructor
* @param min min length of ngram to generate
* @param max max length of ngram to generate
*/
public NGramAnalyzer(int min, int max) {
super();
min_ngram = min;
max_ngram = max;
}
/**
* Constructs a pre populated
* @param reader contains data to parse
* @return TokenStream of ngrams
*/
public TokenStream tokenStream(Reader reader) {
return tokenStream(null, reader);
}
/**
* @param fieldName ignored param
* @param reader contains data to parse
* @return TokenStream of ngrams
*/
public TokenStream tokenStream(String fieldName, Reader reader) {
return new NGramTokenFilter(
new LowerCaseFilter(
new StandardFilter(
new StandardTokenizer(reader))), min_ngram, max_ngram);
}
}