/*****************************************************************************
*
* Copyright (C) Zenoss, Inc. 2010, 2014, all rights reserved.
*
* This content is made available according to terms specified in
* License.zenoss under the directory where your Zenoss product is installed.
*
****************************************************************************/
package org.zenoss.zep.index.impl.lucene;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.ngram.NGramTokenFilter;
import org.apache.lucene.util.Version;
import org.zenoss.zep.index.impl.IndexConstants;
import java.io.Reader;
/** Analyzer used for element and sub element identifiers. */
public final class LuceneIdentifierAnalyzer extends Analyzer {
public static final int MIN_NGRAM_SIZE = IndexConstants.MIN_NGRAM_SIZE;
public static final int MAX_NGRAM_SIZE = IndexConstants.MAX_NGRAM_SIZE;
@Override
protected TokenStreamComponents createComponents(String s, Reader reader) {
final Tokenizer source = new WhitespaceTokenizer(IndexConstants.LUCENE_VERSION, reader);
TokenStream filter = new LowerCaseFilter(IndexConstants.LUCENE_VERSION, source);
// Use the 4.3 NGram filter here because it changed a lot >=4.4
filter = new NGramTokenFilter(Version.LUCENE_43, filter, MIN_NGRAM_SIZE, MAX_NGRAM_SIZE);
return new TokenStreamComponents(source, filter);
}
}