package com.code972.elasticsearch.plugins.index.analysis;
import com.code972.hebmorph.datastructures.DictHebMorph;
import com.code972.hebmorph.datastructures.DictRadix;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.hebrew.HebrewTokenizer;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.AbstractTokenizerFactory;
public class HebrewTokenizerFactory extends AbstractTokenizerFactory {
@Inject
public HebrewTokenizerFactory(IndexSettings indexSettings, Environment env, String name, Settings settings,
final DictHebMorph dict) {
super(indexSettings, name, settings);
this.dict = dict;
}
private DictHebMorph dict;
private DictRadix<Byte> SPECIAL_TOKENIZATION_CASES = null;
private final char originalTermSuffix = '$';
@Override
public Tokenizer create() {
HebrewTokenizer tokenizer = new HebrewTokenizer(dict.getPref(), SPECIAL_TOKENIZATION_CASES);
tokenizer.setSuffixForExactMatch(originalTermSuffix);
return tokenizer;
}
}