package org.xbib.elasticsearch.index.analysis.icu;
import com.ibm.icu.text.Transliterator;
import com.ibm.icu.text.UnicodeSet;
import org.apache.lucene.analysis.TokenStream;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
/**
*
*/
public class IcuTransformTokenFilterFactory extends AbstractTokenFilterFactory {
private final Transliterator transliterator;
public IcuTransformTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name,
Settings settings) {
super(indexSettings, name, settings);
String id = settings.get("id", "Null");
String direction = settings.get("dir", "forward");
int dir = "forward".equals(direction) ? Transliterator.FORWARD : Transliterator.REVERSE;
String rules = settings.get("rules");
this.transliterator = rules != null ?
Transliterator.createFromRules(id, rules, dir) :
Transliterator.getInstance(id, dir);
String unicodeSetFilter = settings.get("unicodeSetFilter");
if (unicodeSetFilter != null) {
transliterator.setFilter(new UnicodeSet(unicodeSetFilter).freeze());
}
}
@Override
public TokenStream create(TokenStream tokenStream) {
return new IcuTransformTokenFilter(tokenStream, transliterator);
}
}