package org.xbib.elasticsearch.index.analysis.icu; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.env.Environment; import org.elasticsearch.index.IndexSettings; import java.io.InputStream; /** * Applies foldings from UTR#30 Character Foldings. * Can be filtered to handle certain characters in a specified way. * See http://icu-project.org/apiref/icu4j/com/ibm/icu/text/UnicodeSet.html * E.g national chars that should be retained, like unicodeSetFilter : "[^åäöÅÄÖ]". */ public class IcuFoldingTokenFilterFactory extends IcuNormalizerTokenFilterFactory { public IcuFoldingTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) { super(indexSettings, environment, name, settings); } @Override public Object getMultiTermComponent() { return this; } @Override protected String getNormalizationName(Settings settings) { return settings.get("name", "utr30"); } @Override protected InputStream getNormalizationResource(Settings settings) { InputStream inputStream = null; if ("utr30".equals(getNormalizationName(settings))) { inputStream = getClass().getResourceAsStream("/icu/utr30.nrm"); } return inputStream; } }