package org.xbib.elasticsearch.index.analysis.decompound.patricia; import org.apache.lucene.analysis.TokenStream; import org.elasticsearch.ElasticsearchException; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.env.Environment; import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.analysis.AbstractTokenFilterFactory; import org.xbib.elasticsearch.common.decompound.patricia.Decompounder; /** * */ public class DecompoundTokenFilterFactory extends AbstractTokenFilterFactory { private final Decompounder decompounder; private final Boolean respectKeywords; private final Boolean subwordsonly; public DecompoundTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) { super(indexSettings, name, settings); this.decompounder = createDecompounder(settings); this.respectKeywords = settings.getAsBoolean("respect_keywords", false); this.subwordsonly = settings.getAsBoolean("subwords_only", false); } @Override public TokenStream create(TokenStream tokenStream) { return new DecompoundTokenFilter(tokenStream, decompounder, respectKeywords, subwordsonly); } private Decompounder createDecompounder(Settings settings) { try { String forward = settings.get("forward", "/decompound/patricia/kompVVic.tree"); String backward = settings.get("backward", "/decompound/patricia/kompVHic.tree"); String reduce = settings.get("reduce", "/decompound/patricia/grfExt.tree"); double threshold = settings.getAsDouble("threshold", 0.51); return new Decompounder(getClass().getResourceAsStream(forward), getClass().getResourceAsStream(backward), getClass().getResourceAsStream(reduce), threshold); } catch (Exception e) { throw new ElasticsearchException("decompounder resources in settings not found: " + settings, e); } } }