package org.xbib.elasticsearch.index.analysis.german; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.elasticsearch.Version; import org.elasticsearch.cluster.metadata.IndexMetaData; import org.elasticsearch.common.inject.Injector; import org.elasticsearch.common.inject.ModulesBuilder; import org.elasticsearch.common.settings.ImmutableSettings; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.SettingsModule; import org.elasticsearch.env.Environment; import org.elasticsearch.env.EnvironmentModule; import org.elasticsearch.index.Index; import org.elasticsearch.index.IndexNameModule; import org.elasticsearch.index.analysis.AnalysisModule; import org.elasticsearch.index.analysis.AnalysisService; import org.elasticsearch.index.analysis.TokenFilterFactory; import org.elasticsearch.index.settings.IndexSettingsModule; import org.elasticsearch.indices.analysis.IndicesAnalysisModule; import org.elasticsearch.indices.analysis.IndicesAnalysisService; import org.junit.Assert; import org.junit.Test; import org.xbib.elasticsearch.plugin.analysis.german.AnalysisGermanPlugin; import java.io.IOException; import java.io.StringReader; public class GermanNormalizationTests extends Assert { @Test public void test() throws IOException { String source = "Ein schöner Tag in Köln im Café an der Straßenecke"; String[] expected = { "Ein", "schoner", "Tag", "in", "Koln", "im", "Café", "an", "der", "Strassenecke" }; AnalysisService analysisService = createAnalysisService(); TokenFilterFactory tokenFilter = analysisService.tokenFilter("umlaut"); Tokenizer tokenizer = analysisService.tokenizer("standard").create(new StringReader(source)); assertSimpleTSOutput(tokenFilter.create(tokenizer), expected); } private AnalysisService createAnalysisService() { Settings settings = ImmutableSettings.settingsBuilder() .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT) .loadFromClasspath("org/xbib/elasticsearch/index/analysis/german/german_normalization_analysis.json").build(); Index index = new Index("test"); Injector parentInjector = new ModulesBuilder().add(new SettingsModule(settings), new EnvironmentModule(new Environment(settings)), new IndicesAnalysisModule()) .createInjector(); AnalysisModule analysisModule = new AnalysisModule(settings, parentInjector.getInstance(IndicesAnalysisService.class)); new AnalysisGermanPlugin().onModule(analysisModule); Injector injector = new ModulesBuilder().add( new IndexSettingsModule(index, settings), new IndexNameModule(index), analysisModule) .createChildInjector(parentInjector); return injector.getInstance(AnalysisService.class); } private void assertSimpleTSOutput(TokenStream stream, String[] expected) throws IOException { stream.reset(); CharTermAttribute termAttr = stream.getAttribute(CharTermAttribute.class); assertNotNull(termAttr); int i = 0; while (stream.incrementToken()) { assertTrue(i < expected.length); assertEquals(expected[i], termAttr.toString()); i++; } assertEquals(i, expected.length); stream.close(); } }