package org.xbib.elasticsearch.index.analysis.german; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.elasticsearch.Version; import org.elasticsearch.cluster.metadata.IndexMetaData; import org.elasticsearch.common.inject.Injector; import org.elasticsearch.common.inject.ModulesBuilder; import org.elasticsearch.common.settings.ImmutableSettings; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.SettingsModule; import org.elasticsearch.env.Environment; import org.elasticsearch.env.EnvironmentModule; import org.elasticsearch.index.Index; import org.elasticsearch.index.IndexNameModule; import org.elasticsearch.index.analysis.AnalysisModule; import org.elasticsearch.index.analysis.AnalysisService; import org.elasticsearch.index.settings.IndexSettingsModule; import org.elasticsearch.indices.analysis.IndicesAnalysisModule; import org.elasticsearch.indices.analysis.IndicesAnalysisService; import org.junit.Assert; import org.junit.Test; import org.xbib.elasticsearch.plugin.analysis.german.AnalysisGermanPlugin; import java.io.IOException; import java.io.StringReader; public class SettingsTests extends Assert { @Test public void testOne() throws IOException { String source = "Ein Tag in Köln im Café an der Straßenecke mit einer Standard-Nummer ISBN 1-4493-5854-3"; String[] expected = { "ein", "tag", "in", "koln", "im", "caf", "an", "der", "strasseneck", "mit", "standard-numm", "standardnumm", "numm", "standard", "isbn", "1-4493-5854-3", "1449358543", "978-1-4493-5854-9", "9781449358549" }; AnalysisService analysisService = createAnalysisService(); Analyzer analyzer = analysisService.defaultAnalyzer(); assertSimpleTSOutput(analyzer.tokenStream(null, new StringReader(source)), expected); } @Test public void testTwo() throws IOException { String source = "So wird's was: das Elasticsearch-Buch erscheint beim O'Reilly-Verlag."; String[] expected = { "so", "wird's", "was", "das", "elasticsearch-buch", "elasticsearchbuch", "buch", "elasticsearch", "erscheint", "beim", "o'reilly-verlag" }; AnalysisService analysisService = createAnalysisService(); Analyzer analyzer = analysisService.defaultAnalyzer(); assertSimpleTSOutput(analyzer.tokenStream(null, new StringReader(source)), expected); } @Test public void testThree() throws IOException { String source = "978-1-4493-5854-9"; String[] expected = { "978-1-4493-5854-9", "9781449358549" }; AnalysisService analysisService = createAnalysisService(); Analyzer analyzer = analysisService.defaultAnalyzer(); assertSimpleTSOutput(analyzer.tokenStream(null, new StringReader(source)), expected); } private AnalysisService createAnalysisService() { Settings settings = ImmutableSettings.settingsBuilder() .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT) .loadFromClasspath("org/xbib/elasticsearch/index/analysis/german/test-settings.json").build(); Index index = new Index("test"); Injector parentInjector = new ModulesBuilder().add(new SettingsModule(settings), new EnvironmentModule(new Environment(settings)), new IndicesAnalysisModule()) .createInjector(); AnalysisModule analysisModule = new AnalysisModule(settings, parentInjector.getInstance(IndicesAnalysisService.class)); new AnalysisGermanPlugin().onModule(analysisModule); Injector injector = new ModulesBuilder().add( new IndexSettingsModule(index, settings), new IndexNameModule(index), analysisModule) .createChildInjector(parentInjector); return injector.getInstance(AnalysisService.class); } private void assertSimpleTSOutput(TokenStream stream, String[] expected) throws IOException { stream.reset(); CharTermAttribute termAttr = stream.getAttribute(CharTermAttribute.class); assertNotNull(termAttr); int i = 0; while (stream.incrementToken()) { assertTrue(i < expected.length); assertEquals(expected[i], termAttr.toString()); i++; } assertEquals(i, expected.length); stream.close(); } }