package org.xbib.elasticsearch.index.analysis.german; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.junit.Assert; import org.junit.Test; import org.xbib.elasticsearch.MapperTestUtils; import java.io.IOException; import java.io.StringReader; /** * */ public class UnstemmedTests extends Assert { @Test public void testOne() throws IOException { String source = "Ein Tag in Köln im Café an der Straßenecke mit einer Standard-Nummer ISBN 1-4493-5854-3"; String[] expected = { "tag", "koln", "cafe", "caf", "strassenecke", "strasseneck", "standard-nummer", "standardnummer", "standard-numm", "standardnumm", "isbn", "1-4493-5854-3", "1449358543", "978-1-4493-5854-9", "9781449358549" }; String resource = "org/xbib/elasticsearch/index/analysis/german/unstemmed.json"; Analyzer analyzer = MapperTestUtils.analyzer(resource, "default"); assertSimpleTSOutput(analyzer.tokenStream(null, new StringReader(source)), expected); } @Test public void testTwo() throws IOException { String source = "So wird's was: das Elasticsearch-Buch erscheint beim O'Reilly-Verlag."; String[] expected = { "wird's", "elasticsearch-buch", "elasticsearchbuch", "erscheint", "o'reilly-verlag", "o'reillyverlag" }; String resource = "org/xbib/elasticsearch/index/analysis/german/unstemmed.json"; Analyzer analyzer = MapperTestUtils.analyzer(resource, "default"); assertSimpleTSOutput(analyzer.tokenStream(null, new StringReader(source)), expected); } @Test public void testThree() throws IOException { String source = "978-1-4493-5854-9"; String[] expected = { "978-1-4493-5854-9", "9781449358549" }; String resource = "org/xbib/elasticsearch/index/analysis/german/unstemmed.json"; Analyzer analyzer = MapperTestUtils.analyzer(resource, "default"); assertSimpleTSOutput(analyzer.tokenStream(null, new StringReader(source)), expected); } @Test public void testFour() throws IOException { String source = "Prante, Jörg"; String[] expected = { "prante", "jorg" }; String resource = "org/xbib/elasticsearch/index/analysis/german/unstemmed.json"; Analyzer analyzer = MapperTestUtils.analyzer(resource,"unstemmed"); assertSimpleTSOutput(analyzer.tokenStream("test", new StringReader(source)), expected); } @Test public void testFive() throws IOException { String source = "Schroeder"; String[] expected = { "schroder" }; String resource = "org/xbib/elasticsearch/index/analysis/german/unstemmed.json"; Analyzer analyzer = MapperTestUtils.analyzer(resource, "unstemmed"); assertSimpleTSOutput(analyzer.tokenStream("test", new StringReader(source)), expected); } @Test public void testSix() throws IOException { String source = "Programmieren in C++ für Einsteiger"; String[] expected = { "programmieren", "programmi", "c++", "einsteiger", "einsteig" }; String resource = "org/xbib/elasticsearch/index/analysis/german/unstemmed.json"; Analyzer analyzer = MapperTestUtils.analyzer(resource, "default"); assertSimpleTSOutput(analyzer.tokenStream(null, new StringReader(source)), expected); } private void assertSimpleTSOutput(TokenStream stream, String[] expected) throws IOException { stream.reset(); CharTermAttribute termAttr = stream.getAttribute(CharTermAttribute.class); assertNotNull(termAttr); int i = 0; while (stream.incrementToken()) { assertTrue(i < expected.length); assertEquals(expected[i], termAttr.toString()); i++; } assertEquals(i, expected.length); stream.close(); } }