package org.xbib.elasticsearch.index.analysis.symbolname; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.elasticsearch.index.analysis.TokenFilterFactory; import org.junit.Assert; import org.junit.Test; import java.io.IOException; import java.io.StringReader; import static org.xbib.elasticsearch.MapperTestUtils.tokenFilterFactory; import static org.xbib.elasticsearch.MapperTestUtils.tokenizerFactory; /** * */ public class SymbolnameTokenFilterTests extends Assert { @Test public void testSimple() throws IOException { String source = "Programmieren mit C++"; String[] expected = { "Programmieren", "mit", "C++", "C __PLUSSIGN__ __PLUSSIGN__", "C", "__PLUSSIGN__", "__PLUSSIGN__" }; TokenFilterFactory tokenFilter = tokenFilterFactory("symbolname"); Tokenizer tokenizer = tokenizerFactory("whitespace").create(); tokenizer.setReader(new StringReader(source)); assertSimpleTSOutput(tokenFilter.create(tokenizer), expected); } @Test public void testPunctuation() throws IOException { String source = "Programmieren mit C++ Version 2.0"; String[] expected = { "Programmieren", "mit", "C++", "C __PLUSSIGN__ __PLUSSIGN__", "C", "__PLUSSIGN__", "__PLUSSIGN__", "Version", "2.0", "__DIGITTWO__ __FULLSTOP__ __DIGITZERO__", "__DIGITTWO__", "__FULLSTOP__", "__DIGITZERO__" }; TokenFilterFactory tokenFilter = tokenFilterFactory("symbolname"); Tokenizer tokenizer = tokenizerFactory("whitespace").create(); tokenizer.setReader(new StringReader(source)); assertSimpleTSOutput(tokenFilter.create(tokenizer), expected); } @Test public void testSingleSymbols() throws IOException { String source = "Programmieren mit + und - ist toll, oder?"; String[] expected = { "Programmieren", "mit", "+", "__PLUSSIGN__", "und", "-", "__HYPHENMINUS__", "ist", "toll,", "toll __COMMA__", "toll", "__COMMA__", "oder?", "oder __QUESTIONMARK__", "oder", "__QUESTIONMARK__" }; TokenFilterFactory tokenFilter = tokenFilterFactory("symbolname"); Tokenizer tokenizer = tokenizerFactory("whitespace").create(); tokenizer.setReader(new StringReader(source)); assertSimpleTSOutput(tokenFilter.create(tokenizer), expected); } private void assertSimpleTSOutput(TokenStream stream, String[] expected) throws IOException { stream.reset(); CharTermAttribute termAttr = stream.getAttribute(CharTermAttribute.class); Assert.assertNotNull(termAttr); int i = 0; while (stream.incrementToken()) { //logger.info("'i={}'", termAttr.toString()); assertTrue(i < expected.length); assertEquals("at position " + i, expected[i], termAttr.toString()); i++; } assertEquals(i, expected.length); stream.close(); } }