package org.xbib.elasticsearch.index.analysis.icu; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.elasticsearch.index.analysis.TokenFilterFactory; import org.junit.Test; import java.io.IOException; import java.io.StringReader; import static org.junit.Assert.*; import static org.xbib.elasticsearch.MapperTestUtils.tokenFilterFactory; import static org.xbib.elasticsearch.MapperTestUtils.tokenizerFactory; /** * */ public class IcuNumberFormatTests { @Test public void testGermanNumberFormat() throws IOException { String source = "Muss Rudi Völler fünftausend oder 10000 EUR Strafe zahlen?"; String[] expected = { "Muss", "Rudi", "Völler", "fünftausend", "oder", "zehntausend", "EUR", "Strafe", "zahlen" }; String resource = "org/xbib/elasticsearch/index/analysis/icu/icu_numberformat.json"; Tokenizer tokenizer = tokenizerFactory(resource, "my_tokenizer").create(); tokenizer.setReader(new StringReader(source)); TokenFilterFactory tokenFilter = tokenFilterFactory(resource, "spellout_de"); TokenStream tokenStream = tokenFilter.create(tokenizer); assertSimpleTSOutput(tokenStream, expected); } @Test public void testAmericanEnglish() throws IOException { String source = "You will never get 100,000 US dollars of salary per year."; String[] expected = { "You", "will", "never", "get", "onehundredthousand", "US", "dollars", "of", "salary", "per", "year" }; String resource = "org/xbib/elasticsearch/index/analysis/icu/icu_numberformat.json"; Tokenizer tokenizer = tokenizerFactory(resource, "my_tokenizer").create(); tokenizer.setReader(new StringReader(source)); TokenFilterFactory tokenFilter = tokenFilterFactory(resource,"spellout_en"); TokenStream tokenStream = tokenFilter.create(tokenizer); assertSimpleTSOutput(tokenStream, expected); } private void assertSimpleTSOutput(TokenStream stream, String[] expected) throws IOException { stream.reset(); CharTermAttribute termAttr = stream.getAttribute(CharTermAttribute.class); assertNotNull(termAttr); int i = 0; while (stream.incrementToken()) { assertTrue(i < expected.length); assertEquals(expected[i], termAttr.toString()); i++; } assertEquals(i, expected.length); stream.close(); } }