package org.xbib.elasticsearch.index.analysis.icu;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.junit.Test;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import static org.junit.Assert.*;
import static org.xbib.elasticsearch.MapperTestUtils.analyzer;
import static org.xbib.elasticsearch.MapperTestUtils.charFilterFactory;
/**
*
*/
public class IcuNormalizeCharTests {
@Test
public void testNormalize() throws IOException {
String source = "Jörg Prante";
String resource = "org/xbib/elasticsearch/index/analysis/icu/icu_normalize.json";
Reader charFilter = charFilterFactory(resource, "my_icu_normalizer").create(new StringReader(source));
StringBuilder sb = new StringBuilder();
int ch;
while ((ch = charFilter.read()) != -1) {
sb.append((char)ch);
}
assertEquals("jorg prante", sb.toString());
}
@Test
public void testFoldingAnalyzer() throws IOException {
String resource = "org/xbib/elasticsearch/index/analysis/icu/icu_normalize.json";
Analyzer analyzer = analyzer(resource,"my_icu_analyzer");
TokenStream ts = analyzer.tokenStream("test", "Jörg Prante");
String[] expected = {"jorg", "prante"};
assertSimpleTSOutput(ts, expected);
assertSimpleTSOutput(analyzer.tokenStream("test", "This is a test"), new String[]{ "this", "is", "a", "test" });
assertSimpleTSOutput(analyzer.tokenStream("test", "Ruß"), new String[]{ "russ" });
assertSimpleTSOutput(analyzer.tokenStream("test", "ΜΆΪΟΣ"), new String[]{ "μαιοσ" });
assertSimpleTSOutput(analyzer.tokenStream("test", "Μάϊος"), new String[] { "μαιοσ" });
assertSimpleTSOutput(analyzer.tokenStream("test", "𐐖"), new String[] { "𐐾" });
assertSimpleTSOutput(analyzer.tokenStream("test", "ﴳﴺﰧ"), new String[] { "طمطمطم" });
assertSimpleTSOutput(analyzer.tokenStream("test", "क्ष"), new String[] { "कष" });
assertSimpleTSOutput(analyzer.tokenStream("test", "résumé"), new String[] { "resume" });
assertSimpleTSOutput(analyzer.tokenStream("test", "re\u0301sume\u0301"), new String[] { "resume" });
assertSimpleTSOutput(analyzer.tokenStream("test", "৭০৬"), new String[] { "706" });
assertSimpleTSOutput(analyzer.tokenStream("test", "đis is cræzy"), new String[] { "dis", "is", "craezy" });
assertSimpleTSOutput(analyzer.tokenStream("test", "ELİF"), new String[] { "elif" });
assertSimpleTSOutput(analyzer.tokenStream("test", "eli\u0307f"), new String[] { "elif" });
}
@Test
public void testFoldingAnalyzerrWithExceptions() throws IOException {
String resource = "org/xbib/elasticsearch/index/analysis/icu/icu_normalize.json";
Analyzer analyzer = analyzer(resource, "my_icu_analyzer_with_exceptions");
TokenStream ts = analyzer.tokenStream("test", "Jörg Prante");
String[] expected = { "jörg", "prante" };
assertSimpleTSOutput(ts, expected);
}
private void assertSimpleTSOutput(TokenStream stream, String[] expected) throws IOException {
stream.reset();
CharTermAttribute termAttr = stream.getAttribute(CharTermAttribute.class);
assertNotNull(termAttr);
int i = 0;
while (stream.incrementToken()) {
assertTrue(i < expected.length);
assertEquals(expected[i], termAttr.toString());
i++;
}
assertEquals(i, expected.length);
stream.close();
}
}