package folioxml.lucene.analysis.folio; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.junit.Assert; import org.junit.Test; import java.io.IOException; import java.io.StringReader; public class TokenCombinerTest { @Test public void TestWithStandardAnalyzer() throws IOException { String text = "token1 token2 token3"; TestCombiner(new StandardAnalyzer().tokenStream("field", new StringReader(text)), text.replace(' ', '-')); } @Test public void TestWithFolioEnu() throws IOException { String text = "token1 token2 token3"; TestCombiner(new FolioEnuAnalyzer().tokenStream("field", new StringReader(text)), text.replace(' ', '-')); } @Test public void ShouldDie() throws IOException { String text = "token1* token2 token3"; TestCombiner(new FolioEnuAnalyzer().tokenStream("field", new StringReader(text)), "token1-token2-token3"); } public void TestCombiner(TokenStream s, String expected) throws IOException { TokenCombiner tc = new TokenCombiner(s, '-'); tc.reset(); int i = 0; while (tc.incrementToken()) { String term = tc.getAttribute(CharTermAttribute.class).toString(); Assert.assertEquals(expected, term); assert (i == 0); i++; } tc.end(); tc.close(); } @Test public void TestSA() throws IOException { String text = "agg bgg cgg"; TokenStream s = new StandardAnalyzer().tokenStream("field", new StringReader(text)); s.reset(); int i = 0; while (true) { boolean eos = !s.incrementToken(); //We have to process tokens even if they return end of file. String term = s.getAttribute(CharTermAttribute.class).toString(); if (i == 0) Assert.assertEquals("agg", term); if (i == 1) Assert.assertEquals("bgg", term); if (i == 2) Assert.assertEquals("cgg", term); if (i == 3) Assert.assertEquals("", term); i++; if (eos) break; } s.end(); s.close(); } @Test public void TestFolioEnu() throws IOException { String text = "agg bgg cgg"; TokenStream s = new FolioEnuAnalyzer().tokenStream("field", new StringReader(text)); s.reset(); int i = 0; while (s.incrementToken()) { String term = s.getAttribute(CharTermAttribute.class).toString(); if (i == 0) Assert.assertEquals("agg", term); if (i == 1) Assert.assertEquals("bgg", term); if (i == 2) Assert.assertEquals("cgg", term); if (i == 3) Assert.assertEquals("", term); i++; } s.end(); s.close(); } }