package org.apache.solr.analysis;
import java.io.StringReader;
import java.util.HashMap;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
public class TestASCIIDuplicatingFilter extends BaseTokenStreamTestCase {
public void test() throws Exception {
ASCIIDuplicatingFilterFactory factory = new ASCIIDuplicatingFilterFactory(new HashMap<String,String>());
TokenStream stream = factory.create(whitespaceMockTokenizer(new StringReader("čtyřista čtyřicet čtyři")));
String[] expected = new String[] { "čtyřista", "ctyrista", "čtyřicet", "ctyricet", "čtyři", "ctyri" };
int[] increments = new int[] {1, 0, 1, 0, 1, 0};
String W = TypeAttribute.DEFAULT_TYPE;
String D = OnChangeDuplicatingFilter.DUPLICATE;
String[] types = new String[] { W, D, W, D, W, D};
assertTokenStreamContents(stream, expected, increments);
stream = factory.create(whitespaceMockTokenizer(new StringReader("čtyřista čtyřicet čtyři")));
assertTokenStreamContents(stream, expected, types);
// test it doesnt interfere
stream = factory.create(whitespaceMockTokenizer(new StringReader("Cyril Methood")));
assertTokenStreamContents(stream,
new String[] {"Cyril", "Methood"},
new int[] {1, 1});
}
}