TestASCIIDuplicatingFilter.java example

Explorer
montysolr-master
- contrib
package org.apache.solr.analysis;

import java.io.StringReader;
import java.util.HashMap;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;

public class TestASCIIDuplicatingFilter extends BaseTokenStreamTestCase {
	
	public void test() throws Exception {
		ASCIIDuplicatingFilterFactory factory = new ASCIIDuplicatingFilterFactory(new HashMap<String,String>());
		
		TokenStream stream = factory.create(whitespaceMockTokenizer(new StringReader("čtyřista čtyřicet čtyři")));
		String[] expected = new String[] { "čtyřista", "ctyrista", "čtyřicet", "ctyricet", "čtyři", "ctyri" };
		int[] increments = new int[] {1, 0, 1, 0, 1, 0};
		String W = TypeAttribute.DEFAULT_TYPE;
		String D = OnChangeDuplicatingFilter.DUPLICATE;
		String[] types = new String[] { W, D, W, D, W, D};
		assertTokenStreamContents(stream, expected, increments);
		
		stream = factory.create(whitespaceMockTokenizer(new StringReader("čtyřista čtyřicet čtyři")));
		assertTokenStreamContents(stream, expected, types);
		
		
		
		// test it doesnt interfere
		stream = factory.create(whitespaceMockTokenizer(new StringReader("Cyril Methood")));
    assertTokenStreamContents(stream, 
        new String[] {"Cyril", "Methood"}, 
        new int[] {1, 1});
	}
}