package net.varkhan.data.ling.tokenize; import junit.framework.TestCase; import net.varkhan.base.functor.Expander; import net.varkhan.base.functor.expander.ArrayExpander; import java.util.Iterator; /** * <b></b>. * <p/> * @author varkhan * @date 11/5/13 * @time 5:13 PM */ public class NgramTokenizerTest extends TestCase { public void testTokenizer() throws Exception { String[] str1 = new String[] { "a"}; String[] str2 = new String[] { "a", "b"}; String[] str3 = new String[] { "a", "b", "c" }; String[] str6 = new String[] { "a", "b", "c", "d", "e", "f" }; Expander<String[],String[],Object> tkz11 = new NgramTokenizer<String, String[], Object>(String.class,new ArrayExpander<String,Object>(),1,1); Iterable<String[]> tkn111 = tkz11.invoke(str1,null); Iterator<String[]> itr111 = tkn111.iterator(); assertTrue(itr111.hasNext()); assertEquals("a",toString(itr111.next())); assertFalse(itr111.hasNext()); Iterable<String[]> tkn113 = tkz11.invoke(str3,null); Iterator<String[]> itr113 = tkn113.iterator(); assertTrue(itr113.hasNext()); assertEquals("a",toString(itr113.next())); assertEquals("b",toString(itr113.next())); assertEquals("c",toString(itr113.next())); assertFalse(itr113.hasNext()); Expander<String[],String[],Object> tkz23 = new NgramTokenizer<String, String[], Object>(String.class,new ArrayExpander<String,Object>(), 2,3); Iterable<String[]> tkn231 = tkz23.invoke(str1,null); Iterator<String[]> itr231 = tkn231.iterator(); assertFalse(itr231.hasNext()); Iterable<String[]> tkn232 = tkz23.invoke(str2,null); Iterator<String[]> itr232 = tkn232.iterator(); assertTrue(itr232.hasNext()); assertEquals("a b",toString(itr232.next())); assertFalse(itr232.hasNext()); Iterable<String[]> tkn233 = tkz23.invoke(str3,null); Iterator<String[]> itr233 = tkn233.iterator(); assertTrue(itr233.hasNext()); assertEquals("a b",toString(itr233.next())); assertEquals("b c",toString(itr233.next())); assertEquals("a b c",toString(itr233.next())); assertFalse(itr233.hasNext()); Iterable<String[]> tkn236 = tkz23.invoke(str6,null); Iterator<String[]> itr236 = tkn236.iterator(); assertTrue(itr236.hasNext()); assertEquals("a b",toString(itr236.next())); assertEquals("b c",toString(itr236.next())); assertEquals("a b c",toString(itr236.next())); assertEquals("c d",toString(itr236.next())); assertEquals("b c d",toString(itr236.next())); assertEquals("d e",toString(itr236.next())); assertEquals("c d e",toString(itr236.next())); assertEquals("e f",toString(itr236.next())); assertEquals("d e f",toString(itr236.next())); assertFalse(itr236.hasNext()); } private String toString(String[] a) { StringBuilder buf = new StringBuilder(); boolean first = true; for(String s: a) { if(first) first = false; else buf.append(' '); buf.append(s); } return buf.toString(); } }