package net.varkhan.data.ling.tokenize; import net.varkhan.base.functor.Expander; import java.lang.reflect.Array; import java.util.Iterator; import java.util.NoSuchElementException; /** * <b></b>. * <p/> * @author varkhan * @date 11/5/13 * @time 4:48 PM */ public class NgramTokenizer<T,S,C> implements Expander<T[],S,C> { protected final Class<T> cls; protected final Expander<T,S,C> atk; protected final int min; protected final int max; public NgramTokenizer(Class<T> cls, Expander<T, S, C> atk, int min, int max) { this.cls = cls; this.atk = atk; this.min = min; this.max = max; } @Override public Iterable<T[]> invoke(S src, C ctx) { return new NgramTokens<T>(cls, atk.invoke(src, ctx),min,max); } protected static class NgramTokens<T> implements Iterable<T[]> { protected final Class<T> cls; protected final Iterable<T> atn; protected final int min; protected final int max; public NgramTokens(Class<T> cls, Iterable<T> atn, int min, int max) { this.cls = cls; this.atn = atn; this.min = min; this.max = max; } @Override public Iterator<T[]> iterator() { return new NgramIterator<T>(cls, atn.iterator(),min,max); } } protected static class NgramIterator<T> implements Iterator<T[]> { protected final Class<T> cls; protected final Iterator<T> itr; protected final int min; protected final int max; protected final Object[] buf; protected volatile int lps=0; protected volatile int hps=0; public NgramIterator(Class<T> cls, Iterator<T> itr, int min, int max) { this.cls = cls; this.itr = itr; this.min = min; this.max = max; this.buf = new Object[max]; } @Override public boolean hasNext() { while(lps<min) { if(!getToken()) return false; lps ++; } if(lps>hps) { if(!getToken()) return false; lps=min; } return true; } protected boolean getToken() { if(!itr.hasNext()) return false; T tk = itr.next(); System.arraycopy(buf,0,buf,1,buf.length-1); buf[0] = tk; if(hps<max) hps++; return true; } @Override @SuppressWarnings("unchecked") public T[] next() { if(!hasNext()) throw new NoSuchElementException(); T[] ta = (T[])Array.newInstance(cls,lps); for(int i=0; i<lps; i++) { ta[i] = (T) buf[lps-i-1]; } lps ++; return ta; } @Override public void remove() { } } }