package edu.stanford.nlp.process; import edu.stanford.nlp.objectbank.IteratorFromReaderFactory; import java.io.Reader; /** * A TokenizerFactory is used to convert a java.io.Reader into a Tokenizer * (an extension of Iterator) over objects of type T represented by the text * in the java.io.Reader. It's mainly a convenience, since you could cast * down anyway. * * <i>IMPORTANT NOTE:</i><br/> * * A TokenizerFactory should also provide two static methods: <br> * {@code public static TokenizerFactory<? extends HasWord> newTokenizerFactory(); } * {@code public static TokenizerFactory<Word> newWordTokenizerFactory(String options); } * <br/> * These are expected by certain JavaNLP code (e.g., LexicalizedParser), * which wants to produce a TokenizerFactory by reflection. * * @author Christopher Manning * * @param <T> The type of the tokens returned by the Tokenizer */ public interface TokenizerFactory<T> extends IteratorFromReaderFactory<T> { public Tokenizer<T> getTokenizer(Reader r); public Tokenizer<T> getTokenizer(Reader r, String extraOptions); public void setOptions(String options); }