package edu.stanford.nlp.process; import java.util.ArrayList; import java.util.List; import java.util.NoSuchElementException; // import edu.stanford.nlp.util.logging.Redwood; /** * An abstract tokenizer. Tokenizers extending AbstractTokenizer need only * implement the {@code getNext()} method. This implementation does not * allow null tokens, since * null is used in the protected nextToken field to signify that no more * tokens are available. * * @author Teg Grenager (grenager@stanford.edu) */ public abstract class AbstractTokenizer<T> implements Tokenizer<T> { // /** A logger for this class */ // private static final Redwood.RedwoodChannels log = Redwood.channels(AbstractTokenizer.class); protected T nextToken; // = null; /** * Internally fetches the next token. * * @return the next token in the token stream, or null if none exists. */ protected abstract T getNext(); /** * Returns the next token from this Tokenizer. * * @return the next token in the token stream. * @throws java.util.NoSuchElementException * if the token stream has no more tokens. */ @Override public T next() { if (nextToken == null) { nextToken = getNext(); } T result = nextToken; nextToken = null; if (result == null) { throw new NoSuchElementException(); } return result; } /** * Returns {@code true} if this Tokenizer has more elements. */ @Override public boolean hasNext() { if (nextToken == null) { nextToken = getNext(); } return nextToken != null; } /** * This is an optional operation, by default not supported. */ @Override public void remove() { throw new UnsupportedOperationException(); } /** * This is an optional operation, by default supported. * * @return The next token in the token stream. * @throws java.util.NoSuchElementException * if the token stream has no more tokens. */ @Override public T peek() { if (nextToken == null) { nextToken = getNext(); } if (nextToken == null) { throw new NoSuchElementException(); } return nextToken; } /** * Returns text as a List of tokens. * * @return A list of all tokens remaining in the underlying Reader */ @Override public List<T> tokenize() { List<T> result = new ArrayList<>(); while (hasNext()) { result.add(next()); } // log.info("tokenize() produced " + result); return result; } }