package edu.stanford.nlp.process;
import java.io.IOException;
import java.io.StreamTokenizer;
/**
* This class adapts between a <code>java.io.StreamTokenizer</code>
* and a <code>edu.stanford.nlp.process.Tokenizer</code>.
*
* @author Christopher Manning
* @version 2004/04/01
*/
public class TokenizerAdapter extends AbstractTokenizer<String> {
protected final StreamTokenizer st;
protected String eolString = "<EOL>";
/**
* Create a new <code>TokenizerAdaptor</code>. In general, it is
* recommended that the passed in <code>StreamTokenizer</code> should
* have had <code>resetSyntax()</code> done to it, so that numbers are
* returned as entered as tokens of type <code>String</code>, though this
* code will cope as best it can.
*
* @param st The internal <code>java.io.StreamTokenizer</code>
*/
public TokenizerAdapter(StreamTokenizer st) {
this.st = st;
}
/**
* Internally fetches the next token.
*
* @return the next token in the token stream, or null if none exists.
*/
@Override
public String getNext() {
try {
int nextTok = st.nextToken();
switch (nextTok) {
case java.io.StreamTokenizer.TT_EOL:
return eolString;
case java.io.StreamTokenizer.TT_EOF:
return null;
case java.io.StreamTokenizer.TT_WORD:
return st.sval;
case java.io.StreamTokenizer.TT_NUMBER:
return Double.toString(st.nval);
default:
char[] t = { (char) nextTok }; // (array initialization)
return new String(t);
}
} catch (IOException ioe) {
// do nothing, return null
return null;
}
}
/**
* Set the <code>String</code> returned when the inner tokenizer
* returns an end-of-line token. This will only happen if the
* inner tokenizer has been set to <code>eolIsSignificant(true)</code>.
*
* @param eolString The String used to represent eol. It is not allowed
* to be <code>null</code> (which would confuse line ends and file end)
*/
public void setEolString(String eolString) {
if (eolString == null) {
throw new IllegalArgumentException("eolString cannot be null");
}
this.eolString = eolString;
}
/**
* Say whether the <code>String</code> is the end-of-line token for
* this tokenizer.
*
* @param str The String being tested
* @return Whether it is the end-of-line token
*/
public boolean isEol(String str) {
return eolString.equals(str);
}
}