package project.scangen.tokenizer; import project.nfa.NFA; import project.nfa.State; import project.nfa.Transition; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.util.Iterator; /** * Tokenizes an input stream. The token type is the name of the final state. * * @author Kefu Zhou */ public class Tokenizer implements Iterable<Token> { private final BufferedReader reader; private final NFA dfa; private String curLine; private Token nextToken = null; private int lineNumber = 0; //how many lines are read private int charNumber = 1; public Tokenizer(final NFA dfa, final InputStream input) { if (!dfa.isDFA()) throw new RuntimeException("Must be DFA"); this.dfa = dfa; this.reader = new BufferedReader(new InputStreamReader(input)); } private class TokenIterator implements Iterator<Token> { public boolean hasNext() { if (nextToken == null) { nextToken = getNextToken(); } return nextToken != null; } public Token next() { return getNextToken(); } public void remove() { throw new UnsupportedOperationException(); } } public Iterator<Token> iterator() { return new TokenIterator(); } private Token getNextToken() { if (nextToken != null) { Token token = nextToken; nextToken = null; return token; } if (curLine == null || curLine.length() == 0) { try { curLine = reader.readLine(); // reset lineNumber++; charNumber = 1; } catch (IOException ex) { return null; } // If curLine is still null, no more input if (curLine == null) { return null; } } Token t = null; for (int max = curLine.length(); max > 0; max--) { t = getNextToken(dfa.getStartState(), 0, max); if (t != null) { charNumber += t.value.length(); curLine = curLine.substring(t.value.length()); break; } } if (t == null && curLine.length() > 0) { charNumber++; curLine = curLine.substring(1); return getNextToken(); } return t; } private Token getNextToken(State state, int min, int max) { return getNextToken(state, min, max, new StringBuffer()); } private Token getNextToken(State state, int min, int max, StringBuffer tokenBuffer) { Token t = null; for (Transition tr : state.getTransitions()) { if (min == max) { break; } String c = String.valueOf(curLine.charAt(min)); if (tr.isValid(c)) { tokenBuffer.append(c); return getNextToken(tr.getDestinationState(), min + 1, max, tokenBuffer); } } if (state.isFinal()) { t = new Token(state.getName(), tokenBuffer.toString(), lineNumber, charNumber); } return t; } }