package de.skuzzle.polly.core.parser;
import java.io.*;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.NoSuchElementException;
/**
* <p>This is an abstract base class for all classes that are
* to provide String-scanning for tokens. Extending classes only
* need to implement the {@link #readToken()} method in order to
* provide their own token forming rules.</p>
*
* <p>This class then provides many convenience functions for
* context-free string parsing such as looking one token ahead,
* consuming tokens, match the next token against an expected
* token or skipping tokens over.</p>
*
* <p>This class does position tracking within the input which works
* best for one lined input strings as it does not track line
* breaks.</p>
*
* <p>To implement {@link #readToken()} this class provides the
* methods {@link #readChar()} which reads exactly one char from
* the input and {@link #pushBack(int)} which can put a char back
* onto the stream so it will be read next.</p>
*
* @author Simon
*/
public abstract class AbstractTokenStream implements Iterable<Token>, TokenStream {
/**
* Stream which is used to read chars from the input.
*/
protected PushbackReader reader;
/**
* The pushback buffer for characters
*/
protected LinkedList<Integer> pushbackBuffer;
/**
* Pushbackbuffer for tokens.
*/
protected LinkedList<Token> tokenBuffer;
/** Holds all tokens in order they have been consumed. */
protected final List<Token> consumedTokens;
/** Pointer which points to the last consumed token in {@link #consumedTokens}. */
protected int tokenIndex;
/** Pointer for marking a position in {@link #consumedTokens}. */
protected int mark;
/**
* Creates a new TokenStream with the given PushbackReader as input. It will use the
* charset provided by the reader.
*
* @param reader The PushbackReader to scan for tokens.
*/
public AbstractTokenStream(PushbackReader reader){
this.reader = reader;
this.pushbackBuffer = new LinkedList<Integer>();
this.tokenBuffer = new LinkedList<Token>();
this.consumedTokens = new ArrayList<Token>();
this.mark = -1;
}
/**
* Creates a new TokenStream with the given String as input. It will use UTF-8 as
* default charset.
*
* @param stream The String to scan for tokens.
*/
public AbstractTokenStream(String stream) {
this(stream,Charset.forName("UTF-8"));
}
/**
* Creates a new TokenStream with the given String as input.
*
* @param stream The String to scan for tokens.
* @param charset The charset in which the stream is encoded.
*/
public AbstractTokenStream(String stream, Charset charset) {
this(new PushbackReader(new BufferedReader(
new InputStreamReader(new ByteArrayInputStream(stream.getBytes(charset)), charset))));
}
/**
* Creates a new TokenStream with the given {@link InputStream}
* as input.
*
* @param stream The InputStream to scan for tokens.
* @param charset The name of the charset in which the characters from the stream are
* encoded.
*/
public AbstractTokenStream(InputStream stream, Charset charset) {
this(new PushbackReader(new InputStreamReader(stream, charset)));
}
/**
* Whether all characters have been read from the input.
*
* @return Whether all characters have been read from the input.
*/
public boolean eos() {
return this.reader.eos();
}
/**
* Gets a {@link TokenStream} view of all consumed tokens. The last token returned
* by this stream will always have the type {@link TokenType#EOS}.
*
* @return A TokenStream that can read all tokens that already have been consumed.
*/
public TokenStream consumed() {
return new TokenStream() {
private int i = 0;
@Override
public boolean match(Token token) throws ParseException {
return this.match(token.getType());
}
@Override
public boolean match(TokenType type) throws ParseException {
final TokenType compare = this.indexExists(this.i + 1)
? TokenType.EOS :
consumedTokens.get(this.i + 1).getType();
if (compare == type) {
++this.i;
return true;
}
return false;
}
@Override
public Token lookAhead() throws ParseException {
if (!this.indexExists(this.i + 1)) {
return new Token(TokenType.EOS, Position.NONE);
}
return consumedTokens.get(this.i + 1);
}
@Override
public Token consume() throws ParseException {
return consumedTokens.get(this.i++);
}
private boolean indexExists(int index) {
return index < consumedTokens.size();
}
};
}
/* (non-Javadoc)
* @see de.skuzzle.polly.parsing.TokenStream#match(de.skuzzle.polly.parsing.TokenType)
*/
@Override
public boolean match(TokenType type) throws ParseException {
if (this.lookAhead().matches(type)) {
this.consume();
return true;
}
return false;
}
/* (non-Javadoc)
* @see de.skuzzle.polly.parsing.TokenStream#match(de.skuzzle.polly.parsing.Token)
*/
@Override
public boolean match(Token token) throws ParseException {
return this.match(token.getType());
}
/* (non-Javadoc)
* @see de.skuzzle.polly.parsing.TokenStream#lookAhead()
*/
@Override
public Token lookAhead() throws ParseException {
if (this.tokenBuffer.isEmpty()) {
this.tokenBuffer.add(this.nextToken());
}
return this.tokenBuffer.peek();
}
/**
* Consumes tokens until the next token to be consumed has
* any of the given types or the end of the stream has been reached.
*
* @param types Array of {@link TokenType}s to skip until.
* @return The Token to which this method has been skipped.
* That means that:
* {@code Token la = synchronize(...) => la = this.lookAhead()}.
* @throws ParseException If an invalid token has been read while skipping.
*/
public Token synchronize(TokenType...types) throws ParseException {
return this.synchronize(Arrays.asList(types));
}
/**
* Consumes tokens until the next token to be consumed has
* any of the given types or the end of the stream has been reached.
*
* @param types Array of {@link TokenType}s to skip until.
* @return The Token to which this method has been skipped.
* That means that:
* {@code Token la = synchronize(...) => la = this.lookAhead()}.
* @throws ParseException If an invalid token has been read while skipping.
*/
public Token synchronize(Collection<TokenType> types) throws ParseException {
while (!this.eos()) {
final Token la = this.lookAhead();
if (types.contains(la.getType())) {
return la;
}
this.consume();
}
return this.lookAhead();
}
/* (non-Javadoc)
* @see de.skuzzle.polly.parsing.TokenStream#consume()
*/
@Override
public Token consume() throws ParseException {
return this.nextToken();
}
/**
* Returns the current position within the input stream.
* @return The current stream position.
*/
public int getStreamIndex() {
return this.reader.getPosition();
}
/**
* Creates a new {@link Position} which spans from the given
* start index until the current stream index.
*
* @param start The start index of the new {@link Position} object.
* @return A new {@link Position} which represents the span from
* {@code start} until {@link #getStreamIndex()}.
*/
public Position spanFrom(int start) {
int endIdx = this.eos() ? this.getStreamIndex() + 1 : this.getStreamIndex();
return new Position(start, endIdx);
}
/**
* <p>
* Creates a new {@link Position} which spans from the beginning
* of the given {@link Token} until the current stream index.
* </p>
*
* @param token The {@link Token} which states the beginning of the new
* {@link Position} object.
* @return A new {@link Position} which represents the span from
* {@code token.getPosition.getStart()} until {@link #getStreamIndex()}.
*/
public Position spanFrom(Token token) {
return this.spanFrom(token.getPosition());
}
/**
* Creates a new {@link Position} which spans from the beginning of the given
* Position until the current stream index.
*
* @param start Start position.
* @return A new Position representing the span from start until
* {@link #getStreamIndex()}
*/
public Position spanFrom(Position start) {
int endIdx = this.eos() ? this.getStreamIndex() + 1 : this.getStreamIndex();
return new Position(start.getStart(), endIdx);
}
/**
* <p>
* Pushes one character back onto the input and decreases the streampointer by 1.
* </p>
*
* <p>This method will always set {@link #eos} to {@code false}.</p>
* @param t The character to be pushed back onto the input.
*/
protected void pushBack(int t) {
this.reader.pushback(t);
}
protected void pushBackArtificial(int c) {
this.reader.pushbackInvisible(c);
}
/**
* <p>Pushes back one token. The pushed back token will be buffered and read by later
* calls of {@link #readToken()}. The pushed back token will be appended to the head
* of the token pushback buffer. That means the next call to {@link #readToken()} will
* return the pushed back token.</p>
*
* <p>Pushed back tokens are, when consumed, never added to the list of consumed
* tokens.</p>
*
* @param t The token to push back.
*/
public void pushBackFirst(Token t) {
this.tokenBuffer.addFirst(t);
}
/**
* <p>Pushes back one token. The pushed back token will be buffered and read by later
* calls of {@link #readToken()}. The pushed back token will be appended to the tail
* of the token pushback buffer.</p>
*
* <p>Pushed back tokens are, when consumed, never added to the list of consumed
* tokens.</p>
*
* @param t Token to push back.
*/
public void pushBackLast(Token t) {
this.tokenBuffer.addLast(t);
}
/**
* Consumes the next character only if it is the expected one.
*
* @param c The expected character.
* @return Whether the next character is the expected one.
*/
protected boolean nextIs(int c) {
final int next = this.readChar();
if (next == c) {
return true;
}
this.pushBack(next);
return false;
}
/**
* <p>
* Reads the next character from the input and increases the streampointer by 1.
* If the end of the input is reached, this method sets the attribute {@link #eos}
* to {@code true}.
* </p>
*
* @return The character that has been read from the input or {@code -1} if the end
* of the input has been reached.
*/
protected int readChar() {
try {
return this.reader.read();
} catch (IOException e) {
return -1;
}
}
/**
* <p>Remembers the current position within the token stream. The token stream can
* be reset to this mark by using {@link #reset(boolean, boolean)}. Doing so will
* cause the token to where the stream has been reseted to be the next token returned
* by <code>nextToken()</code> or any other methods that would return a token.</p>
*/
public void mark() {
this.mark = this.tokenIndex;
}
/**
* Resets this stream to the mark set by {@link #mark()}. If the token buffer is
* empty, the token that the stream was reset to will be the next one to be read.
* If the token buffer contains tokens, they will be read first until it is empty.
*
* @param removeMark Whether the mark should be removed.
* @param clearBuffer Whether the token buffer should be cleared.
*/
public void reset(boolean removeMark, boolean clearBuffer) {
if (this.mark == -1) {
throw new IllegalStateException("no mark available"); //$NON-NLS-1$
}
this.tokenIndex = this.mark;
if (removeMark) {
this.mark = -1;
}
if (clearBuffer) {
this.tokenBuffer.clear();
}
}
/**
* Reads the next token from the input and consumes it. If the token has already
* been read (due to call of {@link #lookAhead()}, the token will not be read again.
* In that case, the token will be retrieved from the lookahead buffer and then be
* returned.
* @return The next token in the input stream.
* @throws ParseException If no valid token could be read.
*/
private Token nextToken() throws ParseException {
if (!this.tokenBuffer.isEmpty()) {
return this.tokenBuffer.poll();
}
final Token next;
if (this.tokenIndex < this.consumedTokens.size()) {
next = this.consumedTokens.get(this.tokenIndex);
} else {
next = this.readToken();
this.consumedTokens.add(next);
}
++this.tokenIndex;
return next;
}
/**
* Main method for reading tokens from the input stream.
*
* @return The next token in the input stream.
* @throws ParseException If no valid token could be read.
*/
protected abstract Token readToken() throws ParseException;
private Iterator<Token> tokenIterator;
@Override
public synchronized Iterator<Token> iterator() {
if (this.tokenIterator == null) {
this.tokenIterator = new TokenIterator();
}
return this.tokenIterator;
}
private class TokenIterator implements Iterator<Token> {
@Override
public boolean hasNext() {
return !AbstractTokenStream.this.eos() &&
AbstractTokenStream.this.tokenBuffer.isEmpty();
}
@Override
public Token next() {
if (!this.hasNext()) {
throw new NoSuchElementException();
}
try {
return AbstractTokenStream.this.nextToken();
} catch (ParseException e) {
throw new RuntimeException("ParseException occurred", e); //$NON-NLS-1$
}
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
}
}