package nl.ipo.cds.nagios.parser;
import java.io.IOException;
import java.io.LineNumberReader;
import java.io.Reader;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.Map;
import java.util.Set;
public class Lexer {
private LexerContext lexerContext;
private LineNumberReader reader;
private LinkedList<Token> lookahead;
private int line = 0;
private int column = 0;
private char currentCharacter = 0;
private static Set<Character> punctuationCharacters = new HashSet<Character> ();
private static Map<String, TokenType> punctuationTokenTypes = new HashMap<String, TokenType> ();
// Construct lookup tables for fast processing of punctuation characters:
static {
for (TokenType tt: TokenType.getPunctuationTokenTypes ()) {
final String punctuation = tt.getPunctuation ();
punctuationTokenTypes.put (punctuation, tt);
for (char c: punctuation.toCharArray()) {
punctuationCharacters.add (c);
}
}
}
/**
* Constructs a lexer using the given context that reads characters from the given reader. The reader will be wrapped
* in a BufferedReader if it is not already an instance of BufferedReader.
*
* @param lexerContext
* @param reader
*/
public Lexer (final LexerContext lexerContext, final Reader reader) {
if (!(reader instanceof LineNumberReader)) {
this.reader = new LineNumberReader (reader, 1024);
} else {
this.reader = (LineNumberReader)reader;
}
this.lookahead = new LinkedList<Token> ();
}
/**
* Returns the next token in the stream. Always returns EOF tokens after the stream has been exhausted.
*
* @return The next token in the stream.
* @throws LexerException
*/
public Token la () throws LexerException {
return la (0);
}
/**
* Performs a lookahead on the token stream by the given amount (0 <= la). Intermediate tokens are buffered. If the lookahead operation scans past
* EOF, this method always returns an EOF token.
*
* @param la The amount of lookahead (>= 0).
* @return The token at the given position in the stream.
* @throws LexerException
*/
public Token la (int la) throws LexerException {
if (la < 0) {
throw new IllegalArgumentException ("lookahead must be positive");
}
setLookaheadSize (la + 1);
if (la == 0) {
return lookahead.getFirst ();
} else if (la == lookahead.size () - 1) {
return lookahead.getLast ();
} else {
return lookahead.get (0);
}
}
/**
* Accepts and returns the next token in the stream. The token is removed from the lookahead buffer and can no longer
* be accessed using the 'la' methods.
*
* @return The accepted token.
* @throws LexerException
*/
public Token accept () throws LexerException {
setLookaheadSize (1);
return lookahead.poll ();
}
private void setLookaheadSize (int n) throws LexerException {
while (lookahead.size () < n) {
nextToken ();
}
}
private void nextToken () throws LexerException {
if (line == 0) {
line = 1;
currentCharacter = getNextChar ();
}
while (true) {
char c = acceptChar ();
// Skip whitespace:
while (c > 0 && c <= ' ') {
c = acceptChar ();
}
// Skip comments (single line):
if (c == '#') {
while (c > 0 && c != '\n') {
c = acceptChar ();
}
continue;
}
// EOF:
if (c == 0) {
lookahead.addLast (new Token (lexerContext, TokenType.EOF, "<EOF>", line, column));
return;
}
// Punctuation tokens:
if (punctuationCharacters.contains (c)) {
String punctuation = new String (new char[] { c });
while (punctuationCharacters.contains (peekChar ())) {
punctuation += acceptChar ();
}
if (!punctuationTokenTypes.containsKey (punctuation)) {
throw new LexerException (lexerContext, line, column, String.format ("Invalid punctuation type: `%s`", punctuation));
}
lookahead.addLast (new Token (lexerContext, punctuationTokenTypes.get (punctuation), punctuationTokenTypes.get (punctuation).getPunctuation (), line, column));
// Parse a value until end of line after an '=':
if (lookahead.getLast ().getTokenType() == TokenType.ASSIGN) {
final StringBuilder valueBuilder = new StringBuilder ();
while (peekChar () > 0 && peekChar () != '#' && peekChar () != '\n') {
valueBuilder.append (acceptChar ());
}
lookahead.addLast (new Token (lexerContext, TokenType.VALUE, valueBuilder.toString ().trim (), line, column));
}
return;
}
// Name tokens:
if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_') {
final StringBuilder name = new StringBuilder ();
name.append (c);
char next = peekChar ();
while ((next >= 'a' && next <= 'z') || (next >= 'A' && next <= 'Z') || next == '_' || (next >= '0' && next <= '9')) {
name.append (acceptChar ());
next = peekChar ();
}
lookahead.addLast (new Token (lexerContext, TokenType.NAME, name.toString (), line, column));
return;
}
throw new LexerException (lexerContext, line, column, String.format ("Invalid token starting with `%c`", c));
}
}
private char peekChar () throws LexerException {
return currentCharacter;
}
private char acceptChar () throws LexerException {
final char ch = currentCharacter;
currentCharacter = getNextChar ();
return ch;
}
private char getNextChar () throws LexerException {
try {
int ch = reader.read ();
// Return 0 on EOF:
if (ch < 0) {
return 0;
}
if (ch == (int)'\n') {
++ line;
column = 0;
}
++ column;
return (char)ch;
} catch (IOException e) {
throw new LexerException (lexerContext, line, column, "Error reading from datasource", e);
}
}
}