/*
* Copyright (C) 2011 René Jeschke <rene_jeschke@yahoo.de>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.github.rjeschke.weel;
import java.io.IOException;
import java.io.Reader;
/**
* Straightforward, non RegExp, hand written tokenizer.
*
* @author René Jeschke <rene_jeschke@yahoo.de>
*/
final class Tokenizer
{
/** Unary operator priority. */
final static int UOPR_PRIORITY = 15;
/** The input stream: */
private Reader reader;
/** Builder for names/strings and numbers. */
private StringBuilder builder = new StringBuilder();
/** Current input char. */
private int current = ' ';
/** Current line number. */
private int lineNumber = 1;
/** Current filename. */
private String filename;
/** The current token. */
Token token;
/** The ungot token. */
Token ungot = null;
/** Tokenized reserved word. */
ReservedWord reserved = null;
/** Tokenized string or name. */
String string;
/** Tokenized number. */
double number;
/**
* Constructor.
*
* @param reader
* Input stream.
* @param filename
* The filename.
*/
public Tokenizer(final Reader reader, final String filename)
{
this.reader = reader;
this.filename = filename;
}
/**
* Build an error message containing line number and file name.
*
* @param message
* The message.
* @param args
* Optional arguments.
* @return The error message.
* @see java.lang.String#format(String, Object...)
*/
public String error(String message, Object... args)
{
String m = (args.length > 0) ? String.format(message, args) : message;
if (this.filename != null)
m += " in '" + this.filename + "'";
return m + " around line " + this.lineNumber;
}
/**
* Reads the next character from the input stream.
*
* @return The next character.
* @throws IOException
* If an IO error occurred.
*/
private int read() throws IOException
{
return this.current = this.reader.read();
}
/**
* Reads a number.
*
* @param wasDot
* <code>true</code> if this number starts with a dot.
*/
private void readNumber(final boolean wasDot)
{
this.builder.setLength(0);
this.builder.append(wasDot ? '.' : (char) this.current);
final boolean wasZero = this.current == '0';
try
{
if (!wasDot)
{
this.read();
if (wasZero)
{
switch (this.current)
{
case 'X':
case 'x':
this.builder.setLength(0);
this.read();
for (;;)
{
final char c = Character
.toLowerCase((char) this.current);
if ((c < '0' || c > '9') && (c < 'a' || c > 'f'))
break;
this.builder.append(c);
this.read();
}
if (this.builder.length() == 0)
throw new WeelException(this.error("Syntax error"));
this.number = (int)Long.parseLong(this.builder.toString(),
16);
return;
case 'O':
case 'o':
this.builder.setLength(0);
this.read();
for (;;)
{
if (this.current < '0' || this.current > '7')
break;
this.builder.append((char) this.current);
this.read();
}
if (this.builder.length() == 0)
throw new WeelException(this.error("Syntax error"));
this.number = (int)Long.parseLong(this.builder.toString(),
8);
return;
case 'B':
case 'b':
this.builder.setLength(0);
this.read();
for (;;)
{
if (this.current < '0' || this.current > '1')
break;
this.builder.append((char) this.current);
this.read();
}
if (this.builder.length() == 0)
throw new WeelException(this.error("Syntax error"));
this.number = (int)Long.parseLong(this.builder.toString(),
2);
return;
}
}
while (Character.isDigit(this.current))
{
this.builder.append((char) this.current);
this.read();
}
}
if (this.current == '.')
{
this.builder.append('.');
this.read();
}
while (Character.isDigit(this.current))
{
this.builder.append((char) this.current);
this.read();
}
if (this.current == 'E' || this.current == 'e')
{
this.builder.append('e');
this.read();
if (this.current == '+' || this.current == '-')
{
this.builder.append((char) this.current);
this.read();
}
while (Character.isDigit(this.current))
{
this.builder.append((char) this.current);
this.read();
}
}
this.number = Double.parseDouble(this.builder.toString());
}
catch (final IOException e)
{
throw new WeelException(this.error(e.toString()), e);
}
}
/**
* Reads a name.
*/
private void readName()
{
this.builder.setLength(0);
try
{
while (Character.isLetterOrDigit(this.current)
|| this.current == '_')
{
this.builder
.append((char) (Character.isLetter(this.current) ? Character
.toLowerCase(this.current)
: this.current));
this.read();
}
this.string = this.builder.toString();
}
catch (final IOException e)
{
throw new WeelException(this.error(e.toString()), e);
}
}
/**
* Reads a string.
*/
private void readString()
{
final int lim = this.current;
this.builder.setLength(0);
try
{
this.read();
while (this.current != lim)
{
switch (this.current)
{
case -1:
throw new WeelException(this
.error("Unexpected end of file"));
case '\\':
this.read();
switch (this.current)
{
case -1:
throw new WeelException(this
.error("Unexpected end of file"));
case 'n':
this.builder.append('\n');
break;
case 'r':
this.builder.append('\r');
break;
case 't':
this.builder.append('\t');
break;
case '"':
this.builder.append('"');
break;
case '\'':
this.builder.append('\'');
break;
case '\\':
this.builder.append('\\');
break;
default:
throw new WeelException(this.error(
"Unsupported or illegal escape character '%c'",
(char) this.current));
}
this.read();
break;
default:
this.builder.append((char) this.current);
this.read();
break;
}
}
this.read();
this.string = this.builder.toString();
}
catch (final IOException e)
{
throw new WeelException(this.error(e.toString()), e);
}
}
/**
* Ungets a token.
*
* @param t
* The token.
*/
public void ungetToken(final Token t)
{
this.ungot = this.token;
this.token = t;
}
/**
* Reads the next token.
*
* @return The next token.
*/
public Token next()
{
if (this.ungot != null)
{
this.token = this.ungot;
this.ungot = null;
return this.token;
}
try
{
for (;;)
{
switch (this.current)
{
case -1:
return this.token = Token.EOF;
case '\n':
this.lineNumber++;
//$FALL-THROUGH$
case ' ':
case '\t':
case '\r':
this.read();
continue;
case '"':
case '\'':
this.readString();
return this.token = Token.STRING;
case '`':
this.readString();
if(this.string.length() != 1)
{
throw new WeelException(this.error("Illegal character constant"));
}
this.number = this.string.charAt(0);
return this.token = Token.NUMBER;
case '+':
this.read();
if (this.current == '=')
{
this.read();
return this.token = Token.ASSIGN_ADD;
}
else if(this.current == '+')
{
this.read();
if(this.current == '=')
{
this.read();
return this.token = Token.ASSIGN_MAPCAT;
}
return this.token = Token.MAP_CONCAT;
}
return this.token = Token.ADD;
case '-':
this.read();
if (this.current == '=')
{
this.read();
return this.token = Token.ASSIGN_SUB;
}
else if (this.current == '>')
{
this.read();
return this.token = Token.ARROW;
}
return this.token = Token.SUB;
case '*':
this.read();
if (this.current == '*')
{
this.read();
return this.token = Token.POW;
}
if (this.current == '=')
{
this.read();
return this.token = Token.ASSIGN_MUL;
}
return this.token = Token.MUL;
case '%':
this.read();
if (this.current == '=')
{
this.read();
return this.token = Token.ASSIGN_MODULO;
}
return this.token = Token.MODULO;
case '?':
this.read();
return this.token = Token.TERNARY;
case '/':
this.read();
if (this.current == '/')
{
while (this.current != '\n' && this.current != -1)
this.read();
continue;
}
else if (this.current == '*')
{
this.read();
boolean inComment = true;
while (inComment)
{
switch (this.current)
{
case -1:
throw new WeelException(this
.error("Unexpected end of file"));
case '*':
this.read();
if (this.current == '/')
{
this.read();
inComment = false;
}
break;
case '\n':
this.lineNumber++;
//$FALL-THROUGH$
default:
this.read();
break;
}
}
continue;
}
else if (this.current == '=')
{
this.read();
return this.token = Token.ASSIGN_DIV;
}
return this.token = Token.DIV;
case '(':
this.read();
return this.token = Token.BRACE_OPEN;
case ')':
this.read();
return this.token = Token.BRACE_CLOSE;
case '{':
this.read();
return this.token = Token.CURLY_BRACE_OPEN;
case '}':
this.read();
return this.token = Token.CURLY_BRACE_CLOSE;
case '[':
this.read();
return this.token = Token.BRACKET_OPEN;
case ']':
this.read();
return this.token = Token.BRACKET_CLOSE;
case ',':
this.read();
return this.token = Token.COMMA;
case '.':
this.read();
if (this.current == '.')
{
this.read();
if (this.current == '=')
{
this.read();
return this.token = Token.ASSIGN_STRCAT;
}
return this.token = Token.STRING_CONCAT;
}
else if (Character.isDigit(this.current))
{
this.readNumber(true);
return this.token = Token.NUMBER;
}
return this.token = Token.DOT;
case ';':
this.read();
return this.token = Token.SEMICOLON;
case '~':
this.read();
return this.token = Token.BINARY_NOT;
case '^':
this.read();
if (this.current == '=')
{
this.read();
return this.token = Token.ASSIGN_XOR;
}
return this.token = Token.BINARY_XOR;
case '=':
this.read();
if (this.current == '=')
{
this.read();
return this.token = Token.EQUAL;
}
return this.token = Token.ASSIGN;
case '>':
this.read();
if (this.current == '=')
{
this.read();
return this.token = Token.GREATER_EQUAL;
}
else if(this.current == '>')
{
this.read();
if(this.current == '=')
{
this.read();
return this.token = Token.ASSIGN_SHR;
}
else if(this.current == '>')
{
this.read();
if(this.current == '=')
{
this.read();
return this.token = Token.ASSIGN_USHR;
}
return this.token = Token.USHR;
}
return this.token = Token.SHR;
}
return this.token = Token.GREATER;
case '<':
this.read();
if (this.current == '=')
{
this.read();
return this.token = Token.LESS_EQUAL;
}
else if(this.current == '<')
{
this.read();
if(this.current == '=')
{
this.read();
return this.token = Token.ASSIGN_SHL;
}
return this.token = Token.SHL;
}
return this.token = Token.LESS;
case '&':
this.read();
if (this.current == '&')
{
this.read();
return this.token = Token.LOGICAL_AND;
}
else if (this.current == '=')
{
this.read();
return this.token = Token.ASSIGN_AND;
}
return this.token = Token.BINARY_AND;
case '|':
this.read();
if (this.current == '|')
{
this.read();
return this.token = Token.LOGICAL_OR;
}
else if (this.current == '=')
{
this.read();
return this.token = Token.ASSIGN_OR;
}
return this.token = Token.BINARY_OR;
case '!':
this.read();
if (this.current == '=')
{
this.read();
return this.token = Token.NOT_EQUAL;
}
return this.token = Token.LOGICAL_NOT;
case '@':
this.read();
if (this.current == '{')
{
this.read();
return this.token = Token.ANON_OPEN;
}
return this.token = Token.AT;
case ':':
this.read();
if (this.current == ':')
{
this.read();
return this.token = Token.DOUBLE_COLON;
}
return this.token = Token.COLON;
default:
if (Character.isLetter(this.current) || this.current == '_')
{
this.readName();
if ((this.reserved = ReservedWord
.fromString(this.string)) != null)
return this.token = Token.RESERVED;
return this.token = Token.NAME;
}
else if (Character.isDigit(this.current))
{
this.readNumber(false);
return this.token = Token.NUMBER;
}
throw new WeelException(this.error(
"Illegal character '%c'", this.current));
}
}
}
catch (final IOException e)
{
throw new WeelException(this.error(e.toString()), e);
}
}
/**
* Checks if the token is a unary operator.
*
* @param t
* The token.
* @return <code>true</code> if it is a unary operator.
*/
public boolean isUnary(final Token t)
{
switch (t)
{
case LOGICAL_NOT:
case BINARY_NOT:
case SUB:
case BRACE_OPEN:
return true;
default:
return false;
}
}
/**
* Checks if the token is a binary operator.
*
* @param t
* The token.
* @return <code>true</code> if it is a binary operator.
*/
public boolean isBinary(final Token t)
{
switch (t)
{
case ADD:
case SUB:
case MUL:
case DIV:
case MODULO:
case POW:
case STRING_CONCAT:
case MAP_CONCAT:
case EQUAL:
case NOT_EQUAL:
case LESS:
case LESS_EQUAL:
case GREATER:
case GREATER_EQUAL:
case LOGICAL_AND:
case LOGICAL_OR:
case BINARY_AND:
case BINARY_OR:
case BINARY_XOR:
case SHR:
case USHR:
case SHL:
return true;
default:
return false;
}
}
/**
* Check if the current token starts an expression.
*
* @return <code>true</code> if so.
*/
public boolean isExpression()
{
if (this.isUnary(this.token) || this.isBinary(this.token))
return true;
switch (this.token)
{
case NAME:
case NUMBER:
case STRING:
case BRACE_OPEN:
case CURLY_BRACE_OPEN:
return true;
case RESERVED:
switch (this.reserved)
{
case FUNC:
case SUB:
case TRUE:
case FALSE:
case NULL:
case THIS:
return true;
default:
return false;
}
default:
return false;
}
}
/**
* Gets the priority of a binary operator.
*
* @param t
* The token.
* @return The priority.
*/
public int getBinaryPriority(final Token t)
{
switch (t)
{
case MAP_CONCAT:
case STRING_CONCAT:
return 14;
case POW:
return 13;
case MUL:
case DIV:
case MODULO:
return 12;
case ADD:
case SUB:
return 11;
case SHL:
case SHR:
case USHR:
return 10;
case GREATER:
case GREATER_EQUAL:
case LESS:
case LESS_EQUAL:
return 9;
case NOT_EQUAL:
case EQUAL:
return 8;
case BINARY_AND:
return 7;
case BINARY_XOR:
return 6;
case BINARY_OR:
return 5;
case LOGICAL_AND:
return 4;
case LOGICAL_OR:
return 3;
default:
return 0;
}
}
}