/**
*
* Copyright 2015 Patrick Ahlbrecht
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package de.onyxbits.jbee;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.text.DecimalFormat;
import java.text.DecimalFormatSymbols;
import java.text.ParseException;
import java.text.ParsePosition;
/**
* The Tokenizer for the Expression- and Declaration Parser.
*/
final class Lexer {
/**
* Contains the content value of a token (if needed)
*/
protected TokenValue value;
private int idx;
private int prevIdx;
protected final char[] inp;
private char dSep;
private char gSep;
private DecimalFormat format;
/**
* Construct a new Lexer with a given locale
*
* @param format
* @param expr
* the expression to tokenize
*/
public Lexer(DecimalFormat format, String expr) {
this.format = format;
DecimalFormatSymbols syms = format.getDecimalFormatSymbols();
this.dSep = syms.getDecimalSeparator();
this.gSep = syms.getGroupingSeparator();
this.inp = expr.toCharArray();
format.setParseBigDecimal(true);
}
/**
* Query the current cursor position
*
* @return index into the input string
*/
protected int getPosition() {
return idx;
}
/**
* Call after nextToken() to get the exact character sequence that was
* matched.
*
* @return raw character sequence (not trimmed).
*/
protected String lastMatch() {
return new String(inp, prevIdx, idx - prevIdx);
}
/**
* Read the next token from the input, advance the cursor.
*
* @return token type
*/
protected int nextExpressionToken() throws ParseException {
// NOTE: Since separator characters are not required and whitespace is
// ignored, every token must start with a unique character(sequence). Input
// is then matched till the next unique character(sequence), an unknown
// character or EOL is encountered.
prevIdx = idx;
while (idx < inp.length && (inp[idx] == ' ' || inp[idx] == '\t')) {
// Ignore spaces
idx++;
}
if (idx >= inp.length) {
// We are done
return 0;
}
// Hex number?
if (idx < inp.length - 1 && inp[idx] == '\\' && inp[idx + 1] == 'x') {
hex();
return ExpressionParserTokens.NUM;
}
// Binary number?
if (idx < inp.length - 1 && inp[idx] == '\\' && inp[idx + 1] == 'b') {
bin();
return ExpressionParserTokens.NUM;
}
// Decimal number?
if (idx < inp.length && inp[idx] >= '0' && inp[idx] <= '9') {
dec();
return ExpressionParserTokens.NUM;
}
// Anything starting with a letter or an underscore is an identifier
if ((idx < inp.length && inp[idx] >= 'a' && inp[idx] <= 'z')
|| (idx < inp.length && inp[idx] >= 'Z' && inp[idx] <= 'Z') || inp[idx] == '_') {
ident();
return ExpressionParserTokens.IDENT;
}
// From here on it's either an operator or something that can't be matched.
switch (inp[idx]) {
case '+': {
if (idx < inp.length - 1 && inp[idx + 1] == '%') {
idx += 2;
return ExpressionParserTokens.PLUSPERCENT;
}
else {
idx++;
return '+';
}
}
case '-': {
if (idx < inp.length - 1 && inp[idx + 1] == '%') {
idx += 2;
return ExpressionParserTokens.MINUSPERCENT;
}
else {
idx++;
return '-';
}
}
case '*': {
idx++;
return '*';
}
case '/': {
idx++;
return '/';
}
case '(': {
idx++;
return '(';
}
case ';': {
idx++;
return ExpressionParserTokens.LSTSEP;
}
case ')': {
idx++;
return ')';
}
case ':': {
idx++;
return ':';
}
case '%': {
idx++;
return '%';
}
case '~': {
idx++;
return '~';
}
case '&': {
idx++;
return '&';
}
case '|': {
idx++;
return '|';
}
case '#': {
idx++;
return '#';
}
case '^': {
idx++;
return '^';
}
case '>': {
if (idx < inp.length - 1 && inp[idx + 1] == '>') {
idx += 2;
return ExpressionParserTokens.BSHIFTR;
}
}
case '<': {
if (idx < inp.length - 1 && inp[idx + 1] == '<') {
idx += 2;
return ExpressionParserTokens.BSHIFTL;
}
}
default: {
throw new ParseException("" + inp[idx], idx + 1);
}
}
}
/**
* Read the next token from the input, advance the cursor.
*
* @return token type
*/
protected int nextDeclarationToken() throws ParseException {
prevIdx = idx;
while (idx < inp.length && (inp[idx] == ' ' || inp[idx] == '\t')) {
// Ignore white space
idx++;
}
while (idx < inp.length - 1 && inp[idx] == '/' && inp[idx + 1] == '/') {
// Skip over comments
idx += 2;
while (idx < inp.length && inp[idx] != '\n') {
idx++;
}
idx++;
}
if (idx >= inp.length) {
// We are done
return 0;
}
// Hex number?
if (idx < inp.length - 1 && inp[idx] == '\\' && inp[idx + 1] == 'x') {
hex();
return ExpressionParserTokens.NUM;
}
// Binary number?
if (idx < inp.length - 1 && inp[idx] == '\\' && inp[idx + 1] == 'b') {
bin();
return ExpressionParserTokens.NUM;
}
// Decimal number?
if (idx < inp.length && inp[idx] >= '0' && inp[idx] <= '9') {
dec();
return ExpressionParserTokens.NUM;
}
// Anything starting with a letter or an underscore is an identifier
if ((idx < inp.length && inp[idx] >= 'a' && inp[idx] <= 'z')
|| (idx < inp.length && inp[idx] >= 'Z' && inp[idx] <= 'Z') || inp[idx] == '_') {
ident();
return DeclarationParserTokens.IDENT;
}
// From here on it's either an operator or something that can't be matched.
switch (inp[idx]) {
case ';': {
idx++;
return ';';
}
case '=': {
idx++;
return '=';
}
case '\n': {
idx++;
return '\n';
}
default: {
throw new ParseException("" + inp[idx], idx + 1);
}
}
}
private void dec() throws ParseException {
// NOTE: decimal numbers must start with a number, but we already checked
// for that in nextToken(), so we don't do it here again.
int tmp = idx;
while (tmp < inp.length
&& ((inp[tmp] >= '0' && inp[tmp] <= '9') || inp[tmp] == gSep || inp[tmp] == dSep)) {
tmp++;
}
ParsePosition pos = new ParsePosition(0);
String str = new String(inp, idx, tmp - idx);
BigDecimal res = (BigDecimal) format.parse(str, pos);
if (pos.getIndex() != tmp - idx) {
throw new ParseException(str, idx);
}
value = new TokenValue(res);
idx = tmp;
}
private void hex() throws ParseException {
// We already did the "0x" check, so skip over
int tmp = idx + 2;
while (tmp < inp.length
&& ((tmp < inp.length && inp[tmp] >= 'a' && inp[tmp] <= 'f')
|| (tmp < inp.length && inp[tmp] >= 'A' && inp[tmp] <= 'F') || (tmp < inp.length
&& inp[tmp] >= '0' && inp[tmp] <= '9'))) {
tmp++;
}
String num = new String(inp, idx + 2, tmp - (idx + 2));
if (num.length() == 0) {
throw new ParseException("\\x", idx);
}
value = new TokenValue(new BigDecimal(new BigInteger(num, 16)));
idx = tmp;
}
private void bin() throws ParseException {
// We already did the "0b" check, so skip over
int tmp = idx + 2;
while (tmp < inp.length && (inp[tmp] == '0' || inp[tmp] == '1')) {
tmp++;
}
String num = new String(inp, idx + 2, tmp - (idx + 2));
if (num.length() == 0) {
throw new ParseException("\\b", idx);
}
value = new TokenValue(new BigDecimal(new BigInteger(num, 2)));
idx = tmp;
}
private void ident() {
// NOTE: identifiers must start with a letter or underscore, but we already
// checked for that in nextToken(), so we don't do it here again.
int tmp = idx;
while (tmp < inp.length
&& ((tmp < inp.length && inp[tmp] >= 'a' && inp[tmp] <= 'z')
|| (tmp < inp.length && inp[tmp] >= 'A' && inp[tmp] <= 'Z')
|| (tmp < inp.length && inp[tmp] >= '0' && inp[tmp] <= '9') || inp[tmp] == '_')) {
tmp++;
}
value = new TokenValue(new String(inp, idx, tmp - idx));
idx = tmp;
}
}