package org.reasm.m68k.expressions.internal;
import java.util.ArrayList;
import javax.annotation.CheckForNull;
import javax.annotation.Nonnull;
import org.reasm.AssemblyMessage;
import org.reasm.FloatValue;
import org.reasm.StringValue;
import org.reasm.UnsignedIntValue;
import org.reasm.commons.expressions.Expressions;
import org.reasm.commons.source.Syntax;
import org.reasm.expressions.*;
import org.reasm.messages.OverflowInLiteralWarningMessage;
import ca.fragag.Consumer;
import com.google.common.primitives.UnsignedLongs;
/**
* Contains a method to parse an expression from a sequence of tokens.
*
* @author Francis Gagné
*/
public final class ExpressionParser {
@Nonnull
private static final Expression[] NO_ARGUMENTS = new Expression[0];
// The SymbolLookup of this IdentifierExpression is irrelevant, since that expression is never evaluated.
@Nonnull
private static final IdentifierExpression EMPTY_IDENTIFIER = new IdentifierExpression("", null);
/**
* Parses an expression from the tokens emitted by the specified tokenizer.
*
* @param tokenizer
* the tokenizer to read tokens from
* @param symbolLookup
* an object that looks up symbols by name, which will be used to look up the symbol for identifiers when the
* identifier is {@linkplain IdentifierExpression#evaluate(EvaluationContext) evaluated}, or <code>null</code> to
* consider all identifiers undefined
* @param assemblyMessageConsumer
* a {@link Consumer} that will receive {@link AssemblyMessage}s generated while parsing the expression
* @return the parsed {@link Expression}, or <code>null</code> if an expression could not be parsed
* @throws InvalidTokenException
* an {@linkplain TokenType#INVALID invalid} token was emitted by the tokenizer
*/
@CheckForNull
public static Expression parse(@Nonnull Tokenizer tokenizer, @CheckForNull SymbolLookup symbolLookup,
@CheckForNull Consumer<AssemblyMessage> assemblyMessageConsumer) throws InvalidTokenException {
Expression expression = parseLevel3(tokenizer, symbolLookup, assemblyMessageConsumer);
if (expression != null) {
Tokenizer tokenizer1;
for (;; tokenizer.copyFrom(tokenizer1)) {
if (tokenizer.getTokenType() != TokenType.CONDITIONAL_OPERATOR_FIRST) {
break;
}
tokenizer1 = tokenizer.duplicateAndAdvance();
Expression truePart = parse(tokenizer1, symbolLookup, assemblyMessageConsumer);
if (truePart == null) {
break;
}
if (tokenizer1.getTokenType() != TokenType.CONDITIONAL_OPERATOR_SECOND) {
break;
}
tokenizer1.advance();
Expression falsePart = parse(tokenizer1, symbolLookup, assemblyMessageConsumer);
if (falsePart == null) {
break;
}
expression = new ConditionalExpression(expression, truePart, falsePart);
}
}
return expression;
}
@CheckForNull
private static M68KBinaryOperator parseBinaryOperator(@Nonnull Tokenizer tokenizer) {
if (tokenizer.getTokenType() != TokenType.OPERATOR) {
return null;
}
// Check for characters that only correspond to single-character operators.
final char ch1 = tokenizer.tokenCharAt(0);
switch (ch1) {
case '*':
return M68KBinaryOperator.MULTIPLICATION;
case '/':
return M68KBinaryOperator.DIVISION;
case '%':
return M68KBinaryOperator.MODULUS;
case '+':
return M68KBinaryOperator.ADDITION;
case '-':
return M68KBinaryOperator.SUBTRACTION;
case '^':
return M68KBinaryOperator.BITWISE_XOR;
default:
break;
}
// Check for characters that appear as the first character of a 2-character operator.
final int ch2 = tokenizer.getTokenLength() > 1 ? tokenizer.tokenCharAt(1) : -1;
switch (ch1) {
case '<':
switch (ch2) {
case -1:
return M68KBinaryOperator.LESS_THAN;
case '<':
return M68KBinaryOperator.BIT_SHIFT_LEFT;
case '=':
return M68KBinaryOperator.LESS_THAN_OR_EQUAL_TO;
case '>':
return M68KBinaryOperator.DIFFERENT_FROM;
default:
throw new AssertionError(); // unreachable
}
case '>':
switch (ch2) {
case -1:
return M68KBinaryOperator.GREATER_THAN;
case '=':
return M68KBinaryOperator.GREATER_THAN_OR_EQUAL_TO;
case '>':
return M68KBinaryOperator.BIT_SHIFT_RIGHT;
default:
throw new AssertionError(); // unreachable
}
case '=':
switch (ch2) {
case -1:
return M68KBinaryOperator.EQUAL_TO;
case '=':
return M68KBinaryOperator.STRICTLY_EQUAL_TO;
default:
throw new AssertionError(); // unreachable
}
case '&':
switch (ch2) {
case -1:
return M68KBinaryOperator.BITWISE_AND;
case '&':
return M68KBinaryOperator.LOGICAL_AND;
default:
throw new AssertionError(); // unreachable
}
case '|':
switch (ch2) {
case -1:
return M68KBinaryOperator.BITWISE_OR;
case '|':
return M68KBinaryOperator.LOGICAL_OR;
default:
throw new AssertionError(); // unreachable
}
case '!':
switch (ch2) {
case -1:
return null; // not a binary operator
case '=':
return M68KBinaryOperator.STRICTLY_DIFFERENT_FROM;
default:
throw new AssertionError(); // unreachable
}
default:
return null;
}
}
@CheckForNull
private static Expression parseLevel0(@Nonnull Tokenizer tokenizer, @CheckForNull SymbolLookup symbolLookup,
@CheckForNull Consumer<AssemblyMessage> assemblyMessageConsumer) throws InvalidTokenException {
if (tokenizer.tokenEqualsString("%")) {
tokenizer.changeToBinaryInteger();
}
final CharSequence tokenText = tokenizer.getTokenText();
Expression expression = null;
switch (tokenizer.getTokenType()) {
case INVALID:
throw new InvalidTokenException(tokenText.toString());
case DECIMAL_INTEGER: {
final long intValue = parseUnsignedLongWithOverflow(tokenText, 10, assemblyMessageConsumer);
expression = new ValueExpression(new UnsignedIntValue(intValue));
break;
}
case BINARY_INTEGER: {
final long intValue = parseUnsignedLongWithOverflow(tokenText.subSequence(1, tokenizer.getTokenLength()), 2,
assemblyMessageConsumer);
expression = new ValueExpression(new UnsignedIntValue(intValue));
break;
}
case HEXADECIMAL_INTEGER: {
final long intValue = parseUnsignedLongWithOverflow(tokenText.subSequence(1, tokenizer.getTokenLength()), 16,
assemblyMessageConsumer);
expression = new ValueExpression(new UnsignedIntValue(intValue));
break;
}
case REAL:
final double floatValue = Expression.parseFloatWithOverflow(tokenText);
expression = new ValueExpression(new FloatValue(floatValue));
break;
case STRING:
final String stringValue = Expressions.parseString(tokenText, assemblyMessageConsumer);
expression = new ValueExpression(new StringValue(stringValue));
break;
case IDENTIFIER:
final String identifier = tokenText.toString();
expression = new IdentifierExpression(identifier, symbolLookup);
break;
case OPERATOR:
if (tokenizer.tokenEqualsString("*")) {
expression = ProgramCounterExpression.INSTANCE;
}
break;
case OPENING_PARENTHESIS:
Tokenizer tokenizer1 = tokenizer.duplicateAndAdvance();
Expression childExpression = parse(tokenizer1, symbolLookup, assemblyMessageConsumer);
if (childExpression == null) {
return null;
}
if (tokenizer1.getTokenType() != TokenType.CLOSING_PARENTHESIS) {
return null;
}
tokenizer.copyFrom(tokenizer1);
expression = new GroupingExpression(childExpression);
break;
default:
break;
}
if (expression != null) {
tokenizer.advance();
}
return expression;
}
@CheckForNull
private static Expression parseLevel1(@Nonnull Tokenizer tokenizer, @CheckForNull SymbolLookup symbolLookup,
@CheckForNull Consumer<AssemblyMessage> assemblyMessageConsumer) throws InvalidTokenException {
if (tokenizer.getTokenType() == TokenType.PLUS_OR_MINUS_SEQUENCE) {
tokenizer.breakSequence();
}
if (tokenizer.getTokenType() == TokenType.PERIOD) {
// Treat the period operator as a unary operator too.
final Tokenizer tokenizer1 = tokenizer.duplicateAndAdvance();
final Expression expression1 = parseLevel1(tokenizer1, symbolLookup, assemblyMessageConsumer);
if (expression1 != null) {
tokenizer.copyFrom(tokenizer1);
return new PeriodExpression(EMPTY_IDENTIFIER, expression1, symbolLookup);
}
return null;
}
if (tokenizer.getTokenType() == TokenType.OPERATOR && tokenizer.getTokenLength() == 1) {
UnaryOperator operator = null;
// Check if the operator is a unary operator.
switch (tokenizer.tokenCharAt(0)) {
case '!':
operator = UnaryOperator.LOGICAL_NOT;
break;
case '+':
operator = UnaryOperator.UNARY_PLUS;
break;
case '-':
operator = UnaryOperator.NEGATION;
break;
case '~':
operator = UnaryOperator.BITWISE_NOT;
break;
}
if (operator != null) {
final Tokenizer tokenizer1 = tokenizer.duplicateAndAdvance();
final Expression expression1 = parseLevel1(tokenizer1, symbolLookup, assemblyMessageConsumer);
if (expression1 != null) {
tokenizer.copyFrom(tokenizer1);
return new UnaryOperatorExpression(operator, expression1);
}
return null;
}
}
Expression expression = parseLevel0(tokenizer, symbolLookup, assemblyMessageConsumer);
if (expression != null) {
Tokenizer tokenizer1;
outer: for (;; tokenizer.copyFrom(tokenizer1)) {
final TokenType tokenType = tokenizer.getTokenType();
switch (tokenType) {
case OPENING_PARENTHESIS:
tokenizer1 = tokenizer.duplicateAndAdvance();
// If the argument list is empty, return a function call expression with no arguments.
if (tokenizer1.getTokenType() == TokenType.CLOSING_PARENTHESIS) {
tokenizer1.advance();
expression = new FunctionCallExpression(expression, NO_ARGUMENTS);
continue;
}
final ArrayList<Expression> arguments = new ArrayList<>();
for (; tokenizer1.getTokenType() != TokenType.END; tokenizer1.advance()) {
final Expression argument = parse(tokenizer1, symbolLookup, assemblyMessageConsumer);
// If we couldn't parse a valid argument, give up parsing the argument list.
if (argument == null) {
break;
}
arguments.add(argument);
final TokenType tokenType1 = tokenizer1.getTokenType();
if (tokenType1 == TokenType.CLOSING_PARENTHESIS) {
tokenizer1.advance();
expression = new FunctionCallExpression(expression, arguments);
continue outer;
}
if (tokenType1 != TokenType.COMMA) {
// Give up parsing the argument list.
break;
}
}
break outer;
case OPENING_BRACKET:
tokenizer1 = tokenizer.duplicateAndAdvance();
// Parse the index expression between the brackets.
final Expression indexExpression = parse(tokenizer1, symbolLookup, assemblyMessageConsumer);
if (indexExpression == null) {
break outer;
}
if (tokenizer1.getTokenType() != TokenType.CLOSING_BRACKET) {
break outer;
}
tokenizer1.advance();
expression = new IndexerExpression(expression, indexExpression, symbolLookup);
break;
case PERIOD:
tokenizer1 = tokenizer.duplicateAndAdvance();
final Expression rightOperand = parseLevel0(tokenizer1, symbolLookup, assemblyMessageConsumer);
if (rightOperand == null) {
break outer;
}
expression = new PeriodExpression(expression, rightOperand, symbolLookup);
break;
default:
break outer;
}
}
}
return expression;
}
@CheckForNull
private static Expression parseLevel2(@Nonnull Tokenizer tokenizer, @CheckForNull SymbolLookup symbolLookup,
@CheckForNull M68KBinaryOperator referenceOperator, @CheckForNull Consumer<AssemblyMessage> assemblyMessageConsumer)
throws InvalidTokenException {
Expression expression = parseLevel1(tokenizer, symbolLookup, assemblyMessageConsumer);
if (expression != null) {
Tokenizer tokenizer1;
for (;; tokenizer.copyFrom(tokenizer1)) {
if (tokenizer.getTokenType() == TokenType.PLUS_OR_MINUS_SEQUENCE) {
tokenizer.breakSequence();
}
M68KBinaryOperator operator = parseBinaryOperator(tokenizer);
if (operator == null) {
break;
}
tokenizer1 = tokenizer.duplicateAndAdvance();
// Honor operator precedence.
if (referenceOperator != null && operator.getPriority() >= referenceOperator.getPriority()) {
break;
}
final Expression rightOperand = parseLevel2(tokenizer1, symbolLookup, operator, assemblyMessageConsumer);
if (rightOperand == null) {
break;
}
expression = new BinaryOperatorExpression(operator.getOperator(), expression, rightOperand);
}
}
return expression;
}
@CheckForNull
private static Expression parseLevel3(@Nonnull Tokenizer tokenizer, @CheckForNull SymbolLookup symbolLookup,
@CheckForNull Consumer<AssemblyMessage> assemblyMessageConsumer) throws InvalidTokenException {
// Anonymous symbols are only accepted when they stand alone in the expression, or if they appear alone within parentheses,
// within brackets, in either part of a conditional expression or in an argument.
final CharSequence tokenText = tokenizer.getTokenText();
if (tokenizer.getTokenType() == TokenType.PLUS_OR_MINUS_SEQUENCE) {
Tokenizer tokenizer1 = tokenizer.duplicateAndAdvance();
final TokenType tokenType = tokenizer1.getTokenType();
switch (tokenType) {
case END:
case CLOSING_PARENTHESIS:
case CLOSING_BRACKET:
case CONDITIONAL_OPERATOR_FIRST:
case CONDITIONAL_OPERATOR_SECOND:
case COMMA:
tokenizer.copyFrom(tokenizer1);
return new IdentifierExpression(tokenText.toString(), symbolLookup);
default:
break;
}
}
return parseLevel2(tokenizer, symbolLookup, null, assemblyMessageConsumer);
}
private static long parseUnsignedLongWithOverflow(@Nonnull CharSequence value, int radix,
@CheckForNull Consumer<AssemblyMessage> assemblyMessageConsumer) {
long result = 0;
boolean overflow = false;
for (int i = 0; i < value.length(); i++) {
char ch = value.charAt(i);
int digit;
// Compute the value of the current digit.
if (Syntax.isDigit(ch)) {
digit = ch - '0';
} else {
assert Syntax.isHexDigit(ch);
// Force the letter to upper case.
digit = (ch & 0x5F) - 'A' + 10;
}
assert digit < radix;
// Update the result with this digit while detecting overflow.
// http://stackoverflow.com/questions/8534107/detecting-multiplication-of-uint64-t-integers-overflow-with-c
long a = radix, b = result;
// The 32-bit overflow check on a is omitted, because radix is at most 16 here.
long c = b >>> 32; // upper 32 bits of b
long d = b & 0xFFFFFFFFL; // lower 32 bits of b
long r = a * c;
long s = a * d;
if (r > 0xFFFFFFFFL) {
overflow = true;
}
r <<= 32;
if (UnsignedLongs.compare(s + r, s) < 0) {
overflow = true;
}
result = s + r;
// If result + digit < result, the addition overflowed.
if (UnsignedLongs.compare(result + digit, result) < 0) {
overflow = true;
}
result += digit;
}
if (overflow) {
if (assemblyMessageConsumer != null) {
assemblyMessageConsumer.accept(new OverflowInLiteralWarningMessage(value.toString()));
}
}
return result;
}
// This class is not meant to be instantiated.
private ExpressionParser() {
}
}