package com.laytonsmith.core.compiler;
import com.laytonsmith.core.constructs.Target;
import com.laytonsmith.core.constructs.Token;
import com.laytonsmith.core.constructs.Token.TType;
import com.laytonsmith.core.exceptions.ConfigCompileException;
import java.io.File;
import java.util.ArrayList;
import java.util.List;
import java.util.SortedSet;
import java.util.TreeSet;
/**
*
*
*/
class LexerObject {
StringBuilder buffer;
//We have 5 states we need to monitor, multiline, line/block comment, single/double quote.
//Additionally, we want to have counters for the line number for the applicable ones that
//can span multiple lines.
boolean state_in_single_quote = false;
boolean state_in_double_quote = false;
boolean state_in_multiline = false;
boolean state_in_line_comment = false;
boolean state_in_block_comment = false;
boolean state_in_smart_block_comment = false;
boolean state_in_pure_mscript = false;
boolean state_in_opt_var = false;
boolean state_in_var = false;
boolean state_in_ivar = false;
boolean state_in_fileopts = false;
StringBuffer fileopts = new StringBuffer();
int start_single_quote = 1;
int start_double_quote = 1;
int start_multiline = 1;
int start_block_comment = 1;
int brace_stack = 0;
int square_brace_stack = 0;
List<Token> token_list = null;
String config;
File file;
//Code target information
int line_num = 1;
int column = 1;
int lastColumn = 0;
Target target = Target.UNKNOWN;
final boolean usingNonPure;
private static SortedSet<TokenMap> tokenMap = new TreeSet<TokenMap>();
private static class TokenMap implements Comparable<TokenMap> {
String token;
Token.TType type;
public TokenMap(String token, Token.TType type) {
this.token = token;
this.type = type;
}
@Override
public int compareTo(TokenMap o) {
if (this.token.length() == o.token.length()) {
//Zero case
return this.token.compareTo(o.token);
} else if (this.token.length() < o.token.length()) {
//This token is shorter than the other
return 1;
} else {
//This token is larger than the other
return -1;
}
}
@Override
public String toString() {
return token;
}
}
/**
* Adds simple tokens to the auto-identifier list. Order does not
* matter, it is sorted appropriately for you.
*/
private static void setupTokens() {
//Even though we handle multiline specially, to force the lookahead to check
//far enough in advance, we need to add them to this list
// tokenMap.add(new TokenMap(">>>", Token.TType.MULTILINE_START));
// tokenMap.add(new TokenMap("<<<", Token.TType.MULTILINE_END));
tokenMap.add(new TokenMap("<=", Token.TType.LTE));
tokenMap.add(new TokenMap("<", Token.TType.LT));
tokenMap.add(new TokenMap(">", Token.TType.GT));
tokenMap.add(new TokenMap(">=", Token.TType.GTE));
tokenMap.add(new TokenMap("==", Token.TType.EQUALS));
tokenMap.add(new TokenMap("===", Token.TType.STRICT_EQUALS));
tokenMap.add(new TokenMap("!=", Token.TType.NOT_EQUALS));
tokenMap.add(new TokenMap("!==", Token.TType.STRICT_NOT_EQUALS));
tokenMap.add(new TokenMap("&&", Token.TType.LOGICAL_AND));
tokenMap.add(new TokenMap("||", Token.TType.LOGICAL_OR));
tokenMap.add(new TokenMap("!", Token.TType.LOGICAL_NOT));
tokenMap.add(new TokenMap("+", Token.TType.PLUS));
tokenMap.add(new TokenMap("-", Token.TType.MINUS));
tokenMap.add(new TokenMap("*", Token.TType.MULTIPLICATION));
tokenMap.add(new TokenMap("/", Token.TType.DIVISION));
tokenMap.add(new TokenMap("++", Token.TType.INCREMENT));
tokenMap.add(new TokenMap("--", Token.TType.DECREMENT));
tokenMap.add(new TokenMap("%", Token.TType.MODULO));
tokenMap.add(new TokenMap("**", Token.TType.EXPONENTIAL));
tokenMap.add(new TokenMap(".", Token.TType.CONCAT));
tokenMap.add(new TokenMap("->", Token.TType.DEREFERENCE));
tokenMap.add(new TokenMap("::", Token.TType.DEREFERENCE));
tokenMap.add(new TokenMap("${", Token.TType.CONST_START));
tokenMap.add(new TokenMap("{", Token.TType.LCURLY_BRACKET));
tokenMap.add(new TokenMap("}", Token.TType.RCURLY_BRACKET));
tokenMap.add(new TokenMap("[", Token.TType.LSQUARE_BRACKET));
tokenMap.add(new TokenMap("]", Token.TType.RSQUARE_BRACKET));
tokenMap.add(new TokenMap("..", Token.TType.SLICE));
tokenMap.add(new TokenMap("=", Token.TType.ASSIGNMENT));
tokenMap.add(new TokenMap(":", Token.TType.LABEL));
tokenMap.add(new TokenMap(",", Token.TType.COMMA));
tokenMap.add(new TokenMap("(", Token.TType.FUNC_START));
tokenMap.add(new TokenMap(")", Token.TType.FUNC_END));
}
static {
setupTokens();
}
LexerObject(String config, File file, boolean startInPureMscript) {
this.config = config.replaceAll("\r\n", "\n") + "\n";
this.file = file;
state_in_pure_mscript = startInPureMscript;
usingNonPure = !startInPureMscript;
clearBuffer();
}
private void buffer(Object s) {
buffer.append(s);
}
private void parseBuffer() {
String last = clearBuffer().trim();
if (!last.isEmpty()) {
append(identifyToken(last));
}
}
private String clearBuffer() {
String buf = "";
if (buffer != null) {
buf = buffer.toString();
}
buffer = new StringBuilder(32);
return buf;
}
private Token identifyToken(String item) {
try {
Long.parseLong(item.trim());
return new Token(Token.TType.INTEGER, item, target);
} catch (NumberFormatException e) {
//Not an integer
//Not an integer
}
try {
Double.parseDouble(item);
return new Token(Token.TType.DOUBLE, item, target);
} catch (NumberFormatException e) {
//Not a double
//Not a double
}
if (item.trim().equals("$")) {
return new Token(Token.TType.FINAL_VAR, "$", target);
}
if (item.matches("\\$[a-zA-Z0-9]+")) {
return new Token(Token.TType.VARIABLE, item.trim(), target);
}
if (item.matches("@[a-zA-Z0-9]+")) {
return new Token(Token.TType.IVARIABLE, item.trim(), target);
}
//else it's a bare string
return new Token(Token.TType.BARE_STRING, item.trim(), target);
}
private void append(String value, Token.TType type) {
append(new Token(type, value, target));
}
private void append(Token t) {
token_list.add(t);
}
public TokenStream lex() throws ConfigCompileException {
if (token_list != null) {
return new TokenStream(new ArrayList<Token>(token_list), "");
} else {
token_list = new ArrayList<Token>();
}
for (int i = 0; i < config.length(); i++) {
Character c = config.charAt(i);
Character c2 = null;
Character c3 = null;
if (i < config.length() - 1) {
c2 = config.charAt(i + 1);
}
if (i < config.length() - 2) {
c3 = config.charAt(i + 2);
}
column += i - lastColumn;
lastColumn = i;
if (c == '\n') {
line_num++;
column = 1;
}
target = new Target(line_num, file, column);
//First, lets identify our stateful parameters
//File Options
if (state_in_fileopts) {
if (c == '\\' && c2 == '>') {
//literal >
fileopts.append('>');
i++;
continue;
} else if (c == '>') {
state_in_fileopts = false;
continue;
} else {
fileopts.append(c);
continue;
}
}
//Comments are only applicable if we are not inside a string
if (!state_in_double_quote && !state_in_single_quote) {
//If we aren't already in a comment, we might be starting one here
if (!state_in_block_comment && !state_in_line_comment) {
if (c == '/' && c2 == '*') {
//Start of block comment
parseBuffer();
state_in_block_comment = true;
start_block_comment = line_num;
if (c3 == '*') {
//It's also a smart block comment
state_in_smart_block_comment = true;
i++;
} else {
state_in_smart_block_comment = false;
}
i++;
continue;
}
if (c == '#') {
parseBuffer();
//Start of line comment
state_in_line_comment = true;
continue;
}
} else if (state_in_block_comment) {
//We might be ending the block comment
if (c == '*' && c2 == '/') {
state_in_block_comment = false;
i++;
if (state_in_smart_block_comment) {
//We need to process the block comment here
//TODO:
//We need to process the block comment here
//TODO:
}
clearBuffer();
continue;
}
} else if (state_in_line_comment) {
if (c == '\n') {
state_in_line_comment = false;
clearBuffer();
continue;
}
}
}
//Ok, now if we are in a comment, we should just continue
if (state_in_block_comment || state_in_line_comment) {
if (state_in_smart_block_comment) {
buffer(c);
}
}
//Now we need to check for strings
if (!state_in_double_quote) {
if (c == '"') {
parseBuffer();
//Start of smart string
state_in_double_quote = true;
start_double_quote = line_num;
continue;
}
}
if (!state_in_single_quote) {
if (c == '\'') {
//Start of string
parseBuffer();
state_in_single_quote = true;
start_single_quote = line_num;
continue;
}
}
if (state_in_double_quote || state_in_single_quote) {
if (c == '\\') {
//It's an escaped something or another
switch (c2) {
case 'n':
buffer("\n");
i++;
break;
case 't':
buffer("\t");
i++;
break;
case 'u':
StringBuilder unicode = new StringBuilder();
for (int m = 0; m < 4; m++) {
try {
unicode.append(config.charAt(i + 2 + m));
} catch (IndexOutOfBoundsException e) {
//If this fails, they didn't put enough characters in the stream
error("Incomplete unicode escape");
}
}
try {
Integer.parseInt(unicode.toString(), 16);
} catch (NumberFormatException e) {
error("Unrecognized unicode escape sequence");
}
buffer(Character.toChars(Integer.parseInt(unicode.toString(), 16)));
i += 4;
break;
case '\'':
if (state_in_double_quote) {
//It's an error if we're in double quotes to escape a single quote
error("Invalid escape found. It is an error to escape single quotes inside a double quote.");
} else {
buffer("'");
i++;
}
break;
case '"':
if (state_in_single_quote) {
//It's an error if we're in single quotes to escape a double quote
error("Invalid escape found. It is an error to escape double quotes inside a single quote.");
} else {
buffer('"');
i++;
}
break;
default:
//It's invalid, so throw an exception
error("The escape sequence \\" + c2 + " is not a recognized escape sequence");
break;
}
continue;
}
}
//Now deal with ending a quote
if (state_in_double_quote) {
if (c == '"') {
state_in_double_quote = false;
append(clearBuffer(), Token.TType.SMART_STRING);
//This is currently an error, but won't be forever
error("Double quotes are currently unsupported");
continue;
} else {
buffer(c);
continue;
}
}
if (state_in_single_quote) {
if (c == '\'') {
state_in_single_quote = false;
append(clearBuffer(), Token.TType.STRING);
continue;
} else {
buffer(c);
continue;
}
}
//Now deal with multiline states
if (c == '>' && c2 == '>' && c3 == '>') {
//Multiline start
if (state_in_multiline) {
error("Found multiline start symbol while already in multiline!");
}
state_in_multiline = true;
start_multiline = line_num;
i += 2;
continue;
}
if (c == '<' && c2 == '<' && c3 == '<') {
if (!state_in_multiline) {
error("Found multiline end symbol while not in multiline!");
}
state_in_multiline = false;
i += 2;
continue;
}
//Newlines don't count
if (Character.isWhitespace(c) && c != '\n') {
//We need to parse the buffer
parseBuffer();
continue;
}
if (c == '<' && c2 == '!') {
if (!token_list.isEmpty()) {
throw new ConfigCompileException("File options must come first in the file.", target);
}
state_in_fileopts = true;
i++;
continue;
}
//To simplify token processing later, we will go ahead and do special handling if we're
//not in pure mscript. Therefore, = will
//get special handling up here, as well as square brackets
if (!state_in_pure_mscript) {
if (c == '[') {
if (state_in_opt_var) {
error("Found [ symbol, but a previous optional variable had already been started");
}
state_in_opt_var = true;
parseBuffer();
append("[", Token.TType.LSQUARE_BRACKET);
continue;
}
if (c == ']') {
if (!state_in_opt_var) {
error("Found ] symbol, but no optional variable had been started");
}
state_in_opt_var = false;
parseBuffer();
append("]", Token.TType.RSQUARE_BRACKET);
continue;
}
if (state_in_opt_var) {
if (c == '=') {
//This is an optional variable declaration
parseBuffer();
append("=", Token.TType.OPT_VAR_ASSIGN);
continue;
}
}
if (c == '=') {
state_in_pure_mscript = true;
parseBuffer();
append("=", Token.TType.ALIAS_END);
continue;
}
if (c == ':') {
parseBuffer();
append(":", Token.TType.LABEL);
continue;
}
if (c == '\n') {
parseBuffer();
if (token_list.isEmpty() || token_list.get(token_list.size() - 1).type != TType.NEWLINE) {
append("\n", TType.NEWLINE);
}
continue;
}
//At this point, all other tokens are to be taken literally
buffer(c);
continue;
}
//Newlines are handled differently if it's in multiline or not.
//Remember, if we are in multiline mode (or pure mscript), newlines are simply removed, otherwise they are
//kept (except duplicate ones)
if (c == '\n') {
if (state_in_multiline) {
continue;
} else {
if (!token_list.isEmpty() && token_list.get(token_list.size() - 1).type != Token.TType.NEWLINE) {
parseBuffer();
if (usingNonPure) {
if (token_list.get(token_list.size() - 1).type != TType.NEWLINE) {
//Don't add duplicates
append("\n", Token.TType.NEWLINE);
}
//This also signals the end of pure mscript
state_in_pure_mscript = false;
continue;
} else if (state_in_pure_mscript) {
continue;
}
} else {
continue;
}
}
}
//Handle decimal place vs concat
if (c == '.' && Character.isDigit(c2)) {
//It'll get identified automatically in a bit
buffer(c);
continue;
}
//We need to handle /cmd vs division
if (c == '/' && (c2 == '/' || Character.isLetter(c2))) {
//It'll be registered as a bare string later
buffer(c);
continue;
}
//Now we are in pure mscript mode
//Loop through our token
int skip;
if ((skip = identifySymbol(i)) != -1) {
//Cool, it found one. Jump ahead.
i += skip;
continue;
}
buffer(c);
}
parseBuffer();
return new TokenStream(new ArrayList<Token>(token_list), fileopts.toString());
}
/**
* If a symbol token is the next thing in the stream, it will be
* identified, pushed onto the token_list, and the number of characters
* to advance the stream is returned. If this method returns 0, no token
* was identified, and no changes will have been made.
* @param startAt
* @return
*/
private int identifySymbol(int startAt) {
//We need as much of a lookahead as our largest token
char[] lookahead = new char[tokenMap.first().token.length()];
//Fill in our lookahead buffer
for (int i = 0; i < lookahead.length; i++) {
if (i + startAt < config.length() - 1) {
lookahead[i] = config.charAt(i + startAt);
} else {
lookahead[i] = ' ';
}
}
//Now walk through our token list, and if we find a match, use it.
for (TokenMap tm : tokenMap) {
boolean found = true;
for (int i = 0; i < tm.token.length(); i++) {
if (tm.token.charAt(i) != lookahead[i]) {
found = false;
break;
}
}
if (found) {
//Found it
String last = clearBuffer();
if (!last.isEmpty()) {
append(identifyToken(last));
}
append(tm.token, tm.type);
return tm.token.length() - 1;
}
}
return -1;
}
private void error(String message) throws ConfigCompileException {
throw new ConfigCompileException(message, target);
}
}