package com.laytonsmith.core;
import com.laytonsmith.annotations.breakable;
import com.laytonsmith.annotations.nolinking;
import com.laytonsmith.annotations.unbreakable;
import com.laytonsmith.core.Optimizable.OptimizationOption;
import com.laytonsmith.core.compiler.FileOptions;
import com.laytonsmith.core.compiler.KeywordList;
import com.laytonsmith.core.constructs.CDouble;
import com.laytonsmith.core.constructs.CFunction;
import com.laytonsmith.core.constructs.CIdentifier;
import com.laytonsmith.core.constructs.CInt;
import com.laytonsmith.core.constructs.CKeyword;
import com.laytonsmith.core.constructs.CLabel;
import com.laytonsmith.core.constructs.CNull;
import com.laytonsmith.core.constructs.CPreIdentifier;
import com.laytonsmith.core.constructs.CSlice;
import com.laytonsmith.core.constructs.CString;
import com.laytonsmith.core.constructs.CSymbol;
import com.laytonsmith.core.constructs.CVoid;
import com.laytonsmith.core.constructs.Construct;
import com.laytonsmith.core.constructs.IVariable;
import com.laytonsmith.core.constructs.Target;
import com.laytonsmith.core.constructs.Token;
import com.laytonsmith.core.constructs.Token.TType;
import com.laytonsmith.core.constructs.Variable;
import com.laytonsmith.core.environments.Environment;
import com.laytonsmith.core.environments.GlobalEnv;
import com.laytonsmith.core.exceptions.ConfigCompileException;
import com.laytonsmith.core.exceptions.ConfigCompileGroupException;
import com.laytonsmith.core.exceptions.ConfigRuntimeException;
import com.laytonsmith.core.exceptions.ProgramFlowManipulationException;
import com.laytonsmith.core.functions.Compiler;
import com.laytonsmith.core.functions.DataHandling;
import com.laytonsmith.core.functions.Function;
import com.laytonsmith.core.functions.FunctionBase;
import com.laytonsmith.core.functions.FunctionList;
import com.laytonsmith.core.functions.IncludeCache;
import com.laytonsmith.persistence.DataSourceException;
import java.io.File;
import java.io.IOException;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.EmptyStackException;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.Stack;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.regex.Pattern;
/**
* The MethodScriptCompiler class handles the various stages of compilation and
* provides helper methods for execution of the compiled trees.
*/
public final class MethodScriptCompiler {
private final static EnumSet<Optimizable.OptimizationOption> NO_OPTIMIZATIONS = EnumSet.noneOf(Optimizable.OptimizationOption.class);
private final static FileOptions fileOptions = new FileOptions(new HashMap<String, String>());
private MethodScriptCompiler() {
}
private static final Pattern VAR_PATTERN = Pattern.compile("\\$[\\p{L}0-9_]+");
private static final Pattern IVAR_PATTERN = Pattern.compile(IVariable.VARIABLE_NAME_REGEX);
/**
* Lexes the script, and turns it into a token stream. This looks through the script
* character by character.
* @param script The script to lex
* @param file The file this script came from, or potentially null if the code is from
* a dynamic source
* @param inPureMScript If the script is in pure MethodScript, this should be true. Pure
* MethodScript is defined as code that doesn't have command alias wrappers.
* @return A stream of tokens
* @throws ConfigCompileException If compilation fails due to bad syntax
*/
@SuppressWarnings("UnnecessaryContinue")
public static List<Token> lex(String script, File file, boolean inPureMScript) throws ConfigCompileException {
if(script.isEmpty()){
return new ArrayList<>();
}
if((int)script.charAt(0) == 65279){
// Remove the UTF-8 Byte Order Mark, if present.
script = script.substring(1);
}
script = script.replaceAll("\r\n", "\n");
script = script + "\n";
Set<String> keywords = KeywordList.getKeywordNames();
List<Token> token_list = new ArrayList<>();
//Set our state variables
boolean state_in_quote = false;
int quoteLineNumberStart = 1;
boolean in_smart_quote = false;
int smartQuoteLineNumberStart = 1;
boolean in_comment = false;
int commentLineNumberStart = 1;
boolean comment_is_block = false;
boolean in_opt_var = false;
boolean inCommand = (!inPureMScript);
boolean inMultiline = false;
StringBuilder buf = new StringBuilder();
int line_num = 1;
int column = 1;
int lastColumn = 0;
Target target = Target.UNKNOWN;
//first we lex
for (int i = 0; i < script.length(); i++) {
Character c = script.charAt(i);
Character c2 = null;
Character c3 = null;
if (i < script.length() - 1) {
c2 = script.charAt(i + 1);
}
if (i < script.length() - 2) {
c3 = script.charAt(i + 2);
}
column += i - lastColumn;
lastColumn = i;
if (c == '\n') {
line_num++;
column = 1;
if(!inMultiline && !inPureMScript){
inCommand = true;
}
}
target = new Target(line_num, file, column);
//Comment handling. If we're inside a string, bypass this though
if (!state_in_quote && !in_smart_quote) {
//Block comments start
if (c == '/' && c2 == '*' && !in_comment) {
in_comment = true;
comment_is_block = true;
commentLineNumberStart = line_num;
i++;
continue;
}
//Line comment start
if (c == '#' && !in_comment) {
in_comment = true;
comment_is_block = false;
continue;
}
//Double slash line comment start
if(c == '/' && c2 == '/' && !in_comment){
in_comment = true;
comment_is_block = false;
i++;
continue;
}
//Block comment end
if (c == '*' && c2 == '/' && in_comment && comment_is_block) {
if (in_comment && comment_is_block) {
in_comment = false;
comment_is_block = false;
i++;
continue;
} else if (!in_comment) {
throw new ConfigCompileException("Unexpected block comment end", target);
} //else they put it in a line comment, which is fine
}
//Line comment end
if (c == '\n' && in_comment && !comment_is_block) {
in_comment = false;
continue;
}
}
//Currently, if they are in a comment, we completely throw this away. Eventually block
//comments that were started with /** will be kept and applied to the next identifier, but for the time
//being, nothing.
if (in_comment) {
continue;
}
if(c == '+' && c2 == '=' && !state_in_quote){
if(buf.length() > 0){
token_list.add(new Token(TType.UNKNOWN, buf.toString(), target));
buf = new StringBuilder();
}
token_list.add(new Token(TType.PLUS_ASSIGNMENT, "+=", target));
i++;
continue;
}
if(c == '-' && c2 == '=' && !state_in_quote){
if(buf.length() > 0){
token_list.add(new Token(TType.UNKNOWN, buf.toString(), target));
buf = new StringBuilder();
}
token_list.add(new Token(TType.MINUS_ASSIGNMENT, "-=", target));
i++;
continue;
}
if(c == '*' && c2 == '=' && !state_in_quote){
if(buf.length() > 0){
token_list.add(new Token(TType.UNKNOWN, buf.toString(), target));
buf = new StringBuilder();
}
token_list.add(new Token(TType.MULTIPLICATION_ASSIGNMENT, "*=", target));
i++;
continue;
}
//This has to come before division and equals
if(c == '/' && c2 == '=' && !state_in_quote){
if(buf.length() > 0){
token_list.add(new Token(TType.UNKNOWN, buf.toString(), target));
buf = new StringBuilder();
}
token_list.add(new Token(TType.DIVISION_ASSIGNMENT, "/=", target));
i++;
continue;
}
if(c == '.' && c2 == '=' && !state_in_quote){
if(buf.length() > 0){
token_list.add(new Token(TType.UNKNOWN, buf.toString(), target));
buf = new StringBuilder();
}
token_list.add(new Token(TType.CONCAT_ASSIGNMENT, "/=", target));
i++;
continue;
}
//This has to come before subtraction and greater than
if (c == '-' && c2 == '>' && !state_in_quote) {
if (buf.length() > 0) {
token_list.add(new Token(TType.UNKNOWN, buf.toString(), target));
buf = new StringBuilder();
}
token_list.add(new Token(TType.DEREFERENCE, "->", target));
i++;
continue;
}
//Increment and decrement must come before plus and minus
if (c == '+' && c2 == '+' && !state_in_quote) {
if (buf.length() > 0) {
token_list.add(new Token(TType.UNKNOWN, buf.toString(), target));
buf = new StringBuilder();
}
token_list.add(new Token(TType.INCREMENT, "++", target));
i++;
continue;
}
if (c == '-' && c2 == '-' && !state_in_quote) {
if (buf.length() > 0) {
token_list.add(new Token(TType.UNKNOWN, buf.toString(), target));
buf = new StringBuilder();
}
token_list.add(new Token(TType.DECREMENT, "--", target));
i++;
continue;
}
if (c == '%' && !state_in_quote) {
if (buf.length() > 0) {
token_list.add(new Token(TType.UNKNOWN, buf.toString(), target));
buf = new StringBuilder();
}
token_list.add(new Token(TType.MODULO, "%", target));
continue;
}
//Math symbols must come after comment parsing, due to /* and */ block comments
//Block comments are caught above
if (c == '*' && c2 == '*' && !state_in_quote) {
if (buf.length() > 0) {
token_list.add(new Token(TType.UNKNOWN, buf.toString(), target));
buf = new StringBuilder();
}
token_list.add(new Token(TType.EXPONENTIAL, "**", target));
i++;
continue;
}
if (c == '*' && !state_in_quote) {
if (buf.length() > 0) {
token_list.add(new Token(TType.UNKNOWN, buf.toString(), target));
buf = new StringBuilder();
}
token_list.add(new Token(TType.MULTIPLICATION, "*", target));
continue;
}
if (c == '+' && !state_in_quote) {
if (buf.length() > 0) {
token_list.add(new Token(TType.UNKNOWN, buf.toString(), target));
buf = new StringBuilder();
}
token_list.add(new Token(TType.PLUS, "+", target));
continue;
}
if (c == '-' && !state_in_quote) {
if (buf.length() > 0) {
token_list.add(new Token(TType.UNKNOWN, buf.toString(), target));
buf = new StringBuilder();
}
token_list.add(new Token(TType.MINUS, "-", target));
continue;
}
//Protect against commands
if (c == '/' && !Character.isLetter(c2) && !state_in_quote) {
if (buf.length() > 0) {
token_list.add(new Token(TType.UNKNOWN, buf.toString(), target));
buf = new StringBuilder();
}
token_list.add(new Token(TType.DIVISION, "/", target));
continue;
}
//Logic symbols
if (c == '>' && c2 == '=' && !state_in_quote) {
if (buf.length() > 0) {
token_list.add(new Token(TType.UNKNOWN, buf.toString(), target));
buf = new StringBuilder();
}
token_list.add(new Token(TType.GTE, ">=", target));
i++;
continue;
}
if (c == '<' && c2 == '=' && !state_in_quote) {
if (buf.length() > 0) {
token_list.add(new Token(TType.UNKNOWN, buf.toString(), target));
buf = new StringBuilder();
}
token_list.add(new Token(TType.LTE, "<=", target));
i++;
continue;
}
//multiline has to come before gt/lt
if (c == '<' && c2 == '<' && c3 == '<' && !state_in_quote) {
if (buf.length() > 0) {
token_list.add(new Token(TType.UNKNOWN, buf.toString(), target));
buf = new StringBuilder();
}
token_list.add(new Token(TType.MULTILINE_END, "<<<", target));
inMultiline = false;
i++;
i++;
continue;
}
if (c == '>' && c2 == '>' && c3 == '>' && !state_in_quote) {
if (buf.length() > 0) {
token_list.add(new Token(TType.UNKNOWN, buf.toString(), target));
buf = new StringBuilder();
}
token_list.add(new Token(TType.MULTILINE_START, ">>>", target));
inMultiline = true;
i++;
i++;
continue;
}
if (c == '<' && !state_in_quote) {
if (buf.length() > 0) {
token_list.add(new Token(TType.UNKNOWN, buf.toString(), target));
buf = new StringBuilder();
}
token_list.add(new Token(TType.LT, "<", target));
continue;
}
if (c == '>' && !state_in_quote) {
if (buf.length() > 0) {
token_list.add(new Token(TType.UNKNOWN, buf.toString(), target));
buf = new StringBuilder();
}
token_list.add(new Token(TType.GT, ">", target));
continue;
}
if (c == '=' && c2 == '=' && c3 == '=' && !state_in_quote) {
if (buf.length() > 0) {
token_list.add(new Token(TType.UNKNOWN, buf.toString(), target));
buf = new StringBuilder();
}
token_list.add(new Token(TType.STRICT_EQUALS, "===", target));
i++;
i++;
continue;
}
if (c == '!' && c2 == '=' && c3 == '=' && !state_in_quote) {
if (buf.length() > 0) {
token_list.add(new Token(TType.UNKNOWN, buf.toString(), target));
buf = new StringBuilder();
}
token_list.add(new Token(TType.STRICT_NOT_EQUALS, "!==", target));
i++;
i++;
continue;
}
if (c == '=' && c2 == '=' && !state_in_quote) {
if (buf.length() > 0) {
token_list.add(new Token(TType.UNKNOWN, buf.toString(), target));
buf = new StringBuilder();
}
token_list.add(new Token(TType.EQUALS, "==", target));
i++;
continue;
}
if (c == '!' && c2 == '=' && !state_in_quote) {
if (buf.length() > 0) {
token_list.add(new Token(TType.UNKNOWN, buf.toString(), target));
buf = new StringBuilder();
}
token_list.add(new Token(TType.NOT_EQUALS, "!=", target));
i++;
continue;
}
if (c == '&' && c2 == '&' && c3 == '&' && !state_in_quote) {
if(buf.length() > 0) {
token_list.add(new Token(TType.UNKNOWN, buf.toString(), target));
buf = new StringBuilder();
}
token_list.add(new Token(TType.DEFAULT_AND, "&&&", target));
i++; i++;
continue;
}
if (c == '|' && c2 == '|' && c3 == '|' && !state_in_quote) {
if(buf.length() > 0) {
token_list.add(new Token(TType.UNKNOWN, buf.toString(), target));
buf = new StringBuilder();
}
token_list.add(new Token(TType.DEFAULT_OR, "|||", target));
i++; i++;
continue;
}
if (c == '&' && c2 == '&' && !state_in_quote) {
if (buf.length() > 0) {
token_list.add(new Token(TType.UNKNOWN, buf.toString(), target));
buf = new StringBuilder();
}
token_list.add(new Token(TType.LOGICAL_AND, "&&", target));
i++;
continue;
}
if (c == '|' && c2 == '|' && !state_in_quote) {
if (buf.length() > 0) {
token_list.add(new Token(TType.UNKNOWN, buf.toString(), target));
buf = new StringBuilder();
}
token_list.add(new Token(TType.LOGICAL_OR, "||", target));
i++;
continue;
}
if (c == '!' && !state_in_quote) {
if (buf.length() > 0) {
token_list.add(new Token(TType.UNKNOWN, buf.toString(), target));
buf = new StringBuilder();
}
token_list.add(new Token(TType.LOGICAL_NOT, "!", target));
continue;
}
if (c == '{' && !state_in_quote) {
if (buf.length() > 0) {
token_list.add(new Token(TType.UNKNOWN, buf.toString(), target));
buf = new StringBuilder();
}
token_list.add(new Token(TType.LCURLY_BRACKET, "{", target));
continue;
}
if (c == '}' && !state_in_quote) {
if (buf.length() > 0) {
token_list.add(new Token(TType.UNKNOWN, buf.toString(), target));
buf = new StringBuilder();
}
token_list.add(new Token(TType.RCURLY_BRACKET, "}", target));
continue;
}
//I don't want to use these symbols yet, especially since bitwise operations are rare.
// if(c == '&' && !state_in_quote){
// if (buf.length() > 0) {
// token_list.add(new Token(TType.UNKNOWN, buf.toString(), target));
// buf = new StringBuilder();
// }
// token_list.add(new Token(TType.BIT_AND, "&", target));
// continue;
// }
// if(c == '|' && !state_in_quote){
// if (buf.length() > 0) {
// token_list.add(new Token(TType.UNKNOWN, buf.toString(), target));
// buf = new StringBuilder();
// }
// token_list.add(new Token(TType.BIT_OR, "|", target));
// continue;
// }
// if(c == '^' && !state_in_quote){
// if (buf.length() > 0) {
// token_list.add(new Token(TType.UNKNOWN, buf.toString(), target));
// buf = new StringBuilder();
// }
// token_list.add(new Token(TType.BIT_XOR, "^", target));
// continue;
// }
if (c == '.' && c2 == '.' && !state_in_quote) {
//This one has to come before plain .
if (buf.length() > 0) {
token_list.add(new Token(TType.UNKNOWN, buf.toString(), target));
buf = new StringBuilder();
}
token_list.add(new Token(TType.SLICE, "..", target));
i++;
continue;
}
if(c == '.' && !state_in_quote){
if (buf.length() > 0){
token_list.add(new Token(TType.UNKNOWN, buf.toString(), target));
buf = new StringBuilder();
}
// Dots are resolved later, because order of operations actually matters here, depending on whether
// or not the previous token is a string or a number. But actually, it isn't about the previous token, it's
// about the previous construct, and we want to handle it in a more robust way, so we pass it along to
// the compiler stage.
token_list.add(new Token(TType.DOT, ".", target));
continue;
}
if (c == ':' && c2 == ':' && !state_in_quote) {
if (buf.length() > 0) {
token_list.add(new Token(TType.UNKNOWN, buf.toString(), target));
buf = new StringBuilder();
}
token_list.add(new Token(TType.DEREFERENCE, "::", target));
i++;
continue;
}
if (c == '[' && !state_in_quote) {
if (buf.length() > 0) {
token_list.add(new Token(TType.UNKNOWN, buf.toString(), target));
buf = new StringBuilder();
}
token_list.add(new Token(TType.LSQUARE_BRACKET, "[", target));
in_opt_var = true;
continue;
}
//This has to come after == and ===
if (c == '=' && !state_in_quote) {
if (buf.length() > 0) {
token_list.add(new Token(TType.UNKNOWN, buf.toString(), target));
buf = new StringBuilder();
}
if(inCommand){
if (in_opt_var) {
token_list.add(new Token(TType.OPT_VAR_ASSIGN, "=", target));
} else {
token_list.add(new Token(TType.ALIAS_END, "=", target));
inCommand = false;
}
} else {
token_list.add(new Token(TType.ASSIGNMENT, "=", target));
}
continue;
}
if (c == ']' && !state_in_quote) {
if (buf.length() > 0) {
token_list.add(new Token(TType.UNKNOWN, buf.toString(), target));
buf = new StringBuilder();
}
token_list.add(new Token(TType.RSQUARE_BRACKET, "]", target));
in_opt_var = false;
continue;
}
if (c == ':' && !state_in_quote) {
if (buf.length() > 0) {
token_list.add(new Token(TType.UNKNOWN, buf.toString(), target));
buf = new StringBuilder();
}
token_list.add(new Token(TType.LABEL, ":", target));
continue;
}
if (c == ',' && !state_in_quote) {
if (buf.length() > 0) {
token_list.add(new Token(TType.UNKNOWN, buf.toString(), target));
buf = new StringBuilder();
}
token_list.add(new Token(TType.COMMA, ",", target));
continue;
}
if (c == '(' && !state_in_quote) {
if (buf.length() > 0) {
token_list.add(new Token(TType.FUNC_NAME, buf.toString(), target));
buf = new StringBuilder();
} else {
//The previous token, if unknown, should be changed to a FUNC_NAME. If it's not
//unknown, we may be doing standalone parenthesis, so auto tack on the __autoconcat__ function
try {
int count = 1;
while (token_list.get(token_list.size() - count).type == TType.WHITESPACE) {
count++;
}
if (token_list.get(token_list.size() - count).type == TType.UNKNOWN) {
token_list.get(token_list.size() - count).type = TType.FUNC_NAME;
//Go ahead and remove the whitespace here too, it breaks things
count--;
for (int a = 0; a < count; a++) {
token_list.remove(token_list.size() - 1);
}
} else {
token_list.add(new Token(TType.FUNC_NAME, "__autoconcat__", target));
}
} catch (IndexOutOfBoundsException e) {
//This is the first element on the list, so, it's another autoconcat.
token_list.add(new Token(TType.FUNC_NAME, "__autoconcat__", target));
}
}
token_list.add(new Token(TType.FUNC_START, "(", target));
continue;
}
if (c == ')' && !state_in_quote) {
if (buf.length() > 0) {
token_list.add(new Token(TType.UNKNOWN, buf.toString(), target));
buf = new StringBuilder();
}
token_list.add(new Token(TType.FUNC_END, ")", target));
continue;
}
if(c == ';' && !state_in_quote){
if(buf.length() > 0){
token_list.add(new Token(TType.UNKNOWN, buf.toString(), target));
buf = new StringBuilder();
}
token_list.add(new Token(TType.SEMICOLON, ";", target));
continue;
}
if (Character.isWhitespace(c) && !state_in_quote && c != '\n') {
//keep the whitespace, but end the previous token, unless the last character
//was also whitespace. All whitespace is added as a single space.
if (buf.length() > 0) {
token_list.add(new Token(TType.UNKNOWN, buf.toString(), target));
buf = new StringBuilder();
}
if (token_list.size() > 0
&& token_list.get(token_list.size() - 1).type != TType.WHITESPACE) {
token_list.add(new Token(TType.WHITESPACE, " ", target));
}
continue;
}
if (c == '\'') {
if (state_in_quote && !in_smart_quote) {
token_list.add(new Token(TType.STRING, buf.toString(), target));
buf = new StringBuilder();
state_in_quote = false;
continue;
} else if (!state_in_quote) {
state_in_quote = true;
quoteLineNumberStart = line_num;
in_smart_quote = false;
if (buf.length() > 0) {
token_list.add(new Token(TType.UNKNOWN, buf.toString(), target));
buf = new StringBuilder();
}
continue;
} else {
//we're in a smart quote
buf.append("'");
}
} else if (c == '"') {
if (state_in_quote && in_smart_quote) {
token_list.add(new Token(TType.SMART_STRING, buf.toString(), target));
buf = new StringBuilder();
state_in_quote = false;
in_smart_quote = false;
continue;
} else if (!state_in_quote) {
state_in_quote = true;
in_smart_quote = true;
smartQuoteLineNumberStart = line_num;
if (buf.length() > 0) {
token_list.add(new Token(TType.UNKNOWN, buf.toString(), target));
buf = new StringBuilder();
}
continue;
} else {
//we're in normal quotes
buf.append('"');
}
} else if (c == '\\') {
//escaped characters
if (state_in_quote) {
if (c2 == '\\') {
buf.append("\\");
} else if (c2 == '\'') {
buf.append("'");
} else if (c2 == '"') {
buf.append('"');
} else if (c2 == 'n') {
buf.append("\n");
} else if (c2 == 'r'){
buf.append("\r");
} else if(c2 == 't'){
buf.append("\t");
} else if(c2 == '@' && in_smart_quote){
buf.append("\\@");
} else if (c2 == 'u') {
//Grab the next 4 characters, and check to see if they are numbers
StringBuilder unicode = new StringBuilder();
for (int m = 0; m < 4; m++) {
unicode.append(script.charAt(i + 2 + m));
}
try {
Integer.parseInt(unicode.toString(), 16);
} catch (NumberFormatException e) {
throw new ConfigCompileException("Unrecognized unicode escape sequence", target);
}
buf.append(Character.toChars(Integer.parseInt(unicode.toString(), 16)));
i += 4;
} else {
//Since we might expand this list later, don't let them
//use unescaped backslashes
throw new ConfigCompileException("The escape sequence \\" + c2 + " is not a recognized escape sequence", target);
}
i++;
continue;
} else {
//Control character backslash
token_list.add(new Token(TType.SEPERATOR, "\\", target));
}
} else if (state_in_quote) {
buf.append(c);
continue;
} else if (c == '\n' && !comment_is_block) {
if (buf.length() > 0) {
token_list.add(new Token(TType.UNKNOWN, buf.toString(), target));
buf = new StringBuilder();
}
token_list.add(new Token(TType.NEWLINE, "\n", target));
in_comment = false;
comment_is_block = false;
continue;
} else { //in a literal
buf.append(c);
continue;
}
} //end lexing
if (state_in_quote) {
if (in_smart_quote) {
throw new ConfigCompileException("Unended string literal. You started the last double quote on line " + smartQuoteLineNumberStart, target);
} else {
throw new ConfigCompileException("Unended string literal. You started the last single quote on line " + quoteLineNumberStart, target);
}
}
if (in_comment || comment_is_block) {
throw new ConfigCompileException("Unended block comment. You started the comment on line " + commentLineNumberStart, target);
}
//look at the tokens, and get meaning from them. Also, look for improper symbol locations,
//and go ahead and absorb unary +- into the token
for (int i = 0; i < token_list.size(); i++) {
Token t = token_list.get(i);
Token prev2 = i - 2 >= 0 ? token_list.get(i - 2) : new Token(TType.UNKNOWN, "", t.target);
Token prev1 = i - 1 >= 0 ? token_list.get(i - 1) : new Token(TType.UNKNOWN, "", t.target);
Token next = i + 1 < token_list.size() ? token_list.get(i + 1) : new Token(TType.UNKNOWN, "", t.target);
if (t.type == TType.UNKNOWN && prev1.type.isPlusMinus() && !prev2.type.isIdentifier()
&& !prev2.type.equals(TType.FUNC_END)
&& !IVAR_PATTERN.matcher(t.val()).matches()
&& !VAR_PATTERN.matcher(t.val()).matches()) { // Last boolean makes -@b equal to - @b, instead of a string.
//It is a negative/positive number. Absorb the sign
t.value = prev1.value + t.value;
token_list.remove(i - 1);
i--;
}
if (t.type.equals(TType.UNKNOWN)) {
if (t.val().charAt(0) == '/' && t.val().length() > 1) {
t.type = TType.COMMAND;
} else if (VAR_PATTERN.matcher(t.val()).matches()) {
t.type = TType.VARIABLE;
} else if (IVAR_PATTERN.matcher(t.val()).matches()) {
t.type = TType.IVARIABLE;
} else if (t.val().charAt(0) == '@') {
throw new ConfigCompileException("IVariables must match the regex: " + IVAR_PATTERN, target);
} else if (t.val().equals("$")) {
t.type = TType.FINAL_VAR;
} else if(keywords.contains(t.val())){
t.type = TType.KEYWORD;
} else {
t.type = TType.LIT;
}
}
//Skip this check if we're not in pure mscript
if(inPureMScript){
if (t.type.isSymbol() && !t.type.isUnary() && !next.type.isUnary()) {
if (prev1.type.equals(TType.FUNC_START) || prev1.type.equals(TType.COMMA)
|| next.type.equals(TType.FUNC_END) || next.type.equals(TType.COMMA)
|| prev1.type.isSymbol() || next.type.isSymbol()) {
throw new ConfigCompileException("Unexpected symbol (" + t.val() + ")", t.getTarget());
}
}
}
}
return token_list;
}
/**
* This function breaks the token stream into parts, separating the
* aliases/MethodScript from the command triggers
*
* @param tokenStream
* @return
* @throws ConfigCompileException
*/
public static List<Script> preprocess(List<Token> tokenStream) throws ConfigCompileException {
if(tokenStream == null || tokenStream.isEmpty()){
return new ArrayList<>();
}
//First, pull out the duplicate newlines
ArrayList<Token> temp = new ArrayList<>();
for (int i = 0; i < tokenStream.size(); i++) {
try {
if (tokenStream.get(i).type.equals(TType.NEWLINE)) {
temp.add(new Token(TType.NEWLINE, "\n", tokenStream.get(i).target));
while (tokenStream.get(++i).type.equals(TType.NEWLINE)) {
}
}
if (tokenStream.get(i).type != TType.WHITESPACE) {
temp.add(tokenStream.get(i));
}
} catch (IndexOutOfBoundsException e) {
}
}
if (temp.size() > 0 && temp.get(0).type.equals(TType.NEWLINE)) {
temp.remove(0);
}
tokenStream = temp;
//Handle multiline constructs
ArrayList<Token> tokens1_1 = new ArrayList<>();
boolean inside_multiline = false;
Token thisToken = null;
for (int i = 0; i < tokenStream.size(); i++) {
Token prevToken = i - 1 >= tokenStream.size() ? tokenStream.get(i - 1) : new Token(TType.UNKNOWN, "", Target.UNKNOWN);
thisToken = tokenStream.get(i);
Token nextToken = i + 1 < tokenStream.size() ? tokenStream.get(i + 1) : new Token(TType.UNKNOWN, "", Target.UNKNOWN);
//take out newlines between the = >>> and <<< tokens (also the tokens)
if (thisToken.type.equals(TType.ALIAS_END) && nextToken.val().equals(">>>")) {
inside_multiline = true;
tokens1_1.add(thisToken);
i++;
continue;
}
if (thisToken.val().equals("<<<")) {
if (!inside_multiline) {
throw new ConfigCompileException("Found multiline end symbol, and no multiline start found",
thisToken.target);
}
inside_multiline = false;
continue;
}
if (thisToken.val().equals(">>>") && inside_multiline) {
throw new ConfigCompileException("Did not expect a multiline start symbol here, are you missing a multiline end symbol above this line?", thisToken.target);
}
if (thisToken.val().equals(">>>") && !prevToken.type.equals(TType.ALIAS_END)) {
throw new ConfigCompileException("Multiline symbol must follow the alias_end (=) symbol", thisToken.target);
}
//If we're not in a multiline construct, or we are in it and it's not a newline, add
//it
if (!inside_multiline || !thisToken.type.equals(TType.NEWLINE)) {
tokens1_1.add(thisToken);
}
}
assert thisToken != null;
if (inside_multiline) {
throw new ConfigCompileException("Expecting a multiline end symbol, but your last multiline alias appears to be missing one.", thisToken.target);
}
//take out newlines that are behind a \
ArrayList<Token> tokens2 = new ArrayList<>();
for (int i = 0; i < tokens1_1.size(); i++) {
if (!tokens1_1.get(i).type.equals(TType.STRING) && tokens1_1.get(i).val().equals("\\") && tokens1_1.size() > i
&& tokens1_1.get(i + 1).type.equals(TType.NEWLINE)) {
tokens2.add(tokens1_1.get(i));
i++;
continue;
}
tokens2.add(tokens1_1.get(i));
}
//Now that we have all lines minified, we should be able to split
//on newlines, and easily find the left and right sides
List<Token> left = new ArrayList<>();
List<Token> right = new ArrayList<>();
List<Script> scripts = new ArrayList<>();
boolean inLeft = true;
for (Token t : tokens2) {
if (inLeft) {
if (t.type == TType.ALIAS_END) {
inLeft = false;
} else {
left.add(t);
}
} else {
if (t.type == TType.NEWLINE) {
inLeft = true;
// Check for spurious symbols, which indicate an issue with the
// script, but ignore any whitespace.
for(int j = left.size() - 1; j >= 0; j--){
if(left.get(j).type == TType.NEWLINE){
if(j > 0 && left.get(j - 1).type != TType.WHITESPACE){
throw new ConfigCompileException("Unexpected token: " + left.get(j - 1).val(), left.get(j - 1).getTarget());
}
}
}
Script s = new Script(left, right);
scripts.add(s);
left = new ArrayList<>();
right = new ArrayList<>();
} else {
right.add(t);
}
}
}
return scripts;
}
/**
* Compiles the token stream into a valid ParseTree. This also includes optimization
* and reduction.
* @param stream The token stream, as generated by {@link #lex(String, File, boolean) lex}
* @return A fully compiled, optimized, and reduced parse tree. If {@code stream} is
* null or empty, null is returned.
* @throws ConfigCompileException If the script contains syntax errors. Additionally,
* during optimization, certain methods may cause compile errors. Any function that
* can optimize static occurrences and throws a {@link ConfigRuntimeException} will
* have that exception converted to a ConfigCompileException.
*/
@SuppressWarnings("UnnecessaryContinue")
public static ParseTree compile(List<Token> stream) throws ConfigCompileException, ConfigCompileGroupException {
Set<ConfigCompileException> compilerErrors = new HashSet<>();
if(stream == null || stream.isEmpty()){
return null;
}
Target unknown;
try {
//Instead of using Target.UNKNOWN, we can at least set the file.
unknown = new Target(0, stream.get(0).target.file(), 0);
} catch (Exception e) {
unknown = Target.UNKNOWN;
}
List<Token> tempStream = new ArrayList<>(stream.size());
for (Token t : stream) {
if(!t.type.isWhitespace()){
tempStream.add(t);
}
}
stream = tempStream;
ParseTree tree = new ParseTree(fileOptions);
tree.setData(CNull.NULL);
Stack<ParseTree> parents = new Stack<>();
/**
* constructCount is used to determine if we need to use autoconcat
* when reaching a FUNC_END. The previous constructs, if the count
* is greater than 1, will be moved down into an autoconcat.
*/
Stack<AtomicInteger> constructCount = new Stack<>();
constructCount.push(new AtomicInteger(0));
parents.push(tree);
tree.addChild(new ParseTree(new CFunction("__autoconcat__", unknown), fileOptions));
parents.push(tree.getChildAt(0));
tree = tree.getChildAt(0);
constructCount.push(new AtomicInteger(0));
/**
* The array stack is used to keep track of the number
* of square braces in use.
*/
Stack<AtomicInteger> arrayStack = new Stack<>();
arrayStack.add(new AtomicInteger(-1));
Stack<AtomicInteger> minusArrayStack = new Stack<>();
Stack<AtomicInteger> minusFuncStack = new Stack<>();
int parens = 0;
Token t = null;
int bracketCount = 0;
for (int i = 0; i < stream.size(); i++) {
t = stream.get(i);
//Token prev2 = i - 2 >= 0 ? stream.get(i - 2) : new Token(TType.UNKNOWN, "", t.target);
Token prev1 = i - 1 >= 0 ? stream.get(i - 1) : new Token(TType.UNKNOWN, "", t.target);
Token next1 = i + 1 < stream.size() ? stream.get(i + 1) : new Token(TType.UNKNOWN, "", t.target);
Token next2 = i + 2 < stream.size() ? stream.get(i + 2) : new Token(TType.UNKNOWN, "", t.target);
Token next3 = i + 3 < stream.size() ? stream.get(i + 3) : new Token(TType.UNKNOWN, "", t.target);
// Brace handling
if(t.type == TType.LCURLY_BRACKET){
ParseTree b = new ParseTree(new CFunction("__cbrace__", t.getTarget()), fileOptions);
tree.addChild(b);
tree = b;
parents.push(b);
bracketCount++;
constructCount.push(new AtomicInteger(0));
continue;
}
if(t.type == TType.RCURLY_BRACKET){
bracketCount--;
if (constructCount.peek().get() > 1) {
//We need to autoconcat some stuff
int stacks = constructCount.peek().get();
int replaceAt = tree.getChildren().size() - stacks;
ParseTree c = new ParseTree(new CFunction("__autoconcat__", tree.getTarget()), fileOptions);
List<ParseTree> subChildren = new ArrayList<>();
for (int b = replaceAt; b < tree.numberOfChildren(); b++) {
subChildren.add(tree.getChildAt(b));
}
c.setChildren(subChildren);
if (replaceAt > 0) {
List<ParseTree> firstChildren = new ArrayList<>();
for (int d = 0; d < replaceAt; d++) {
firstChildren.add(tree.getChildAt(d));
}
tree.setChildren(firstChildren);
} else {
tree.removeChildren();
}
tree.addChild(c);
}
parents.pop();
tree = parents.peek();
constructCount.pop();
try {
constructCount.peek().incrementAndGet();
} catch (EmptyStackException e) {
throw new ConfigCompileException("Unexpected end curly brace", t.target);
}
continue;
}
//Associative array/label handling
if(t.type == TType.LABEL && tree.getChildren().size() > 0){
//If it's not an atomic identifier it's an error.
if(!prev1.type.isAtomicLit() && prev1.type != TType.IVARIABLE && prev1.type != TType.KEYWORD){
ConfigCompileException error = new ConfigCompileException("Invalid label specified", t.getTarget());
if(prev1.type == TType.FUNC_END){
// This is a fairly common mistake, so we have special handling for this,
// because otherwise we would get a "Mismatched parenthesis" warning (which doesn't make sense),
// and potentially lots of other invalid errors down the line, so we go ahead
// and stop compilation at this point.
throw error;
}
compilerErrors.add(error);
}
// Wrap previous construct in a CLabel
ParseTree cc = tree.getChildren().get(tree.getChildren().size() - 1);
tree.removeChildAt(tree.getChildren().size() - 1);
tree.addChild(new ParseTree(new CLabel(cc.getData()), fileOptions));
continue;
}
//Array notation handling
if (t.type.equals(TType.LSQUARE_BRACKET)) {
arrayStack.push(new AtomicInteger(tree.getChildren().size() - 1));
continue;
} else if (t.type.equals(TType.RSQUARE_BRACKET)) {
boolean emptyArray = false;
if (prev1.type.equals(TType.LSQUARE_BRACKET)) {
emptyArray = true;
}
if (arrayStack.size() == 1) {
throw new ConfigCompileException("Mismatched square bracket", t.target);
}
//array is the location of the array
int array = arrayStack.pop().get();
//index is the location of the first node with the index
int index = array + 1;
if (!tree.hasChildren()) {
throw new ConfigCompileException("Brackets are illegal here", t.target);
}
ParseTree myArray = tree.getChildAt(array);
ParseTree myIndex;
if (!emptyArray) {
myIndex = new ParseTree(new CFunction("__autoconcat__", myArray.getTarget()), fileOptions);
for (int j = index; j < tree.numberOfChildren(); j++) {
myIndex.addChild(tree.getChildAt(j));
}
} else {
myIndex = new ParseTree(new CSlice("0..-1", t.target), fileOptions);
}
tree.setChildren(tree.getChildren().subList(0, array));
ParseTree arrayGet = new ParseTree(new CFunction("array_get", t.target), fileOptions);
arrayGet.addChild(myArray);
arrayGet.addChild(myIndex);
// Check if the @var[...] had a negating "-" in front. If so, add a neg().
if (minusArrayStack.size() != 0 && arrayStack.size() + 1 == minusArrayStack.peek().get()) {
if (!next1.type.equals(TType.LSQUARE_BRACKET)) { // Wait if there are more array_get's comming.
ParseTree negTree = new ParseTree(new CFunction("neg", unknown), fileOptions);
negTree.addChild(arrayGet);
tree.addChild(negTree);
minusArrayStack.pop();
} else {
// Negate the next array_get instead, so just add this one to the tree.
tree.addChild(arrayGet);
}
} else {
tree.addChild(arrayGet);
}
constructCount.peek().set(constructCount.peek().get() - myIndex.numberOfChildren());
continue;
}
//Smart strings
if (t.type == TType.SMART_STRING) {
if(t.val().contains("@")) {
ParseTree function = new ParseTree(fileOptions);
function.setData(new CFunction(new Compiler.smart_string().getName(), t.target));
ParseTree string = new ParseTree(fileOptions);
string.setData(new CString(t.value, t.target));
function.addChild(string);
tree.addChild(function);
} else {
tree.addChild(new ParseTree(new CString(t.val(), t.target), fileOptions));
}
constructCount.peek().incrementAndGet();
continue;
}
if (t.type == TType.DEREFERENCE) {
//Currently unimplemented, but going ahead and making it strict
compilerErrors.add(new ConfigCompileException("The '" + t.val() + "' symbol is not currently allowed in raw strings. You must quote all"
+ " symbols.", t.target));
}
if (t.type.equals(TType.FUNC_NAME)) {
CFunction func = new CFunction(t.val(), t.target);
ParseTree f = new ParseTree(func, fileOptions);
tree.addChild(f);
constructCount.push(new AtomicInteger(0));
tree = f;
parents.push(f);
} else if (t.type.equals(TType.FUNC_START)) {
if (!prev1.type.equals(TType.FUNC_NAME)) {
throw new ConfigCompileException("Unexpected parenthesis", t.target);
}
parens++;
} else if (t.type.equals(TType.FUNC_END)) {
if (parens <= 0) {
throw new ConfigCompileException("Unexpected parenthesis", t.target);
}
parens--;
ParseTree function = parents.pop();
if (constructCount.peek().get() > 1) {
//We need to autoconcat some stuff
int stacks = constructCount.peek().get();
int replaceAt = tree.getChildren().size() - stacks;
ParseTree c = new ParseTree(new CFunction("__autoconcat__", tree.getTarget()), fileOptions);
List<ParseTree> subChildren = new ArrayList<>();
for (int b = replaceAt; b < tree.numberOfChildren(); b++) {
subChildren.add(tree.getChildAt(b));
}
c.setChildren(subChildren);
if (replaceAt > 0) {
List<ParseTree> firstChildren = new ArrayList<>();
for (int d = 0; d < replaceAt; d++) {
firstChildren.add(tree.getChildAt(d));
}
tree.setChildren(firstChildren);
} else {
tree.removeChildren();
}
tree.addChild(c);
}
constructCount.pop();
try {
constructCount.peek().incrementAndGet();
} catch (EmptyStackException e) {
throw new ConfigCompileException("Unexpected end parenthesis", t.target);
}
try {
tree = parents.peek();
} catch (EmptyStackException e) {
throw new ConfigCompileException("Unexpected end parenthesis", t.target);
}
// Handle "-func(args)" and "-func(args)[index]".
if (minusFuncStack.size() != 0 && minusFuncStack.peek().get() == parens + 1) {
if(next1.type.equals(TType.LSQUARE_BRACKET)) {
// Move the negation to the array_get which contains this function.
minusArrayStack.push(new AtomicInteger(arrayStack.size() + 1)); // +1 because the bracket isn't counted yet.
} else {
// Negate this function.
ParseTree negTree = new ParseTree(new CFunction("neg", unknown), fileOptions);
negTree.addChild(tree.getChildAt(tree.numberOfChildren() - 1));
tree.removeChildAt(tree.numberOfChildren() - 1);
tree.addChildAt(tree.numberOfChildren(), negTree);
}
minusFuncStack.pop();
}
} else if (t.type.equals(TType.COMMA)) {
if (constructCount.peek().get() > 1) {
int stacks = constructCount.peek().get();
int replaceAt = tree.getChildren().size() - stacks;
ParseTree c = new ParseTree(new CFunction("__autoconcat__", unknown), fileOptions);
List<ParseTree> subChildren = new ArrayList<>();
for (int b = replaceAt; b < tree.numberOfChildren(); b++) {
subChildren.add(tree.getChildAt(b));
}
c.setChildren(subChildren);
if (replaceAt > 0) {
List<ParseTree> firstChildren = new ArrayList<>();
for (int d = 0; d < replaceAt; d++) {
firstChildren.add(tree.getChildAt(d));
}
tree.setChildren(firstChildren);
} else {
tree.removeChildren();
}
tree.addChild(c);
}
constructCount.peek().set(0);
continue;
}
if(t.type == TType.SLICE){
//We got here because the previous token isn't being ignored, because it's
//actually a control character, instead of whitespace, but this is a
//"empty first" slice notation. Compare this to the code below.
try{
CSlice slice;
String value = next1.val();
if(next1.type == TType.MINUS || next1.type == TType.PLUS){
value = next1.val() + next2.val();
i++;
}
slice = new CSlice(".." + value, t.getTarget());
i++;
tree.addChild(new ParseTree(slice, fileOptions));
constructCount.peek().incrementAndGet();
continue;
} catch(ConfigRuntimeException ex){
//CSlice can throw CREs, but at this stage, we have to
//turn them into a CCE.
throw new ConfigCompileException(ex);
}
}
if (next1.type.equals(TType.SLICE)) {
//Slice notation handling
try {
CSlice slice;
if (t.type.isSeparator() || (t.type.isWhitespace() && prev1.type.isSeparator()) || t.type.isKeyword()) {
//empty first
String value = next2.val();
i++;
if(next2.type == TType.MINUS || next2.type == TType.PLUS){
value = next2.val() + next3.val();
i++;
}
slice = new CSlice(".." + value, next1.getTarget());
if(t.type.isKeyword()){
tree.addChild(new ParseTree(new CKeyword(t.val(), t.getTarget()), fileOptions));
constructCount.peek().incrementAndGet();
}
} else if (next2.type.isSeparator() || next2.type.isKeyword()) {
//empty last
String modifier = "";
if(prev1.type == TType.MINUS || prev1.type == TType.PLUS){
//The negative would have already been inserted into the tree
modifier = prev1.val();
tree.removeChildAt(tree.getChildren().size() - 1);
}
slice = new CSlice(modifier + t.value + "..", t.target);
} else {
//both are provided
String modifier1 = "";
if(prev1.type == TType.MINUS || prev1.type == TType.PLUS){
//It's a negative, incorporate that here, and remove the
//minus from the tree
modifier1 = prev1.val();
tree.removeChildAt(tree.getChildren().size() - 1);
}
Token first = t;
if(first.type.isWhitespace()){
first = prev1;
}
Token second = next2;
i++;
String modifier2 = "";
if(next2.type == TType.MINUS || next2.type == TType.PLUS){
modifier2 = next2.val();
second = next3;
i++;
}
slice = new CSlice(modifier1 + first.value + ".." + modifier2 + second.value, t.target);
}
i++;
tree.addChild(new ParseTree(slice, fileOptions));
constructCount.peek().incrementAndGet();
continue;
} catch(ConfigRuntimeException ex){
//CSlice can throw CREs, but at this stage, we have to
//turn them into a CCE.
throw new ConfigCompileException(ex);
}
} else if (t.type == TType.LIT) {
Construct c = Static.resolveConstruct(t.val(), t.target);
if(c instanceof CString && fileOptions.isStrict()){
compilerErrors.add(new ConfigCompileException("Bare strings are not allowed in strict mode", t.target));
} else if(c instanceof CInt && next1.type == TType.DOT && next2.type == TType.LIT) {
// make CDouble here because otherwise Long.parseLong() will remove
// minus zero before decimals and leading zeroes after decimals
try {
c = new CDouble(Double.parseDouble(t.val() + '.' + next2.val()), t.target);
i += 2;
} catch (NumberFormatException e) {
// Not a double
}
}
tree.addChild(new ParseTree(c, fileOptions));
constructCount.peek().incrementAndGet();
} else if (t.type.equals(TType.STRING) || t.type.equals(TType.COMMAND)) {
tree.addChild(new ParseTree(new CString(t.val(), t.target), fileOptions));
constructCount.peek().incrementAndGet();
} else if (t.type.equals(TType.IDENTIFIER)) {
tree.addChild(new ParseTree(new CPreIdentifier(t.val(), t.target), fileOptions));
constructCount.peek().incrementAndGet();
} else if(t.type.isKeyword()){
tree.addChild(new ParseTree(new CKeyword(t.val(), t.getTarget()), fileOptions));
constructCount.peek().incrementAndGet();
} else if (t.type.equals(TType.IVARIABLE)) {
tree.addChild(new ParseTree(new IVariable(t.val(), t.target), fileOptions));
constructCount.peek().incrementAndGet();
} else if (t.type.equals(TType.UNKNOWN)) {
tree.addChild(new ParseTree(Static.resolveConstruct(t.val(), t.target), fileOptions));
constructCount.peek().incrementAndGet();
} else if (t.type.isSymbol()) { //Logic and math symbols
// Attempt to find "-@var" and change it to "neg(@var)" if it's not @a - @b. Else just add the symbol.
// Also handles "-function()" and "-@var[index]".
if (t.type.equals(TType.MINUS) && !prev1.type.isAtomicLit() && !prev1.type.equals(TType.IVARIABLE)
&& !prev1.type.equals(TType.VARIABLE) && !prev1.type.equals(TType.RCURLY_BRACKET)
&& !prev1.type.equals(TType.RSQUARE_BRACKET) && !prev1.type.equals(TType.FUNC_END)
&& (next1.type.equals(TType.IVARIABLE) || next1.type.equals(TType.VARIABLE) || next1.type.equals(TType.FUNC_NAME))) {
// Check if we are negating a value from an array, function or variable.
if (next2.type.equals(TType.LSQUARE_BRACKET)) {
minusArrayStack.push(new AtomicInteger(arrayStack.size() + 1)); // +1 because the bracket isn't counted yet.
} else if (next1.type.equals(TType.FUNC_NAME)) {
minusFuncStack.push(new AtomicInteger(parens + 1)); // +1 because the function isn't counted yet.
} else {
ParseTree negTree = new ParseTree(new CFunction("neg", unknown), fileOptions);
negTree.addChild(new ParseTree(new IVariable(next1.value, next1.target), fileOptions));
tree.addChild(negTree);
constructCount.peek().incrementAndGet();
i++; // Skip the next variable as we've just handled it.
}
} else {
tree.addChild(new ParseTree(new CSymbol(t.val(), t.type, t.target), fileOptions));
constructCount.peek().incrementAndGet();
}
} else if (t.type == TType.DOT){
// Check for doubles that start with a decimal, otherwise concat
Construct c = null;
if(next1.type == TType.LIT && prev1.type != TType.STRING && prev1.type != TType.SMART_STRING) {
try {
c = new CDouble(Double.parseDouble('.' + next1.val()), t.target);
i++;
} catch (NumberFormatException e) {
// Not a double
}
}
if(c == null) {
c = new CSymbol(".", TType.CONCAT, t.target);
}
tree.addChild(new ParseTree(c, fileOptions));
constructCount.peek().incrementAndGet();
} else if (t.type.equals(TType.VARIABLE) || t.type.equals(TType.FINAL_VAR)) {
tree.addChild(new ParseTree(new Variable(t.val(), null, false, t.type.equals(TType.FINAL_VAR), t.target), fileOptions));
constructCount.peek().incrementAndGet();
//right_vars.add(new Variable(t.val(), null, t.line_num));
}
}
assert t != null;
if (arrayStack.size() != 1) {
throw new ConfigCompileException("Mismatched square brackets", t.target);
}
if (parens != 0) {
throw new ConfigCompileException("Mismatched parenthesis", t.target);
}
if (bracketCount != 0){
throw new ConfigCompileException("Mismatched curly braces", t.target);
}
Stack<List<Procedure>> procs = new Stack<>();
procs.add(new ArrayList<Procedure>());
processKeywords(tree);
optimizeAutoconcats(tree, compilerErrors);
optimize(tree, procs, compilerErrors);
link(tree, compilerErrors);
checkLabels(tree, compilerErrors);
checkBreaks(tree, compilerErrors);
if(!compilerErrors.isEmpty()){
if(compilerErrors.size() == 1){
// Just throw the one CCE
for(ConfigCompileException e : compilerErrors){
throw e;
}
} else {
throw new ConfigCompileGroupException(compilerErrors);
}
}
parents.pop();
tree = parents.pop();
return tree;
}
/**
* Recurses down the tree and ensures that breaks don't bubble up past
* procedures or the root code tree.
* @param tree
* @throws ConfigCompileException
*/
private static void checkBreaks(ParseTree tree, Set<ConfigCompileException> compilerExceptions) {
checkBreaks0(tree, 0, null, compilerExceptions);
}
private static void checkBreaks0(ParseTree tree, long currentLoops, String lastUnbreakable, Set<ConfigCompileException> compilerErrors) {
if(!(tree.getData() instanceof CFunction)){
//Don't care about these
return;
}
if(tree.getData().val().startsWith("_")){
//It's a proc. We need to recurse, but not check this "function"
for(ParseTree child : tree.getChildren()){
checkBreaks0(child, currentLoops, lastUnbreakable, compilerErrors);
}
return;
}
Function func;
try {
func = ((CFunction)tree.getData()).getFunction();
} catch (ConfigCompileException ex) {
compilerErrors.add(ex);
return;
}
if(func.getClass().getAnnotation(nolinking.class) != null){
// Don't link here
return;
}
// We have special handling for procs and closures, and of course break and the loops.
// If any of these are here, we kick into special handling mode. Otherwise, we recurse.
if(func instanceof DataHandling._break){
// First grab the counter in the break function. If the break function doesn't
// have any children, then 1 is implied. break() requires the argument to be
// a CInt, so if it weren't, there would already have been a compile error, so
// we can assume it will be a CInt.
long breakCounter = 1;
if(tree.getChildren().size() == 1){
breakCounter = ((CInt)tree.getChildAt(0).getData()).getInt();
}
if(breakCounter > currentLoops){
// Throw an exception, as this would break above a loop. Different error messages
// are applied to different cases
if(currentLoops == 0){
compilerErrors.add(new ConfigCompileException("The break() function can only break out of loops" + (lastUnbreakable == null ? "." :
", but an attempt to break out of a " + lastUnbreakable + " was detected."), tree.getTarget()));
} else {
compilerErrors.add(new ConfigCompileException("Too many breaks"
+ " detected. Check your loop nesting, and set the break count to an appropriate value.", tree.getTarget()));
}
}
return;
}
if(func.getClass().getAnnotation(unbreakable.class) != null){
// Parse the children like normal, but reset the counter to 0.
for(ParseTree child : tree.getChildren()){
checkBreaks0(child, 0, func.getName(), compilerErrors);
}
return;
}
if(func.getClass().getAnnotation(breakable.class) != null){
// Don't break yet, still recurse, but up our current loops counter.
currentLoops++;
}
for(ParseTree child : tree.getChildren()){
checkBreaks0(child, currentLoops, lastUnbreakable, compilerErrors);
}
}
/**
* Optimizing __autoconcat__ out should happen early, and should happen regardless
* of whether or not optimizations are on or off. So this is broken off into a separate
* optimization procedure, so that the intricacies of the normal optimizations don't
* apply to __autoconcat__.
* @param root
* @param compilerExceptions
*/
private static void optimizeAutoconcats(ParseTree root, Set<ConfigCompileException> compilerExceptions){
for(ParseTree child : root.getChildren()){
if(child.hasChildren()){
optimizeAutoconcats(child, compilerExceptions);
}
}
if(root.getData() instanceof CFunction && root.getData().val().equals(__autoconcat__)){
try {
ParseTree ret = ((Compiler.__autoconcat__)((CFunction)root.getData()).getFunction()).optimizeDynamic(root.getTarget(), root.getChildren(), root.getFileOptions());
root.setData(ret.getData());
root.setChildren(ret.getChildren());
} catch (ConfigCompileException ex) {
compilerExceptions.add(ex);
}
}
}
/**
* Recurses down the tree and ensures that there are no dynamic labels. This has
* to finish completely after optimization, because the optimizer has no
* good hook to know when optimization for a unit is fully completed, until
* ALL units are fully complete, so this happens separately after optimization,
* but as apart of the normal compile process.
* @param tree
* @throws ConfigCompileException
*/
private static void checkLabels(ParseTree tree, Set<ConfigCompileException> compilerErrors) throws ConfigCompileException {
// for(ParseTree t : tree.getChildren()){
// if(t.getData() instanceof CLabel){
// if(((CLabel)t.getData()).cVal() instanceof IVariable){
// throw new ConfigCompileException("Variables may not be used as labels", t.getTarget());
// }
// }
// checkLabels(t);
// }
}
/**
* Recurses down the tree and
* <ul><li>Links functions</li>
* <li>Checks function arguments</li></ul>
* This is a separate process from optimization, because optimization
* ignores any missing functions.
* @param tree
*/
private static void link(ParseTree tree, Set<ConfigCompileException> compilerErrors) {
FunctionBase treeFunction = null;
try {
treeFunction = FunctionList.getFunction(tree.getData());
if(treeFunction.getClass().getAnnotation(nolinking.class) != null){
//Don't link children of a nolinking function.
return;
}
} catch(ConfigCompileException ex){
//This can happen if the treeFunction isn't a function, is a proc, etc,
//but we don't care, we just want to continue.
}
// Check the argument count, and do any custom linking the function may have
if(treeFunction != null){
Integer[] numArgs = treeFunction.numArgs();
if (!Arrays.asList(numArgs).contains(Integer.MAX_VALUE) &&
!Arrays.asList(numArgs).contains(tree.getChildren().size())) {
compilerErrors.add(new ConfigCompileException("Incorrect number of arguments passed to "
+ tree.getData().val(), tree.getData().getTarget()));
}
if(treeFunction instanceof Optimizable){
Optimizable op = (Optimizable) treeFunction;
if(op.optimizationOptions().contains(OptimizationOption.CUSTOM_LINK)){
try {
op.link(tree.getData().getTarget(), tree.getChildren());
} catch(ConfigCompileException ex){
compilerErrors.add(ex);
}
}
}
}
// Walk the children
for(ParseTree child : tree.getChildren()){
if(child.getData() instanceof CFunction){
FunctionBase f = null;
if (child.getData().val().charAt(0) != '_' || child.getData().val().charAt(1) == '_') {
// This will throw an exception if the function doesn't exist.
try {
f = FunctionList.getFunction(child.getData());
} catch(ConfigCompileException ex){
compilerErrors.add(ex);
}
}
link(child, compilerErrors);
}
}
}
private static final String __autoconcat__ = new Compiler.__autoconcat__().getName();
/**
* Recurses down into the tree, attempting to optimize where possible. A few
* things have strong coupling, for information on these items, see the
* documentation included in the source.
*
* @param tree
* @return
*/
private static void optimize(ParseTree tree, Stack<List<Procedure>> procs, Set<ConfigCompileException> compilerErrors) {
if (tree.isOptimized()) {
return; //Don't need to re-run this
}
// if (tree.getData() instanceof CIdentifier) {
// optimize(((CIdentifier) tree.getData()).contained(), procs);
// return;
// }
if (!(tree.getData() instanceof CFunction)) {
//There's no way to optimize something that's not a function
return;
}
//If it is a proc definition, we need to go ahead and see if we can add it to the const proc stack
if (tree.getData().val().equals("proc")) {
procs.push(new ArrayList<Procedure>());
}
CFunction cFunction = (CFunction) tree.getData();
Function func;
try {
func = (Function) FunctionList.getFunction(cFunction);
} catch (ConfigCompileException e) {
func = null;
}
if (func != null) {
if (func.getClass().getAnnotation(nolinking.class) != null) {
//It's an unlinking function, so we need to stop at this point
return;
}
}
if (cFunction instanceof CIdentifier) {
//Add the child to the identifier
ParseTree c = ((CIdentifier) cFunction).contained();
tree.addChild(c);
c.getData().setWasIdentifier(true);
}
List<ParseTree> children = tree.getChildren();
if(func instanceof Optimizable && ((Optimizable)func).optimizationOptions().contains(OptimizationOption.PRIORITY_OPTIMIZATION)){
// This is a priority optimization function, meaning it needs to be optimized before its children are.
// This is required when optimization of the children could cause different internal behavior, for instance
// if this function is expecting the precense of soem code element, but the child gets optimized out, this
// would cause an error, even though the user did in fact provide code in that section.
try {
((Optimizable)func).optimizeDynamic(tree.getTarget(), children, fileOptions);
} catch (ConfigCompileException ex){
// If an error occurs, we will skip the rest of this element
compilerErrors.add(ex);
return;
} catch (ConfigRuntimeException ex) {
compilerErrors.add(new ConfigCompileException(ex));
return;
}
}
//Loop through the children, and if any of them are functions that are terminal, truncate.
//To explain this further, consider the following:
//For the code: concat(die(), msg('')), this diagram shows the abstract syntax tree:
// (concat)
// / \
// / \
// (die) (msg)
//By looking at the code, we can tell that msg() will never be called, because die() will run first,
//and since it is a "terminal" function, any code after it will NEVER run. However, consider a more complex condition:
// if(@input){ die() msg('1') } else { msg('2') msg('3') }
// if(@input)
// [true]/ \[false]
// / \
// (sconcat) (sconcat)
// / \ / \
// / \ / \
// (die) (msg[1])(msg[2]) (msg[3])
//In this case, only msg('1') is guaranteed not to run, msg('2') and msg('3') will still run in some cases.
//So, we can optimize out msg('1') in this case, which would cause the tree to become much simpler, therefore a worthwile optimization:
// if(@input)
// [true]/ \[false]
// / \
// (die) (sconcat)
// / \
// / \
// (msg[2]) (msg[3])
//We do have to be careful though, because of functions like if, which actually work like this:
//if(@var){ die() } else { msg('') }
// (if)
// / | \
// / | \
// @var (die) (msg)
//We can't git rid of the msg() here, because it is actually in another branch.
//For the time being, we will simply say that if a function uses execs, it
//is a branch (branches always use execs, though using execs doesn't strictly
//mean you are a branch type function).
for (int i = 0; i < children.size(); i++) {
ParseTree t = children.get(i);
if (t.getData() instanceof CFunction) {
if (t.getData().val().startsWith("_") || (func != null && func.useSpecialExec())) {
continue;
}
Function f;
try {
f = (Function) FunctionList.getFunction(t.getData());
} catch (ConfigCompileException ex) {
compilerErrors.add(ex);
return;
}
Set<OptimizationOption> options = NO_OPTIMIZATIONS;
if (f instanceof Optimizable) {
options = ((Optimizable) f).optimizationOptions();
}
if (options.contains(OptimizationOption.TERMINAL)) {
if (children.size() > i + 1) {
//First, a compiler warning
CHLog.GetLogger().Log(CHLog.Tags.COMPILER, LogLevel.WARNING, "Unreachable code. Consider removing this code.", children.get(i + 1).getTarget());
//Now, truncate the children
for (int j = children.size() - 1; j > i; j--) {
children.remove(j);
}
break;
}
}
}
}
boolean fullyStatic = true;
boolean hasIVars = false;
for (ParseTree node : children) {
if (node.getData() instanceof CFunction) {
optimize(node, procs, compilerErrors);
}
if (node.getData().isDynamic() && !(node.getData() instanceof IVariable)) {
fullyStatic = false;
}
if (node.getData() instanceof IVariable) {
hasIVars = true;
}
}
//In all cases, at this point, we are either unable to optimize, or we will
//optimize, so set our optimized variable at this point.
tree.setOptimized(true);
if (func == null) {
//It's a proc call. Let's see if we can optimize it
Procedure p = null;
//Did you know about this feature in java? I didn't until recently.
//I break to the loop label, which makes it jump to the bottom of
//that loop.
loop:
for (List<Procedure> proc : procs) {
for (Procedure pp : proc) {
if (pp.getName().equals(cFunction.val())) {
p = pp;
break loop;
}
}
}
if (p != null) {
try {
Construct c = DataHandling.proc.optimizeProcedure(p.getTarget(), p, children);
if (c != null) {
tree.setData(c);
tree.removeChildren();
return;
}//else Nope, couldn't optimize.
} catch (ConfigRuntimeException ex) {
//Cool. Caught a runtime error at compile time :D
compilerErrors.add(new ConfigCompileException(ex));
}
}
//else this procedure isn't listed yet. Maybe a compiler error, maybe not, depends,
//so we can't for sure say, but we do know we can't optimize this
return;
}
if (tree.getData().val().equals("proc")) {
//Check for too few arguments
if (children.size() < 2) {
compilerErrors.add(new ConfigCompileException("Incorrect number of arguments passed to proc",
tree.getData().getTarget()));
return;
}
//We just went out of scope, so we need to pop the layer of Procedures that
//are internal to us
procs.pop();
//However, as a special function, we *might* be able to get a const proc out of this
//Let's see.
try {
ParseTree root = new ParseTree(new CFunction(__autoconcat__, Target.UNKNOWN), fileOptions);
Script fakeScript = Script.GenerateScript(root, "*");
Environment env = null;
try {
env = Static.GenerateStandaloneEnvironment();
} catch (IOException | DataSourceException | URISyntaxException | Profiles.InvalidProfileException e) {
//
}
Procedure myProc = DataHandling.proc.getProcedure(tree.getTarget(), env, fakeScript, children.toArray(new ParseTree[children.size()]));
procs.peek().add(myProc); //Yep. So, we can move on with our lives now, and if it's used later, it could possibly be static.
} catch (ConfigRuntimeException e) {
//Well, they have an error in there somewhere
compilerErrors.add(new ConfigCompileException(e));
} catch (NullPointerException e) {
//Nope, can't optimize.
return;
}
}
//the compiler trick functions know how to deal with it specially, even if everything isn't
//static, so do this first.
String oldFunctionName = func.getName();
Set<OptimizationOption> options = NO_OPTIMIZATIONS;
if (func instanceof Optimizable) {
options = ((Optimizable) func).optimizationOptions();
}
if (options.contains(OptimizationOption.OPTIMIZE_DYNAMIC)) {
try {
ParseTree tempNode;
try {
tempNode = ((Optimizable) func).optimizeDynamic(tree.getData().getTarget(), tree.getChildren(), tree.getFileOptions());
} catch (ConfigRuntimeException e) {
//Turn it into a compile exception, then rethrow
throw new ConfigCompileException(e);
}
if (tempNode == Optimizable.PULL_ME_UP) {
tempNode = tree.getChildAt(0);
}
if (tempNode == Optimizable.REMOVE_ME) {
tree.setData(new CFunction("p", Target.UNKNOWN));
tree.removeChildren();
} else if (tempNode != null) {
tree.setData(tempNode.getData());
tree.setOptimized(tempNode.isOptimized());
tree.setChildren(tempNode.getChildren());
tree.getData().setWasIdentifier(tempNode.getData().wasIdentifier());
optimize(tree, procs, compilerErrors);
tree.setOptimized(true);
//Some functions can actually make static the arguments, for instance, by pulling up a hardcoded
//array, so if they have reversed this, make note of that now
if (tempNode.hasBeenMadeStatic()) {
fullyStatic = true;
}
} //else it wasn't an optimization, but a compile check
} catch (ConfigCompileException ex) {
compilerErrors.add(ex);
}
}
if (!fullyStatic) {
return;
}
//Otherwise, everything is static, or an IVariable and we can proceed.
//Note since we could still have IVariables, we have to handle those
//specially from here forward
if (func.preResolveVariables() && hasIVars) {
//Well, this function isn't equipped to deal with IVariables.
return;
}
//It could have optimized by changing the name, in that case, we
//don't want to run this now
if (tree.getData().getValue().equals(oldFunctionName)
&& (options.contains(OptimizationOption.OPTIMIZE_CONSTANT) || options.contains(OptimizationOption.CONSTANT_OFFLINE))) {
Construct[] constructs = new Construct[tree.getChildren().size()];
for (int i = 0; i < tree.getChildren().size(); i++) {
constructs[i] = tree.getChildAt(i).getData();
}
try {
try {
Construct result;
if (options.contains(OptimizationOption.CONSTANT_OFFLINE)) {
List<Integer> numArgsList = Arrays.asList(func.numArgs());
if (!numArgsList.contains(Integer.MAX_VALUE) &&
!numArgsList.contains(tree.getChildren().size())) {
compilerErrors.add(new ConfigCompileException("Incorrect number of arguments passed to "
+ tree.getData().val(), tree.getData().getTarget()));
result = null;
} else {
result = func.exec(tree.getData().getTarget(), null, constructs);
}
} else {
result = ((Optimizable) func).optimize(tree.getData().getTarget(), constructs);
}
//If the result is null, it was just a check, it can't optimize further.
if (result != null) {
result.setWasIdentifier(tree.getData().wasIdentifier());
tree.setData(result);
tree.removeChildren();
}
} catch (ConfigRuntimeException e) {
//Turn this into a ConfigCompileException, then rethrow
throw new ConfigCompileException(e);
}
} catch (ConfigCompileException ex) {
compilerErrors.add(ex);
}
}
//It doesn't know how to optimize. Oh well.
}
/**
* Runs keyword processing on the tree. Note that this is run before optimization, and is
* a depth first process.
* @param tree
*/
private static void processKeywords(ParseTree tree) throws ConfigCompileException {
// Keyword processing
List<ParseTree> children = tree.getChildren();
for(int i = 0; i < children.size(); i++){
ParseTree node = children.get(i);
// Keywords can be standalone, or a function can double as a keyword. So we have to check for both
// conditions.
processKeywords(node);
if(node.getData() instanceof CKeyword
|| (node.getData() instanceof CLabel && ((CLabel) node.getData()).cVal() instanceof CKeyword)
|| (node.getData() instanceof CFunction && KeywordList.getKeywordByName(node.getData().val()) != null)){
// This looks a bit confusing, but is fairly straightforward. We want to process the child elements of all
// remaining nodes, so that subchildren that need processing will be finished, and our current tree level will
// be able to independently process it. We don't want to process THIS level though, just the children of this level.
for(int j = i + 1; j < children.size(); j++){
processKeywords(children.get(j));
}
// Now that all the children of the rest of the chain are processed, we can do the processing of this level.
i = KeywordList.getKeywordByName(node.getData().val()).process(children, i);
}
}
}
/**
* Shorthand for lexing, compiling, and executing a script.
* @param script The textual script to execute
* @param file The file it was located in
* @param inPureMScript If it is pure MScript, or aliases
* @param env The execution environment
* @param done The MethodScriptComplete callback (may be null)
* @param s A script object (may be null)
* @param vars Any $vars (may be null)
* @return
* @throws ConfigCompileException
* @throws com.laytonsmith.core.exceptions.ConfigCompileGroupException This indicates
* that a group of compile errors occurred.
*/
public static Construct execute(String script, File file, boolean inPureMScript, Environment env, MethodScriptComplete done, Script s, List<Variable> vars) throws ConfigCompileException, ConfigCompileGroupException{
return execute(compile(lex(script, file, inPureMScript)), env, done, s, vars);
}
/**
* Executes a pre-compiled MethodScript, given the specified Script
* environment. Both done and script may be null, and if so, reasonable
* defaults will be provided. The value sent to done will also be returned,
* as a Construct, so this one function may be used synchronously also.
*
* @param root
* @param env
* @param done
* @param script
* @return
*/
public static Construct execute(ParseTree root, Environment env, MethodScriptComplete done, Script script) {
return execute(root, env, done, script, null);
}
/**
* Executes a pre-compiled MethodScript, given the specified Script
* environment, but also provides a method to set the constants in the
* script.
*
* @param root
* @param env
* @param done
* @param script
* @param vars
* @return
*/
public static Construct execute(ParseTree root, Environment env, MethodScriptComplete done, Script script, List<Variable> vars) {
if(root == null){
return CVoid.VOID;
}
if (script == null) {
script = new Script(null, null);
}
if (vars != null) {
Map<String, Variable> varMap = new HashMap<>();
for (Variable v : vars) {
varMap.put(v.getVariableName(), v);
}
for (Construct tempNode : root.getAllData()) {
if (tempNode instanceof Variable) {
Variable vv = varMap.get(((Variable) tempNode).getVariableName());
if(vv != null){
((Variable) tempNode).setVal(vv.getDefault());
} else {
//The variable is unset. I'm not quite sure what cases would cause this
((Variable) tempNode).setVal("");
}
}
}
}
StringBuilder b = new StringBuilder();
Construct returnable = null;
for (ParseTree gg : root.getChildren()) {
script.setLabel(env.getEnv(GlobalEnv.class).GetLabel());
Construct retc = script.eval(gg, env);
if (root.numberOfChildren() == 1) {
returnable = retc;
}
String ret = retc instanceof CNull ? "null" : retc.val();
if (ret != null && !ret.trim().isEmpty()) {
b.append(ret).append(" ");
}
}
if (done != null) {
done.done(b.toString().trim());
}
if (returnable != null) {
return returnable;
}
return Static.resolveConstruct(b.toString().trim(), Target.UNKNOWN);
}
public static void registerAutoIncludes(Environment env, Script s) {
for (File f : Static.getAliasCore().autoIncludes) {
try {
MethodScriptCompiler.execute(IncludeCache.get(f, new Target(0, f, 0)), env, null, s);
} catch (ProgramFlowManipulationException e) {
ConfigRuntimeException.HandleUncaughtException(ConfigRuntimeException.CreateUncatchableException("Cannot break program flow in auto include files.", e.getTarget()), env);
} catch (ConfigRuntimeException e) {
ConfigRuntimeException.HandleUncaughtException(e, env);
}
}
}
}