package com.coverity.ps.sac.parser.as;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* The MIT License (MIT)
* Copyright (c) 2007 Randy Hollines - jhttphtml project
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
* IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* ActionScript 3.0 scanner
* @author rhollines
*
*/
public class ActionScriptScanner {
private static final char EOS = '\0';
private static Map<String, Token.Type> keyWords = new HashMap<String, Token.Type>();
// keywords
static {
keyWords.put("as", Token.Type.AS);
keyWords.put("case", Token.Type.CASE);
keyWords.put("catch", Token.Type.CATCH);
keyWords.put("class", Token.Type.CLASS);
keyWords.put("const", Token.Type.CONST);
keyWords.put("default", Token.Type.DEFAULT);
keyWords.put("delete", Token.Type.DELETE);
keyWords.put("do", Token.Type.DO);
keyWords.put("dynamic", Token.Type.DYNAMIC);
keyWords.put("each", Token.Type.EACH);
keyWords.put("else", Token.Type.ELSE);
keyWords.put("__END__", Token.Type.END);
keyWords.put("extends", Token.Type.EXTENDS);
keyWords.put("final", Token.Type.FINAL);
keyWords.put("finally", Token.Type.FINALLY);
keyWords.put("for", Token.Type.FOR);
keyWords.put("function", Token.Type.FUNCTION);
keyWords.put("get", Token.Type.GET);
keyWords.put("if", Token.Type.IF);
keyWords.put("implements", Token.Type.IMPLEMENTS);
keyWords.put("import", Token.Type.IMPORT);
keyWords.put("in", Token.Type.IN);
keyWords.put("include", Token.Type.INCLUDE);
keyWords.put("#include", Token.Type.INCLUDE2);
keyWords.put("instanceof", Token.Type.INSTANCEOF);
keyWords.put("interface", Token.Type.INTERFACE);
keyWords.put("internal", Token.Type.INTERNAL);
keyWords.put("intrinsic", Token.Type.INTRINSIC);
keyWords.put("is", Token.Type.IS);
keyWords.put("namespace", Token.Type.NAMESPACE);
keyWords.put("new", Token.Type.NEW);
keyWords.put("override", Token.Type.OVERRIDE);
keyWords.put("package", Token.Type.PACKAGE);
keyWords.put("private", Token.Type.PRIVATE);
keyWords.put("protected", Token.Type.PROTECTED);
keyWords.put("public", Token.Type.PUBLIC);
keyWords.put("return", Token.Type.RETURN);
keyWords.put("set", Token.Type.SET);
keyWords.put("static", Token.Type.STATIC);
keyWords.put("super", Token.Type.SUPER);
keyWords.put("switch", Token.Type.SWITCH);
keyWords.put("try", Token.Type.TRY);
keyWords.put("typeof", Token.Type.TYPEOF);
keyWords.put("use", Token.Type.USE);
keyWords.put("var", Token.Type.VAR);
keyWords.put("void", Token.Type.VOID);
keyWords.put("while", Token.Type.WHILE);
// reserved literals
keyWords.put("false", Token.Type.FALSE);
keyWords.put("null", Token.Type.NULL);
keyWords.put("true", Token.Type.TRUE);
}
String code;
private int startIndex = 0;
private int endIndex = 0;
private char[] codeChars;
private int scanPosition = 0;
private int lineNumber = 1;
private char currentChar, nextChar;
/**
* Default constructor
*
* @param code
* code to scan represented as a string
*/
public ActionScriptScanner(String code) {
this.code = code;
codeChars = code.toCharArray();
nextChar();
}
/**
* Scans tokens
*/
public List<Token> scan() {
List<Token> tokens = new ArrayList<Token>();
Token token = null;
do {
token = getNextToken();
if(token.getType() != Token.Type.OTHER) {
tokens.add(token);
}
} while (token.getType() != Token.Type.EOS);
/*
for(Token t : tokens) {
System.out.print("### " + t + " ###\n");
}
*/
return tokens;
}
public int getLineCount() {
return this.lineNumber;
}
/**
* Gets the next character in the stream
*/
private void nextChar() {
if (scanPosition < codeChars.length) {
currentChar = codeChars[scanPosition++];
if (scanPosition < codeChars.length) {
nextChar = codeChars[scanPosition];
} else {
nextChar = EOS;
}
} else {
currentChar = EOS;
}
}
/**
* Ignores whitespace
*/
private void whiteSpace() {
while (currentChar != EOS
&& (currentChar == ' ' || currentChar == '\t'
|| currentChar == '\n' || currentChar == '\r')) {
if (currentChar == '\n') {
lineNumber++;
}
nextChar();
}
}
/**
* Gets the next token
*/
private Token getNextToken() {
// ignore white space
whiteSpace();
// skip multi-line comment
if (currentChar == '/' && nextChar == '*') {
nextChar();
nextChar();
boolean done = false;
while (currentChar != EOS && !done) {
if (currentChar == '\n') {
lineNumber++;
}
// find end
if (currentChar == '*' && nextChar == '/') {
done = true;
nextChar();
}
nextChar();
}
return new Token(lineNumber, Token.Type.COMMENT, "/* */");
}
// skip single-line comment
if (currentChar == '/' && nextChar == '/') {
nextChar();
nextChar();
while (currentChar != EOS && currentChar != '\n') {
nextChar();
}
lineNumber++;
nextChar();
return new Token(lineNumber - 1, Token.Type.COMMENT, "//");
}
// parse string
if (currentChar == '"') {
startIndex = endIndex = scanPosition - 1;
nextChar();
endIndex++;
while (currentChar != EOS && currentChar != '"') {
if (currentChar == '\\' && nextChar == '"') {
nextChar();
endIndex++;
}
nextChar();
endIndex++;
}
nextChar();
endIndex++;
return new Token(lineNumber, Token.Type.STRING, code.substring(
startIndex, endIndex));
} else if (Character.isLetter(currentChar) || currentChar == '_') {
startIndex = endIndex = scanPosition - 1;
while (Character.isLetterOrDigit(currentChar) || currentChar == '_') {
nextChar();
endIndex++;
}
return lookupIdent(code.substring(startIndex, endIndex));
} else if (Character.isDigit(currentChar) || (currentChar == '.' && Character.isDigit(nextChar))) {
startIndex = endIndex = scanPosition - 1;
boolean foundDot = false;
while (Character.isDigit(currentChar) || currentChar == '.') {
// could check for scan error here
if (currentChar == '.') {
foundDot = true;
}
nextChar();
endIndex++;
}
// return result
if (foundDot) {
return new Token(lineNumber, Token.Type.NUM, code.substring(
startIndex, endIndex));
} else {
return new Token(lineNumber, Token.Type.INTEGER,
code.substring(startIndex, endIndex));
}
} else {
Token token = null;
switch (currentChar) {
// TODO: >>=, >>>=, <<=, <<<=, ===, !==, &&=, ||=, ...
case EOS:
token = new Token(lineNumber, Token.Type.EOS);
nextChar();
break;
case ':':
if (nextChar == ':') {
nextChar();
token = new Token(lineNumber, Token.Type.NAME_QUAL, "::");
nextChar();
} else {
token = new Token(lineNumber, Token.Type.TYPE, ":");
nextChar();
}
break;
case ';':
token = new Token(lineNumber, Token.Type.SEMI_COLON, ";");
nextChar();
break;
case '{':
token = new Token(lineNumber, Token.Type.OCBR, "{");
nextChar();
break;
case '}':
token = new Token(lineNumber, Token.Type.CCBR, "}");
nextChar();
break;
case '[':
token = new Token(lineNumber, Token.Type.OBR, "[");
nextChar();
break;
case ']':
token = new Token(lineNumber, Token.Type.CBR, "]");
nextChar();
break;
case '.':
token = new Token(lineNumber, Token.Type.DOT, ".");
nextChar();
break;
case '#':
token = new Token(lineNumber, Token.Type.POUND, ".");
nextChar();
break;
case '(':
token = new Token(lineNumber, Token.Type.OPRN, "(");
nextChar();
break;
case ')':
token = new Token(lineNumber, Token.Type.CPRN, ")");
nextChar();
break;
case '@':
token = new Token(lineNumber, Token.Type.AT, ")");
nextChar();
break;
case '=':
token = new Token(lineNumber, Token.Type.EQL, "=");
nextChar();
break;
case ',':
token = new Token(lineNumber, Token.Type.COMMA, ",");
nextChar();
break;
case '~':
token = new Token(lineNumber, Token.Type.NOT, "~");
nextChar();
break;
case '|':
if (nextChar == '|') {
nextChar();
token = new Token(lineNumber, Token.Type.OR_OR, "||");
nextChar();
} else {
token = new Token(lineNumber, Token.Type.OR, "|");
nextChar();
}
break;
case '?':
token = new Token(lineNumber, Token.Type.QUESTION, "?");
nextChar();
break;
case '&':
if (nextChar == '&') {
nextChar();
token = new Token(lineNumber, Token.Type.AND_AND, "&&");
nextChar();
} else {
token = new Token(lineNumber, Token.Type.AND, "&");
nextChar();
}
break;
case '+':
if (nextChar == '=') {
nextChar();
token = new Token(lineNumber, Token.Type.ADD_EQL, "+=");
nextChar();
} else if (nextChar == '+') {
nextChar();
token = new Token(lineNumber, Token.Type.INC, "++");
nextChar();
} else {
token = new Token(lineNumber, Token.Type.ADD, "+");
nextChar();
}
break;
case '-':
if (nextChar == '-') {
nextChar();
token = new Token(lineNumber, Token.Type.DECL, "--");
nextChar();
} else if (nextChar == '=') {
nextChar();
token = new Token(lineNumber, Token.Type.MINUS_EQL, "-=");
nextChar();
} else {
token = new Token(lineNumber, Token.Type.MINUS, "-");
nextChar();
}
break;
case '*':
if (nextChar == '=') {
nextChar();
token = new Token(lineNumber, Token.Type.MUL_EQL, "*=");
nextChar();
} else {
token = new Token(lineNumber, Token.Type.MUL, "*");
nextChar();
}
break;
case '/':
if (nextChar == '=') {
nextChar();
token = new Token(lineNumber, Token.Type.DIV_EQL, "/=");
nextChar();
} else {
token = new Token(lineNumber, Token.Type.DIV, "/");
nextChar();
}
break;
case '%':
if (nextChar == '=') {
nextChar();
token = new Token(lineNumber, Token.Type.MOD_EQL, "%=");
nextChar();
} else {
token = new Token(lineNumber, Token.Type.MOD, "%");
nextChar();
}
break;
case '!':
if (nextChar == '=') {
nextChar();
token = new Token(lineNumber, Token.Type.NEQL, "!=");
nextChar();
} else {
token = new Token(lineNumber, Token.Type.NEG, "NEG");
nextChar();
}
break;
case '<':
if (nextChar == '<') {
nextChar();
token = new Token(lineNumber, Token.Type.LEFT_SHIFT, "<<");
nextChar();
} else {
token = new Token(lineNumber, Token.Type.LESS, "<");
nextChar();
}
break;
case '>':
if (nextChar == '=') {
nextChar();
token = new Token(lineNumber, Token.Type.GTR_EQL, ">=");
nextChar();
} else {
token = new Token(lineNumber, Token.Type.GTR, ">");
nextChar();
}
break;
case '\'':
nextChar();
token = new Token(lineNumber, Token.Type.CHAR,
Character.toString(currentChar));
nextChar();
nextChar();
break;
default:
// we only care about ASCII characters
if(currentChar < 128) {
System.err.print("Unknown token: " + currentChar + " line=" + lineNumber);
System.exit(1);
}
token = new Token(lineNumber, Token.Type.OTHER, "*OTHER* " + currentChar);
nextChar();
break;
}
// return token
return token;
}
}
/**
* Checks to see if an identifier is a keyword
*/
private Token lookupIdent(String ident) {
Token.Type tokenType = keyWords.get(ident);
if (tokenType == null) {
return new Token(lineNumber, Token.Type.IDENT, ident);
} else {
return new Token(lineNumber, tokenType, ident);
}
}
/**
* Token class
*/
public static class Token {
public enum Type {
AS,
CASE,
CATCH,
CLASS,
CONST,
DEFAULT,
DELETE,
DO,
DYNAMIC,
EACH,
ELSE,
END,
EXTENDS,
FINAL,
FINALLY,
FOR,
FUNCTION,
GET,
IF,
IMPLEMENTS,
IMPORT,
IN,
INCLUDE,
INCLUDE2,
INSTANCEOF,
INTERFACE,
INTERNAL,
INTRINSIC,
IS,
NAMESPACE,
NEW,
OVERRIDE,
PACKAGE,
PRIVATE,
PROTECTED,
PUBLIC,
RETURN,
SET,
STATIC,
SUPER,
SWITCH,
TRY,
TYPEOF,
USE,
VAR,
VOID,
WHILE,
// tokens
OTHER,
EOS,
COMMENT,
STRING,
IDENT,
CHAR,
GTR,
NUM,
LESS,
GTR_EQL,
LEFT_SHIFT,
NEG,
NEQL,
MOD,
MOD_EQL,
DIV,
DIV_EQL,
MUL,
MUL_EQL,
MINUS,
MINUS_EQL,
DECL,
INTEGER,
NAME_QUAL,
TYPE,
ADD,
INC,
SEMI_COLON,
ADD_EQL,
OCBR,
AND,
DOT,
AND_AND,
POUND,
OR_OR,
NOT,
CBR,
CCBR,
AT,
OBR,
QUESTION,
EQL,
OR,
OPRN,
COMMA,
CPRN,
FALSE,
NULL,
TRUE
}
private Type type;
private String value;
private int lineNumber;
/**
* Constructor
*/
public Token(int lineNumber, Type type) {
this(lineNumber, type, "");
}
/**
* Constructor
*/
public Token(int lineNumber, Type type, String value) {
this.lineNumber = lineNumber;
this.type = type;
this.value = value;
this.lineNumber = lineNumber;
}
public Type getType() {
return this.type;
}
public int getLineNumber() {
return this.lineNumber;
}
public String getValue() {
return value;
}
public String toString() {
StringBuilder buffer = new StringBuilder();
buffer.append(type);
buffer.append(":");
buffer.append(lineNumber);
buffer.append("-> '");
buffer.append(value);
buffer.append("'");
return buffer.toString();
}
}
}