package com.coverity.ps.sac.parser.java;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* The MIT License (MIT)
* Copyright (c) 2007 Randy Hollines - jhttphtml project
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
* IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* Java language scanner
* @author rhollines
*
*/
public class JavaScanner {
private static final char EOS = '\0';
private static Map<String, Token.Type> keyWords = new HashMap<String, Token.Type>();
// keywords
static {
keyWords.put("abstract", Token.Type.ABSTRACT);
keyWords.put("continue", Token.Type.CONTINUE);
keyWords.put("for", Token.Type.FOR);
keyWords.put("new", Token.Type.NEW);
keyWords.put("switch", Token.Type.SWITCH);
keyWords.put("assert", Token.Type.ASSERT);
keyWords.put("default", Token.Type.DEFAULT);
keyWords.put("goto", Token.Type.GOTO);
keyWords.put("package", Token.Type.PACKAGE);
keyWords.put("synchronized", Token.Type.SYNCHRONIZED);
keyWords.put("boolean", Token.Type.BOOLEAN);
keyWords.put("do", Token.Type.DO);
keyWords.put("if", Token.Type.IF);
keyWords.put("private", Token.Type.PRIVATE);
keyWords.put("this", Token.Type.THIS);
keyWords.put("break", Token.Type.BREAK);
keyWords.put("double", Token.Type.DOUBLE);
keyWords.put("implements", Token.Type.IMPLEMENTS);
keyWords.put("protected", Token.Type.PROTECTED);
keyWords.put("throw", Token.Type.THROW);
keyWords.put("byte", Token.Type.BYTE);
keyWords.put("else", Token.Type.ELSE);
keyWords.put("import", Token.Type.IMPORT);
keyWords.put("public", Token.Type.PUBLIC);
keyWords.put("throws", Token.Type.THROWS);
keyWords.put("case", Token.Type.CASE);
keyWords.put("enum", Token.Type.ENUM);
keyWords.put("instanceof", Token.Type.INSTANCEOF);
keyWords.put("return", Token.Type.RETURN);
keyWords.put("transient", Token.Type.TRANSIENT);
keyWords.put("catch", Token.Type.CATCH);
keyWords.put("extends", Token.Type.EXTENDS);
keyWords.put("int", Token.Type.INT);
keyWords.put("short", Token.Type.SHORT);
keyWords.put("try", Token.Type.TRY);
keyWords.put("char", Token.Type.CHAR);
keyWords.put("final", Token.Type.FINAL);
keyWords.put("interface", Token.Type.INTERFACE);
keyWords.put("static", Token.Type.STATIC);
keyWords.put("void", Token.Type.VOID);
keyWords.put("class", Token.Type.CLASS);
keyWords.put("finally", Token.Type.FINALLY);
keyWords.put("long", Token.Type.LONG);
keyWords.put("strictfp", Token.Type.STRICTFP);
keyWords.put("volatile", Token.Type.VOLATILE);
keyWords.put("const", Token.Type.CONST);
keyWords.put("float", Token.Type.FLOAT);
keyWords.put("native", Token.Type.NATIVE);
keyWords.put("super", Token.Type.SUPER);
keyWords.put("while", Token.Type.WHILE);
// reserved literals
keyWords.put("false", Token.Type.FALSE);
keyWords.put("null", Token.Type.NULL);
keyWords.put("true", Token.Type.TRUE);
}
String code;
private int startIndex = 0;
private int endIndex = 0;
private char[] codeChars;
private int scanPosition = 0;
private int lineNumber = 1;
private char currentChar, nextChar;
/**
* Default constructor
*
* @param code
* code to scan represented as a string
*/
public JavaScanner(String code) {
this.code = code;
codeChars = code.toCharArray();
nextChar();
}
/**
* Scans tokens
*/
public List<Token> scan() {
List<Token> tokens = new ArrayList<Token>();
Token token = null;
do {
token = getNextToken();
if(token.getType() != Token.Type.OTHER) {
tokens.add(token);
}
} while (token.getType() != Token.Type.EOS);
/*
for(Token t : tokens) {
System.out.print("### " + t + " ###\n");
}
*/
return tokens;
}
public int getLineCount() {
return this.lineNumber;
}
/**
* Gets the next character in the stream
*/
private void nextChar() {
if (scanPosition < codeChars.length) {
currentChar = codeChars[scanPosition++];
if (scanPosition < codeChars.length) {
nextChar = codeChars[scanPosition];
} else {
nextChar = EOS;
}
} else {
currentChar = EOS;
}
}
/**
* Ignores whitespace
*/
private void whiteSpace() {
while (currentChar != EOS
&& (currentChar == ' ' || currentChar == '\t'
|| currentChar == '\n' || currentChar == '\r')) {
if (currentChar == '\n') {
lineNumber++;
}
nextChar();
}
}
/**
* Gets the next token
*/
private Token getNextToken() {
// ignore white space
whiteSpace();
// skip multi-line comment
if (currentChar == '/' && nextChar == '*') {
nextChar();
nextChar();
boolean done = false;
while (currentChar != EOS && !done) {
if (currentChar == '\n') {
lineNumber++;
}
// find end
if (currentChar == '*' && nextChar == '/') {
done = true;
nextChar();
}
nextChar();
}
return new Token(lineNumber, Token.Type.COMMENT, "/* */");
}
// skip single-line comment
if (currentChar == '/' && nextChar == '/') {
nextChar();
nextChar();
while (currentChar != EOS && currentChar != '\n') {
nextChar();
}
lineNumber++;
nextChar();
return new Token(lineNumber - 1, Token.Type.COMMENT, "//");
}
// parse string
if (currentChar == '"') {
startIndex = endIndex = scanPosition - 1;
nextChar();
endIndex++;
while (currentChar != EOS && currentChar != '"') {
if (currentChar == '\\' && nextChar == '"') {
nextChar();
endIndex++;
}
nextChar();
endIndex++;
}
nextChar();
endIndex++;
return new Token(lineNumber, Token.Type.STRING, code.substring(
startIndex, endIndex));
} else if (Character.isLetter(currentChar) || currentChar == '_') {
startIndex = endIndex = scanPosition - 1;
while (Character.isLetterOrDigit(currentChar) || currentChar == '_') {
nextChar();
endIndex++;
}
return lookupIdent(code.substring(startIndex, endIndex));
} else if (Character.isDigit(currentChar) || (currentChar == '.' && Character.isDigit(nextChar))) {
startIndex = endIndex = scanPosition - 1;
boolean foundDot = false;
while (Character.isDigit(currentChar) || currentChar == '.') {
// could check for scan error here
if (currentChar == '.') {
foundDot = true;
}
nextChar();
endIndex++;
}
// return result
if (foundDot) {
return new Token(lineNumber, Token.Type.NUM, code.substring(
startIndex, endIndex));
} else {
return new Token(lineNumber, Token.Type.INTEGER,
code.substring(startIndex, endIndex));
}
} else {
Token token = null;
switch (currentChar) {
// TODO: >>=, >>>=, <<=, <<<=, ===, !==, &&=, ||=, ...
case EOS:
token = new Token(lineNumber, Token.Type.EOS);
nextChar();
break;
case ':':
if (nextChar == ':') {
nextChar();
token = new Token(lineNumber, Token.Type.NAME_QUAL, "::");
nextChar();
} else {
token = new Token(lineNumber, Token.Type.TYPE, ":");
nextChar();
}
break;
case ';':
token = new Token(lineNumber, Token.Type.SEMI_COLON, ";");
nextChar();
break;
case '{':
token = new Token(lineNumber, Token.Type.OCBR, "{");
nextChar();
break;
case '}':
token = new Token(lineNumber, Token.Type.CCBR, "}");
nextChar();
break;
case '[':
token = new Token(lineNumber, Token.Type.OBR, "[");
nextChar();
break;
case ']':
token = new Token(lineNumber, Token.Type.CBR, "]");
nextChar();
break;
case '.':
token = new Token(lineNumber, Token.Type.DOT, ".");
nextChar();
break;
case '#':
token = new Token(lineNumber, Token.Type.POUND, ".");
nextChar();
break;
case '(':
token = new Token(lineNumber, Token.Type.OPRN, "(");
nextChar();
break;
case ')':
token = new Token(lineNumber, Token.Type.CPRN, ")");
nextChar();
break;
case '=':
token = new Token(lineNumber, Token.Type.EQL, "=");
nextChar();
break;
case ',':
token = new Token(lineNumber, Token.Type.COMMA, ",");
nextChar();
break;
case '~':
token = new Token(lineNumber, Token.Type.NOT, "~");
nextChar();
break;
case '|':
if (nextChar == '|') {
nextChar();
token = new Token(lineNumber, Token.Type.OR_OR, "||");
nextChar();
} else {
token = new Token(lineNumber, Token.Type.OR, "|");
nextChar();
}
break;
case '?':
token = new Token(lineNumber, Token.Type.QUESTION, "?");
nextChar();
break;
case '&':
if (nextChar == '&') {
nextChar();
token = new Token(lineNumber, Token.Type.AND_AND, "&&");
nextChar();
} else {
token = new Token(lineNumber, Token.Type.AND, "&");
nextChar();
}
break;
case '+':
if (nextChar == '=') {
nextChar();
token = new Token(lineNumber, Token.Type.ADD_EQL, "+=");
nextChar();
} else if (nextChar == '+') {
nextChar();
token = new Token(lineNumber, Token.Type.INC, "++");
nextChar();
} else {
token = new Token(lineNumber, Token.Type.ADD, "+");
nextChar();
}
break;
case '-':
if (nextChar == '-') {
nextChar();
token = new Token(lineNumber, Token.Type.DECL, "--");
nextChar();
} else if (nextChar == '=') {
nextChar();
token = new Token(lineNumber, Token.Type.MINUS_EQL, "-=");
nextChar();
} else {
token = new Token(lineNumber, Token.Type.MINUS, "-");
nextChar();
}
break;
case '*':
if (nextChar == '=') {
nextChar();
token = new Token(lineNumber, Token.Type.MUL_EQL, "*=");
nextChar();
} else {
token = new Token(lineNumber, Token.Type.MUL, "*");
nextChar();
}
break;
case '/':
if (nextChar == '=') {
nextChar();
token = new Token(lineNumber, Token.Type.DIV_EQL, "/=");
nextChar();
} else {
token = new Token(lineNumber, Token.Type.DIV, "/");
nextChar();
}
break;
case '%':
if (nextChar == '=') {
nextChar();
token = new Token(lineNumber, Token.Type.MOD_EQL, "%=");
nextChar();
} else {
token = new Token(lineNumber, Token.Type.MOD, "%");
nextChar();
}
break;
case '!':
if (nextChar == '=') {
nextChar();
token = new Token(lineNumber, Token.Type.NEQL, "!=");
nextChar();
} else {
token = new Token(lineNumber, Token.Type.NEG, "NEG");
nextChar();
}
break;
case '<':
if (nextChar == '<') {
nextChar();
token = new Token(lineNumber, Token.Type.LEFT_SHIFT, "<<");
nextChar();
} else {
token = new Token(lineNumber, Token.Type.LESS, "<");
nextChar();
}
break;
case '>':
if (nextChar == '=') {
nextChar();
token = new Token(lineNumber, Token.Type.GTR_EQL, ">=");
nextChar();
} else {
token = new Token(lineNumber, Token.Type.GTR, ">");
nextChar();
}
break;
case '\'':
nextChar();
token = new Token(lineNumber, Token.Type.CHAR,
Character.toString(currentChar));
nextChar();
nextChar();
break;
default:
// we only care about ASCII characters
if(currentChar < 128) {
System.err.print("Unknown token: " + currentChar + " line=" + lineNumber);
System.exit(1);
}
token = new Token(lineNumber, Token.Type.OTHER, "*OTHER* " + currentChar);
nextChar();
break;
}
// return token
return token;
}
}
/**
* Checks to see if an identifier is a keyword
*/
private Token lookupIdent(String ident) {
Token.Type tokenType = keyWords.get(ident);
if (tokenType == null) {
return new Token(lineNumber, Token.Type.IDENT, ident);
} else {
return new Token(lineNumber, tokenType, ident);
}
}
/**
* Token class
*/
public static class Token {
public enum Type {
// tokens
OTHER,
EOS,
COMMENT,
STRING,
IDENT,
CHAR,
GTR,
NUM,
LESS,
GTR_EQL,
LEFT_SHIFT,
NEG,
NEQL,
MOD,
MOD_EQL,
DIV,
DIV_EQL,
MUL,
MUL_EQL,
MINUS,
MINUS_EQL,
DECL,
INTEGER,
NAME_QUAL,
TYPE,
ADD,
INC,
SEMI_COLON,
ADD_EQL,
OCBR,
AND,
DOT,
AND_AND,
POUND,
OR_OR,
NOT,
CBR,
CCBR,
AT,
OBR,
QUESTION,
EQL,
OR,
OPRN,
COMMA,
CPRN,
FALSE,
NULL,
TRUE,
ABSTRACT,
CONTINUE,
FOR,
PACKAGE,
TRANSIENT,
ASSERT,
WHILE,
SUPER,
NATIVE,
FLOAT,
NEW,
SWITCH,
GOTO,
DEFAULT,
CONST,
VOLATILE,
THROWS,
INSTANCEOF,
RETURN,
STATIC,
PROTECTED,
EXTENDS,
VOID,
ELSE,
ENUM,
BOOLEAN,
THIS,
IF,
PUBLIC,
FINALLY,
STRICTFP,
LONG,
SYNCHRONIZED,
DO,
PRIVATE,
DOUBLE,
TRY,
CLASS,
THROW,
INT,
FINAL,
INTERFACE,
BREAK,
SHORT,
CATCH,
CASE,
IMPLEMENTS,
BYTE,
IMPORT
}
private Type type;
private String value;
private int lineNumber;
/**
* Constructor
*/
public Token(int lineNumber, Type type) {
this(lineNumber, type, "");
}
/**
* Constructor
*/
public Token(int lineNumber, Type type, String value) {
this.lineNumber = lineNumber;
this.type = type;
this.value = value;
this.lineNumber = lineNumber;
}
public Type getType() {
return this.type;
}
public int getLineNumber() {
return this.lineNumber;
}
public String getValue() {
return value;
}
public String toString() {
StringBuilder buffer = new StringBuilder();
buffer.append(type);
buffer.append(":");
buffer.append(lineNumber);
buffer.append("-> '");
buffer.append(value);
buffer.append("'");
return buffer.toString();
}
}
}