/*
* Copyright (C) 2013 Sasha Vasko <sasha at aftercode dot net>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.wifiafterconnect.html;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.jsoup.nodes.Element;
import com.wifiafterconnect.Constants;
import android.util.Log;
public class JavaScript {
private boolean enableDebugOuput = false;
private String src;
private String script = null;
private List<Token> tokens;
private List<Token> tokensClean;
@SuppressWarnings("unused") // TODO :
private Map<String, String> functions = new HashMap<String,String>();// bodies of the functions
@SuppressWarnings("unused") // TODO :
private List<String> jqueries = new ArrayList<String>(); // top level JQueries without leading $
public static boolean isEOLChar(int c) {
return c == '\n' || c == '\r' || c == 2028 || c == 2029;
}
/*
* JavaScript could be Unicode/UTF-8, hence the mess with CodePoints
*/
public static abstract class Token {
protected int start; //index
protected int startCP; //codepoints
protected int count; //codepoints
protected String token = " ";
public Token (int start, int startCodePoints) {
this.start = start;
this.startCP = startCodePoints;
this.count = 0;
}
public Token (String token) {
this.start = 0;
this.token = token;
this.count = token.codePointCount(0, token.length());
}
/*
* length may differ,
* for example whitespace, comments and eol's all have effective length of 1
* so this return count of codepoints in the source
*/
public int getCodePointsCount() {
return count;
}
protected void saveToken (String source) {
if (count > 0)
token = source.substring(start, source.offsetByCodePoints(start, count));
}
public int parse(String source, int codePointCount) {
int maxPos = codePointCount - startCP;
while (count < maxPos && checkChar(source.codePointAt(startCP + count))) ++count;
if (count == 0)
return start;
saveToken(source);
return source.offsetByCodePoints(start, count);
}
public abstract boolean checkChar(int c);
@Override
public String toString() {
return token;
}
public String toDiagString() {
return "Token class ["+getClass().getSimpleName() + "]{ start = " + start + "; count = " + count + "; token = [" + toString() + "]}";
}
@Override
public boolean equals(Object o) {
return o.getClass().equals(this.getClass()) && this.toString().equals(((Token)o).toString());
}
}
public static class WhiteSpace extends Token {
public WhiteSpace(int start, int startCodePoints) {
super(start, startCodePoints);
}
public WhiteSpace() {
super(" ");
}
@Override
protected void saveToken (String source) {}
@Override
public boolean checkChar(int c) {
return !isEOLChar(c) && Character.isWhitespace(c);
}
}
public static class LineTerminator extends Token {
public LineTerminator(int start, int startCodePoints) {
super(start, startCodePoints);
}
public LineTerminator() {
super(" ");
}
@Override
protected void saveToken (String source) {}
@Override
public boolean checkChar(int c) {
return isEOLChar(c);
}
}
public static class Comment extends Token {
private boolean multiline = false;
private int maybeEnd = 0;
public Comment(int start, int startCodePoints) {
super(start, startCodePoints);
}
public Comment() {
super(" ");
}
@Override
protected void saveToken (String source) {}
@Override
public boolean checkChar(int c) {
switch (count) {
case 0 : return c== '/';
case 1 : multiline = (c == '*');
return c== '/' || c == '*';
}
if (!multiline)
return !isEOLChar(c);
switch (maybeEnd) {
case 0: maybeEnd = (c == '*') ? 1 : 0; break;
case 1: maybeEnd = (c == '/') ? 2 : 0; break;
case 2: return false;
}
return true;
}
}
public static class StringLiteral extends Token {
private int startChar = ' ';
private boolean escaped = false;
private boolean complete = false;
public StringLiteral(int start, int startCodePoints) {
super(start, startCodePoints);
}
public StringLiteral(String sl) {
super(sl);
startChar = sl.codePointAt(0);
complete = sl.codePointBefore(sl.length()) == startChar;
}
@Override
public boolean checkChar(int c) {
if (complete || isEOLChar(c))
return false;
if (startChar == ' '){
startChar = c;
return c == '\'' || c == '"';
}
if (!escaped && c == startChar) {
return (complete = true);
}
escaped = (!escaped && c == '\\');
return true;
}
}
public static class Number extends Token {
private boolean isHex = false;
private boolean hasDecimalPoint = false;
public Number(int start, int startCodePoints) {
super(start, startCodePoints);
}
public Number(String token) {
super(token);
isHex = (token.startsWith ("0x") || token.startsWith ("0X"));
hasDecimalPoint = (token.indexOf('.') >= 0);
}
@Override
public boolean checkChar(int c) {
switch (count) {
case 0 : isHex = (c == '0'); break;
case 1 :
if (c == 'x' || c == 'X') return isHex;
else isHex = false;
}
if (c== '.') {
if (hasDecimalPoint) return false;
return (hasDecimalPoint = true);
}
// TODO need to handle exponent somehow
return Character.isDigit(c) || (isHex && "abcdefABCDEF".indexOf(c) >= 0);
}
}
public static class RegularExpression extends Token {
private boolean escaped = false;
private int complete = 0;
public RegularExpression(int start, int startCodePoints) {
super(start, startCodePoints);
}
public RegularExpression(String token) {
super(token);
complete = 1;
}
@Override
public boolean checkChar(int c) {
if (complete > 0 || isEOLChar(c))
return false;
if (count == 0)
return c == '/';
if (!escaped) {
if (c == ']' && complete < 0) {
++complete;
}else if (c == '[') {
--complete;
}else if (c == '/' && complete == 0) {
++complete;
}else
escaped = c == '\\';
}else
escaped = false;
return true;
}
}
public static class Identifier extends Token {
public Identifier(int start, int startCodePoints) {
super(start, startCodePoints);
}
public Identifier(String token) {
super(token);
}
@Override
public boolean checkChar(int c) {
if (count == 0)
return Character.isJavaIdentifierStart(c);
else
return Character.isJavaIdentifierPart(c);
}
}
public static final String PUNCTUATORS_SINGLE_STR = "><&|+-=!.;(),{}:[]?*/%^~";
public static final String PUNCTUATORS_SINGLE[] = {">","<","&","|","+","-","=","!",".",";","(",")",",","{","}",":","[","]","?","*","/","%","^","~"};
public static final String PUNCTUATORS_DOUBLE[] = { ">>","<<","==","!=","+=",">=","<=","-=","|=","*=","/=","&=","^=","%=", "&&", "||","++","--"};
public static final String PUNCTUATORS_TRIPPLE[] = {">>=", "<<=", "===", "!==",">>>"};
public static final String PUNCTUATORS_QUAD[] = {">>>="};
public static boolean isPunct (int c) {
return PUNCTUATORS_SINGLE_STR.indexOf(c) >= 0;
}
public static class Punctuator extends Token {
int singleIdx = -1;
int doubleIdx = -1;
int trippleIdx = -1;
int quadIdx = -1;
public Punctuator(int start, int startCodePoints) {
super(start, startCodePoints);
}
public Punctuator(String token) {
super(token);
int maxCount = token.codePointCount(0, token.length());
for (count = 0 ; count < maxCount ; ++count)
checkChar (token.codePointAt(count));
}
public Punctuator(char c) {
super(Character.toString(c));
singleIdx = PUNCTUATORS_SINGLE_STR.indexOf(c);
count = 1;
}
@Override
protected void saveToken (String source) {}
@Override
public boolean checkChar(int c) {
switch (count) {
case 0 : return (singleIdx = PUNCTUATORS_SINGLE_STR.indexOf(c)) >= 0;
case 1 :
if (singleIdx <= 5 || c == '=') {
char c1 = PUNCTUATORS_SINGLE_STR.charAt(singleIdx);
for (int i = 0 ; i < PUNCTUATORS_DOUBLE.length ; ++i){
if (PUNCTUATORS_DOUBLE[i].charAt(1) == c &&
PUNCTUATORS_DOUBLE[i].charAt(0) == c1 ){
doubleIdx = i;
break;
}
}
}
return doubleIdx >= 0;
case 2 :
if (c == '>' && doubleIdx == 0) trippleIdx = 4;
else if (c == '=' && doubleIdx < 4) trippleIdx = doubleIdx;
return trippleIdx >= 0;
case 3 :
if (c == '=' && trippleIdx == 4)
quadIdx = 0;
return trippleIdx >= 0;
}
return false;
}
@Override
public String toString() {
if (quadIdx >= 0) return PUNCTUATORS_QUAD[quadIdx];
if (trippleIdx >= 0) return PUNCTUATORS_TRIPPLE[trippleIdx];
if (doubleIdx >= 0) return PUNCTUATORS_DOUBLE[doubleIdx];
if (singleIdx >= 0) return PUNCTUATORS_SINGLE[singleIdx];
return " ";
}
}
public JavaScript (Element e){
src = e.attr("src");
if (src.isEmpty()) {
parse (e.data());
}
}
public JavaScript (String script){
src = "";
parse (script);
}
public JavaScript (String script, boolean enableDebugOuput){
this.enableDebugOuput = enableDebugOuput;
src = "";
parse (script);
}
private void parse(String data) {
script = data;
tokens = tokenize (script);
tokensClean = clean(tokens);
if (enableDebugOuput) {
Log.d(Constants.TAG, tokens.size() + "of parsed tokens: ");
for (Token t : tokens)
Log.d(Constants.TAG, t.toDiagString());
Log.d(Constants.TAG, tokensClean.size() + "of clean tokens: ");
for (Token t : tokensClean)
Log.d(Constants.TAG, t.toDiagString());
}
extractFunctions();
extractJQueries();
}
public static List<Token> clean(List<Token> tokensAll) {
List<Token> result = new ArrayList <Token>();
if (tokensAll != null) {
for (Token t1 : tokensAll) {
if (t1 instanceof WhiteSpace || t1 instanceof Comment)
continue;
result.add(t1);
}
}
return result;
}
public static List<Token> tokenize(String code) {
int lengthCP = code.codePointCount(0, code.length());
int curr = 0;
int currCP = 0;
List<Token> result = new ArrayList <Token>();
while (currCP < lengthCP) {
int c1 = code.codePointAt(currCP);
int c2 = currCP+1 < lengthCP ? code.codePointAt(currCP+1) : 0;
Token t = null;
if (isEOLChar (c1))
t = new LineTerminator(curr, currCP);
else if (Character.isWhitespace(c1))
t = new WhiteSpace (curr, currCP);
else if (c1 == '/' && (c2 == '/' || c2 == '*'))
t = new Comment(curr, currCP);
else if (c1 == '\'' || c1 == '"')
t = new StringLiteral(curr, currCP);
else if ((c1 >= '0' && c1 <= '9') || (c1 == '.' && c2 >= '0' && c2 <= '9'))
t = new Number(curr, currCP);
else if (/*TODO parseRegex &&*/ c1 == '/')
t = new RegularExpression(curr, currCP);
else if (isPunct(c1))
t = new Punctuator(curr, currCP);
else
t = new Identifier(curr, currCP);
curr = t.parse(code, lengthCP);
int count = t.getCodePointsCount();
if (count == 0) {
Log.e(Constants.TAG, "unrecognized JavaScript token encountered");
break;
}
currCP += count;
result.add(t);
}
return result;
}
private void extractJQueries() {
// TODO Auto-generated method stub
}
private void extractFunctions() {
// TODO Auto-generated method stub
}
public String getDocumentReadyFunc() {
return null;
}
public int matchCode (String code) {
List<Token> tokenizedCode = clean(tokenize (code));
return matchCode (tokenizedCode);
}
public int matchCode (List<Token> tokenizedCode) {
if (tokensClean == null || tokenizedCode == null)
return -1;
for (int i = 0 ; i < tokensClean.size() ; ++i) {
if (tokensClean.get(i) instanceof LineTerminator)
continue;
int curr_t1 = i;
int curr_t2 = 0 ;
boolean match = true;
while (match && curr_t2 < tokenizedCode.size() && curr_t1 < tokensClean.size()) {
Token t2 = tokenizedCode.get(curr_t2++);
if (!(t2 instanceof LineTerminator)) {
Token t1 = tokensClean.get(curr_t1++);
if (t1 instanceof LineTerminator){
curr_t2--;
}else{
match = t1.equals(t2);
if (enableDebugOuput && match) {
Log.d(Constants.TAG, "tok#" + curr_t1 + " " + t1.toDiagString() + " matches tok#" + curr_t2 + " " + t2.toDiagString());
}
}
}
}
if (match)
return i;
}
return -1;
}
public String findAssignedValue (String varName) {
List<Token> tokenizedCode = new ArrayList <Token>();
tokenizedCode.add (new Identifier(varName));
tokenizedCode.add (new Punctuator("="));
int assignmentIdx = matchCode (tokenizedCode);
// TODO implement proper evaluation of expressions :
return (assignmentIdx >= 0 && assignmentIdx+2 < tokensClean.size()) ? tokensClean.get(assignmentIdx+2).toString() : "";
}
public static String scriptFromTokens (List<Token> tokens) {
StringBuilder sb = new StringBuilder();
if (tokens != null) {
for (Token t : tokens) {
if (t instanceof LineTerminator)
sb.append('\n');
else
sb.append(t.toString()).append(' ');
}
}
return sb.toString();
}
public String getClean() {
return scriptFromTokens (tokensClean);
}
public void setEnableDebugOuput(boolean enableDebugOuput) {
this.enableDebugOuput = enableDebugOuput;
}
private static final Punctuator punctSemicolon = new Punctuator (';');
private static final Punctuator punctAdd = new Punctuator ('+');
private static final Punctuator punctAssign = new Punctuator ('=');
public String eval (int idx) {
String result = "";
if (tokensClean != null) {
while (idx < tokensClean.size()) {
Token tok = tokensClean.get(idx++);
Log.d(Constants.TAG, "eval(" + idx + "): tok = " + tok.toString());
if (tok.equals(punctSemicolon))
break;
if (tok instanceof Punctuator && idx < tokensClean.size()) {
Token tokOp = tokensClean.get(idx++);
Log.d(Constants.TAG, "eval(" + idx + "): tok = " + tok.toString());
String strOp = "";
if (tokOp instanceof StringLiteral) {
strOp = tokOp.toString();
strOp = strOp.substring(1, strOp.length()-1);
}else if (tokOp instanceof Identifier) {
strOp = evalStringVar(tokOp.toString());
}
Log.d(Constants.TAG, "eval(" + idx + "): strOp = " + strOp);
// only support addition for now
if (tok.equals(punctAssign))
result += strOp;
else if (tok.equals(punctAdd))
result += strOp;
}
}
}
return result;
}
public String evalStringVar(String varName) {
List<Token> tokenizedCode = new ArrayList <Token>();
tokenizedCode.add (new Identifier(varName));
tokenizedCode.add (new Punctuator("="));
int idx = matchCode (tokenizedCode);
Log.d(Constants.TAG, "evalStringVar(" + varName + "): idx = " + idx);
if (idx < 0)
return "";
return eval (idx+1);
}
}