JavaScript.java example

Explorer
WiFiAfterConnect-master
- src
  - com
    - wifiafterconnect
/*
 * Copyright (C) 2013 Sasha Vasko <sasha at aftercode dot net> 
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.wifiafterconnect.html;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.jsoup.nodes.Element;

import com.wifiafterconnect.Constants;

import android.util.Log;

public class JavaScript {

	private boolean enableDebugOuput = false;
	private String src;
	private String script = null;
	private List<Token> tokens;
	private List<Token> tokensClean;
	@SuppressWarnings("unused") // TODO :
	private Map<String, String> functions = new HashMap<String,String>();// bodies of the functions
	@SuppressWarnings("unused") // TODO :
	private List<String> jqueries = new ArrayList<String>(); // top level JQueries without leading $

	public static boolean isEOLChar(int c) {
		return c == '\n' || c == '\r' || c == 2028 || c == 2029;
	}
	
	/*
	 * JavaScript could be Unicode/UTF-8, hence the mess with CodePoints
	 */
	
	public static abstract class Token {
		protected int start;  //index
		protected int startCP;  //codepoints
		protected int count;  	//codepoints
		protected String token = " ";
		
		public Token (int start, int startCodePoints) {
			this.start = start;
			this.startCP = startCodePoints;
			this.count = 0;
		}

		public Token (String token) {
			this.start = 0;
			this.token = token;
			this.count = token.codePointCount(0, token.length());
		}
		/* 
		 * length may differ, 
		 * for example whitespace, comments and eol's all have effective length of 1
		 * so this return count of codepoints in the source
		 */
		public int getCodePointsCount() {
			return count;
		}
		
		protected void saveToken (String source) {
			if (count > 0)
				token = source.substring(start, source.offsetByCodePoints(start, count));
		}
		
		public int parse(String source, int codePointCount) {
			int maxPos = codePointCount - startCP;
			
			while (count < maxPos && checkChar(source.codePointAt(startCP + count))) ++count;

			if (count == 0) 
				return start;

			saveToken(source);
			return source.offsetByCodePoints(start, count);
		}
		
		public abstract boolean checkChar(int c);
		
		@Override
		public String toString() {
			return token;
		}

		public String toDiagString() {
			return "Token class ["+getClass().getSimpleName() + "]{ start = " + start + "; count = " + count + "; token = [" + toString() + "]}";
		}

		@Override
		public boolean equals(Object o) {
			
			return o.getClass().equals(this.getClass()) && this.toString().equals(((Token)o).toString());
		}
	}
	
	public static class WhiteSpace extends Token {

		public WhiteSpace(int start, int startCodePoints) {
			super(start, startCodePoints);
		}
		
		public WhiteSpace() {
			super(" ");
		}

		@Override
		protected void saveToken (String source) {}

		@Override
		public boolean checkChar(int c) {
			return !isEOLChar(c) && Character.isWhitespace(c);
		}
	} 
	public static class LineTerminator  extends Token {

		public LineTerminator(int start, int startCodePoints) {
			super(start, startCodePoints);
		}

		public LineTerminator() {
			super(" ");
		}
		
		@Override
		protected void saveToken (String source) {}

		@Override
		public boolean checkChar(int c) {
			return isEOLChar(c);
		}
	}
	public static class Comment extends Token {
		
		private boolean multiline = false;
		private int maybeEnd = 0;

		public Comment(int start, int startCodePoints) {
			super(start, startCodePoints);
		}
		
		public Comment() {
			super(" ");
		}
		
		@Override
		protected void saveToken (String source) {}

		@Override
		public boolean checkChar(int c) {
			switch (count) {
				case 0 : 	return c== '/';
				case 1 : 	multiline = (c == '*');
							return c== '/' || c == '*';
			}
			if (!multiline)
				return !isEOLChar(c);
			switch (maybeEnd) {
				case 0: maybeEnd = (c == '*') ? 1 : 0; break;
				case 1: maybeEnd = (c == '/') ? 2 : 0; break;
				case 2: return false;
			}
			return true;
		}
	}
	public static class StringLiteral extends Token {
		
		private int startChar = ' ';
		private boolean escaped = false;
		private boolean complete = false;

		public StringLiteral(int start, int startCodePoints) {
			super(start, startCodePoints);
		}
		
		public StringLiteral(String sl) {
			super(sl);
			startChar = sl.codePointAt(0);
			complete = sl.codePointBefore(sl.length()) == startChar;
		}


		@Override
		public boolean checkChar(int c) {
			if (complete || isEOLChar(c))
				return false;
			if (startChar == ' '){
				startChar = c;
				return c == '\'' || c == '"';
			}
			if (!escaped && c == startChar) {
				return (complete = true);
			}
			escaped = (!escaped && c == '\\');
			return true;
		}
	}

	public static class Number extends Token {

		private boolean isHex = false;
		private boolean hasDecimalPoint = false;
		
		public Number(int start, int startCodePoints) {
			super(start, startCodePoints);
		}

		public Number(String token) {
			super(token);
			isHex = (token.startsWith ("0x") || token.startsWith ("0X")); 
			hasDecimalPoint = (token.indexOf('.') >= 0);
		}

		@Override
		public boolean checkChar(int c) {
			switch (count) {
				case 0 : isHex = (c == '0'); break;
				case 1 : 
					if (c == 'x' || c == 'X') return isHex;
					else isHex = false; 
			}
			if (c== '.') {
				if (hasDecimalPoint) return false;
				return (hasDecimalPoint = true);
			}
			// TODO need to handle exponent somehow
			return Character.isDigit(c) || (isHex && "abcdefABCDEF".indexOf(c) >= 0);
		}
	}

	public static class RegularExpression extends Token {

		private boolean escaped = false;
		private int complete = 0;

		public RegularExpression(int start, int startCodePoints) {
			super(start, startCodePoints);
		}

		public RegularExpression(String token) {
			super(token);
			complete = 1;
		}

		@Override
		public boolean checkChar(int c) {
			if (complete > 0 || isEOLChar(c))
				return false;
			if (count == 0)
				return c == '/';

			if (!escaped) {
				if (c == ']' && complete < 0) {
					++complete;
				}else if (c == '[') {
					--complete;
				}else if (c == '/' && complete == 0) {
					++complete;
				}else 
					escaped = c == '\\';
			}else 
				escaped = false;
			return true;
		}
	} 
	public static class Identifier extends Token {

		public Identifier(int start, int startCodePoints) {
			super(start, startCodePoints);
		}

		public Identifier(String token) {
			super(token);
		}

		@Override
		public boolean checkChar(int c) {
			if (count == 0) 
				return Character.isJavaIdentifierStart(c);
			else
				return Character.isJavaIdentifierPart(c);
		}
	} 
	

	public static final String PUNCTUATORS_SINGLE_STR = "><&|+-=!.;(),{}:[]?*/%^~";
	public static final String PUNCTUATORS_SINGLE[] = {">","<","&","|","+","-","=","!",".",";","(",")",",","{","}",":","[","]","?","*","/","%","^","~"};
	public static final String PUNCTUATORS_DOUBLE[] = {	">>","<<","==","!=","+=",">=","<=","-=","|=","*=","/=","&=","^=","%=", "&&", "||","++","--"};
	public static final String PUNCTUATORS_TRIPPLE[] = {">>=", "<<=", "===", "!==",">>>"};
	public static final String PUNCTUATORS_QUAD[] = {">>>="};	
	
	public static boolean isPunct (int c) {
		return PUNCTUATORS_SINGLE_STR.indexOf(c) >= 0;
	}
	
	public static class Punctuator extends Token {
		int singleIdx = -1;
		int doubleIdx = -1;
		int trippleIdx = -1;
		int quadIdx = -1;
		
		public Punctuator(int start, int startCodePoints) {
			super(start, startCodePoints);
		}

		public Punctuator(String token) {
			super(token);
			int maxCount = token.codePointCount(0, token.length());
			for (count = 0 ; count < maxCount ; ++count)
				checkChar (token.codePointAt(count));
		}

		public Punctuator(char c) {
			super(Character.toString(c));
			singleIdx = PUNCTUATORS_SINGLE_STR.indexOf(c);
			count = 1;
		}

		@Override
		protected void saveToken (String source) {}

		@Override
		public boolean checkChar(int c) {
			
			switch (count) {
				case 0 : return (singleIdx = PUNCTUATORS_SINGLE_STR.indexOf(c)) >= 0;
				case 1 : 
					if (singleIdx <= 5 || c == '=') {
						char c1 =  PUNCTUATORS_SINGLE_STR.charAt(singleIdx);
						for (int i = 0 ; i < PUNCTUATORS_DOUBLE.length ; ++i){
							if (PUNCTUATORS_DOUBLE[i].charAt(1) == c && 
									PUNCTUATORS_DOUBLE[i].charAt(0) == c1 ){
								doubleIdx = i;
								break;
							}
						}
					}
					return doubleIdx >= 0;
				case 2 : 
					if (c == '>' && doubleIdx == 0)	trippleIdx = 4;
					else if (c == '=' && doubleIdx < 4) trippleIdx = doubleIdx;
					return trippleIdx >= 0;
				case 3 : 
					if (c == '=' && trippleIdx == 4)
						quadIdx = 0;
					return trippleIdx >= 0;
			}
			return false;	
		}

		@Override
		public String toString() {
			if (quadIdx >= 0) return PUNCTUATORS_QUAD[quadIdx];
			if (trippleIdx >= 0) return PUNCTUATORS_TRIPPLE[trippleIdx];
			if (doubleIdx >= 0) return PUNCTUATORS_DOUBLE[doubleIdx];
			if (singleIdx >= 0) return PUNCTUATORS_SINGLE[singleIdx];
			return " ";
		}
		
	}
	
	public JavaScript (Element e){
		src = e.attr("src");
		if (src.isEmpty()) {
			parse (e.data());
		}
	}

	public JavaScript (String script){
		src = "";
		parse (script);
	}

	public JavaScript (String script, boolean enableDebugOuput){
		this.enableDebugOuput = enableDebugOuput;
		src = "";
		parse (script);
	}
	
	private void parse(String data) {
		script = data;
		tokens = tokenize (script);
		tokensClean = clean(tokens);
		if (enableDebugOuput) {
			Log.d(Constants.TAG, tokens.size() + "of parsed tokens: ");
			for (Token t : tokens)
				Log.d(Constants.TAG, t.toDiagString());
			Log.d(Constants.TAG, tokensClean.size() + "of clean tokens: ");
			for (Token t : tokensClean)
				Log.d(Constants.TAG, t.toDiagString());
		}
		extractFunctions();
		extractJQueries();
	}
	
	public static List<Token> clean(List<Token> tokensAll) {
		List<Token> result = new ArrayList <Token>();
		if (tokensAll != null) {
			for (Token t1 : tokensAll) {
				if (t1 instanceof WhiteSpace || t1 instanceof Comment)
					continue;
				result.add(t1);
			}
		}
		return result;
	}
	
	public static List<Token> tokenize(String code) {
		int lengthCP = code.codePointCount(0, code.length());
		int curr = 0;
		int currCP = 0;
		
		List<Token> result = new ArrayList <Token>();
		
		while (currCP < lengthCP) {
			int c1 = code.codePointAt(currCP);
			int c2 = currCP+1 < lengthCP ? code.codePointAt(currCP+1) : 0;
			Token t = null;
			if (isEOLChar (c1))
				t = new LineTerminator(curr, currCP);
			else if (Character.isWhitespace(c1))
				t = new WhiteSpace (curr, currCP);
			else if (c1 == '/' && (c2 == '/' || c2 == '*'))
				t = new Comment(curr, currCP);
			else if (c1 == '\'' || c1 == '"')
				t = new StringLiteral(curr, currCP);
			else if ((c1 >= '0' && c1 <= '9') || (c1 == '.' && c2 >= '0' && c2 <= '9'))
				t = new Number(curr, currCP);
			else if (/*TODO parseRegex &&*/ c1 == '/')
				t = new RegularExpression(curr, currCP);
			else if (isPunct(c1))
				t = new Punctuator(curr, currCP);
			else
				t = new Identifier(curr, currCP);
			
			curr = t.parse(code, lengthCP);
			int count = t.getCodePointsCount();
			if (count == 0) {
				Log.e(Constants.TAG, "unrecognized JavaScript token encountered");
				break;
			}
			
			currCP += count;
			result.add(t);
		}
		
		return result;
	}

	private void extractJQueries() {
		// TODO Auto-generated method stub
		
	}

	private void extractFunctions() {
		// TODO Auto-generated method stub
		
	}


	public String getDocumentReadyFunc() {
			
		return null;
	}
	
	public int matchCode (String code) {
		List<Token> tokenizedCode = clean(tokenize (code));
		return matchCode (tokenizedCode);
	}
	
	public int matchCode (List<Token> tokenizedCode) {
		if (tokensClean == null || tokenizedCode == null)
			return -1;
		for (int i = 0 ; i < tokensClean.size() ; ++i) {
			
			if (tokensClean.get(i) instanceof LineTerminator)
				continue;
			
			int curr_t1 = i;
			int curr_t2 = 0 ;
			boolean match = true;
			
			while (match && curr_t2 < tokenizedCode.size() && curr_t1 < tokensClean.size()) {
				Token t2 = tokenizedCode.get(curr_t2++);
				if (!(t2 instanceof LineTerminator)) { 
					Token t1 = tokensClean.get(curr_t1++);
					if (t1 instanceof LineTerminator){ 
						curr_t2--;
					}else{
						match = t1.equals(t2);
						if (enableDebugOuput && match) {
							Log.d(Constants.TAG, "tok#" + curr_t1 + " " + t1.toDiagString() + " matches tok#" + curr_t2 + " " + t2.toDiagString());
						}
					}
				}
			}
			
			if (match)
				return i;
		}
		
		return -1;
	}
	
	public String findAssignedValue (String varName) {
		List<Token> tokenizedCode = new ArrayList <Token>();
		tokenizedCode.add (new Identifier(varName));
		tokenizedCode.add (new Punctuator("=")); 

		int assignmentIdx = matchCode (tokenizedCode);
		
		// TODO implement proper evaluation of expressions :
		return (assignmentIdx >= 0 && assignmentIdx+2 < tokensClean.size()) ? tokensClean.get(assignmentIdx+2).toString() : "";
	}
		
	public static String scriptFromTokens (List<Token> tokens) {
		StringBuilder sb = new StringBuilder();
		if (tokens != null) {
			for (Token t : tokens) {
				if (t instanceof LineTerminator)
					sb.append('\n');
				else
					sb.append(t.toString()).append(' ');
			}
		}
		return sb.toString();
	}
	
	public String getClean() {
		return scriptFromTokens (tokensClean);
	}

	public void setEnableDebugOuput(boolean enableDebugOuput) {
		this.enableDebugOuput = enableDebugOuput;
	}
	
	private static final Punctuator punctSemicolon = new Punctuator (';');
	private static final Punctuator punctAdd = new Punctuator ('+');
	private static final Punctuator punctAssign = new Punctuator ('=');
	
	public String eval (int idx) {
		String result = "";
		if (tokensClean != null) {
			while (idx  < tokensClean.size()) {
				Token tok = tokensClean.get(idx++);
				Log.d(Constants.TAG, "eval(" + idx + "): tok = " + tok.toString());
				if (tok.equals(punctSemicolon))
					break;
				if (tok instanceof Punctuator && idx < tokensClean.size()) {
					Token tokOp = tokensClean.get(idx++);
					Log.d(Constants.TAG, "eval(" + idx + "): tok = " + tok.toString());
					String strOp = "";
					if (tokOp instanceof StringLiteral) {
						strOp = tokOp.toString();
						strOp = strOp.substring(1, strOp.length()-1);
					}else if (tokOp instanceof Identifier) {
						strOp = evalStringVar(tokOp.toString());
					}
					Log.d(Constants.TAG, "eval(" + idx + "): strOp = " + strOp);
					// only support addition for now
					if (tok.equals(punctAssign))
						result += strOp;
					else if (tok.equals(punctAdd))
						result += strOp;
				}
			}
		}
		return result;
	}

	public String evalStringVar(String varName) {
		List<Token> tokenizedCode = new ArrayList <Token>();
		tokenizedCode.add (new Identifier(varName));
		tokenizedCode.add (new Punctuator("=")); 

		int idx = matchCode (tokenizedCode);
		Log.d(Constants.TAG, "evalStringVar(" + varName + "): idx = " + idx);
		if (idx < 0)
			return "";
		return eval (idx+1);
	}
	
}