package de.neuland.jade4j.util;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class CharacterParser {
private Pattern pattern = Pattern.compile("^\\w+\\b");
// function parse(src, state, options) {
// options = options || {};
// state = state || exports.defaultState();
// var start = options.start || 0;
// var end = options.end || src.length;
// var index = start;
// while (index < end) {
// if (state.roundDepth < 0 || state.curlyDepth < 0 || state.squareDepth < 0) {
// throw new SyntaxError('Mismatched Bracket: ' + src[index - 1]);
// }
// exports.parseChar(src[index++], state);
// }
// return state;
// }
public static class SyntaxError extends Exception{
/**
* Constructs a new exception with the specified detail message. The
* cause is not initialized, and may subsequently be initialized by
* a call to {@link #initCause}.
*
* @param message the detail message. The detail message is saved for
* later retrieval by the {@link #getMessage()} method.
*/
public SyntaxError(String message) {
super(message);
}
}
public State parse(String src) throws SyntaxError {
Options options = new Options();
options.setEnd(src.length());
return this.parse(src,this.defaultState(),options);
}
public State parse(String src,State state) throws SyntaxError {
Options options = new Options();
options.setEnd(src.length());
return this.parse(src,state,options);
}
public State parse(String src,State state,Options options) throws SyntaxError {
if(options == null) {
options = new Options();
options.setEnd(src.length());
}
if(state == null)
state = this.defaultState();
int start = options.getStart();
int end = options.getEnd();
int index = start;
while (index < end) {
if (state.getRoundDepth() < 0 || state.getCurlyDepth() < 0 || state.getSquareDepth() < 0) {
throw new SyntaxError("Mismatched Bracket: " + src.charAt(index - 1));
}
this.parseChar(src.charAt(index++), state);
}
return state;
}
// function parseMax(src, options) {
// options = options || {};
// var start = options.start || 0;
// var index = start;
// var state = exports.defaultState();
// while (state.roundDepth >= 0 && state.curlyDepth >= 0 && state.squareDepth >= 0) {
// if (index >= src.length) {
// throw new Error('The end of the string was reached with no closing bracket found.');
// }
// exports.parseChar(src[index++], state);
// }
// var end = index - 1;
// return {
// start: start,
// end: end,
// src: src.substring(start, end)
// };
// }
public Match parseMax(String src) throws SyntaxError {
Options options = new Options();
return this.parseMax(src, options);
}
public Match parseMax(String src,Options options) throws SyntaxError {
if(options == null)
options = new Options();
int start = options.getStart();
int index = start;
State state = this.defaultState();
while (state.getRoundDepth() >= 0 && state.getCurlyDepth() >= 0 && state.getSquareDepth() >= 0) {
if (index >= src.length()) {
throw new SyntaxError("The end of the string was reached with no closing bracket found.");
}
this.parseChar(src.charAt(index++), state);
}
int end = index - 1;
return new Match(start,end,src.substring(start,end));
}
// var bracketToProp = {
// ')': 'roundDepth',
// '}': 'curlyDepth',
// ']': 'squareDepth'
// };
//
// function parseMaxBracket(src, bracket, options) {
// options = options || {};
// var start = options.start || 0;
// var index = start;
// var state = exports.defaultState();
// var prop = bracketToProp[bracket];
// if (prop === undefined) {
// throw new Error('Bracket specified (' + JSON.stringify(bracket) + ') is not one of ")", "]", or "}";');
// }
// while (state[prop] >= 0) {
// if (index >= src.length) {
// throw new Error('The end of the string was reached with no closing bracket "' + bracket + '" found.');
// }
// exports.parseChar(src[index++], state);
// }
// var end = index - 1;
// return {
// start: start,
// end: end,
// src: src.substring(start, end)
// };
// }
private int getStateProp(State state, char bracket){
if(')' == bracket)
return state.getRoundDepth();
if('}' == bracket)
return state.getCurlyDepth();
if(']' == bracket)
return state.getSquareDepth();
return -1;
}
public Match parseMaxBracket(String src,char bracket) throws SyntaxError {
return this.parseMaxBracket(src,bracket,new Options());
}
public Match parseMaxBracket(String src,char bracket,Options options) throws SyntaxError {
if (options == null)
options = new Options();
int start = options.getStart();
int index = start;
State state = this.defaultState();
if (bracket != ')' && bracket != '}' && bracket != ']') {
throw new SyntaxError("Bracket specified (" + String.valueOf(bracket) + ") is not one of \")\", \"]\", or \"}\"");
}
while (getStateProp(state,bracket) >= 0) {
if (index >= src.length()) {
throw new SyntaxError("The end of the string was reached with no closing bracket \"" + bracket + "\" found.");
}
this.parseChar(src.charAt(index++), state);
}
int end = index - 1;
return new Match(start, end, src.substring(start, end));
}
// function parseUntil(src, delimiter, options) {
// options = options || {};
// var includeLineComment = options.includeLineComment || false;
// var start = options.start || 0;
// var index = start;
// var state = exports.defaultState();
// while (state.isString() || state.regexp || state.blockComment ||
// (!includeLineComment && state.lineComment) || !startsWith(src, delimiter, index)) {
// exports.parseChar(src[index++], state);
// }
// var end = index;
// return {
// start: start,
// end: end,
// src: src.substring(start, end)
// };
// }
public Match parseUntil(String src,String delimiter) {
return this.parseUntil(src,delimiter,new Options());
}
public Match parseUntil(String src,String delimiter,Options options){
if (options == null)
options = new Options();
boolean includeLineComment = options.isIncludeLineComment();
int start = options.getStart();
int index = start;
State state = this.defaultState();
while (state.isString() || state.isRegexp() || state.isBlockComment() ||
(!includeLineComment && state.isLineComment()) || !startsWith(src, delimiter, index)) {
this.parseChar(src.charAt(index++), state);
}
int end = index;
return new Match(start, end, src.substring(start, end));
}
// function parseChar(character, state) {
// if (character.length !== 1) throw new Error('Character must be a string of length 1');
// state = state || exports.defaultState();
// state.src = state.src || '';
// state.src += character;
// var wasComment = state.blockComment || state.lineComment;
// var lastChar = state.history ? state.history[0] : '';
//
// if (state.regexpStart) {
// if (character === '/' || character == '*') {
// state.regexp = false;
// }
// state.regexpStart = false;
// }
// if (state.lineComment) {
// if (character === '\n') {
// state.lineComment = false;
// }
// } else if (state.blockComment) {
// if (state.lastChar === '*' && character === '/') {
// state.blockComment = false;
// }
// } else if (state.singleQuote) {
// if (character === '\'' && !state.escaped) {
// state.singleQuote = false;
// } else if (character === '\\' && !state.escaped) {
// state.escaped = true;
// } else {
// state.escaped = false;
// }
// } else if (state.doubleQuote) {
// if (character === '"' && !state.escaped) {
// state.doubleQuote = false;
// } else if (character === '\\' && !state.escaped) {
// state.escaped = true;
// } else {
// state.escaped = false;
// }
// } else if (state.regexp) {
// if (character === '/' && !state.escaped) {
// state.regexp = false;
// } else if (character === '\\' && !state.escaped) {
// state.escaped = true;
// } else {
// state.escaped = false;
// }
// } else if (lastChar === '/' && character === '/') {
// state.history = state.history.substr(1);
// state.lineComment = true;
// } else if (lastChar === '/' && character === '*') {
// state.history = state.history.substr(1);
// state.blockComment = true;
// } else if (character === '/' && isRegexp(state.history)) {
// state.regexp = true;
// state.regexpStart = true;
// } else if (character === '\'') {
// state.singleQuote = true;
// } else if (character === '"') {
// state.doubleQuote = true;
// } else if (character === '(') {
// state.roundDepth++;
// } else if (character === ')') {
// state.roundDepth--;
// } else if (character === '{') {
// state.curlyDepth++;
// } else if (character === '}') {
// state.curlyDepth--;
// } else if (character === '[') {
// state.squareDepth++;
// } else if (character === ']') {
// state.squareDepth--;
// }
// if (!state.blockComment && !state.lineComment && !wasComment) state.history = character + state.history;
// state.lastChar = character; // store last character for ending block comments
// return state;
// }
public State parseChar(char character,State state){
// if (character.length !== 1) throw new Error('Character must be a string of length 1');
if(state == null)
state = this.defaultState();
state.setSrc(state.getSrc() + character);
boolean wasComment = state.isBlockComment() || state.isLineComment();
Character lastChar = !state.getHistory().isEmpty() ? state.getHistory().charAt(0) : null;
if (state.isRegexpStart()) {
if ('/' == character || '*'==character) {
state.setRegexp(false);
}
state.setRegexpStart(false);
}
if (state.isLineComment()) {
if ('\n' == character) {
state.setLineComment(false);
}
} else if (state.isBlockComment()) {
if ('*' == state.getLastChar() && '/'==character) {
state.setBlockComment(false);
}
} else if (state.isSingleQuote()) {
if ('\''==character && !state.isEscaped()) {
state.setSingleQuote(false);
} else if ('\\'==character && !state.isEscaped()) {
state.setEscaped(true);
} else {
state.setEscaped(false);
}
} else if (state.isDoubleQuote()) {
if ('"'==character && !state.isEscaped()) {
state.setDoubleQuote(false);
} else if ('\\'==character && !state.isEscaped()) {
state.setEscaped(true);
} else {
state.setEscaped(false);
}
} else if (state.isRegexp()) {
if ('/'==character && !state.isEscaped()) {
state.setRegexp(false);
} else if ('\\'==character && !state.isEscaped()) {
state.setRegexp(true);
} else {
state.setEscaped(false);
}
} else if (lastChar!=null && '/' == lastChar && '/'==character) {
state.setHistory(state.getHistory().substring(1));
state.setLineComment(true);
} else if (lastChar!=null && '/'==lastChar && '*'==character) {
state.setHistory(state.getHistory().substring(1));
state.setBlockComment(true);
} else if ('/'==character && !state.getHistory().isEmpty() &&isRegexp(state.getHistory())) {
state.setRegexp(true);
state.setRegexpStart(true);
} else if ('\''==character) {
state.setSingleQuote(true);
} else if (character == '"') {
state.setDoubleQuote(true);
} else if (character == '(') {
state.setRoundDepth(state.getRoundDepth()+1);
} else if (character == ')') {
state.setRoundDepth(state.getRoundDepth()-1);
} else if (character == '{') {
state.setCurlyDepth(state.getCurlyDepth()+1);
} else if (character == '}') {
state.setCurlyDepth(state.getCurlyDepth()-1);
} else if (character == '[') {
state.setSquareDepth(state.getSquareDepth()+1);
} else if (character == ']') {
state.setSquareDepth(state.getSquareDepth()-1);
}
if (!state.isBlockComment() && !state.isLineComment() && !wasComment) state.setHistory(character + state.getHistory());
state.setLastChar(character); // store last character for ending block comments
return state;
}
// exports.defaultState = function () { return new State() };
public State defaultState(){
return new State();
}
public static class State{
private boolean lineComment = false;
private boolean blockComment = false;
private boolean singleQuote = false;
private boolean doubleQuote = false;
private boolean regexp = false;
private boolean regexpStart = false;
private boolean escaped = false;
private int roundDepth = 0;
private int curlyDepth = 0;
private int squareDepth = 0;
private String history = "";
private Character lastChar = null;
private String src = "";
public boolean isString(){
return this.singleQuote || this.doubleQuote;
}
public boolean isComment(){
return this.lineComment || this.blockComment;
}
public boolean isNesting(){
return this.isString() || this.isComment() || this.regexp || this.roundDepth > 0 || this.curlyDepth > 0 || this.squareDepth > 0;
}
public String getSrc() {
return src;
}
public boolean isLineComment() {
return lineComment;
}
public void setLineComment(boolean lineComment) {
this.lineComment = lineComment;
}
public boolean isBlockComment() {
return blockComment;
}
public void setBlockComment(boolean blockComment) {
this.blockComment = blockComment;
}
public boolean isSingleQuote() {
return singleQuote;
}
public void setSingleQuote(boolean singleQuote) {
this.singleQuote = singleQuote;
}
public boolean isDoubleQuote() {
return doubleQuote;
}
public void setDoubleQuote(boolean doubleQuote) {
this.doubleQuote = doubleQuote;
}
public boolean isRegexp() {
return regexp;
}
public void setRegexp(boolean regexp) {
this.regexp = regexp;
}
public boolean isRegexpStart() {
return regexpStart;
}
public void setRegexpStart(boolean regexpStart) {
this.regexpStart = regexpStart;
}
public boolean isEscaped() {
return escaped;
}
public void setEscaped(boolean escaped) {
this.escaped = escaped;
}
public int getRoundDepth() {
return roundDepth;
}
public void setRoundDepth(int roundDepth) {
this.roundDepth = roundDepth;
}
public int getCurlyDepth() {
return curlyDepth;
}
public void setCurlyDepth(int curlyDepth) {
this.curlyDepth = curlyDepth;
}
public int getSquareDepth() {
return squareDepth;
}
public void setSquareDepth(int squareDepth) {
this.squareDepth = squareDepth;
}
public String getHistory() {
return history;
}
public void setHistory(String history) {
this.history = history;
}
public Character getLastChar() {
return lastChar;
}
public void setLastChar(Character lastChar) {
this.lastChar = lastChar;
}
public void setSrc(String src) {
this.src = src;
}
}
private boolean startsWith(String str, String start, int i){
return start.equals(str.substring(i,i+start.length()));
}
// exports.isPunctuator = isPunctuator
// function isPunctuator(c) {
// if (!c) return true; // the start of a string is a punctuator
// var code = c.charCodeAt(0)
//
// switch (code) {
// case 46: // . dot
// case 40: // ( open bracket
// case 41: // ) close bracket
// case 59: // ; semicolon
// case 44: // , comma
// case 123: // { open curly brace
// case 125: // } close curly brace
// case 91: // [
// case 93: // ]
// case 58: // :
// case 63: // ?
// case 126: // ~
// case 37: // %
// case 38: // &
// case 42: // *:
// case 43: // +
// case 45: // -
// case 47: // /
// case 60: // <
// case 62: // >
// case 94: // ^
// case 124: // |
// case 33: // !
// case 61: // =
// return true;
// default:
// return false;
// }
// }
public boolean isPunctuator(Character character){
Integer code = Character.codePointAt(character.toString(),0);
switch (code) {
case 46: // . dot
case 40: // ( open bracket
case 41: // ) close bracket
case 59: // ; semicolon
case 44: // , comma
case 123: // { open curly brace
case 125: // } close curly brace
case 91: // [
case 93: // ]
case 58: // :
case 63: // ?
case 126: // ~
case 37: // %
case 38: // &
case 42: // *:
case 43: // +
case 45: // -
case 47: // /
case 60: // <
case 62: // >
case 94: // ^
case 124: // |
case 33: // !
case 61: // =
return true;
default:
return false;
}
}
public boolean isKeyword(String id) {
return ("if".equals(id)) || ("in".equals(id)) || ("do".equals(id)) || ("var".equals(id)) || ("for".equals(id)) || ("new".equals(id)) ||
("try".equals(id)) || ("let".equals(id)) || ("this".equals(id)) || ("else".equals(id)) || ("case".equals(id)) ||
("void".equals(id)) || ("with".equals(id)) || ("enum".equals(id)) || ("while".equals(id)) || ("break".equals(id)) || ("catch".equals(id)) ||
("throw".equals(id)) || ("const".equals(id)) || ("yield".equals(id)) || ("class".equals(id)) || ("super".equals(id)) ||
("return".equals(id)) || ("typeof".equals(id)) || ("delete".equals(id)) || ("switch".equals(id)) || ("export".equals(id)) ||
("import".equals(id)) || ("default".equals(id)) || ("finally".equals(id)) || ("extends".equals(id)) || ("function".equals(id)) ||
("continue".equals(id)) || ("debugger".equals(id)) || ("package".equals(id)) || ("private".equals(id)) || ("interface".equals(id)) ||
("instanceof".equals(id)) || ("implements".equals(id)) || ("protected".equals(id)) || ("public".equals(id)) || ("static".equals(id));
}
// function isRegexp(history) {
// //could be start of regexp or divide sign
//
// history = history.replace(/^\s*/, '');
//
// //unless its an `if`, `while`, `for` or `with` it's a divide, so we assume it's a divide
// if (history[0] === ')') return false;
// //unless it's a function expression, it's a regexp, so we assume it's a regexp
// if (history[0] === '}') return true;
// //any punctuation means it's a regexp
// if (isPunctuator(history[0])) return true;
// //if the last thing was a keyword then it must be a regexp (e.g. `typeof /foo/`)
// if (/^\w+\b/.test(history) && isKeyword(/^\w+\b/.exec(history)[0].split('').reverse().join(''))) return true;
//
// return false;
// }
public boolean isRegexp(String history){
//could be start of regexp or divide sign
history = history.replace("^\\s*", "");
//unless its an `if`, `while`, `for` or `with` it's a divide, so we assume it's a divide
if (history.charAt(0) == ')') return false;
//unless it's a function expression, it's a regexp, so we assume it's a regexp
if (history.charAt(0) == '}') return true;
//any punctuation means it's a regexp
if (isPunctuator(history.charAt(0))) return true;
//if the last thing was a keyword then it must be a regexp (e.g. `typeof /foo/`)
Matcher matcher = pattern.matcher(history);
if (matcher.matches() && isKeyword(new StringBuilder(matcher.group(0)).reverse().toString())){
return true;
}
return false;
}
public class Match {
private int start;
private int end;
private String src;
public Match(int start, int end, String src) {
this.start = start;
this.end = end;
this.src = src;
}
public int getStart() {
return start;
}
public int getEnd() {
return end;
}
public String getSrc() {
return src;
}
}
}