/** * Copyright 2010 Bing Ran<bing_ran@hotmail.com> * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy * of the License at http://www.apache.org/licenses/LICENSE-2.0. * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package cn.bran.japid.compiler; import java.io.BufferedReader; import java.io.IOException; import java.io.StringReader; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Stack; import java.util.regex.Pattern; /** * Template parser * * @author Bing Ran<bing_ran@hotmail.com> * @author Play! framework original authors */ public class JapidParser { private char MARKER_CHAR = '`'; private String MARKER_STRING = "`"; private static final String VERBATIM2 = "verbatim"; private String pageSource; public JapidParser(String pageSource) { // hack: allow $[] string interpolation in String literals in script block. // see: https://github.com/branaway/Japid/issues/19 pageSource = pageSource.replaceAll(JapidParser.PLACE_HOLDER_PATTERN_S, JapidParser.SUB_PATTERN_S); // detect marker // the logic is to find the first line that starts with either ` or @ char mar = detectMarker(pageSource); setMarker(mar); this.pageSource = pageSource; this.len = pageSource.length(); } /** * @param pageSource */ public static char detectMarker(String pageSource) { BufferedReader br = new BufferedReader(new StringReader(pageSource)); String line; try { line = br.readLine(); while (line != null) { line = line.trim(); if (line.startsWith("@")) { return '@'; } if (line.startsWith("`")) { if (!line.startsWith("``") && !line.startsWith("`@")) { return '`'; } } line = br.readLine(); } br.close(); } catch (IOException e) { } // default return '`'; } public void setMarker(char m) { MARKER_CHAR = m; MARKER_STRING = new String(new char[] { m }); } public void resetMarker() { MARKER_CHAR = '`'; MARKER_STRING = new String(new char[] { MARKER_CHAR }); } // bran: // keep track of nested state tokens, eg. nested function calls in // expressions // what inside is not used for now, we only are interested in the depth Stack<JapidParser.Token> methodCallStackInExpr = new Stack<JapidParser.Token>(); // public enum Token { EOF, // PLAIN, // // PLAIN_LEADINGSPACE, // the part after a new line and before any // none-space characters SCRIPT, // %{...}% or {%...%} bran: or ~{}~, ~[]~ the open wings // directives SCRIPT_LINE, // line started with marker. will turn the rest if the line // in to // script. EXPR, // ${...} EXPR_ESCAPED, // ~{} START_TAG, // #{...} END_TAG, // #{/...} MESSAGE, // &{...} ACTION, // @{...} ABS_ACTION, // @@{...} // bran: to indicate { in action arguments ACTION_CURLY, // @@{...} COMMENT, // *{...}* // bran expression without using {}, such as ~_; EXPR_WING, // ~{...} EXPR_NATURAL, // $xxx EXPR_NATURAL_ESCAPED, // ~xxx // EXPR_NATURAL_METHOD_CALL, // bran function call in expression: // // ~user?.name.format( '###' ) // EXPR_NATURAL_ARRAY_OP, // bran : ~myarray[-1].val // EXPR_NATURAL_STRING_LITERAL, // bran ~user?.name.format( '#)#' ) or // $'hello'.length TEMPLATE_ARGS, // bran ~( ) CLOSING_BRACE, // a closing curly brace after leading space. Used? Good // idea? VERBATIM, // raw text until another marker } // end2/begin2: for mark the current returned token // begin is the start pos of next token // end is the next pos pointer private int end, begin, end2, begin2, len; private JapidParser.Token state = Token.PLAIN; private JapidParser.Token lastState; public boolean verbatim; public static final String SUB_PATTERN_S = "\"\\+$1\\+\""; public static final String PLACE_HOLDER_PATTERN_S = "\\$\\[(.+?)\\]\\$"; // none-greedy match private JapidParser.Token found(JapidParser.Token newState, int skip) { begin2 = begin; end2 = --end; begin = end += skip; lastState = state == Token.EXPR_NATURAL ? Token.EXPR : state; state = newState; return lastState; } private void skip(int skip) { end2 = --end; end += skip; } public Integer getLineNumber() { String token = pageSource.substring(0, begin2); if (token.indexOf("\n") == -1) { return 1; } else { return token.split("\n").length; } } public String getToken() { String tokenString = pageSource.substring(begin2, end2); if (lastState == Token.PLAIN) { // un-escape special sequence tokenString = tokenString.replace("``", "`").replace("`@", "@"); tokenString = escapeSpecialWith(tokenString, '~'); } else { tokenString = escapeSpecialWith(tokenString, (char) 0); } return tokenString; } public static String escapeSpecialWith(String src, char marker) { int len = src.length(); StringBuffer buf = new StringBuffer(len); if (len < 2) return src; for (int i = 0; i < len - 1; i++) { char c = src.charAt(i); char c1 = src.charAt(i + 1); char c2 = 0; if (i < len - 2) { c2 = src.charAt(i + 2); } if (c == marker) { switch (c1) { case '`': buf.append('`'); i++; break; case '~': buf.append('~'); i++; break; case '@': buf.append('@'); i++; break; case '#': buf.append('#'); i++; break; case '$': buf.append('$'); i++; break; case '%': buf.append('%'); i++; break; case '&': buf.append('%'); i++; break; case '*': buf.append('*'); i++; break; default: buf.append(marker); } } else { // detect line continue sign if (c == '\\') { if (c1 == '\r') { if (c2 == '\n') { i ++; i++; continue; } else { i++; continue; } } else if (c1 == '\n') { i++; continue; } } buf.append(c); } } buf.append(src.charAt(len - 1)); return buf.toString(); } public String checkNext() { if (end2 < pageSource.length()) { return pageSource.charAt(end2) + ""; } return ""; } public JapidParser.Token nextToken() { for (;;) { // how many more chars to be processed int left = len - end; if (left == 0) { end++; return found(Token.EOF, 0); } char c = pageSource.charAt(end++); char c1 = left > 1 ? pageSource.charAt(end) : 0; char c2 = left > 2 ? pageSource.charAt(end + 1) : 0; // detect line continue sign if (c == '\\') { if (c1 == '\r') { if (c2 == '\n') { skip(3); continue; } else { skip(2); continue; } } else if (c1 == '\n') { skip(2); continue; } } switch (state) { case PLAIN: if (c == '%' && c1 == '{') { return found(Token.SCRIPT, 2); } if (c == '{' && c1 == '%') { return found(Token.SCRIPT, 2); } // bran open wings // breaking changes. now used as escaped expression // if (c == '~' && c1 == '{') { // // deprecated use ~[ // return found(Token.SCRIPT, 2); // } if (c == '~') if (c1 == '`' || c1 == '~' || c1 == '@' || c1 == '#' || c1 == '$' || c1 == '%' || c1 == '&' || c1 == '*') { skip(2); break; } if (c == '~' && c1 == '[') { return found(Token.SCRIPT, 2); } if (c == '$' && c1 == '{') { return found(Token.EXPR, 2); } if (c == '~' && c1 == '{') { return found(Token.EXPR_ESCAPED, 2); } if (c == '~' && c1 == '(') { // deprecated in favor of args directive in a script return found(Token.TEMPLATE_ARGS, 2); } // bran: shell like expression: ~_, ~user.name (this one is // diff // from sh, which requires ${user.name} // if (c == '~' && c1 != '~' && (Character.isJavaIdentifierStart(c1) || '\'' == c1 || '\"' == c1)) { return found(Token.EXPR_NATURAL_ESCAPED, 1); // return found(Token.EXPR_NATURAL, 1); } if (c == '$' && c1 != '$' && (Character.isJavaIdentifierStart(c1) || '\'' == c1 || '\"' == c1)) { return found(Token.EXPR_NATURAL, 1); } if (c == '#' && c1 == '{' && c2 == '/') { return found(Token.END_TAG, 3); } if (c == '#' && c1 == '{') { return found(Token.START_TAG, 2); } if (c == '&' && c1 == '{') { return found(Token.MESSAGE, 2); } if (c == '@' && c1 == '@' && c2 == '{') { return found(Token.ABS_ACTION, 3); } if (c == '@' && c1 == '{') { return found(Token.ACTION, 2); } if (c == '*' && c1 == '{') { return found(Token.COMMENT, 2); } if (c == '`') if (c1 == '`' || c1 == '~' || c1 == '@' || c1 == '#' || c1 == '$' || c1 == '%' || c1 == '&' || c1 == '*') { skip(2); break; } if (c == MARKER_CHAR) if (c1 == MARKER_CHAR) { skip(2); } else if (c1 == '(') { return found(Token.TEMPLATE_ARGS, 2); } else if (nextMatch(VERBATIM2)) { return found(Token.VERBATIM, VERBATIM2.length() + 1); } else { return found(Token.SCRIPT_LINE, 1); } // was trying to implement an escape-less }, but it may be // too // confusing with json, javascript syntax etc. // so it's disabled for now. // if (c == '}') { // String curToken = getPrevTokenString(); // boolean allLeadingSpace = // allLeadingSpaceInline(curToken); // if (allLeadingSpace) { // return found(Token.CLOSING_BRACE, 0); // } // } break; case CLOSING_BRACE: if (c == '\n') { return found(Token.PLAIN, 1); } else return found(Token.SCRIPT_LINE, 1); case SCRIPT: // the parsing is fragile if (c == '}' && c1 == '%') { return found(Token.PLAIN, 2); } if (c == '%' && c1 == '}') { return found(Token.PLAIN, 2); } // bran if (c == '}' && c1 == '~') { return found(Token.PLAIN, 2); } if (c == ']' && c1 == '~') { return found(Token.PLAIN, 2); } break; case VERBATIM: if (c == MARKER_CHAR) { String currentLine = getCurrentLine(); if (currentLine.trim().equals(MARKER_STRING)) { int skip = currentLine.length() - currentLine.indexOf(MARKER_CHAR); return found(Token.PLAIN, skip); } } break; case SCRIPT_LINE: if (c == '\r') { if (c1 == '\n') { return found(Token.PLAIN, 2); } else return found(Token.PLAIN, 1); } else if (c == '\n') { return found(Token.PLAIN, 1); } else if (c == MARKER_CHAR) { return found(Token.PLAIN, 1); } break; case COMMENT: if (c == '}' && c1 == '*') { return found(Token.PLAIN, 2); } break; case START_TAG: if (c == '}') { return found(Token.PLAIN, 1); } if (c == '/' && c1 == '}') { return found(Token.END_TAG, 1); } break; case END_TAG: if (c == '}') { return found(Token.PLAIN, 1); } break; case EXPR: case EXPR_ESCAPED: if (c == '}') { return found(Token.PLAIN, 1); } break; case TEMPLATE_ARGS: if (c == ')') { String seg = getCurrentPartialToken(); if (JavaSyntaxTool.isValidParamList(seg)) { return found(Token.PLAIN, 1); } } break; // bran // special characters considered an expression: '?.() // break characters: space, other punctuations, new lines, // returns case EXPR_NATURAL: case EXPR_NATURAL_ESCAPED: //////// using syntax tool to find the end of expression smartly String restline = c + getRestLine(); String longestExpr = JavaSyntaxTool.matchLongestPossibleExpr(restline); skip(longestExpr.length() + 1); // int nowleft = len - end + 1; // if (nowleft == 0) { // end++; // return found(Token.EOF, 0); // } // else { return found(Token.PLAIN, 0); // } // // // //////// // // // if ('(' == c) { // skipAhead(Token.EXPR_NATURAL_METHOD_CALL, 1); // } else if ('[' == c) { // skipAhead(Token.EXPR_NATURAL_ARRAY_OP, 1); // } else if ('\'' == c) { // // \' is valid only at the beginning // // FIXME // // start of literal // skipAhead(Token.EXPR_NATURAL_STRING_LITERAL, 1); // } else if (Character.isWhitespace(c)) { // // state = Token.EXPR; // return found(Token.PLAIN, 0); // it ea // } else if (!Character.isJavaIdentifierPart(c)) { // if (c != '?' && c != '.' && c != ':' && c != '=') { // // state = Token.EXPR; // return found(Token.PLAIN, 0); // it ea // } else if (!Character.isJavaIdentifierStart(c1)) { // if (c == '=' && c1 == '=') { // if (Character.isWhitespace(c2)) { // // state = Token.EXPR; // return found(Token.PLAIN, 0); // it ea // } else { // skip(2); // } // } else { // // state = Token.EXPR; // return found(Token.PLAIN, 0); // it ea // } // } // } // break; // case EXPR_NATURAL_METHOD_CALL: // if ('(' == c) { // // nested call // skipAhead(Token.EXPR_NATURAL_METHOD_CALL, 1); // } else if (')' == c) { // state = this.emthodCallStackInExpr.pop(); // skip(1); // } // break; // case EXPR_NATURAL_ARRAY_OP: // if ('[' == c) { // // nested call // skipAhead(Token.EXPR_NATURAL_ARRAY_OP, 1); // } else if (']' == c) { // state = this.emthodCallStackInExpr.pop(); // skip(1); // } // break; // case EXPR_NATURAL_STRING_LITERAL: // if ('\\' == c && '\'' == c1) { // // the escaped ' in a literal string // skip(2); // } // if ('\'' == c) { // // end of literal // state = this.emthodCallStackInExpr.pop(); // skip(1); // } // break; case ACTION: if (c == '}') { return found(Token.PLAIN, 1); } else if (c == '{') { // bran: weak logic: assuming no "{" // in // string literals skipAhead(Token.ACTION_CURLY, 1); } break; case ABS_ACTION: if (c == '}') { return found(Token.PLAIN, 1); } else if (c == '{') { skipAhead(Token.ACTION_CURLY, 1); } break; case ACTION_CURLY: if (c == '}') { state = this.methodCallStackInExpr.pop(); skip(1); } else if (c == '{') { skipAhead(Token.ACTION_CURLY, 1); } break; case MESSAGE: if (c == '}') { return found(Token.PLAIN, 1); } break; } } } /** * @author Bing Ran (bing.ran@gmail.com) * @return */ private String getCurrentPartialToken() { return pageSource.substring(begin, end - 1); } private String getCurrentLine() { return getCurrentLine(pageSource, end - 1); } // private boolean isStandAloneBackQuote() { // String currentLine = getCurrentLine(pageSource, end); // if (currentLine.trim().equals(MARKER_STRING)) { // return true; // } else { // return false; // } // } // String getRestLine() { return getRestLine(pageSource, end - 1); } String getRestLineIncludingLineBreaks() { return getRestLineIncludingLineBreaks(pageSource, end - 1); } /** * get the rest of the current line * @return */ static String getRestLine(String src, int pos) { int begin = pos, endp = 0; int i = 1; int length = src.length(); while (true) { endp = begin + i++; if (endp < length) { char charAt = src.charAt(endp); if (charAt == '\n' || charAt == '\r') { // got the end break; } } else { break; } } return src.substring(++begin, endp); } static String getRestLineIncludingLineBreaks(String src, int pos) { int begin = pos, endp = 0; int i = 1; int length = src.length(); boolean breakFound = false; while (true) { endp = begin + i++; if (endp < length) { char charAt = src.charAt(endp); if (charAt == '\n' || charAt == '\r') { breakFound = true; }else { if (breakFound) { // good to go break; } } } else { break; } } return src.substring(++begin, endp); } static String getCurrentLine(String src, final int pos) { int begin = 0, endp = 0; int i = 1; while (true) { begin = pos - i++; if (begin >= 0) { char charAt = src.charAt(begin); if (charAt == '\n') { // got the beginning break; } } else { break; } } i = 1; while (true) { endp = pos + i++; int length = src.length(); if (endp < length) { char charAt = src.charAt(endp); if (charAt == '\n') { // got the end break; } } else { break; } } return src.substring(++begin, endp); } private boolean nextMatch(String s) { for (int i = 0; i < s.length(); i++) { char c = s.charAt(i); int index = end + i; if (index < pageSource.length()) { if (c != pageSource.charAt(index)) { return false; } } else { return false; } } return true; } // /** // * @return // */ // private String getPrevTokenString() { // return pageSource.substring(end2, end - 1); // } /** * @param curToken * @return */ static boolean allLeadingSpaceInline(String curToken) { boolean allLeadingSpace = true; int len = curToken.length(); for (int i = len - 1; i > -1; i--) { char ch = curToken.charAt(i); if (ch == '\n') break; else if (ch == ' ' || ch == '\t') continue; else { allLeadingSpace = false; break; } } return allLeadingSpace; } /** * push a nested token to the stack * * @param token * @param i * number of chars to skip */ private void skipAhead(JapidParser.Token token, int i) { this.methodCallStackInExpr.push(state); state = token; skip(i); } void reset() { end = begin = end2 = begin2 = 0; state = Token.PLAIN; } /** * get all the token and content in an ordered list. EOF is not included. * * @return */ public List<TokenPair> allTokens() { List<TokenPair> result = new ArrayList<TokenPair>(); loop: for (;;) { Token state = nextToken(); switch (state) { case EOF: break loop; default: String tokenstring = getToken(); result.add(new TokenPair(state, tokenstring)); } } return result; } }