/** * Copyright (c) 2011-2017, James Zhan 詹波 (jfinal@126.com). * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.jfinal.template.stat; import java.util.ArrayList; import java.util.LinkedList; import java.util.List; /** * DKFF(Dynamic Key Feature Forward) Lexer */ class Lexer { static final char EOF = (char)-1; static final int TEXT_STATE_DIAGRAM = 999; char[] buf; int state = 0; int lexemeBegin = 0; int forward = 0; int beginRow = 1; int forwardRow = 1; TextToken previousTextToken = null; List<Token> tokens = new ArrayList<Token>(); LinkedList<String> stack = new LinkedList<String>(); String fileName; public Lexer(StringBuilder content, String fileName) { int len = content.length(); buf = new char[len + 1]; content.getChars(0, content.length(), buf, 0); buf[len] = EOF; this.fileName = fileName; } /** * 进入每个扫描方法之前 peek() 处于可用状态,不需要 next() * 每个扫描方法内部是否要 next() 移动,取决定具体情况 * 每个扫描方法成功返回前,将 forward 置于下一次扫描需要处理的地方 * 让下个扫描方法不必 next() * 紧靠 scanText() 之前的扫描方法在失败后必须保持住forward * 这是 scanText() 可以一直向前的保障 */ public List<Token> scan() { while (peek() != EOF) { if (peek() == '#') { if (scanDire()) { continue ; } if (scanSingleLineComment()) { continue ; } if (scanMultiLineComment()) { continue ; } if (scanNoParse()) { continue ; } } scanText(); } return tokens; } /** * 指令模式与解析规则 * 1:指令 pattern * #(p) * #id(p) * #define id(p) * #@id(p) / #@id?(p) * #else / #end * * 2:关键字类型指令在获取到关键字以后,必须要正确解析出后续内容,否则抛异常 * * 3:非关键字类型指令只有在本行内出现 # id ( 三个序列以后,才要求正确解析出后续内容 * 否则当成普通文本 */ boolean scanDire() { String id = null; StringBuilder para = null; Token idToken = null; Token paraToken = null; while(true) { switch (state) { case 0: if (peek() == '#') { // # next(); skipBlanks(); state = 1; continue ; } return fail(); case 1: if (peek() == '(') { // # ( para = scanPara(""); idToken = new Token(Symbol.OUTPUT, beginRow); paraToken = new ParaToken(para, beginRow); return addOutputToken(idToken, paraToken); } if (CharTable.isLetter(peek())) { // # id state = 10; continue ; } if (peek() == '@') { // # @ next(); skipBlanks(); if (CharTable.isLetter(peek())) { // # @ id state = 20; continue ; } } return fail(); // ----------------------------------------------------- case 10: // # id id = scanId(); Symbol symbol = Symbol.getKeywordSym(id); // 非关键字指令 if (symbol == null) { state = 11; continue ; } // define 指令 if (symbol == Symbol.DEFINE) { state = 12; continue ; } // 无参关键字指令 if (symbol.noPara()) { return addNoParaToken(new Token(symbol, id, beginRow)); } // 有参关键字指令 skipBlanks(); if (peek() == '(') { para = scanPara(id); idToken = new Token(symbol, beginRow); paraToken = new ParaToken(para, beginRow); return addIdParaToken(idToken, paraToken); } throw new ParseException("#" + id + " directive requires parentheses \"()\"", new Location(fileName, beginRow)); case 11: // 用户自定义指令必须有参数 skipBlanks(); if (peek() == '(') { para = scanPara(id); idToken = new Token(Symbol.ID, id, beginRow); paraToken = new ParaToken(para, beginRow); return addIdParaToken(idToken, paraToken); } return fail(); // 用户自定义指令在没有左括号的情况下当作普通文本 case 12: // 处理 "# define id (para)" 指令 skipBlanks(); if (CharTable.isLetter(peek())) { id = scanId(); // 模板函数名称 skipBlanks(); if (peek() == '(') { para = scanPara("define " + id); idToken = new Token(Symbol.DEFINE, id, beginRow); paraToken = new ParaToken(para, beginRow); return addIdParaToken(idToken, paraToken); } throw new ParseException("#define " + id + " : template function definition requires parentheses \"()\"", new Location(fileName, beginRow)); } throw new ParseException("#define directive requires identifier as a function name", new Location(fileName, beginRow)); case 20: // # @ id id = scanId(); skipBlanks(); boolean hasQuestionMark = peek() == '?'; if (hasQuestionMark) { next(); skipBlanks(); } if (peek() == '(') { para = scanPara(hasQuestionMark ? "@" + id + "?" : "@" + id); idToken = new Token(hasQuestionMark ? Symbol.CALL_IF_DEFINED : Symbol.CALL, id, beginRow); paraToken = new ParaToken(para, beginRow); return addIdParaToken(idToken, paraToken); } return fail(); default : return fail(); } } } /** * 调用者已确定以字母或下划线开头,故一定可以获取到 id值 */ String scanId() { int idStart = forward; while (CharTable.isLetterOrDigit(next())) { ; } return subBuf(idStart, forward - 1).toString(); } /** * 扫描指令参数,成功则返回,否则抛出词法分析异常 */ StringBuilder scanPara(String id) { stack.clear(); char quotes = '"'; int localState = 0; stack.push("("); next(); int paraStart = forward; while (true) { switch (localState) { case 0: for (char c=peek(); true; c=next()) { if (c == ')') { stack.pop(); if (stack.size() == 0) { next(); return subBuf(paraStart, forward - 2); } continue ; } if (c == '(') { stack.push("("); continue ; } if (c == '"' || c == '\'') { quotes = c; localState = 1; break ; } if (CharTable.isExprChar(c)) { continue ; } if (c == EOF) { throw new ParseException("#" + id + " parameter can not match the end char ')'", new Location(fileName, beginRow)); } throw new ParseException("#" + id + " parameter exists illegal char: '" + c + "'", new Location(fileName, beginRow)); } break ; case 1: for (char c=next(); true; c=next()) { if (c == quotes) { if (buf[forward - 1] != '\\') { // 前一个字符不是转义字符 next(); localState = 0; break ; } else { continue ; } } if (c == EOF) { throw new ParseException("#" + id + " parameter error, the string parameter not ending", new Location(fileName, beginRow)); } } break ; } } } /** * 单行注释,开始状态 100,关注换行与 EOF */ boolean scanSingleLineComment() { while (true) { switch (state) { case 100: if (peek() == '#' && next() == '#' && next() == '#') { state = 101; continue ; } return fail(); case 101: for (char c=next(); true; c=next()) { if (c == '\n') { if (deletePreviousTextTokenBlankTails()) { return prepareNextScan(1); } else { return prepareNextScan(0); } } if (c == EOF) { deletePreviousTextTokenBlankTails(); return prepareNextScan(0); } } default : return fail(); } } } /** * 多行注释,开始状态 200,关注结尾标记与 EOF */ boolean scanMultiLineComment() { while (true) { switch (state) { case 200: if (peek() == '#' && next() == '-' && next() == '-') { state = 201; continue ; } return fail(); case 201: for (char c=next(); true; c=next()) { if (c == '-' && buf[forward + 1] == '-' && buf[forward + 2] == '#') { forward = forward + 3; if (lookForwardLineFeedAndEof() && deletePreviousTextTokenBlankTails()) { return prepareNextScan(peek() != EOF ? 1 : 0); } else { return prepareNextScan(0); } } if (c == EOF) { throw new ParseException("The multiline comment start block \"#--\" can not match the end block: \"--#\"", new Location(fileName, beginRow)); } } default : return fail(); } } } /** * 非解析块,开始状态 300,关注结尾标记与 EOF */ boolean scanNoParse() { while (true) { switch (state) { case 300: if (peek() == '#' && next() == '[' && next() == '[') { state = 301; continue ; } return fail(); case 301: for (char c=next(); true; c=next()) { if (c == ']' && buf[forward + 1] == ']' && buf[forward + 2] == '#') { addTextToken(subBuf(lexemeBegin + 3, forward - 1)); // NoParse 块使用 TextToken return prepareNextScan(3); } if (c == EOF) { throw new ParseException("The \"no parse\" start block \"#[[\" can not match the end block: \"]]#\"", new Location(fileName, beginRow)); } } default : return fail(); } } } boolean scanText() { for (char c=peek(); true; c=next()) { if (c == '#' || c == EOF) { addTextToken(subBuf(lexemeBegin, forward - 1)); return prepareNextScan(0); } } } boolean fail() { if (state < 300) { forward = lexemeBegin; forwardRow = beginRow; } if (state < 100) { state = 100; } else if (state < 200) { state = 200; } else if (state < 300) { state = 300; } else { state = TEXT_STATE_DIAGRAM; } return false; } char next() { if (buf[forward] == '\n') { forwardRow++; } return buf[++forward]; } char peek() { return buf[forward]; } void skipBlanks() { while(CharTable.isBlank(buf[forward])) { next(); } } /** * scanPara 与 scanNoParse 存在 start > end 的情况 */ StringBuilder subBuf(int start, int end) { if (start > end) { return null; } StringBuilder ret = new StringBuilder(end - start + 1); for (int i=start; i<=end; i++) { ret.append(buf[i]); } return ret; } boolean prepareNextScan(int moveForward) { for (int i=0; i<moveForward; i++) { next(); } state = 0; lexemeBegin = forward; beginRow = forwardRow; return true; } void addTextToken(StringBuilder text) { if (text == null || text.length() == 0) { return ; } if (previousTextToken != null) { previousTextToken.append(text); } else { previousTextToken = new TextToken(text, beginRow); tokens.add(previousTextToken); } } // 输出指令不对前后空白与换行进行任何处理,直接调用 tokens.add(...) boolean addOutputToken(Token idToken, Token paraToken) { tokens.add(idToken); tokens.add(paraToken); previousTextToken = null; return prepareNextScan(0); } // 向前看后续是否跟随的是空白 + 换行或者是空白 + EOF,是则表示当前指令后续没有其它有用内容 boolean lookForwardLineFeedAndEof() { int forwardBak = forward; int forwardRowBak = forwardRow; for (char c=peek(); true; c=next()) { if (CharTable.isBlank(c)) { continue ; } if (c == '\n' || c == EOF) { return true; } forward = forwardBak; forwardRow = forwardRowBak; return false; } } /** * 带参指令处于独立行时删除前后空白字符,并且再删除一个后续的换行符 * 处于独立行是指:向前看无有用内容,在前面情况成立的基础之上 * 再向后看如果也无可用内容,前一个条件成立才开执行后续动作 * * 向前看时 forward 在移动,意味着正在删除空白字符(通过 lookForwardLineFeed()方法) * 向后看时也会在碰到空白 + '\n' 时删空白字符 (通过 deletePreviousTextTokenBlankTails()方法) */ boolean addIdParaToken(Token idToken, Token paraToken) { tokens.add(idToken); tokens.add(paraToken); // if (lookForwardLineFeed() && (deletePreviousTextTokenBlankTails() || lexemeBegin == 0)) { if (lookForwardLineFeedAndEof() && deletePreviousTextTokenBlankTails()) { prepareNextScan(peek() != EOF ? 1 : 0); } else { prepareNextScan(0); } previousTextToken = null; return true; } // 处理前后空白的逻辑与 addIdParaToken() 基本一样,仅仅多了一个对于紧随空白的 next() 操作 boolean addNoParaToken(Token noParaToken) { tokens.add(noParaToken); if (CharTable.isBlank(peek())) { next(); // 无参指令之后紧随的一个空白字符仅为分隔符,不参与后续扫描 } if (lookForwardLineFeedAndEof() && deletePreviousTextTokenBlankTails()) { prepareNextScan(peek() != EOF ? 1 : 0); } else { prepareNextScan(0); } previousTextToken = null; return true; } /** * 1:当前指令前方仍然是指令 (previousTextToken 为 null),直接返回 true * 2:当前指令前方为 TextToken 时的处理逻辑与返回值完全依赖于 TextToken.deleteBlankTails() */ boolean deletePreviousTextTokenBlankTails() { // return previousTextToken != null ? previousTextToken.deleteBlankTails() : false; return previousTextToken == null || previousTextToken.deleteBlankTails(); } }