/**********************************************************************
* Copyright (c) 2014 HubSpot Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
**********************************************************************/
package com.hubspot.jinjava.tree.parse;
import static com.hubspot.jinjava.tree.parse.TokenScannerSymbols.TOKEN_EXPR_END;
import static com.hubspot.jinjava.tree.parse.TokenScannerSymbols.TOKEN_EXPR_START;
import static com.hubspot.jinjava.tree.parse.TokenScannerSymbols.TOKEN_FIXED;
import static com.hubspot.jinjava.tree.parse.TokenScannerSymbols.TOKEN_NEWLINE;
import static com.hubspot.jinjava.tree.parse.TokenScannerSymbols.TOKEN_NOTE;
import static com.hubspot.jinjava.tree.parse.TokenScannerSymbols.TOKEN_POSTFIX;
import static com.hubspot.jinjava.tree.parse.TokenScannerSymbols.TOKEN_PREFIX;
import static com.hubspot.jinjava.tree.parse.TokenScannerSymbols.TOKEN_TAG;
import static com.hubspot.jinjava.util.CharArrayUtils.charArrayRegionMatches;
import com.google.common.collect.AbstractIterator;
import com.hubspot.jinjava.JinjavaConfig;
public class TokenScanner extends AbstractIterator<Token> {
private final JinjavaConfig config;
private final char[] is;
private final int length;
private int currPost = 0;
private int tokenStart = 0;
private int tokenLength = 0;
private int tokenKind = -1;
private int lastStart = 0;
private int inComment = 0;
private int inRaw = 0;
private int inBlock = 0;
private char inQuote = 0;
private int currLine = 1;
public TokenScanner(String input, JinjavaConfig config) {
this.config = config;
is = input.toCharArray();
length = is.length;
currPost = 0;
tokenStart = 0;
tokenKind = -1;
lastStart = 0;
inComment = 0;
inRaw = 0;
inBlock = 0;
inQuote = 0;
currLine = 1;
}
private Token getNextToken() {
char c = 0;
while (currPost < length) {
c = is[currPost++];
if (currPost == length) {
return getEndToken();
}
if (inBlock > 0) {
if (inQuote != 0) {
if (inQuote == c) {
inQuote = 0;
continue;
} else if (c == '\\') {
++currPost;
continue;
} else {
continue;
}
} else if (inQuote == 0 && (c == '\'' || c == '"')) {
inQuote = c;
continue;
}
}
switch (c) {
case TOKEN_PREFIX:
if (currPost < length) {
c = is[currPost];
switch (c) {
case TOKEN_NOTE:
if (inComment == 1 || inRaw == 1) {
continue;
}
inComment = 1;
tokenLength = currPost - tokenStart - 1;
if (tokenLength > 0) {
// start a new token
lastStart = tokenStart;
tokenStart = --currPost;
tokenKind = c;
inComment = 0;
return newToken(TOKEN_FIXED);
} else {
tokenKind = c;
}
break;
case TOKEN_TAG:
case TOKEN_EXPR_START:
if (inComment > 0) {
continue;
}
if (inRaw > 0 && (c == TOKEN_EXPR_START || !isEndRaw())) {
continue;
}
// match token two ends
if (!matchToken(c) && tokenKind > 0) {
continue;
}
if (inBlock++ > 0) {
continue;
}
tokenLength = currPost - tokenStart - 1;
if (tokenLength > 0) {
// start a new token
lastStart = tokenStart;
tokenStart = --currPost;
tokenKind = c;
return newToken(TOKEN_FIXED);
} else {
tokenKind = c;
}
break;
default:
break;
}
}
// reach the stream end
else {
return getEndToken();
}
break;
// maybe current token is closing
case TOKEN_TAG:
case TOKEN_EXPR_END:
if (inComment > 0) {
continue;
}
if (!matchToken(c)) {
continue;
}
if (currPost < length) {
c = is[currPost];
if (c == TOKEN_POSTFIX) {
inBlock = 0;
tokenLength = currPost - tokenStart + 1;
if (tokenLength > 0) {
// start a new token
lastStart = tokenStart;
tokenStart = ++currPost;
int kind = tokenKind;
tokenKind = TOKEN_FIXED;
return newToken(kind);
}
}
} else {
return getEndToken();
}
break;
case TOKEN_NOTE:
if (!matchToken(c)) {
continue;
}
if (currPost < length) {
c = is[currPost];
if (c == TOKEN_POSTFIX) {
inComment = 0;
tokenLength = currPost - tokenStart + 1;
if (tokenLength > 0) {
// start a new token
lastStart = tokenStart;
tokenStart = ++currPost;
tokenKind = TOKEN_FIXED;
return newToken(TOKEN_NOTE);
}
}
} else {
return getEndToken();
}
break;
case TOKEN_NEWLINE:
currLine++;
if (inComment > 0 || inBlock > 0) {
continue;
}
tokenKind = TOKEN_FIXED;
break;
default:
if (tokenKind == -1) {
tokenKind = TOKEN_FIXED;
}
}
}
return null;
}
private boolean isEndRaw() {
int pos = currPost + 1;
while (pos < length) {
if (!Character.isWhitespace(is[pos++])) {
break;
}
}
if (pos + 5 >= length) {
return false;
}
return charArrayRegionMatches(is, pos - 1, "endraw");
}
private Token getEndToken() {
tokenLength = currPost - tokenStart;
int type = TOKEN_FIXED;
if (inComment > 0) {
type = TOKEN_NOTE;
}
return Token.newToken(type, String.valueOf(is, tokenStart, tokenLength), currLine);
}
private Token newToken(int kind) {
Token t = Token.newToken(kind, String.valueOf(is, lastStart, tokenLength), currLine);
if (t instanceof TagToken) {
if (config.isTrimBlocks() && currPost < length && is[currPost] == '\n') {
++currPost;
++tokenStart;
}
TagToken tt = (TagToken) t;
if ("raw".equals(tt.getTagName())) {
inRaw = 1;
return tt;
} else if ("endraw".equals(tt.getTagName())) {
inRaw = 0;
return tt;
}
}
if (inRaw > 0 && t.getType() != TOKEN_FIXED) {
return Token.newToken(TOKEN_FIXED, t.image, currLine);
}
return t;
}
private boolean matchToken(char kind) {
if (kind == TOKEN_EXPR_START) {
return tokenKind == TOKEN_EXPR_END;
} else if (kind == TOKEN_EXPR_END) {
return tokenKind == TOKEN_EXPR_START;
} else {
return kind == tokenKind;
}
}
@Override
protected Token computeNext() {
Token t = getNextToken();
if (t == null) {
return endOfData();
}
return t;
}
}