/** * Alipay.com Inc. * Copyright (c) 2004-2012 All Rights Reserved. */ package com.alipay.zdal.parser.sql.parser; import static com.alipay.zdal.parser.sql.parser.CharTypes.isFirstIdentifierChar; import static com.alipay.zdal.parser.sql.parser.CharTypes.isIdentifierChar; import static com.alipay.zdal.parser.sql.parser.CharTypes.isWhitespace; import static com.alipay.zdal.parser.sql.parser.LayoutCharacters.EOI; import static com.alipay.zdal.parser.sql.parser.Token.COLONEQ; import static com.alipay.zdal.parser.sql.parser.Token.COMMA; import static com.alipay.zdal.parser.sql.parser.Token.EOF; import static com.alipay.zdal.parser.sql.parser.Token.ERROR; import static com.alipay.zdal.parser.sql.parser.Token.LBRACE; import static com.alipay.zdal.parser.sql.parser.Token.LBRACKET; import static com.alipay.zdal.parser.sql.parser.Token.LITERAL_ALIAS; import static com.alipay.zdal.parser.sql.parser.Token.LITERAL_CHARS; import static com.alipay.zdal.parser.sql.parser.Token.LPAREN; import static com.alipay.zdal.parser.sql.parser.Token.RBRACE; import static com.alipay.zdal.parser.sql.parser.Token.RBRACKET; import static com.alipay.zdal.parser.sql.parser.Token.RPAREN; import java.math.BigDecimal; import java.math.BigInteger; /** * * @author ���� * @version $Id: Lexer.java, v 0.1 2012-11-17 ����3:53:44 Exp $ */ public class Lexer { protected final char[] buf; protected int bp; protected int buflen; // QS_TODO what is the purpose? protected int eofPos; /** The current character. */ protected char ch; /** The token's position, 0-based offset from beginning of text. */ protected int tokenPos; /** A character buffer for literals. */ protected final static ThreadLocal<char[]> SBUFREF = new ThreadLocal<char[]>(); protected char[] sbuf; /** string point as size */ protected int sp; /** string point as offset */ protected int np; protected SymbolTable symbolTable = new SymbolTable(); /** * The token, set by nextToken(). */ protected Token token; protected Keywords keywods = Keywords.DEFAULT_KEYWORDS; protected String stringVal; protected boolean skipComment = true; private SavePoint savePoint = null; /* * anti sql injection */ private boolean allowComment = true; private int varIndex = -1; public Lexer(String input) { this(input, true); } public boolean isAllowComment() { return allowComment; } public void setAllowComment(boolean allowComment) { this.allowComment = allowComment; } public int nextVarIndex() { return ++varIndex; } private static class SavePoint { int bp; int sp; int np; char ch; Token token; } public Keywords getKeywods() { return keywods; } public void mark() { SavePoint savePoint = new SavePoint(); savePoint.bp = bp; savePoint.sp = sp; savePoint.np = np; savePoint.ch = ch; savePoint.token = token; this.savePoint = savePoint; } public void reset() { this.bp = savePoint.bp; this.sp = savePoint.sp; this.np = savePoint.np; this.ch = savePoint.ch; this.token = savePoint.token; } public Lexer(String input, boolean skipComment) { this(input.toCharArray(), input.length(), skipComment); } public Lexer(char[] input, int inputLength, boolean skipComment) { this.skipComment = skipComment; this.sbuf = SBUFREF.get(); // new char[1024]; if (this.sbuf == null) { this.sbuf = new char[1024]; SBUFREF.set(sbuf); } this.eofPos = inputLength; // QS_TODO ? if (inputLength == input.length) { if (input.length > 0 && isWhitespace(input[input.length - 1])) { inputLength--; } else { char[] newInput = new char[inputLength + 1]; System.arraycopy(input, 0, newInput, 0, input.length); input = newInput; } } this.buf = input; this.buflen = inputLength; this.buf[this.buflen] = EOI; this.bp = -1; scanChar(); } protected final void scanChar() { ch = buf[++bp]; } protected void unscan() { ch = buf[--bp]; } /** * Report an error at the given position using the provided arguments. */ protected void lexError(int pos, String key, Object... args) { token = ERROR; } /** * Report an error at the current token position using the provided arguments. */ public void lexError(String key, Object... args) { lexError(tokenPos, key, args); } /** * Return the current token, set by nextToken(). */ public final Token token() { return token; } public String info() { return this.token + " " + this.stringVal(); } public final void nextToken() { sp = 0; for (;;) { tokenPos = bp; if (isWhitespace(ch)) { scanChar(); continue; } if (ch == '$' && buf[bp + 1] == '{') { scanVariable(); return; } if (isFirstIdentifierChar(ch)) { if (ch == 'N') { if (buf[bp + 1] == '\'') { ++bp; ch = '\''; scanString(); token = Token.LITERAL_NCHARS; return; } } scanIdentifier(); return; } switch (ch) { case '0': if (buf[bp + 1] == 'x') { scanChar(); scanChar(); scanHexaDecimal(); } else { scanNumber(); } return; case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': scanNumber(); return; case ',': scanChar(); token = COMMA; return; case '(': scanChar(); token = LPAREN; return; case ')': scanChar(); token = RPAREN; return; case '[': scanChar(); token = LBRACKET; return; case ']': scanChar(); token = RBRACKET; return; case '{': scanChar(); token = LBRACE; return; case '}': scanChar(); token = RBRACE; return; case ':': scanChar(); if (ch == '=') { scanChar(); token = COLONEQ; } else { if (isDigit(ch)) { unscan(); scanVariable(); } else { unscan(); scanVariable(); } } return; case '#': scanVariable(); return; case '.': scanChar(); if (isDigit(ch)) { unscan(); scanNumber(); return; } else if (ch == '.') { scanChar(); if (ch == '.') { scanChar(); token = Token.DOTDOTDOT; } else { token = Token.DOTDOT; } } else { token = Token.DOT; } return; case '\'': scanString(); return; case '\"': scanAlias(); return; case '*': scanChar(); token = Token.STAR; return; case '?': scanChar(); token = Token.QUES; return; case ';': scanChar(); token = Token.SEMI; return; case '`': throw new SQLParserException("TODO"); // TODO case '@': scanVariable(); return; case '-': int subNextChar = buf[bp + 1]; if (subNextChar == '-') { scanComment(); if ((token() == Token.LINE_COMMENT || token() == Token.MULTI_LINE_COMMENT) && skipComment) { sp = 0; continue; } } else { scanOperator(); } return; case '/': int nextChar = buf[bp + 1]; if (nextChar == '/' || nextChar == '*') { scanComment(); if ((token() == Token.LINE_COMMENT || token() == Token.MULTI_LINE_COMMENT) && skipComment) { sp = 0; continue; } } else { token = Token.SLASH; scanChar(); } return; default: if (Character.isLetter(ch)) { scanIdentifier(); return; } if (isOperator(ch)) { scanOperator(); return; } // QS_TODO ? if (bp == buflen || ch == EOI && bp + 1 == buflen) { // JLS token = EOF; tokenPos = bp = eofPos; } else { lexError("illegal.char", String.valueOf((int) ch)); scanChar(); } return; } } } private final void scanOperator() { switch (ch) { case '+': scanChar(); token = Token.PLUS; break; case '-': scanChar(); token = Token.SUB; break; case '*': scanChar(); token = Token.STAR; break; case '/': scanChar(); token = Token.SLASH; break; case '&': scanChar(); if (ch == '&') { scanChar(); token = Token.AMPAMP; } else { token = Token.AMP; } break; case '|': scanChar(); if (ch == '|') { scanChar(); token = Token.BARBAR; } else { token = Token.BAR; } break; case '^': scanChar(); token = Token.CARET; break; case '%': scanChar(); token = Token.PERCENT; break; case '=': scanChar(); if (ch == '=') { scanChar(); token = Token.EQEQ; } else { token = Token.EQ; } break; case '>': scanChar(); if (ch == '=') { scanChar(); token = Token.GTEQ; } else if (ch == '>') { scanChar(); token = Token.GTGT; } else { token = Token.GT; } break; case '<': scanChar(); if (ch == '=') { scanChar(); if (ch == '>') { token = Token.LTEQGT; scanChar(); } else { token = Token.LTEQ; } } else if (ch == '>') { scanChar(); token = Token.LTGT; } else if (ch == '<') { scanChar(); token = Token.LTLT; } else { token = Token.LT; } break; case '!': scanChar(); if (ch == '=') { scanChar(); token = Token.BANGEQ; } else if (ch == '>') { scanChar(); token = Token.BANGGT; } else if (ch == '<') { scanChar(); token = Token.BANGLT; } else { token = Token.BANG; } break; case '?': scanChar(); token = Token.QUES; break; case '~': scanChar(); token = Token.TILDE; break; default: throw new SQLParserException("TODO"); } } protected void scanString() { np = bp; boolean hasSpecial = false; for (;;) { if (bp >= buflen) { lexError(tokenPos, "unclosed.str.lit"); return; } ch = buf[++bp]; if (ch == '\'') { scanChar(); if (ch != '\'') { token = LITERAL_CHARS; break; } else { if (!hasSpecial) { System.arraycopy(buf, np + 1, sbuf, 0, sp); hasSpecial = true; } putChar('\''); continue; } } if (!hasSpecial) { sp++; continue; } if (sp == sbuf.length) { putChar(ch); } else { sbuf[sp++] = ch; } } if (!hasSpecial) { stringVal = new String(buf, np + 1, sp); } else { stringVal = new String(sbuf, 0, sp); } } private final void scanAlias() { np = bp; for (;;) { if (bp >= buflen) { lexError(tokenPos, "unclosed.str.lit"); return; } ch = buf[++bp]; if (ch == '\"') { scanChar(); token = LITERAL_ALIAS; break; } if (sp == sbuf.length) { putChar(ch); } else { sbuf[sp++] = ch; } } stringVal = new String(buf, np + 1, sp); } public void scanVariable() { final char first = ch; if (ch != '@' && ch != ':' && ch != '#' && ch != '$') { throw new SQLParserException("illegal variable"); } int hash = first; np = bp; sp = 1; char ch; boolean mybatisFlag = false; if (buf[bp + 1] == '@') { ch = buf[++bp]; hash = 31 * hash + ch; sp++; } else if (buf[bp + 1] == '{') { hash = 31 * hash + '"'; bp++; sp++; mybatisFlag = true; } for (;;) { ch = buf[++bp]; if (!isIdentifierChar(ch)) { break; } hash = 31 * hash + ch; sp++; continue; } if (mybatisFlag) { if (ch != '}') { throw new SQLParserException("syntax error"); } hash = 31 * hash + '"'; ++bp; sp++; } this.ch = buf[bp]; stringVal = symbolTable.addSymbol(buf, np, sp, hash); token = Token.VARIANT; } public void scanComment() { if (!allowComment) { throw new NotAllowCommentException(); } if (ch != '/') { throw new IllegalStateException(); } np = bp; sp = 0; scanChar(); if (ch == '*') { scanChar(); sp++; for (;;) { if (ch == '*' && buf[bp + 1] == '/') { sp += 2; scanChar(); scanChar(); break; } scanChar(); sp++; } stringVal = new String(buf, np, sp); token = Token.MULTI_LINE_COMMENT; return; } if (ch == '/') { scanChar(); sp++; for (;;) { if (ch == '\r') { if (buf[bp + 1] == '\n') { sp += 2; scanChar(); break; } sp++; break; } if (ch == '\n') { scanChar(); sp++; break; } scanChar(); sp++; } stringVal = new String(buf, np + 1, sp); token = Token.LINE_COMMENT; return; } } public void scanIdentifier() { final char first = ch; final boolean firstFlag = isFirstIdentifierChar(first); if (!firstFlag) { throw new SQLParserException("illegal identifier"); } int hash = first; np = bp; sp = 1; char ch; for (;;) { ch = buf[++bp]; if (!isIdentifierChar(ch)) { break; } hash = 31 * hash + ch; sp++; continue; } this.ch = buf[bp]; stringVal = symbolTable.addSymbol(buf, np, sp, hash); Token tok = keywods.getKeyword(stringVal); if (tok != null) { token = tok; } else { token = Token.IDENTIFIER; } } public void scanNumber() { np = bp; if (ch == '-') { sp++; ch = buf[++bp]; } for (;;) { if (ch >= '0' && ch <= '9') { sp++; } else { break; } ch = buf[++bp]; } boolean isDouble = false; if (ch == '.') { if (buf[bp + 1] == '.') { token = Token.LITERAL_INT; return; } sp++; ch = buf[++bp]; isDouble = true; for (;;) { if (ch >= '0' && ch <= '9') { sp++; } else { break; } ch = buf[++bp]; } } if (ch == 'e' || ch == 'E') { sp++; ch = buf[++bp]; if (ch == '+' || ch == '-') { sp++; ch = buf[++bp]; } for (;;) { if (ch >= '0' && ch <= '9') { sp++; } else { break; } ch = buf[++bp]; } isDouble = true; } if (isDouble) { token = Token.LITERAL_FLOAT; } else { token = Token.LITERAL_INT; } } public void scanHexaDecimal() { np = bp; if (ch == '-') { sp++; ch = buf[++bp]; } for (;;) { if (CharTypes.isHex(ch)) { sp++; } else { break; } ch = buf[++bp]; } token = Token.LITERAL_HEX; } public String hexString() throws NumberFormatException { return new String(buf, np, sp); } public final boolean isDigit(char ch) { return ch >= '0' && ch <= '9'; } /** * Append a character to sbuf. */ protected final void putChar(char ch) { if (sp == sbuf.length) { char[] newsbuf = new char[sbuf.length * 2]; System.arraycopy(sbuf, 0, newsbuf, 0, sbuf.length); sbuf = newsbuf; } sbuf[sp++] = ch; } /** * Return the current token's position: a 0-based offset from beginning of the raw input stream (before unicode * translation) */ public final int pos() { return tokenPos; } /** * The value of a literal token, recorded as a string. For integers, leading 0x and 'l' suffixes are suppressed. */ public final String stringVal() { return stringVal; } private boolean isOperator(char ch) { switch (ch) { case '!': case '%': case '&': case '*': case '+': case '-': case '<': case '=': case '>': case '^': case '|': case '~': case ';': return true; default: return false; } } private static final long MULTMIN_RADIX_TEN = Long.MIN_VALUE / 10; private static final long N_MULTMAX_RADIX_TEN = -Long.MAX_VALUE / 10; private final static int[] digits = new int['9' + 1]; static { for (int i = '0'; i <= '9'; ++i) { digits[i] = i - '0'; } } // QS_TODO negative number is invisible for lexer public Number integerValue() throws NumberFormatException { long result = 0; boolean negative = false; int i = np, max = np + sp; long limit; long multmin; int digit; if (buf[np] == '-') { negative = true; limit = Long.MIN_VALUE; i++; } else { limit = -Long.MAX_VALUE; } multmin = negative ? MULTMIN_RADIX_TEN : N_MULTMAX_RADIX_TEN; if (i < max) { digit = digits[buf[i++]]; result = -digit; } while (i < max) { // Accumulating negatively avoids surprises near MAX_VALUE digit = digits[buf[i++]]; if (result < multmin) { return new BigInteger(numberString()); } result *= 10; if (result < limit + digit) { return new BigInteger(numberString()); } result -= digit; } if (negative) { if (i > np + 1) { if (result >= Integer.MIN_VALUE) { return (int) result; } return result; } else { /* Only got "-" */ throw new NumberFormatException(numberString()); } } else { result = -result; if (result <= Integer.MAX_VALUE) { return (int) result; } return result; } } public int bp() { return this.bp; } public char current() { return this.ch; } public void reset(int mark, char mark_ch, Token token) { this.bp = mark; this.ch = mark_ch; this.token = token; } public final String numberString() { return new String(buf, np, sp); } public BigDecimal decimalValue() { return new BigDecimal(buf, np, sp); } }