/* * Copyright 2014 NAVER Corp. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.navercorp.pinpoint.common.util; import java.util.LinkedList; import java.util.List; import java.util.Queue; /** * @author emeroad */ public class DefaultSqlParser implements SqlParser { public static final char SEPARATOR = ','; public static final char SYMBOL_REPLACE = '$'; public static final char NUMBER_REPLACE = '#'; private static final int NEXT_TOKEN_NOT_EXIST = -1; private static final int NORMALIZED_SQL_BUFFER = 32; private static final NormalizedSql NULL_OBJECT = new DefaultNormalizedSql("", ""); public DefaultSqlParser() { } @Override public NormalizedSql normalizedSql(final String sql) { if (sql == null) { return NULL_OBJECT; } final int length = sql.length(); final StringBuilder normalized = new StringBuilder(length + NORMALIZED_SQL_BUFFER); final StringBuilder parsedParameter = new StringBuilder(32); boolean change = false; int replaceIndex = 0; boolean numberTokenStartEnable = true; for (int i = 0; i < length; i++) { final char ch = sql.charAt(i); switch (ch) { // COMMENT start check case '/': // comment state final int lookAhead1Char = lookAhead1(sql, i); // multi line comment and oracle hint /*+ */ if (lookAhead1Char == '*') { normalized.append("/*"); i += 2; for (; i < length; i++) { char stateCh = sql.charAt(i); if (stateCh == '*') { if (lookAhead1(sql, i) == '/') { normalized.append("*/"); i++; break; } } normalized.append(stateCh); } break; // single line comment } else if (lookAhead1Char == '/') { normalized.append("//"); i += 2; i = readLine(sql, normalized, i); break; } else { // unary operator numberTokenStartEnable = true; normalized.append(ch); break; } // case '#' // # is a single line comment in mysql case '-': // single line comment state if (lookAhead1(sql, i) == '-') { normalized.append("--"); i += 2; i = readLine(sql, normalized, i); break; } else { // unary operator numberTokenStartEnable = true; normalized.append(ch); break; } // SYMBOL start check case '\'': // empty symbol if (lookAhead1(sql, i) == '\'') { normalized.append("''"); // no need to add parameter to output as $ is not converted i += 2; break; } else { change = true; normalized.append('\''); i++; appendOutputSeparator(parsedParameter); for (; i < length; i++) { char stateCh = sql.charAt(i); if (stateCh == '\'') { // a consecutive ' is the same as \' if (lookAhead1(sql, i) == '\'') { i++; appendOutputParam(parsedParameter, "''"); continue; } else { normalized.append(replaceIndex++); normalized.append(SYMBOL_REPLACE); normalized.append('\''); // outputParam.append(','); break; } } appendSeparatorCheckOutputParam(parsedParameter, stateCh); } break; } // number start check case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': // http://www.h2database.com/html/grammar.html look at the state machine more if (numberTokenStartEnable) { change = true; normalized.append(replaceIndex++); normalized.append(NUMBER_REPLACE); // number token start appendOutputSeparator(parsedParameter); appendOutputParam(parsedParameter, ch); i++; tokenEnd: for (; i < length; i++) { char stateCh = sql.charAt(i); switch (stateCh) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': case '.': case 'E': case 'e': appendOutputParam(parsedParameter, stateCh); break; default: // should look at the token outside the loop - not here // outputParam.append(SEPARATOR); i--; break tokenEnd; } } break; } else { normalized.append(ch); break; } // empty space case ' ': case '\t': case '\n': case '\r': numberTokenStartEnable = true; normalized.append(ch); break; // http://msdn.microsoft.com/en-us/library/ms174986.aspx case '*': case '+': case '%': case '=': case '<': case '>': case '&': case '|': case '^': case '~': case '!': numberTokenStartEnable = true; normalized.append(ch); break; case '(': case ')': case ',': case ';': numberTokenStartEnable = true; normalized.append(ch); break; case '.': case '_': case '@': // Assignment Operator case ':': // Oracle's bind variable is possible with :bindvalue numberTokenStartEnable = false; normalized.append(ch); break; default: // what if it's in a different language?? if (ch >= 'a' && ch <= 'z' || ch >= 'A' && ch <= 'Z') { numberTokenStartEnable = false; } else { numberTokenStartEnable = true; } normalized.append(ch); break; } } if (change) { String parsedParameterString; if (parsedParameter.length() > 0) { parsedParameterString = parsedParameter.toString(); } else { parsedParameterString = ""; } return new DefaultNormalizedSql(normalized.toString(), parsedParameterString); } else { // Reuse if not modified. // 1. new strings are not generated // 2. reuse hashcodes return new DefaultNormalizedSql(sql, ""); } } private int readLine(String sql, StringBuilder normalized, int index) { final int length = sql.length(); for (; index < length; index++) { char ch = sql.charAt(index); normalized.append(ch); if (ch == '\n') { break; } } return index; } private void appendOutputSeparator(StringBuilder output) { if (output.length() == 0) { // first parameter return; } output.append(SEPARATOR); } private void appendOutputParam(StringBuilder output, String str) { output.append(str); } private void appendSeparatorCheckOutputParam(StringBuilder output, char ch) { if (ch == ',') { output.append(",,"); } else { output.append(ch); } } private void appendOutputParam(StringBuilder output, char ch) { output.append(ch); } /** * look up the next character in a string * * @param sql * @param index * @return */ private int lookAhead1(String sql, int index) { index++; if (index < sql.length()) { return sql.charAt(index); } else { return NEXT_TOKEN_NOT_EXIST; } } @Override public String combineOutputParams(String sql, List<String> outputParams) { final int length = sql.length(); final StringBuilder normalized = new StringBuilder(length + 16); for (int i = 0; i < length; i++) { final char ch = sql.charAt(i); switch (ch) { // COMMENT start check case '/': // comment state int lookAhead1Char = lookAhead1(sql, i); // multi line comment and oracle hint /*+ */ if (lookAhead1Char == '*') { normalized.append("/*"); i += 2; for (; i < length; i++) { char stateCh = sql.charAt(i); if (stateCh == '*') { if (lookAhead1(sql, i) == '/') { normalized.append("*/"); i++; break; } } normalized.append(stateCh); } break; // single line comment } else if (lookAhead1Char == '/') { normalized.append("//"); i += 2; i = readLine(sql, normalized, i); break; } else { // unary operator // numberTokenStartEnable = true; normalized.append(ch); break; } // case '#' // # is a single line comment in mysql case '-': // single line comment state if (lookAhead1(sql, i) == '-') { normalized.append("--"); i += 2; i = readLine(sql, normalized, i); break; } else { // unary operator // numberTokenStartEnable = true; normalized.append(ch); break; } // number start check case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': // http://www.h2database.com/html/grammar.html look at the state machine more if (lookAhead1(sql, i) == NEXT_TOKEN_NOT_EXIST) { normalized.append(ch); break; } StringBuilder outputIndex = new StringBuilder(); outputIndex.append(ch); // number token start i++; tokenEnd: for (; i < length; i++) { final char stateCh = sql.charAt(i); switch (stateCh) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': if (lookAhead1(sql, i) == NEXT_TOKEN_NOT_EXIST) { outputIndex.append(stateCh); normalized.append(outputIndex.toString()); break tokenEnd; } outputIndex.append(stateCh); break; case NUMBER_REPLACE: int numberIndex = 0; try { numberIndex = Integer.parseInt(outputIndex.toString()); } catch (NumberFormatException e) { // just append for invalid parameters normalized.append(outputIndex.toString()); normalized.append(NUMBER_REPLACE); break tokenEnd; } try { String replaceNumber = outputParams.get(numberIndex); normalized.append(replaceNumber); } catch (IndexOutOfBoundsException e) { // just append for invalid parameters normalized.append(outputIndex.toString()); normalized.append(NUMBER_REPLACE); break tokenEnd; } break tokenEnd; case SYMBOL_REPLACE: int symbolIndex = 0; try { symbolIndex = Integer.parseInt(outputIndex.toString()); } catch (NumberFormatException e) { // just append for invalid parameters normalized.append(outputIndex.toString()); normalized.append(SYMBOL_REPLACE); } try { String replaceSymbol = outputParams.get(symbolIndex); normalized.append(replaceSymbol); } catch (IndexOutOfBoundsException e) { normalized.append(outputIndex.toString()); normalized.append(SYMBOL_REPLACE); } break tokenEnd; default: // should look at the token outside the loop - not here // outputParam.append(SEPARATOR); normalized.append(outputIndex.toString()); i--; break tokenEnd; } } break; default: normalized.append(ch); break; } } return normalized.toString(); } public String combineBindValues(String sql, List<String> bindValues) { if (StringUtils.isEmpty(sql)) { return sql; } if (CollectionUtils.isEmpty(bindValues)) { return sql; } final Queue<String> bindValueQueue = new LinkedList<String>(); for(String value : bindValues) { // trim bindValueQueue.add(value.trim()); } final int length = sql.length(); final StringBuilder result = new StringBuilder(length + 16); boolean inQuotes = false; char quoteChar = 0; for (int i = 0; i < length; i++) { final char ch = sql.charAt(i); if (inQuotes) { if (((ch == '\'') || (ch == '"')) && ch == quoteChar) { if (lookAhead1(sql, i) == quoteChar) { // inline quote. result.append(ch); i++; continue; } inQuotes = !inQuotes; quoteChar = 0; } result.append(ch); } else { // COMMENT start check if (ch == '/') { // comment state int lookAhead1Char = lookAhead1(sql, i); // multi line comment and oracle hint /*+ */ if (lookAhead1Char == '*') { result.append("/*"); i += 2; for (; i < length; i++) { char stateCh = sql.charAt(i); if (stateCh == '*') { if (lookAhead1(sql, i) == '/') { result.append("*/"); i++; break; } } result.append(stateCh); } // single line comment } else if (lookAhead1Char == '/') { result.append("//"); i += 2; i = readLine(sql, result, i); } else { // unary operator result.append(ch); } } else if (ch == '-') { // single line comment state if (lookAhead1(sql, i) == '-') { result.append("--"); i += 2; i = readLine(sql, result, i); } else { // unary operator result.append(ch); } } else if (ch == '\'' || ch == '"') { inQuotes = true; quoteChar = ch; result.append(ch); } else if(ch == '?') { if(!bindValueQueue.isEmpty()) { result.append('\'').append(bindValueQueue.poll()).append('\''); } } else { result.append(ch); } } } return result.toString(); } }