/* This file is part of VoltDB. * Copyright (C) 2008-2017 VoltDB Inc. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as * published by the Free Software Foundation, either version 3 of the * License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with VoltDB. If not, see <http://www.gnu.org/licenses/>. */ package org.voltdb.parser; import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * Provides an API for performing various lexing operations on SQL/DML/DDL text. * Ideally it shouldn't be doing "parsing", i.e. language-aware token processing. * In reality the code is not split cleanly and lexing and parsing overlap a bit. * * Keep the regular expressions private and just expose methods needed for parsing. * * Avoid external dependencies since this is linked with the client. */ public class SQLLexer extends SQLPatternFactory { //===== Fundamental (not derived) parsing data private static class VerbToken { final String token; final boolean supported; VerbToken(String token, boolean supported) { this.token = token; this.supported = supported; } }; private final static VerbToken[] VERB_TOKENS = { // Supported verbs new VerbToken("alter", true), new VerbToken("create", true), new VerbToken("drop", true), new VerbToken("export", true), new VerbToken("partition", true), new VerbToken("dr", true), new VerbToken("set", true), // Unsupported verbs new VerbToken("import", false) }; private static class ObjectToken { final String token; final boolean renameable; ObjectToken(String token, boolean renameable) { this.token = token; this.renameable = renameable; } }; private final static ObjectToken[] OBJECT_TOKENS = { // Rename-able objects new ObjectToken("table", true), new ObjectToken("stream", true), new ObjectToken("column", true), new ObjectToken("index", true), // Non-rename-able objects new ObjectToken("view", false), new ObjectToken("procedure", false), new ObjectToken("role", false), new ObjectToken("function", false) }; private final static String[] MODIFIER_TOKENS = { "assumeunique", "unique" }; static final char BLOCK_DELIMITER_CHAR = '#'; static final String BLOCK_DELIMITER = "###"; //===== Special non-DDL/DML/SQL patterns // Match single-line comments private static final Pattern PAT_SINGLE_LINE_COMMENT = Pattern.compile( "^\\s*" + // start of line, 0 or more whitespace "--" + // start of comment ".*$"); // everything to end of line private static final Pattern PAT_STRIP_CSTYLE_COMMENTS = Pattern.compile( "/\\*(.|\\n)*?\\*/" ); //===== Derived parsing data (populated in static block on first demand) // Simplest possible SQL DDL token lexer. (set in static block) private static Pattern PAT_ANY_DDL_FIRST_TOKEN = null; // All handled patterns. (set in static block) private static CheckedPattern[] WHITELISTS = null; // All rejected patterns. (set in static block) private static CheckedPattern[] BLACKLISTS = null; // Extracts the table or stream name for DDL batch conflicting command checks. private static final Pattern PAT_TABLE_DDL_PREAMBLE = SPF.statementLeader( SPF.capture(SPF.tokenAlternatives("create", "drop")), // DDL commands we're looking for SPF.tokenAlternatives("table", "stream"), // target is table or stream SPF.capture(SPF.databaseObjectName()) // table name (captured) ).compile("PAT_TABLE_DDL_PREAMBLE"); // Matches the start of a SELECT statement private static final Pattern PAT_SELECT_STATEMENT_PREAMBLE = SPF.statementLeader( SPF.token("select") ).compile("PAT_SELECT_STATEMENT_PREAMBLE"); // Capture group number defns for regex below. // Don't use capture labels because it is not supported in 1.6 // and this class needs to compile in 1.6. private static final int PARENTTYPE_GROUP=1; @SuppressWarnings("unused") // We don't get this group as of now private static final int PARENTNAME_GROUP=2; private static final int CHILDTYPE_GROUP=3; @SuppressWarnings("unused") // We don't get this group as of now private static final int CHILDNAME_GROUP=4; // Pattern for plausible ALTER...RENAME statements. // Keep the matching loose in order to support clear messaging. private static final Pattern PAT_ALTER_RENAME = SPF.statementLeader( SPF.token("alter"), SPF.capture(SPF.databaseObjectTypeName()), SPF.capture(SPF.databaseObjectName()), SPF.optional( SPF.clause( SPF.token("alter"), SPF.capture(SPF.databaseObjectTypeName()), SPF.capture(SPF.databaseObjectName()) ) ), SPF.token("rename"), SPF.token("to") ).compile("PAT_ALTER_RENAME"); //========== Public Methods ========== /** * Check if a SQL string is a comment. * @param sql SQL string * @return true if it's a comment */ public static boolean isComment(String sql) { Matcher commentMatcher = PAT_SINGLE_LINE_COMMENT.matcher(sql); return commentMatcher.matches(); } /** * Test if character is block delimiter * @param c character to test * @return true if c is block delimiter */ public static boolean isBlockDelimiter(char c) { return c == BLOCK_DELIMITER_CHAR; } /** * Get the DDL token, if any, at the start of this statement. * @return returns token, or null if it wasn't DDL */ public static String extractDDLToken(String sql) { String ddlToken = null; Matcher ddlMatcher = PAT_ANY_DDL_FIRST_TOKEN.matcher(sql); if (ddlMatcher.find()) { ddlToken = ddlMatcher.group(1).toLowerCase(); } return ddlToken; } /** Remove c-style comments globally and -- comments from the end of lines */ public static String stripComments(String ddl) { ddl = removeCStyleComments(ddl); StringBuilder sb = new StringBuilder(); String[] ddlLines = ddl.split("\n"); for (String ddlLine : ddlLines) { sb.append(stripCommentFromLine(ddlLine)).append(' '); } return sb.toString(); } /** Strip -- comments from the end of a single line */ public static String stripCommentFromLine(String ddlLine) { boolean inQuote = false; char quoteChar = ' '; // will be written before use boolean lastCharWasDash = false; int length = ddlLine.length(); for (int i = 0; i < length; i++) { char c = ddlLine.charAt(i); if (inQuote) { if (quoteChar == c) { inQuote = false; } } else { if (c == '-') { if (lastCharWasDash) { return ddlLine.substring(0, i - 1); } else { lastCharWasDash = true; } } else { lastCharWasDash = false; if (c == '\"' || c == '\'') { inQuote = true; quoteChar = c; } } } } return ddlLine; } /** * Get the table name for a CREATE or DROP DDL statement. * @return returns token, or null if the DDL isn't (CREATE|DROP) TABLE */ public static String extractDDLTableName(String sql) { Matcher matcher = PAT_TABLE_DDL_PREAMBLE.matcher(sql); if (matcher.find()) { return matcher.group(2).toLowerCase(); } return null; } /** * Naive filtering for stuff we haven't implemented yet. * Hopefully this gets whittled away and eventually disappears. * * @param sql statement to check * @return rejection explanation string or null if accepted */ public static String checkPermitted(String sql) { /* * IMPORTANT: Black-lists are checked first because they know more about * what they don't like about a statement and can provide a better message. * It requires that black-lists patterns be very selective and that they * don't mind seeing statements that wouldn't pass the white-lists. */ //=== Check against blacklists, must not be rejected by any. for (CheckedPattern cp : BLACKLISTS) { CheckedPattern.Result result = cp.check(sql); if (result.matcher != null) { return String.format("%s, in statement: %s", result.explanation, sql); } } //=== Check against whitelists, must be accepted by at least one. boolean hadWLMatch = false; for (CheckedPattern cp : WHITELISTS) { if (cp.matches(sql)) { hadWLMatch = true; break; } } if (!hadWLMatch) { return String.format("AdHoc DDL contains an unsupported statement: %s", sql); } // The statement is permitted. return null; } /** * Split SQL statements on semi-colons with quoted string and comment support. * * Degenerate formats such as escape as the last character or unclosed strings are ignored and * left to the SQL parser to complain about. This is a simple string splitter that errs on the * side of not splitting. * * Regexes are avoided. * * Handle single and double quoted strings and backslash escapes. Backslashes escape a single * character. * * Handle double-dash (single line) and C-style (muli-line) comments. Nested C-style comments * are not supported. * * @param sql raw SQL text to split * @return list of individual SQL statements */ public static List<String> splitStatements(final String sql) { List<String> statements = new ArrayList<>(); // Use a character array for efficient character-at-a-time scanning. char[] buf = sql.toCharArray(); // Set to null outside of quoted segments or the quote character inside them. Character cQuote = null; // Set to null outside of comments or to the string that ends the comment. String sCommentEnd = null; // Index to start of current statement. int iStart = 0; // Index to current character. // IMPORTANT: The loop is structured in a way that requires all if/else/... blocks to bump // iCur appropriately. Failure of a corner case to bump iCur will cause an infinite loop. boolean statementIsComment = false; boolean inStatement = false; int iCur = 0; while (iCur < buf.length) { // Eat up whitespace outside of a statement if (!inStatement) { if (Character.isWhitespace(buf[iCur])) { iCur++; iStart = iCur; } else { inStatement = true; } } else if (sCommentEnd != null) { // Processing the interior of a comment. Check if at the comment or buffer end. if (iCur >= buf.length - sCommentEnd.length()) { // Exit iCur = buf.length; } else if (String.copyValueOf(buf, iCur, sCommentEnd.length()).equals(sCommentEnd)) { // Move past the comment end. iCur += sCommentEnd.length(); sCommentEnd = null; // If the comment is the whole of the statement so far, terminate it if (statementIsComment) { String statement = String.copyValueOf(buf, iStart, iCur - iStart).trim(); if (!statement.isEmpty()) { statements.add(statement); } iStart = iCur; statementIsComment = false; inStatement = false; } } else { // Keep going inside the comment. iCur++; } } else if (cQuote != null) { // Processing the interior of a quoted string. if (buf[iCur] == '\\') { // Skip the '\' escape and the trailing single escaped character. // Doesn't matter if iCur is beyond the end, it won't be used in that case. iCur += 2; } else if (buf[iCur] == cQuote) { // Look at the next character to distinguish a double escaped quote // from the end of the quoted string. iCur++; if (iCur < buf.length) { if (buf[iCur] != cQuote) { // Not a double escaped quote - end of quoted string. cQuote = null; } else { // Move past the double escaped quote. iCur++; } } } else { // Move past an ordinary character. iCur++; } } else { // Outside of a quoted string - watch for the next separator, quote or comment. if (buf[iCur] == ';') { // Add terminated statement (if not empty after trimming). String statement = String.copyValueOf(buf, iStart, iCur - iStart).trim(); if (!statement.isEmpty()) { statements.add(statement); } iStart = iCur + 1; iCur = iStart; inStatement = false; } else if (buf[iCur] == '"' || buf[iCur] == '\'') { // Start of quoted string. cQuote = buf[iCur]; iCur++; } else if (iCur <= buf.length - 2) { // Comment (double-dash or C-style)? if (buf[iCur] == '-' && buf[iCur+1] == '-') { // One line double-dash comment start. sCommentEnd = "\n"; // Works for *IX (\n) and Windows (\r\n). if (iCur == iStart) { statementIsComment = true; } iCur += 2; } else if (buf[iCur] == '/' && buf[iCur+1] == '*') { // Multi-line C-style comment start. sCommentEnd = "*/"; if (iCur == iStart) { statementIsComment = true; } iCur += 2; } else { // Not a comment start, move past this character. iCur++; } } else { // Move past a non-quote/non-separator character. iCur++; } } } // Get the last statement, if any. if (iStart < buf.length) { String statement = String.copyValueOf(buf, iStart, iCur - iStart).trim(); if (!statement.isEmpty()) { statements.add(statement); } } return statements; } /** * Check if a statement is a SELECT. * @param statement statement to check * @return true if it's a SELECT statement */ public static boolean isSelect(String statement) { return PAT_SELECT_STATEMENT_PREAMBLE.matcher(statement).matches(); } //========== Private ========== /** * Initialize derived data */ static { // Simplest possible SQL DDL token lexer String[] verbsAll = new String[VERB_TOKENS.length]; for (int i = 0; i < VERB_TOKENS.length; ++i) { verbsAll[i] = VERB_TOKENS[i].token; } PAT_ANY_DDL_FIRST_TOKEN = SPF.statementLeader( SPF.capture(SPF.tokenAlternatives(verbsAll)), SPF.anyClause() ).compile("PAT_ANY_DDL_FIRST_TOKEN"); // Whitelists for acceptable statement preambles. WHITELISTS = new CheckedPattern[] { new WhitelistSupportedPreamblePattern(), new CheckedPattern(SQLParser.SET_GLOBAL_PARAM_FOR_WHITELIST) { @Override String explainMatch(Matcher matcher) { return null; } } }; BLACKLISTS = new CheckedPattern[] { new BlacklistUnsupportedPreamblePattern(), new BlacklistRenamePattern() }; } /** Remove c-style comments from a string aggressively */ private static String removeCStyleComments(String ddl) { // Avoid Apache commons StringUtils.join() to minimize client dependencies. StringBuilder sb = new StringBuilder(); for (String part : PAT_STRIP_CSTYLE_COMMENTS.split(ddl)) { sb.append(part); } return sb.toString(); } /** * Find information about an object type token, if it's a known object type. * @param objectTypeName object type name to look up * @return object token information or null if it wasn't found */ private static ObjectToken findObjectToken(String objectTypeName) { if (objectTypeName != null) { for (ObjectToken ot : OBJECT_TOKENS) { if (ot.token.equalsIgnoreCase(objectTypeName)) { return ot; } } } return null; } /** * Abstract base for whitelists and blacklists */ private static abstract class CheckedPattern { Pattern pattern; CheckedPattern(Pattern pattern) { this.pattern = pattern; } static class Result { // non-null Matcher with groups() set if it matched Matcher matcher = null; // optional explanation, e.g. blacklist rejection message, or null if it didn't match String explanation = null; } /** * Check if statement matches. * @param statement statement to match against * @return result object with m */ Result check(String statement) { Result result = new Result(); Matcher matcher = this.pattern.matcher(statement); if (matcher.matches()) { result.matcher = matcher; result.explanation = this.explainMatch(matcher); } return result; } /** * Simplified yes/no match check * @param statement statement to match against * @return true if it matches */ boolean matches(String statement) { return this.check(statement).matcher != null; } // Override to provide an explanation, e.g. for blacklist rejection. abstract String explainMatch(Matcher matcher); } /** * Whitelist matcher for supported two token preambles. * * Provides no explanation. */ private static class WhitelistSupportedPreamblePattern extends CheckedPattern { private static Pattern initPattern() { // All handled (white-listed) patterns. String[] secondTokens = new String[OBJECT_TOKENS.length + MODIFIER_TOKENS.length]; for (int i = 0; i < OBJECT_TOKENS.length; ++i) { secondTokens[i] = OBJECT_TOKENS[i].token; } // Modifier tokens are supported in the place of object tokens following // a verb to allow the "verb modifier object" pattern like "CREATE UNIQUE INDEX". // For simplicity, "CREATE UNIQUE" et. al. are considered sufficient evidence that // the statement is a permitted white-listed DDL statement. // We seem to be more concerned about accidentally permitting // "CREATE <non-permitted-object> ..." // than "CREATE UNIQUE <non-permitted-object> ...". // Otherwise, we'd require the modifiers to be part of a nested // "modifier object" subpattern. for (int j = 0; j < MODIFIER_TOKENS.length; ++j) { secondTokens[OBJECT_TOKENS.length + j] = MODIFIER_TOKENS[j]; } int supportedVerbCount = 0; for (int i = 0; i < VERB_TOKENS.length; ++i) { if (VERB_TOKENS[i].supported) { supportedVerbCount++; } } String[] verbsSupported = new String[supportedVerbCount]; supportedVerbCount = 0; // Reuse to build supported verb array. for (int i = 0; i < VERB_TOKENS.length; ++i) { if (VERB_TOKENS[i].supported) { verbsSupported[supportedVerbCount++] = VERB_TOKENS[i].token; } } Pattern whitelistPattern = SPF.statementLeader( SPF.clause( SPF.tokenAlternatives(verbsSupported), SPF.tokenAlternatives(secondTokens) ) ).compile("PAT_WHITELISTS-PREAMBLES"); return whitelistPattern; } WhitelistSupportedPreamblePattern() { super(initPattern()); } // Whitelist match provides no explanation. @Override String explainMatch(Matcher matcher) { return null; } } /** * Blacklists known unsupported statement preambles and explains rejections. */ private static class BlacklistUnsupportedPreamblePattern extends CheckedPattern { private static Pattern initPattern() { int unsupportedVerbCount = 0; for (int i = 0; i < VERB_TOKENS.length; ++i) { if (!VERB_TOKENS[i].supported) { unsupportedVerbCount++; } } String[] verbsNotSupported = new String[unsupportedVerbCount]; unsupportedVerbCount = 0; // Reuse to build unsupported verb array. for (int i = 0; i < VERB_TOKENS.length; ++i) { if (!VERB_TOKENS[i].supported) { verbsNotSupported[unsupportedVerbCount++] = VERB_TOKENS[i].token; } } Pattern blacklistPattern = SPF.statementLeader( SPF.capture(SPF.tokenAlternatives(verbsNotSupported)) ).compile("PAT_BLACKLISTS-PREAMBLES"); return blacklistPattern; } BlacklistUnsupportedPreamblePattern() { super(initPattern()); } /** * Provide a match explanation, assuming it's a rejection. */ @Override String explainMatch(Matcher matcher) { return String.format("Statement is not supported: %s", matcher.group(1).toUpperCase()); } } /** * Blacklists ALTER/RENAME and provides focused rejection explanations. */ private static class BlacklistRenamePattern extends CheckedPattern { BlacklistRenamePattern() { super(PAT_ALTER_RENAME); } /** * (Internal) * See if there's something to say about a parent or child target object type. * @param typeName object type name to check * @param isParent true when a child object is available and this is the parent * @return explanation string or null when a child still needs checking */ private static String getExplanation(String typeName, boolean isParent) { assert typeName != null; ObjectToken token = findObjectToken(typeName); if (token == null) { return String.format("AdHoc DDL ALTER/RENAME refers to an unknown object type '%s'", typeName); } if (isParent) { // The parent is okay, still need to check the child. return null; } if (!token.renameable) { return String.format("AdHoc DDL ALTER/RENAME is not supported for object type '%s'", typeName); } return "AdHoc DDL ALTER/RENAME is not yet supported"; } /** * Provide a match explanation, assuming it's a rejection. */ @Override String explainMatch(Matcher matcher) { String parentType = matcher.group(PARENTTYPE_GROUP); String childType = matcher.group(CHILDTYPE_GROUP); // See if there's something to say about the parent object type. String explanation = getExplanation(parentType, childType != null); // If not see if there's something to say about the child type, when applicable. if (explanation == null) { explanation = getExplanation(childType, false); } return explanation; } } }