/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.tajo.cli.tsql; import org.apache.tajo.cli.tsql.ParsedResult.StatementType; import java.util.ArrayList; import java.util.List; /** * This is a parser used in tsql to parse multiple SQL lines into SQL statements. * It helps tsql recognizes the termination of each SQL statement and quotation mark (') while * parses multiple separate lines. */ public class SimpleParser { public static enum ParsingState { TOK_START, // Start State META, // Meta Command STATEMENT, // Statement WITHIN_QUOTE, // Within Quote INVALID, // Invalid Statement STATEMENT_EOS, // End State (End of Statement) META_EOS // End State (End of Statement) } ParsingState state = START_STATE; int lineNum; /** * It will be used to store a query statement into Jline history. * the query statement for history does not include unnecessary white spaces and new line. */ private StringBuilder historyAppender = new StringBuilder(); /** * It will be used to submit a query statement to the TajoMaster. It just contains a raw query statement string. */ private StringBuilder rawAppender = new StringBuilder(); public static final ParsingState START_STATE = ParsingState.TOK_START; /** * <h2>State Machine</h2> * All whitespace are ignored in all cases except for * * <pre> * (start) TOK_START --> META ---------------------> META_EOS * | * | * | * |-----------> STATEMENT ----------> STMT_EOS * \ ^ * \ / * \-> WITHIN_QUOTE * \ ^ * \---/ * </pre> */ public static List<ParsedResult> parseScript(String str) throws InvalidStatementException { SimpleParser parser = new SimpleParser(); List<ParsedResult> parsedResults = new ArrayList<>(); parsedResults.addAll(parser.parseLines(str)); parsedResults.addAll(parser.EOF()); return parsedResults; } public List<ParsedResult> parseLines(String str) throws InvalidStatementException { List<ParsedResult> statements = new ArrayList<>(); int lineStartIdx; int idx = 0; char [] chars = str.toCharArray(); // if parsing continues, it means that the previous line is broken by '\n'. // So, we should add new line to rawAppender. int appenderLen = rawAppender.length(); if (appenderLen != 0 && rawAppender.charAt(appenderLen - 1) != '\n' && isStatementContinue()) { rawAppender.append("\n"); } while(idx < str.length()) { // initialization for new statement if (state == ParsingState.TOK_START) { lineNum = 0; // ignore all whitespace before start if (Character.isWhitespace(chars[idx])) { idx++; continue; } } //////////////////////////// // TOK_START --> META //////////////////////////// lineStartIdx = idx; if (state == ParsingState.TOK_START && chars[idx] == '\\') { state = ParsingState.META; //////////////////////////// // META --> TOK_EOS //////////////////////////// while (state != ParsingState.META_EOS && idx < chars.length) { char character = chars[idx++]; if (isEndOfMeta(character)) { state = ParsingState.META_EOS; } else if (Character.isWhitespace(character)) { // skip } } if (state == ParsingState.META_EOS) { historyAppender.append(str.subSequence(lineStartIdx, idx - 1).toString()); appendToRawStatement(str.subSequence(lineStartIdx, idx - 1).toString(), true); } else { historyAppender.append(str.subSequence(lineStartIdx, idx).toString()); appendToRawStatement(str.subSequence(lineStartIdx, idx).toString(), true); } } else if (isInlineCommentStart(chars, idx)) { idx = consumeInlineComment(chars, idx); appendToRawStatement(str.subSequence(lineStartIdx, idx).toString(), true); ///////////////////////////////// // TOK_START -> STATEMENT // or TOK_STATEMENT -> STATEMENT //////////////////////////////// } else if (isStatementContinue() || isStatementStart(chars[idx])) { if (!isStatementContinue()) { // TOK_START -> STATEMENT state = ParsingState.STATEMENT; rawAppender.append("\n"); } while (!isTerminateState(state) && idx < chars.length) { char character = chars[idx++]; /////////////////////////////////////////////////////// // in-statement loop BEGIN /////////////////////////////////////////////////////// if (isEndOfStatement(character)) { state = ParsingState.STATEMENT_EOS; } else if (state == ParsingState.STATEMENT && character == '\n') { appendToBothStatements(chars, lineStartIdx, idx, 1); // omit new line chacter '\n' from history statement lineStartIdx = idx; } else if (state == ParsingState.STATEMENT && character == '\'') { // TOK_STATEMENT -> WITHIN_QUOTE state = ParsingState.WITHIN_QUOTE; if (idx < chars.length) { character = chars[idx++]; } else { continue; } // idx points the characters followed by the current character. So, we should use 'idx - 1' // in order to point the current character. } else if (state == ParsingState.STATEMENT && idx < chars.length && isInlineCommentStart(chars, idx - 1)) { idx++; appendToBothStatements(chars, lineStartIdx, idx, 2); // omit two dash characters '--' from history statement int commentStartIdx = idx; idx = consumeInlineComment(chars, idx); appendToRawStatement(str.subSequence(commentStartIdx, idx).toString(), false); lineStartIdx = idx; } /////////////////////////////////////////////////////// // in-statement loop END /////////////////////////////////////////////////////// if (state == ParsingState.WITHIN_QUOTE) { while(idx < chars.length) { /////////////////////////////// // WITHIN_QUOTE --> STATEMENT /////////////////////////////// if (character == '\'') { state = ParsingState.STATEMENT; break; } character = chars[idx++]; } if (state == ParsingState.WITHIN_QUOTE && character == '\'') { state = ParsingState.STATEMENT; } } } // After all characters are consumed if (state == ParsingState.STATEMENT_EOS) { // If one query statement is terminated appendToBothStatements(chars, lineStartIdx, idx - 1); // skip semicolon (;) } else { appendToBothStatements(chars, lineStartIdx, idx); // if it is not within quote and there is no space between lines, adds a space. if (state == ParsingState.STATEMENT && (historyAppender.charAt(historyAppender.length() - 1) != ' ')) { historyAppender.append(" "); rawAppender.append("\n"); } } } else { // skip unknown character idx++; } lineNum++; statements.addAll(doProcessEndOfStatement(state == ParsingState.META)); } return statements; } /** * Append the range of characters into a given StringBuilder instance. * * @param chars Characters * @param fromIdx start character index * @param toIdx end character index */ private void appendToStatement(StringBuilder builder, char[] chars, int fromIdx, int toIdx) { builder.append(chars, fromIdx, toIdx - fromIdx); } /** * Append the range of characters into both history and raw appenders. It omits the number of characters specified by * <code>omitCharNums</code>. * * * @param chars Characters * @param fromIdx start character index * @param toIdx end character index * @param omitCharNums how many characters will be omitted from history statement */ private void appendToBothStatements(char[] chars, int fromIdx, int toIdx, int omitCharNums) { appendToStatement(historyAppender, chars, fromIdx, toIdx - omitCharNums); if (historyAppender.charAt(historyAppender.length() - 1) != ' ') { historyAppender.append(" "); } appendToStatement(rawAppender, chars, fromIdx, toIdx); } /** * Append the range of characters into both history and raw appenders. * * * @param chars Characters * @param fromIdx start character index * @param toIdx end character index */ private void appendToBothStatements(char[] chars, int fromIdx, int toIdx) { historyAppender.append(chars, fromIdx, toIdx - fromIdx); rawAppender.append(chars, fromIdx, toIdx - fromIdx); } private int consumeInlineComment(char [] chars, int currentIdx) { currentIdx++; while (currentIdx < chars.length && !isNewLine(chars[currentIdx])) { currentIdx++; } return currentIdx; } private void appendToRawStatement(String str, boolean addLF) { if (!str.isEmpty() && !"\n".equals(str) && rawAppender.length() > 0 && addLF && rawAppender.charAt(rawAppender.length() - 1) != '\n') { rawAppender.append("\n"); rawAppender.append(str); } else { rawAppender.append(str); } } private static boolean isEndOfMeta(char character) { return character == ';' || character == '\n'; } private static boolean isEndOfStatement(char character) { return character == ';'; } /** * It checks if inline comment '--' begins. */ private boolean isInlineCommentStart(char[] chars, int idx) { if (idx >= chars.length - 1) { return false; } return (state == ParsingState.STATEMENT || state == ParsingState.TOK_START) && (chars[idx] == '-' && chars[idx + 1] == '-'); } private boolean isNewLine(char character) { return character == '\n'; } private boolean isStatementStart(char character) { return state == ParsingState.TOK_START && (Character.isLetterOrDigit(character) || character == '('); } private boolean isStatementContinue() { return state == ParsingState.WITHIN_QUOTE || state == ParsingState.STATEMENT; } /** * process all parsed statements so far and return a list of parsed results. * * @param endOfFile TRUE if the end of file. * @return the list of parsed results, each of result contains one query statement or meta command. * @throws InvalidStatementException */ private List<ParsedResult> doProcessEndOfStatement(boolean endOfFile) throws InvalidStatementException { List<ParsedResult> parsedResults = new ArrayList<>(); String errorMessage = ""; if (endOfFile) { if (state == ParsingState.META) { state = ParsingState.META_EOS; } else if (state == ParsingState.STATEMENT) { state = ParsingState.STATEMENT_EOS; } else if (state == ParsingState.WITHIN_QUOTE) { state = ParsingState.INVALID; errorMessage = "unterminated quoted string at LINE " + lineNum; } } if (isTerminateState(state)) { String historyStatement = historyAppender.toString(); String rawStatement = rawAppender.toString(); if (state == ParsingState.META_EOS) { parsedResults.add(new ParsedResult(StatementType.META, rawStatement, historyStatement)); state = ParsingState.TOK_START; } else if (state == ParsingState.STATEMENT_EOS) { parsedResults.add(new ParsedResult(StatementType.STATEMENT, rawStatement, historyStatement)); } else { throw new InvalidStatementException("ERROR: " + errorMessage); } // reset all states historyAppender.delete(0, historyAppender.length()); rawAppender.delete(0, rawAppender.length()); state = START_STATE; } return parsedResults; } /** * It manually triggers the end of file. * * @return the list of parsed results, each of result contains one query statement or meta command. * @throws InvalidStatementException */ public List<ParsedResult> EOF() throws InvalidStatementException { return doProcessEndOfStatement(true); } private static boolean isTerminateState(ParsingState state) { return (state == ParsingState.META_EOS || state == ParsingState.STATEMENT_EOS || state == ParsingState.INVALID); } public ParsingState getState() { return state; } public String toString() { return "[" + state.name() + "]: " + historyAppender.toString(); } }