/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tajo.cli;
import java.util.ArrayList;
import java.util.List;
import static org.apache.tajo.cli.ParsedResult.StatementType.META;
import static org.apache.tajo.cli.ParsedResult.StatementType.STATEMENT;
/**
* This is a parser used in tsql to parse multiple SQL lines into SQL statements.
* It helps tsql recognizes the termination of each SQL statement and quotation mark (') while
* parses multiple separate lines.
*/
public class SimpleParser {
public static enum ParsingState {
TOK_START, // Start State
META, // Meta Command
STATEMENT, // Statement
WITHIN_QUOTE, // Within Quote
COMMENT,
INVALID, // Invalid Statement
STATEMENT_EOS, // End State (End of Statement)
META_EOS // End State (End of Statement)
}
ParsingState state = START_STATE;
int lineNum;
StringBuilder appender = new StringBuilder();
public static final ParsingState START_STATE = ParsingState.TOK_START;
/**
* <h2>State Machine</h2>
* All whitespace are ignored in all cases except for
*
* <pre>
* (start) TOK_START --> META ---------------------> META_EOS
* |
* |
* |
* |-----------> STATEMENT ----------> STMT_EOS
* \ ^
* \ /
* \-> WITHIN_QUOTE
* \ ^
* \---/
* </pre>
*/
public static List<ParsedResult> parseScript(String str) throws InvalidStatementException {
SimpleParser parser = new SimpleParser();
List<ParsedResult> parsedResults = new ArrayList<ParsedResult>();
parsedResults.addAll(parser.parseLines(str));
parsedResults.addAll(parser.EOF());
return parsedResults;
}
public List<ParsedResult> parseLines(String str) throws InvalidStatementException {
List<ParsedResult> statements = new ArrayList<ParsedResult>();
int lineStartIdx;
int idx = 0;
char [] chars = str.toCharArray();
while(idx < str.length()) {
// initialization for new statement
if (state == ParsingState.TOK_START) {
lineNum = 0;
// ignore all whitespace before start
if (Character.isWhitespace(chars[idx])) {
idx++;
continue;
}
}
////////////////////////////
// TOK_START --> META
////////////////////////////
lineStartIdx = idx;
if (state == ParsingState.TOK_START && chars[idx] == '\\') {
state = ParsingState.META;
////////////////////////////
// META --> TOK_EOS
////////////////////////////
while (state != ParsingState.META_EOS && idx < chars.length) {
char character = chars[idx++];
if (Character.isWhitespace(character)) {
// skip
} else if (isEndOfMeta(character)) {
state = ParsingState.META_EOS;
}
}
if (state == ParsingState.META_EOS) {
appender.append(str.subSequence(lineStartIdx, idx - 1).toString());
} else {
appender.append(str.subSequence(lineStartIdx, idx).toString());
}
} else if (isCommentStart(chars[idx])) {
idx++;
while (!isLineEnd(chars[idx]) && idx < chars.length) {
idx++;
}
/////////////////////////////////
// TOK_START -> STATEMENT
// or TOK_STATEMENT -> STATEMENT
////////////////////////////////
} else if (isStatementContinue() || isStatementStart(chars[idx])) {
int endIdx = 0;
if (!isStatementContinue()) { // TOK_START -> STATEMENT
state = ParsingState.STATEMENT;
}
while (!isTerminateState(state) && idx < chars.length) {
char character = chars[idx++];
if (isEndOfStatement(character)) {
state = ParsingState.STATEMENT_EOS;
endIdx = idx - 1;
} else if (state == ParsingState.STATEMENT && character == '\'') { // TOK_STATEMENT -> WITHIN_QUOTE
state = ParsingState.WITHIN_QUOTE;
if (idx < chars.length) {
character = chars[idx++];
} else {
continue;
}
}
if (state == ParsingState.WITHIN_QUOTE) {
while(idx < chars.length) {
///////////////////////////////
// WITHIN_QUOTE --> STATEMENT
///////////////////////////////
if (character == '\'') {
state = ParsingState.STATEMENT;
break;
}
character = chars[idx++];
}
if (state == ParsingState.WITHIN_QUOTE && character == '\'') {
state = ParsingState.STATEMENT;
}
}
}
if (state == ParsingState.STATEMENT_EOS) {
appender.append(str.subSequence(lineStartIdx, endIdx).toString());
} else {
appender.append(str.subSequence(lineStartIdx, idx).toString());
// if it is not within quote and there is no space between lines, add a space.
if (state == ParsingState.STATEMENT && (appender.charAt(appender.length() - 1) != ' ')) {
appender.append(" ");
}
}
} else { // skip unknown character
idx++;
}
lineNum++;
statements.addAll(doProcessEndOfStatement(state == ParsingState.META));
}
return statements;
}
private static boolean isEndOfMeta(char character) {
return character == ';' || character == '\n';
}
private static boolean isEndOfStatement(char character) {
return character == ';';
}
private boolean isCommentStart(char character) {
return state == ParsingState.TOK_START && character == '-';
}
private boolean isLineEnd(char character) {
return character == '\n';
}
private boolean isStatementStart(char character) {
return state == ParsingState.TOK_START && (Character.isLetterOrDigit(character));
}
private boolean isStatementContinue() {
return state == ParsingState.WITHIN_QUOTE || state == ParsingState.STATEMENT;
}
private List<ParsedResult> doProcessEndOfStatement(boolean endOfFile) throws InvalidStatementException {
List<ParsedResult> parsedResults = new ArrayList<ParsedResult>();
String errorMessage = "";
if (endOfFile) {
if (state == ParsingState.META) {
state = ParsingState.META_EOS;
} else if (state == ParsingState.STATEMENT) {
state = ParsingState.STATEMENT_EOS;
} else if (state == ParsingState.WITHIN_QUOTE) {
state = ParsingState.INVALID;
errorMessage = "unterminated quoted string at LINE " + lineNum;
}
}
if (isTerminateState(state)) {
String statement = appender.toString();
if (state == ParsingState.META_EOS) {
parsedResults.add(new ParsedResult(META, statement));
state = ParsingState.TOK_START;
} else if (state == ParsingState.STATEMENT_EOS) {
parsedResults.add(new ParsedResult(STATEMENT, statement));
} else {
throw new InvalidStatementException("ERROR: " + errorMessage);
}
// reset all states
appender.delete(0, appender.length());
state = START_STATE;
}
return parsedResults;
}
public List<ParsedResult> EOF() throws InvalidStatementException {
return doProcessEndOfStatement(true);
}
private static boolean isTerminateState(ParsingState state) {
return (state == ParsingState.META_EOS || state == ParsingState.STATEMENT_EOS || state == ParsingState.INVALID);
}
public ParsingState getState() {
return state;
}
public String toString() {
return "[" + state.name() + "]: " + appender.toString();
}
}