/*! ******************************************************************************
*
* Pentaho Data Integration
*
* Copyright (C) 2002-2014 by Pentaho : http://www.pentaho.com
*
*******************************************************************************
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
******************************************************************************/
package org.pentaho.di.core.database;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import org.apache.commons.lang.StringUtils;
/**
* This class represents a splitter of SQL script into separate statements. It respects the notion of a string
* literal and comments, such that if a separator appears in a string literal or comment, it is treated as
* part of the string or comment instead of a separator.
*
* @author Alexander Buloichik
*/
public class SqlScriptParser {
enum MODE {
SQL, LINE_COMMENT, BLOCK_COMMENT, STRING
};
private static final SqlScriptParser INSTANCE = new SqlScriptParser();
public static SqlScriptParser getInstance() {
return INSTANCE;
}
/**
* Private constructor to enforce static access
*/
private SqlScriptParser() {
}
/**
* This method splits script into separate statements.
*
* @param script a string representing the SQL script to parse
* @return the list of statements
*/
public List<String> split( String script ) {
if ( script == null ) {
return Collections.emptyList();
}
List<String> result = new ArrayList<String>();
MODE mode = MODE.SQL;
char currentStringChar = 0;
int statementStart = 0;
for ( int i = 0; i < script.length(); i++ ) {
char ch = script.charAt( i );
char nextCh = i < script.length() - 1 ? script.charAt( i + 1 ) : 0;
switch ( mode ) {
case SQL:
switch ( ch ) {
case '/':
if ( nextCh == '*' ) {
mode = MODE.BLOCK_COMMENT;
i++;
}
break;
case '-':
if ( nextCh == '-' ) {
mode = MODE.LINE_COMMENT;
i++;
}
break;
case '\'':
case '"':
mode = MODE.STRING;
currentStringChar = ch;
break;
case ';':
String st = script.substring( statementStart, i );
if ( StringUtils.isNotBlank( st ) ) {
result.add( st );
}
statementStart = i + 1;
break;
}
break;
case BLOCK_COMMENT:
if ( ch == '*' ) {
if ( nextCh == '/' ) {
mode = MODE.SQL;
i++;
}
}
break;
case LINE_COMMENT:
if ( ch == '\n' || ch == '\r' ) {
mode = MODE.SQL;
}
break;
case STRING:
if ( ch == '\\' && nextCh == '\\' ) {
/*
* The user is hard-coding a backslash into the string.
* Pass the hard-coded backslash through, and skip over the real backslash on the next loop
*/
i++;
} else if ( ch == '\\' && nextCh == currentStringChar ) {
/*
* The user is hard-coding a quote character into the string.
* Pass the hard-coded quote character through, and skip over the quote on next loop
*/
i++;
} else if ( ch == currentStringChar ) {
mode = MODE.SQL;
}
break;
}
}
if ( statementStart < script.length() ) {
String st = script.substring( statementStart );
if ( StringUtils.isNotBlank( st ) ) {
result.add( st );
}
}
return result;
}
/**
* This method removes comments from one statement.
*
* @param script a string representing the SQL script to parse
* @return script without comments
*/
public String removeComments( String script ) {
if ( script == null ) {
return null;
}
StringBuilder result = new StringBuilder();
MODE mode = MODE.SQL;
char currentStringChar = 0;
for ( int i = 0; i < script.length(); i++ ) {
char ch = script.charAt( i );
char nextCh = i < script.length() - 1 ? script.charAt( i + 1 ) : 0;
char nextPlusOneCh = i < script.length() - 2 ? script.charAt( i + 2 ) : 0;
switch ( mode ) {
case SQL:
switch ( ch ) {
case '/':
if ( nextCh == '*' && nextPlusOneCh != '+' ) {
mode = MODE.BLOCK_COMMENT;
i++;
ch = 0;
}
break;
case '-':
if ( nextCh == '-' ) {
mode = MODE.LINE_COMMENT;
i++;
ch = 0;
}
break;
case '\'':
case '"':
mode = MODE.STRING;
currentStringChar = ch;
break;
}
break;
case BLOCK_COMMENT:
if ( ch == '*' ) {
if ( nextCh == '/' ) {
mode = MODE.SQL;
i++;
}
}
ch = 0;
break;
case LINE_COMMENT:
if ( ch == '\n' || ch == '\r' ) {
mode = MODE.SQL;
} else {
ch = 0;
}
break;
case STRING:
if ( ch == '\\' && nextCh == currentStringChar ) {
/*
* The user is hard-coding a quote character into the string.
* Pass the hard-coded quote character through, and skip over the quote on next loop
*/
result.append( ch );
result.append( nextCh );
ch = 0;
i++;
} else if ( ch == currentStringChar ) {
mode = MODE.SQL;
}
break;
}
if ( ch != 0 ) {
result.append( ch );
}
}
return result.toString();
}
}