/*! ******************************************************************************
*
* Pentaho Data Integration
*
* Copyright (C) 2002-2013 by Pentaho : http://www.pentaho.com
*
*******************************************************************************
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
******************************************************************************/
package org.pentaho.di.core.database;
import java.io.IOException;
import java.io.StringReader;
/**
* This class represents a parser that will remove SQL comments (both multi-line and single-line) from a string
* representing a SQL query. It respects the notion of a string literal, such that if a comment appears in a string
* literal, it is treated as part of the string instead of a comment. Both single-quoted and double-quoted string
* literals are supported, including nested quotes (whether the SQL dialect supports them or not).
*
* @author Matt Burgess
*
* @deprecated Use SqlScriptParser instead.
*/
@Deprecated
public class SqlCommentScrubber {
/** End-of-File (EOF) indicator **/
public static final int EOF = -1;
/** End-of-Line (EOL) indicator **/
public static final int EOL = 10;
/** List of characters that can signify a string literal **/
private static final int[] QUOTE_CHARS = { '\'', '"' };
/**
* Private constructor to enforce static access
*/
private SqlCommentScrubber() {
}
/**
* Checks to see whether the character is a quote character
*
* @param ch
* the input character to check
*
* @return true if the input character is a quote character, false if not
*/
private static boolean isQuoteChar( int ch ) {
for ( int c : QUOTE_CHARS ) {
if ( ch == c ) {
return true;
}
}
return false;
}
/**
* This method will remove SQL comments (both multi-line and single-line) from a string representing a SQL query. It
* respects the notion of a string literal, such that if a comment appears in a string literal, it is treated as part
* of the string instead of a comment. A simple state machine is implemented, keeping track of whether the current
* character is starting, ending, or inside a comment construct. The state machine also checks to see if the current
* character is starting, ending, or inside a single-quoted string literal, as this takes precedence over comment
* constructs. In other words, comments inside strings are not actually comments, they are part of the string literal.
*
* @param text
* a string representing the SQL query to parse and from which to remove comments
*
* @return the input string with SQL comments removed, or null if the input string is null
*/
public static String removeComments( String text ) {
if ( text == null ) {
return null;
}
StringBuilder queryWithoutComments = new StringBuilder();
boolean blkComment = false;
boolean lineComment = false;
boolean inString = false;
StringReader buffer = new StringReader( text );
int ch;
char currentStringChar = (char) QUOTE_CHARS[0];
boolean done = false;
try {
while ( !done ) {
switch ( ch = buffer.read() ) {
case EOF: { // End Of File
done = true;
break;
}
case '\'': // NOTE: Add cases for any other quote characters in QUOTE_CHARS
case '"': { // String literals
// If we're not in a comment, we're either entering or leaving a string
if ( !lineComment && !blkComment ) {
char cch = (char) ch;
if ( inString ) {
if ( currentStringChar == cch ) {
inString = false;
}
} else {
inString = true;
currentStringChar = cch;
}
queryWithoutComments.append( cch );
}
break;
}
case '/': { // multi-line comments
// If we're not in a line comment, we might be entering a line or multi-line comment
if ( !lineComment ) {
ch = buffer.read();
// If we see a multi-line comment starter (/*) and we're not in a string or
// multi-line comment, then we have started a multi-line comment.
if ( ( ch == '*' ) && ( !blkComment ) && ( !inString ) ) {
// Make sure that the next character isn't a + which identifies a hint in Oracle (PDI-13054)
ch = buffer.read();
if ( ch == '+' ) {
queryWithoutComments.append( '/' );
queryWithoutComments.append( '*' );
queryWithoutComments.append( '+' );
} else {
blkComment = true;
}
} else {
// Otherwise if we aren't already in a block comment, pass the chars through
if ( !blkComment ) {
queryWithoutComments.append( '/' );
queryWithoutComments.append( (char) ch );
if ( inString && ( currentStringChar == (char) ch ) ) {
inString = false;
}
}
}
}
break;
}
case '*': { // multi-line comments
// If we're in a multi-line comment, look ahead to see if we're about to exit
if ( blkComment ) {
ch = buffer.read();
if ( ch == '/' ) {
blkComment = false;
}
} else {
// if we're not in a multi-line or line comment, pass the char through
if ( !lineComment ) {
queryWithoutComments.append( '*' );
}
}
break;
}
case '-': { // single-line comment
// if we're not in a multi-line or line comment, we might be entering a line comment
if ( !blkComment && !lineComment ) {
ch = buffer.read();
// If we look ahead to see another dash and we're not in a string, we're entering a line comment
if ( ch == '-' && !inString ) {
lineComment = true;
} else {
queryWithoutComments.append( '-' );
queryWithoutComments.append( (char) ch );
// If it's a quote character, we're entering or leaving a string
if ( isQuoteChar( ch ) ) {
char cch = (char) ch;
if ( inString ) {
if ( currentStringChar == cch ) {
inString = false;
}
} else {
inString = true;
currentStringChar = cch;
}
}
}
}
break;
}
case EOL: { // End Of Line
// If we're not in a comment, pass the EOL through
if ( !blkComment && !lineComment ) {
queryWithoutComments.append( (char) ch );
}
lineComment = false;
break;
}
default: {
// if we're not in a comment, pass the character through
if ( !blkComment && !lineComment ) {
queryWithoutComments.append( (char) ch );
}
break;
}
}
}
} catch ( IOException e ) {
// break on error, exit gracefully with altered query thus far
}
return queryWithoutComments.toString();
}
}