/* * This program is free software; you can redistribute it and/or modify it under the * terms of the GNU Lesser General Public License, version 2.1 as published by the Free Software * Foundation. * * You should have received a copy of the GNU Lesser General Public License along with this * program; if not, you can obtain a copy at http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html * or from the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. * * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * See the GNU Lesser General Public License for more details. * * Copyright (c) 2006 - 2009 Pentaho Corporation.. All rights reserved. */ package org.pentaho.reporting.libraries.base.util; import java.io.IOException; import java.io.StringReader; /** * This class represents a parser that will remove SQL comments (both multi-line and single-line) from a string * representing a SQL query. It respects the notion of a string literal, such that if a comment appears in a string * literal, it is treated as part of the string instead of a comment. Both single-quoted and double-quoted string * literals are supported, including nested quotes (whether the SQL dialect supports them or not). * * @author Matt Burgess */ public class SqlCommentScrubber { /** * End-of-File (EOF) indicator * */ public static final int EOF = -1; /** * End-of-Line (EOL) indicator * */ public static final int EOL = 10; /** * List of characters that can signify a string literal * */ private static final int[] QUOTE_CHARS = { '\'', '"' }; /** * Private constructor to enforce static access */ private SqlCommentScrubber() { } /** * Checks to see whether the character is a quote character * * @param ch the input character to check * @return true if the input character is a quote character, false if not */ private static boolean isQuoteChar( final int ch ) { for ( final int c : QUOTE_CHARS ) { if ( ch == c ) { return true; } } return false; } /** * This method will remove SQL comments (both multi-line and single-line) from a string representing a SQL query. It * respects the notion of a string literal, such that if a comment appears in a string literal, it is treated as part * of the string instead of a comment. A simple state machine is implemented, keeping track of whether the current * character is starting, ending, or inside a comment construct. The state machine also checks to see if the current * character is starting, ending, or inside a single-quoted string literal, as this takes precedence over comment * constructs. In other words, comments inside strings are not actually comments, they are part of the string * literal. * * @param text a string representing the SQL query to parse and from which to remove comments * @return the input string with SQL comments removed, or null if the input string is null */ public static String removeComments( String text ) { if ( text == null ) { return null; } StringBuilder queryWithoutComments = new StringBuilder(); boolean blkComment = false; boolean lineComment = false; boolean inString = false; StringReader buffer = new StringReader( text ); int ch; char currentStringChar = (char) QUOTE_CHARS[ 0 ]; boolean done = false; try { while ( !done ) { switch( ch = buffer.read() ) { case EOF: { // End Of File done = true; break; } case '\'': // NOTE: Add cases for any other quote characters in QUOTE_CHARS case '"': { // String literals // If we're not in a comment, we're either entering or leaving a string if ( !lineComment && !blkComment ) { char cch = (char) ch; if ( inString ) { if ( currentStringChar == cch ) { inString = false; } } else { inString = true; currentStringChar = cch; } queryWithoutComments.append( cch ); } break; } case '/': { // multi-line comments // If we're not in a line comment, we might be entering a line or multi-line comment if ( !lineComment ) { ch = buffer.read(); // If we see a multi-line comment starter (/*) and we're not in a string or // multi-line comment, then we have started a multi-line comment. if ( ( ch == '*' ) && ( !blkComment ) && ( !inString ) ) { blkComment = true; } else { // Otherwise if we aren't already in a block comment, pass the chars through if ( !blkComment ) { queryWithoutComments.append( '/' ); queryWithoutComments.append( (char) ch ); } } } break; } case '*': { // multi-line comments // If we're in a multi-line comment, look ahead to see if we're about to exit if ( blkComment ) { ch = buffer.read(); if ( ch == '/' ) { blkComment = false; } } else { // if we're not in a multi-line or line comment, pass the char through if ( !lineComment ) { queryWithoutComments.append( '*' ); } } break; } case '-': { // single-line comment // if we're not in a multi-line or line comment, we might be entering a line comment if ( !blkComment && !lineComment ) { ch = buffer.read(); // If we look ahead to see another dash and we're not in a string, we're entering a line comment if ( ch == '-' && !inString ) { lineComment = true; } else { queryWithoutComments.append( '-' ); queryWithoutComments.append( (char) ch ); // If it's a quote character, we're entering or leaving a string if ( isQuoteChar( ch ) ) { char cch = (char) ch; if ( inString ) { if ( currentStringChar == cch ) { inString = false; } } else { inString = true; currentStringChar = cch; } } } } break; } case EOL: { // End Of Line // If we're not in a comment, pass the EOL through if ( !blkComment && !lineComment ) { queryWithoutComments.append( (char) ch ); } lineComment = false; break; } default: { // if we're not in a comment, pass the character through if ( !blkComment && !lineComment ) { queryWithoutComments.append( (char) ch ); } break; } } } } catch ( IOException e ) { // break on error, exit gracefully with altered query thus far } return queryWithoutComments.toString(); } }