/*
* $Id$
*
* Copyright 2006, The jCoderZ.org Project. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
* * Neither the name of the jCoderZ.org Project nor the names of
* its contributors may be used to endorse or promote products
* derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.jcoderz.phoenix.sqlparser;
import java.io.BufferedInputStream;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.math.BigDecimal;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.jcoderz.commons.util.Constants;
/**
* Simple SQL Scanner.
*
* @author Michael Griffel
*/
public final class SqlScanner
implements ScannerInterface
{
private final BufferedInputStream mInputStream;
private int mColumn = 0;
private int mLine = 1;
private boolean mReportWhitespace = true;
private int mSaveColumn = 0;
/**
* create a new SQL Scanner.
* @param input the input stream to read SQL data from
*/
public SqlScanner (InputStream input)
{
mInputStream = new BufferedInputStream(input);
}
/**
* Returns the reportWhitespace.
* @return the reportWhitespace.
*/
public boolean isSetReportWhitespace ()
{
return mReportWhitespace;
}
/**
* Sets the reportWhitespace to given <code>reportWhitespace</code>.
* @param reportWhitespace The reportWhitespace to set.
*/
public void setReportWhitespace (boolean reportWhitespace)
{
mReportWhitespace = reportWhitespace;
}
/**
* Returns the line.
* @return the line.
*/
public int getLine ()
{
return mLine;
}
/**
* Returns the offset.
* @return the offset.
*/
public int getColumn ()
{
return mColumn;
}
/**
* This is just a wrapper around the real nextToken() method for logging.
* @return the next token
* @throws ParseException if a syntax error is encountered
* @see org.jcoderz.phoenix.sqlparser.ScannerInterface#nextToken()
*/
public Token nextToken ()
throws ParseException
{
return getNextToken();
}
/** {@inheritDoc} */
private Token getNextToken ()
throws ParseException
{
for (;;)
{
mark();
final int c = read();
if (c == -1) // EOF
{
return new Token(TokenType.EOF);
}
else if (isNewlineChar((char) c))
{
final Token t = eatNewline(c);
if (mReportWhitespace)
{
return t;
}
continue;
}
else if (Character.isWhitespace((char) c))
{
final Token t = eatWhitespaces(c);
if (mReportWhitespace)
{
return t;
}
continue;
}
else if (c == '(')
{
return new Token(TokenType.OPEN_PAREN, asString(c));
}
else if (c == ')')
{
return new Token(TokenType.CLOSE_PAREN, asString(c));
}
else if (c == ';')
{
return new Token(TokenType.SEMICOLON, asString(c));
}
else if (c == ',')
{
return new Token(TokenType.COMMA, asString(c));
}
else if (c == '/') // maybe block comment or single slash
{
mark();
if (read() == '*') // a block comment
{
final String comment = eatBlockComment();
return new Token(TokenType.COMMENT, comment);
}
reset();
return new Token(TokenType.SLASH, asString(c));
}
else if (c == '-') // comment or numeric
{
mark();
final int d = read();
final Token t;
if (d == '-') // -> comment
{
final StringBuffer sb = new StringBuffer();
sb.append("--");
for (;;)
{
mark();
final int e = read();
if (e == '\n' || e == -1) // end of line or eof
{
reset();
break;
}
sb.append((char) e);
}
t = new Token(TokenType.COMMENT, sb.toString());
}
else if (Character.isDigit((char) d))// (negative) nummeric
{
final StringBuffer sb = new StringBuffer();
sb.append('-');
sb.append((char) d);
for (;;)
{
mark();
final int e = read();
if (! Character.isDigit((char) e))
{
reset();
break;
}
sb.append((char) e);
}
final String negativeNumeric = sb.toString();
try
{
Integer.parseInt(negativeNumeric);
t = new Token(TokenType.NUMERIC_LITERAL, negativeNumeric);
}
catch (NumberFormatException shouldNotOccur)
{
throw new ParseException("Cannot parse negative numberic '"
+ negativeNumeric
+ "'", shouldNotOccur, mLine, mColumn);
}
}
// operator '- ', '-(' or '-function'
else if (d == '(' || Character.isLetter((char) d)
|| Character.isWhitespace((char) d))
{
reset();
return new Token(TokenType.OPERATOR, asString(c));
}
else
{
throw new ParseException("Unexpected char '" + (char) d
+ "', expected '-' or digit.", mLine, mColumn);
}
return t;
}
else if (c == '"' || c == '\'') // literal
{
final String literal = readStringLiteral(c);
return new Token(TokenType.STRING_LITERAL, literal);
}
else // keywords, identifier
{
final String word = readWord(c);
try
{
// FIXME: prefix keyword? otherwise 'comma' will be a keyword
if (!TokenType.OPERATOR.toString().equalsIgnoreCase(word))
{
final TokenType tokenType
= TokenType.fromString(
word.toLowerCase(Constants.SYSTEM_LOCALE));
return new Token(tokenType, word);
}
}
catch (IllegalArgumentException ignore)
{
// not a known keyword
}
// numeric literal?
try
{
new BigDecimal(word); // well-formed?
return new Token(TokenType.NUMERIC_LITERAL, word);
}
catch (NumberFormatException ignore)
{
// not a numeric
}
// otherwise it must be a identifier (hopefully)
return new Token(TokenType.IDENTIFIER, word);
}
}
}
private String eatBlockComment ()
throws ParseException
{
// read block comment
final StringBuffer sb = new StringBuffer();
sb.append("/*");
for (;;)
{
mark();
final int d = read();
if (d == '*') // maybe end of block comment
{
mark();
if (read() != '/') // not end of block comment
{
reset();
sb.append((char) d);
continue;
}
sb.append("*/");
break;
}
else if (isNewlineChar((char) d))
{
++mLine; mColumn = 0;
}
sb.append((char) d);
}
return sb.toString();
}
private String readWord (int c)
throws ParseException
{
final StringBuffer sb = new StringBuffer();
sb.append((char) c);
for (;;)
{
mark();
final int d = read();
if (isSpecialCharacter((char) d))
{
reset();
break;
}
sb.append((char) d);
}
return sb.toString();
}
private String readStringLiteral (int c)
throws ParseException
{
final StringBuffer sb = new StringBuffer();
sb.append((char) c);
for (;;)
{
final int d = read();
sb.append((char) d);
if (d == '"' || d == '\'')
{
break;
}
}
return sb.toString();
}
private static boolean isSpecialCharacter (char c)
{
return (Character.isWhitespace(c) || c == '(' || c == ')'
|| c == ';' || c == ',' || c == '-');
}
private Token eatNewline (int c)
throws ParseException
{
final Token t;
if (c == Constants.LINE_FEED_CHAR) // UNIX newline?
{
++mLine; mColumn = 0;
t = new Token(TokenType.NEWLINE, asString(Constants.LINE_FEED_CHAR));
}
else if (c == Constants.CARRIAGE_RETURN_CHAR) // WINDOWS newline?
{
mark();
if (read() != Constants.LINE_FEED_CHAR) // eat LF
{
reset();
}
++mLine; mColumn = 0;
t = new Token(TokenType.NEWLINE,
asString(Constants.CARRIAGE_RETURN_CHAR)
+ asString(Constants.LINE_FEED_CHAR));
}
else
{
throw new ParseException("Unexpected newline char '"
+ (char) c + "'", mLine, mColumn);
}
return t;
}
private Token eatWhitespaces (int c)
throws ParseException
{
final StringBuffer sb = new StringBuffer();
sb.append((char) c); // TODO: assertTrue(isWhitespace(c));
for (;;)
{
mark();
final int d = read();
if (Character.isWhitespace((char) d)
&& ! isNewlineChar((char) d))
{
sb.append((char) d);
}
else // not a whitespace, or is newline
{ // which must be reported separately
reset();
break;
}
}
return new Token(TokenType.WHITESPACE, sb.toString());
}
private void reset ()
throws ParseException
{
try
{
mInputStream.reset();
mColumn = mSaveColumn;
}
catch (IOException e)
{
final ParseException pe
= new ParseException(e, mLine, mColumn);
pe.initCause(e);
throw pe;
}
}
private void mark ()
{
mSaveColumn = mColumn;
mInputStream.mark(Integer.MAX_VALUE);
}
private static String asString (int c)
{
return Character.toString((char) c);
}
private int read ()
throws ParseException
{
int c = -1;
try
{
++mColumn;
c = mInputStream.read();
}
catch (IOException e)
{
throw new ParseException(e, mLine, mColumn);
}
return c;
}
private static boolean isNewlineChar (char c)
{
return (c == Constants.LINE_FEED_CHAR
|| c == Constants.CARRIAGE_RETURN_CHAR);
}
/**
* Simple SQL Scanner that reads the file given at argument 1 and dumps
* the tokens to <code>stderr</code> and the content on <code>stdout</code>.
*
* @param args command line arguments
* @throws Exception An error occurred
*/
public static void main (String[] args)
throws Exception
{
final SqlScanner scanner
= new SqlScanner(new FileInputStream(args[0]));
final List tokens = new ArrayList();
for (;;)
{
final Token t = scanner.nextToken();
System.err.println(scanner.getLine() + ": "
+ scanner.getColumn() + " = " + t);
tokens.add(t);
if (t.getType() == TokenType.EOF)
{
break;
}
}
for (final Iterator iterator = tokens.iterator(); iterator.hasNext();)
{
final Token t = (Token) iterator.next();
System.out.print(t.getValue());
}
System.out.flush();
}
}