/*
* This file or a portion of this file is licensed under the terms of
* the Globus Toolkit Public License, found in file GTPL, or at
* http://www.globus.org/toolkit/download/license.html. This notice must
* appear in redistributions of this file, with or without modification.
*
* Redistributions of this Software, with or without modification, must
* reproduce the GTPL in: (1) the Software, or (2) the Documentation or
* some other similar material which is provided with the Software (if
* any).
*
* Copyright 1999-2004 University of Chicago and The University of
* Southern California. All rights reserved.
*/
package org.griphyn.vdl.parser;
import java.io.Reader;
import java.io.IOException;
import java.io.LineNumberReader;
/**
* Implements the scanner for reserved words and other tokens that are
* generated from the input stream. This class is module-local on
* purpose.
*
* @author Jens-S. Vöckler
* @version $Revision$
*
*/
class VDLtScanner
{
/**
* stores the stream from which we are currently scanning.
*/
private LineNumberReader m_in;
/**
* captures the look-ahead character;
*/
private int m_lookAhead;
/**
* Starts to scan the given stream.
*/
public VDLtScanner( java.io.Reader reader )
throws IOException
{
this.m_in = new LineNumberReader(reader);
this.m_in.setLineNumber(1);
this.m_lookAhead = m_in.read();
// skipWhitespace();
}
/**
* Obtains the current line number in the input stream from the outside.
* @return the current line number.
*/
public int getLineNumber()
{
return m_in.getLineNumber();
}
/**
* Skips any white space and comments in the input. This method
* stops either at the end of file, or at any non-whitespace
* input character.
*/
private void skipWhitespace()
throws IOException
{
// end of file?
if ( m_lookAhead == -1 ) return;
// skip over whitespace
while ( m_lookAhead != -1 && Character.isWhitespace((char) m_lookAhead) )
m_lookAhead = m_in.read();
// skip over comments until eoln
if ( m_lookAhead == '#' ) {
m_in.readLine();
m_lookAhead = m_in.read();
skipWhitespace(); // FIXME: reformulate end-recursion into loop
}
}
/**
* Checks for the availability of more input.
* @return true, if there is more to read, false for EOF.
*/
public boolean hasMoreTokens()
throws IOException
{
skipWhitespace();
return ( this.m_lookAhead != -1 );
}
/**
* Obtains the next token from the input stream.
* @return an instance conforming to the token interface, or null for eof.
* @throws IOException if something went wrong while reading
* @throws VDLtScannerException if a lexical error was encountered.
*/
public VDLtToken nextToken()
throws IOException, VDLtScannerException
{
// sanity check
skipWhitespace();
if ( m_lookAhead == -1 ) return null;
switch ( m_lookAhead ) {
case '$':
m_lookAhead = m_in.read();
if ( m_lookAhead==-1 || Character.isWhitespace((char) m_lookAhead) )
throw new VDLtScannerException( m_in, "no whitespace allowed after dollar" );
else
return new VDLtDollar();
case ',':
m_lookAhead = m_in.read();
skipWhitespace();
return new VDLtComma();
case '|':
m_lookAhead = m_in.read();
skipWhitespace();
return new VDLtVBar();
case '.':
m_lookAhead = m_in.read();
if ( m_lookAhead==-1 || Character.isWhitespace((char) m_lookAhead) )
throw new VDLtScannerException( m_in, "no whitespace allowed after period" );
else
return new VDLtPeriod();
case '@':
m_lookAhead = m_in.read();
if ( m_lookAhead==-1 || Character.isWhitespace((char) m_lookAhead) )
throw new VDLtScannerException( m_in, "no whitespace allowed after at" );
else
return new VDLtAt();
case '-':
m_lookAhead = m_in.read();
if ( m_lookAhead == '>' ) {
m_lookAhead = m_in.read();
skipWhitespace();
return new VDLtArrow();
} else {
throw new VDLtScannerException( m_in, "a sole hyphen is not permitted" );
}
case '=':
m_lookAhead = m_in.read();
skipWhitespace();
return new VDLtEquals();
case ';':
m_lookAhead = m_in.read();
skipWhitespace();
return new VDLtSemicolon();
case '(':
m_lookAhead = m_in.read();
skipWhitespace();
return new VDLtOpenParenthesis();
case ')':
m_lookAhead = m_in.read();
skipWhitespace();
return new VDLtCloseParenthesis();
case '{':
m_lookAhead = m_in.read();
skipWhitespace();
return new VDLtOpenBrace();
case '}':
m_lookAhead = m_in.read();
skipWhitespace();
return new VDLtCloseBrace();
case '[':
m_lookAhead = m_in.read();
skipWhitespace();
return new VDLtOpenBracket();
case ']':
m_lookAhead = m_in.read();
skipWhitespace();
return new VDLtCloseBracket();
case ':':
m_lookAhead = m_in.read();
if ( m_lookAhead == ':' ) {
m_lookAhead = m_in.read();
if ( m_lookAhead==-1 || Character.isWhitespace((char) m_lookAhead) ) {
throw new VDLtScannerException( m_in, "no whitespace allowed after double colon" );
} else {
return new VDLtDoubleColon();
}
} else if ( m_lookAhead==-1 || Character.isWhitespace((char) m_lookAhead) ) {
throw new VDLtScannerException( m_in, "no whitespace allowed after colon" );
} else {
return new VDLtColon();
}
case '"':
// parse a quoted string
StringBuffer result = new StringBuffer(16);
do {
m_lookAhead = m_in.read();
if ( m_lookAhead == -1 || m_lookAhead == '\r' || m_lookAhead == '\n' ) {
// eof is an unterminated string
throw new VDLtScannerException( m_in, "unterminated quoted string" );
} else if ( m_lookAhead == '\\' ) {
int temp = m_in.read();
if ( temp == -1 )
throw new VDLtScannerException( m_in, "unterminated escape in quoted string" );
else result.append((char) temp); // always add whatever is after the backslash
} else if ( m_lookAhead != '"' ) {
result.append((char) m_lookAhead);
}
} while ( m_lookAhead != '"' );
// skip over final quote
m_lookAhead = m_in.read();
skipWhitespace();
return new VDLtQuotedString( result.toString() );
default:
// are we parsing a reserved word or identifier
if ( Character.isLetterOrDigit((char) m_lookAhead) ||
m_lookAhead=='_' ||
m_lookAhead=='/' ) {
StringBuffer identifier = new StringBuffer(8);
identifier.append( (char) m_lookAhead );
m_lookAhead = m_in.read();
while ( m_lookAhead != -1 &&
( Character.isLetterOrDigit((char) m_lookAhead) ||
m_lookAhead == '_' ||
m_lookAhead == '-' || // <-- soon to be dropped !!!
m_lookAhead == '/' || // <-- new for Mike
m_lookAhead == '.' ) ) {
if ( m_lookAhead == '-' ) {
// terry kludge just for Jim, grumblftz
m_in.mark(2);
m_lookAhead = m_in.read();
if ( m_lookAhead == '>' ) {
// this is part of the next token, reset stream
m_in.reset();
m_lookAhead = '-';
break;
} else {
identifier.append('-');
}
} else {
identifier.append( (char) m_lookAhead );
m_lookAhead = m_in.read();
}
}
// done parsing identifier or reserved word
skipWhitespace();
String s = identifier.toString();
if ( s.compareToIgnoreCase("tr") == 0 )
// reserved word
return new VDLtTransformation();
else if ( s.compareToIgnoreCase("dv") == 0 )
// reserved word
return new VDLtDerivation();
else
// is a non-reserved identifier
return new VDLtIdentifier(s);
} else {
// unknown material
throw new VDLtScannerException( m_in, "unknown character " + m_lookAhead );
}
} // switch
}
}