/*
* JacORB - a free Java ORB
*
* Copyright (C) 1997-2014 Gerald Brose / The JacORB Team.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the Free
* Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
package org.jacorb.idl;
import java.util.Enumeration;
import java.util.HashSet;
import java.util.Hashtable;
import java.util.Stack;
import java.util.logging.Level;
import org.jacorb.idl.runtime.char_token;
import org.jacorb.idl.runtime.float_token;
import org.jacorb.idl.runtime.int_token;
import org.jacorb.idl.runtime.long_token;
import org.jacorb.idl.runtime.token;
/**
* This class implements a scanner (aka lexical analyzer or
* lexer) for IDL. The scanner reads characters from a global input
* stream and returns integers corresponding to the terminal number
* of the next token. Once the end of input is reached the EOF token
* is returned on every subsequent call.<p>
*
* All symbol constants are defined in sym.java which is generated by
* JavaCup from parser.cup.<p>
*
* In addition to the scanner proper (called first via init() then
* with next_token() to get each token) this class provides simple
* error and warning routines and keeps a count of errors and
* warnings that is publicly accessible. It also provides basic
* preprocessing facilties, i.e. it does handle preprocessor
* directives such as #define, #undef, #include, etc. although it
* does not provide full C++ preprocessing
*
* This class is "static" (i.e., it has only static members and methods).
*
* @author Gerald Brose
*
*/
public class lexer
{
/** First and second character of lookahead. */
protected static int next_char;
protected static int next_char2;
/** EOF constant. */
protected static final int EOF_CHAR = -1;
/**
* Table of keywords. Keywords are initially treated as
* identifiers. Just before they are returned we look them up in
* this table to see if they match one of the keywords. The
* string of the name is the key here, which indexes Integer
* objects holding the symbol number.
*/
protected static Hashtable keywords = new Hashtable();
/** Table of keywords, stored in lower case. Keys are the
* lower case version of the keywords used as keys for the keywords
* hash above, and the values are the case sensitive versions of
* the keywords. This table is used for detecting collisions of
* identifiers with keywords.
*/
protected static Hashtable<String, String> keywords_lower_case = new Hashtable<String, String>();
/** Table of Java reserved names.
*/
protected static HashSet<String> java_keywords = new HashSet<String>();
/** Table of single character symbols. For ease of implementation, we
* store all unambiguous single character tokens in this table of Integer
* objects keyed by Integer objects with the numerical value of the
* appropriate char (currently Character objects have a bug which precludes
* their use in tables).
*/
protected static Hashtable<Integer, Integer> char_symbols = new Hashtable<Integer, Integer>( 25 );
/** Defined symbols (preprocessor) */
protected static Hashtable<String, String> defines = new Hashtable<String, String>();
protected static boolean conditionalCompilation = true;
/** nested #ifdefs are pushed on this stack by the "preprocessor" */
private static Stack<Boolean> ifStack = new Stack<Boolean>();
private static Stack<token> tokenStack = new Stack<token>();
/** Current line number for use in error messages. */
protected static int current_line = 1;
/** Current line for use in error messages. */
protected static StringBuffer line = new StringBuffer();
/** Character position in current line. */
protected static int current_position = 1;
/** Have we already read a '"' ? */
protected static boolean in_string = false;
/** Are we processing a wide char or string ? */
protected static boolean wide = false;
/** Count of total errors detected so far. */
static int error_count = 0;
/** Count of warnings issued so far */
public static int warning_count = 0;
/** currently active pragma prefix */
public static String currentPragmaPrefix = "";
/** current file name */
public static String currentFile = "";
/** reset the scanner state */
public static void reset()
{
current_position = 1;
error_count = 0;
warning_count = 0;
currentPragmaPrefix = "";
line = new StringBuffer();
ifStack.removeAllElements();
tokenStack.removeAllElements();
defines.clear();
}
/**
* Initialize the scanner. This sets up the keywords and char_symbols
* tables and reads the first two characters of lookahead.
*
* "Object" is listed as reserved in the OMG spec.
* "int" is not, but I reserved it to bar its usage as a legal integer
* type.
*/
public static void init()
throws java.io.IOException
{
/* set up standard symbols */
defines.put( "JACORB_IDL_1_4", "" );
/* set up the keyword table */
keywords.put( "abstract", new Integer( sym.ABSTRACT ) );
keywords.put( "any", new Integer( sym.ANY ) );
keywords.put( "attribute", new Integer( sym.ATTRIBUTE ) );
keywords.put( "boolean", new Integer( sym.BOOLEAN ) );
keywords.put( "case", new Integer( sym.CASE ) );
keywords.put( "char", new Integer( sym.CHAR ) );
keywords.put( "const", new Integer( sym.CONST ) );
keywords.put( "context", new Integer( sym.CONTEXT ) );
keywords.put( "custom", new Integer( sym.CUSTOM ) );
keywords.put( "default", new Integer( sym.DEFAULT ) );
keywords.put( "double", new Integer( sym.DOUBLE ) );
keywords.put( "enum", new Integer( sym.ENUM ) );
keywords.put( "exception", new Integer( sym.EXCEPTION ) );
keywords.put( "factory", new Integer( sym.FACTORY ) );
keywords.put( "FALSE", new Integer( sym.FALSE ) );
keywords.put( "fixed", new Integer( sym.FIXED ) );
keywords.put( "float", new Integer( sym.FLOAT ) );
keywords.put( "getraises", new Integer( sym.GETRAISES ) );
keywords.put( "in", new Integer( sym.IN ) );
keywords.put( "inout", new Integer( sym.INOUT ) );
keywords.put( "interface", new Integer( sym.INTERFACE ) );
keywords.put( "local", new Integer( sym.LOCAL ) );
keywords.put( "long", new Integer( sym.LONG ) );
keywords.put( "module", new Integer( sym.MODULE ) );
keywords.put( "native", new Integer( sym.NATIVE ) );
keywords.put( "Object", new Integer( sym.OBJECT ) );
keywords.put( "octet", new Integer( sym.OCTET ) );
keywords.put( "oneway", new Integer( sym.ONEWAY ) );
keywords.put( "out", new Integer( sym.OUT ) );
keywords.put( "private", new Integer( sym.PRIVATE ) );
keywords.put( "public", new Integer( sym.PUBLIC ) );
keywords.put( "pseudo", new Integer( sym.PSEUDO ) );
keywords.put( "raises", new Integer( sym.RAISES ) );
keywords.put( "readonly", new Integer( sym.READONLY ) );
keywords.put( "sequence", new Integer( sym.SEQUENCE ) );
keywords.put( "setraises", new Integer( sym.SETRAISES ) );
keywords.put( "short", new Integer( sym.SHORT ) );
keywords.put( "string", new Integer( sym.STRING ) );
keywords.put( "struct", new Integer( sym.STRUCT ) );
keywords.put( "supports", new Integer( sym.SUPPORTS ) );
keywords.put( "switch", new Integer( sym.SWITCH ) );
keywords.put( "TRUE", new Integer( sym.TRUE ) );
keywords.put( "truncatable", new Integer( sym.TRUNCATABLE ) );
keywords.put( "typedef", new Integer( sym.TYPEDEF ) );
keywords.put( "typeprefix", new Integer( sym.TYPEPREFIX ) );
keywords.put( "unsigned", new Integer( sym.UNSIGNED ) );
keywords.put( "union", new Integer( sym.UNION ) );
keywords.put( "ValueBase", new Integer( sym.VALUEBASE ) );
keywords.put( "valuetype", new Integer( sym.VALUETYPE ) );
keywords.put( "void", new Integer( sym.VOID ) );
keywords.put( "wchar", new Integer( sym.WCHAR ) );
keywords.put( "wstring", new Integer( sym.WSTRING ) );
keywords.put( "::", new Integer( sym.DBLCOLON ) );
keywords.put( "<<", new Integer( sym.LSHIFT ) );
keywords.put( ">>", new Integer( sym.RSHIFT ) );
keywords.put( "L\"", new Integer( sym.LDBLQUOTE ) );
// setup the mapping of lower case keywords to case sensitive
// keywords
for( Enumeration<String> e = keywords.keys(); e.hasMoreElements(); )
{
String keyword = e.nextElement();
String keyword_lower_case = keyword.toLowerCase();
keywords_lower_case.put( keyword_lower_case, keyword );
}
/* set up the table of single character symbols */
char_symbols.put( new Integer( ';' ), new Integer( sym.SEMI ) );
char_symbols.put( new Integer( ',' ), new Integer( sym.COMMA ) );
char_symbols.put( new Integer( '*' ), new Integer( sym.STAR ) );
char_symbols.put( new Integer( '.' ), new Integer( sym.DOT ) );
char_symbols.put( new Integer( ':' ), new Integer( sym.COLON ) );
char_symbols.put( new Integer( '=' ), new Integer( sym.EQUALS ) );
char_symbols.put( new Integer( '+' ), new Integer( sym.PLUS ) );
char_symbols.put( new Integer( '-' ), new Integer( sym.MINUS ) );
char_symbols.put( new Integer( '{' ), new Integer( sym.LCBRACE ) );
char_symbols.put( new Integer( '}' ), new Integer( sym.RCBRACE ) );
char_symbols.put( new Integer( '(' ), new Integer( sym.LPAREN ) );
char_symbols.put( new Integer( ')' ), new Integer( sym.RPAREN ) );
char_symbols.put( new Integer( '[' ), new Integer( sym.LSBRACE ) );
char_symbols.put( new Integer( ']' ), new Integer( sym.RSBRACE ) );
char_symbols.put( new Integer( '<' ), new Integer( sym.LESSTHAN ) );
char_symbols.put( new Integer( '>' ), new Integer( sym.GREATERTHAN ) );
char_symbols.put( new Integer( '\'' ), new Integer( sym.QUOTE ) );
char_symbols.put( new Integer( '\"' ), new Integer( sym.DBLQUOTE ) );
char_symbols.put( new Integer( '\\' ), new Integer( sym.BSLASH ) );
char_symbols.put( new Integer( '^' ), new Integer( sym.CIRCUM ) );
char_symbols.put( new Integer( '&' ), new Integer( sym.AMPERSAND ) );
char_symbols.put( new Integer( '/' ), new Integer( sym.SLASH ) );
char_symbols.put( new Integer( '%' ), new Integer( sym.PERCENT ) );
char_symbols.put( new Integer( '~' ), new Integer( sym.TILDE ) );
char_symbols.put( new Integer( '|' ), new Integer( sym.BAR ) );
char_symbols.put( new Integer( ' ' ), new Integer( sym.SPACE ) );
/* set up reserved Java names */
java_keywords.add( "abstract" );
java_keywords.add( "assert");
java_keywords.add( "boolean" );
java_keywords.add( "break" );
java_keywords.add( "byte" );
java_keywords.add( "case" );
java_keywords.add( "catch" );
java_keywords.add( "char" );
java_keywords.add( "class" );
java_keywords.add( "const" );
java_keywords.add( "continue" );
java_keywords.add( "default" );
java_keywords.add( "do" );
java_keywords.add( "double" );
java_keywords.add( "else" );
java_keywords.add( "enum");
java_keywords.add( "extends" );
java_keywords.add( "false" );
java_keywords.add( "final" );
java_keywords.add( "finally" );
java_keywords.add( "float" );
java_keywords.add( "for" );
java_keywords.add( "goto" );
java_keywords.add( "if" );
java_keywords.add( "implements" );
java_keywords.add( "import" );
java_keywords.add( "instanceof" );
java_keywords.add( "int" );
java_keywords.add( "interface" );
java_keywords.add( "long" );
java_keywords.add( "native" );
java_keywords.add( "new" );
java_keywords.add( "null" );
java_keywords.add( "package" );
java_keywords.add( "private" );
java_keywords.add( "protected" );
java_keywords.add( "public" );
java_keywords.add( "return" );
java_keywords.add( "short" );
java_keywords.add( "static" );
java_keywords.add( "strictfp");
java_keywords.add( "super" );
java_keywords.add( "switch" );
java_keywords.add( "synchronized" );
java_keywords.add( "true" );
java_keywords.add( "this" );
java_keywords.add( "throw" );
java_keywords.add( "throws" );
java_keywords.add( "transient" );
java_keywords.add( "try" );
java_keywords.add( "void" );
java_keywords.add( "volatile" );
java_keywords.add( "while" );
java_keywords.add( "clone" );
java_keywords.add( "equals" );
java_keywords.add( "finalize" );
java_keywords.add( "getClass" );
java_keywords.add( "hashCode" );
java_keywords.add( "notify" );
java_keywords.add( "notifyAll" );
java_keywords.add( "toString" );
java_keywords.add( "wait" );
/* stack needs a topmost value */
ifStack.push( new Boolean( true ) );
/* read two characters of lookahead */
try
{
next_char = GlobalInputStream.read();
}
catch( Exception e )
{
org.jacorb.idl.parser.fatal_error( "Cannot read from file " +
GlobalInputStream.currentFile().getAbsolutePath() +
", please check file name.", null );
}
if( next_char == EOF_CHAR )
next_char2 = EOF_CHAR;
else
next_char2 = GlobalInputStream.read();
}
public static void define( String symbol, String value )
{
if( parser.logger.isLoggable(Level.ALL) )
org.jacorb.idl.parser.logger.log( Level.ALL, "Defining: " + symbol + " as " + value );
defines.put( symbol, value );
}
public static void undefine( String symbol )
{
if( parser.logger.isLoggable(Level.ALL) )
parser.logger.log(Level.ALL, "Un-defining: " + symbol);
defines.remove( symbol );
}
public static String defined( String symbol )
{
return defines.get( symbol );
}
/**
* record information about the last lexical scope so that it can be
* restored later
*/
public static int currentLine()
{
return current_line;
}
/**
* return the current reading position
*/
public static PositionInfo getPosition()
{
return new PositionInfo( current_line,
current_position,
currentPragmaPrefix,
line.toString(),
GlobalInputStream.currentFile() );
}
public static void restorePosition( PositionInfo p )
{
current_line = p.line_no;
currentPragmaPrefix = p.pragma_prefix;
current_position = 0;
}
/**
* Advance the scanner one character in the input stream. This moves
* next_char2 to next_char and then reads a new next_char2.
*/
protected static void advance()
throws java.io.IOException
{
int old_char;
old_char = next_char;
next_char = next_char2;
next_char2 = GlobalInputStream.read();
line.append( (char)old_char );
/* count this */
current_position++;
if( old_char == '\n' )
{
current_line++;
current_position = 1;
line = new StringBuffer();
}
}
/**
* Emit an error message. The message will be marked with both the
* current line number and the position in the line. Error messages
* are printed on standard error (System.err).
* @param message the message to print.
*/
public static void emit_error( String message )
{
if (parser.logger.isLoggable(Level.SEVERE))
{
if (GlobalInputStream.currentFile() != null)
{
parser.logger.log(Level.SEVERE, GlobalInputStream.currentFile().getAbsolutePath() +
", line: " + current_line +
"(" + current_position + "): " +
message + "\n\t" +
line.toString());
}
else
{
//error probably ocurred before parsing
parser.logger.log(Level.SEVERE, message);
}
}
error_count++;
}
public static void emit_error( String message, str_token t )
{
if( t == null )
{
emit_error( message );
}
else
{
if (parser.logger.isLoggable(Level.SEVERE))
{
parser.logger.log(Level.SEVERE, t.fileName + ", line:" + t.line_no +
"(" + t.char_pos + "): " + message +
"\n\t" + t.line_val);
}
error_count++;
}
}
/**
* Emit a warning message. The message will be marked with both the
* current line number and the position in the line. Messages are
* printed on standard error (System.err).
*
* @param message the message to print.
*/
public static void emit_warn( String message )
{
if (parser.logger.isLoggable(Level.WARNING))
{
parser.logger.log(Level.WARNING, message + " at " + current_line + "(" + current_position +
"): \"" + line.toString() + "\"");
}
warning_count++;
}
public static void emit_warn( String message, str_token t )
{
if( t == null )
{
emit_warn( message );
}
else
{
if (parser.logger.isLoggable(Level.WARNING))
{
parser.logger.log(Level.WARNING, " at " + t.fileName + ", line:" + t.line_no + "(" +
t.char_pos + "): " + message + "\n\t" + t.line_val);
}
warning_count++;
}
}
/**
* Determine if a character is ok to start an id.
* @param ch the character in question.
*/
protected static boolean id_start_char( int ch )
{
return
( ch >= 'a' && ch <= 'z' ) ||
( ch >= 'A' && ch <= 'Z' ) ||
( ch == '_' );
}
/**
* Determine if a character is ok for the middle of an id.
* @param ch the character in question.
*/
protected static boolean id_char( int ch )
{
return id_start_char( ch ) || ( ch == '_' ) || ( ch >= '0' && ch <= '9' );
}
/**
* Try to look up a single character symbol, returns -1 for not found.
* @param ch the character in question.
*/
protected static int find_single_char( int ch )
{
Integer result;
result = char_symbols.get( new Integer( (char)ch ) );
if( result == null )
return -1;
else
return result.intValue();
}
/**
* Handle swallowing up a comment. Both old style C and new style C++
* comments are handled.
*/
protected static void swallow_comment()
throws java.io.IOException
{
/* next_char == '/' at this point */
/* is it a traditional comment */
if( next_char2 == '*' )
{
/* swallow the opener */
advance();
advance();
/* swallow the comment until end of comment or EOF */
for( ; ; )
{
/* if its EOF we have an error */
if( next_char == EOF_CHAR )
{
emit_error( "Specification file ends inside a comment", null );
return;
}
/* if we can see the closer we are done */
if( next_char == '*' && next_char2 == '/' )
{
advance();
advance();
return;
}
/* otherwise swallow char and move on */
advance();
}
}
/* is its a new style comment */
if( next_char2 == '/' )
{
/* swallow the opener */
advance();
advance();
/* swallow to '\n', '\f', or EOF */
while( next_char != '\n' && next_char != '\f' && next_char != '\r' && next_char != EOF_CHAR )
{
advance();
}
return;
}
/* shouldn't get here, but... if we get here we have an error */
emit_error( "Malformed comment in specification -- ignored", null );
advance();
}
/**
* Preprocessor directives are handled here.
*/
protected static void preprocess()
throws java.io.IOException
{
for( ; ; )
{
/* if its EOF we have an error */
if( next_char == EOF_CHAR )
{
emit_error( "Specification file ends inside a preprocessor directive", null );
return;
}
else if( next_char != '#' )
{
emit_error( "expected #, got " + (char)next_char + " instead!", null );
}
else
advance(); // skip '#'
// the following is done to allow for # ifdef sloppiness
while( ( ' ' == next_char ) || ( '\t' == next_char ) )
advance();
String dir = get_string();
if( dir.equals( "include" ) )
{
if( !conditionalCompilation )
return;
// Swallow between include and < or "
swallow_whitespace();
boolean useIncludePath = ( next_char == '<' );
advance(); // skip `\"' or '<'
String fname = get_string();
if( useIncludePath && ( next_char != '>' ) )
emit_error( "Syntax error in #include directive, expecting '>'" );
else if( !useIncludePath && ( next_char != '\"' ) )
emit_error( "Syntax error in #include directive, expecting \"" );
// Don't skip forward here and swallow \n,\f etc just in case the included
// file does not end on a newline. The newline from the include will ensure
// the included file is terminated.
GlobalInputStream.include( fname, next_char2, useIncludePath );
current_line = 0;
advance();
advance();
return;
}
else if( dir.equals( "define" ) )
{
if( !conditionalCompilation )
return;
swallow_whitespace();
String name = get_string();
StringBuffer text = new StringBuffer();
if( next_char == ' ' )
{
advance();
}
while( next_char != '\n' )
{
if( next_char == '\\' )
{
advance();
advance();
}
text.append( (char)next_char );
advance();
}
define( name, text.toString() );
}
else if( dir.equals( "error" ) )
{
if( !conditionalCompilation )
return;
advance(); // skip ' '
String name = get_string();
emit_error( name );
}
else if( dir.equals( "undef" ) )
{
// Undefining symbol
if( !conditionalCompilation )
return;
swallow_whitespace();
String name = get_string();
undefine( name );
}
else if( dir.equals( "if" ) || dir.equals( "elif" ) )
{
if (! dir.equals( "elif" ) )
{
ifStack.push( new Boolean( conditionalCompilation ) );
if( !conditionalCompilation )
return;
}
swallow_whitespace();
// the following snippet distinguishes between #if defined
// and #if !defined
boolean straightDefined = true;
if( '!' == next_char )
{
advance();
straightDefined = false;
}
String defineStr = get_string_no_paren();
if (defineStr.equals ("defined"))
{
swallow_whitespace();
boolean brackets = ( '(' == next_char );
if( brackets )
{
advance(); // skip '('
swallow_whitespace(); // any whitespace after ( ? skip it
}
String name = get_string_no_paren();
if( brackets )
{
swallow_whitespace();
if( parser.logger.isLoggable(Level.ALL) )
parser.logger.log(Level.ALL, "next char: " + next_char);
if( ')' != next_char )
{
emit_error( "Expected ) terminating #if defined", null );
return;
}
advance();
}
if( straightDefined )
conditionalCompilation = ( null != defined( name ) );
else
conditionalCompilation = ( null == defined( name ) );
}
else if (defineStr.equals("0"))
{
conditionalCompilation = false;
}
else if (defineStr.equals("1"))
{
conditionalCompilation = true;
}
else
{
emit_error( "Expected \"defined\" following #if: " +
dir, null );
return;
}
}
else if( dir.equals( "ifdef" ) )
{
ifStack.push( new Boolean( conditionalCompilation ) );
if( !conditionalCompilation )
return;
swallow_whitespace();
String name = get_string();
conditionalCompilation = ( defined( name ) != null );
}
else if( dir.equals( "ifndef" ) )
{
ifStack.push( new Boolean( conditionalCompilation ) );
if( !conditionalCompilation )
return;
swallow_whitespace();
String name = get_string();
conditionalCompilation = ( defined( name ) == null );
}
else if( dir.equals( "else" ) )
{
if( ifStack.peek().booleanValue() )
conditionalCompilation = !conditionalCompilation;
}
else if( dir.equals( "endif" ) )
{
boolean b = ifStack.pop().booleanValue();
conditionalCompilation = b;
}
else if( dir.equals( "pragma" ) )
{
if( !conditionalCompilation )
return;
swallow_whitespace();
String name = get_string();
if( name.equals( "prefix" ) )
{
swallow_whitespace();
currentPragmaPrefix = get_string();
}
else if( name.equals( "version" ) )
{
advance(); // skip ' '
String vname = get_string();
advance(); // skip ' '
String version = get_string();
String existingVersion = (String) parser.currentScopeData().versionMap.get (vname);
if (existingVersion == null)
{
// Set version
parser.currentScopeData().versionMap.put (vname, version);
}
else
{
// Check for version change
if (! existingVersion.equals (version))
{
emit_error
(
"Version re-declaration with different value: #pragma version " +
version,
null
);
}
}
String iname = (String)parser.currentScopeData().idMap.get (vname);
if (iname != null)
{
if (version.equals (iname.substring (1 + iname.lastIndexOf (':'))) == false)
{
emit_error ("Declaring version with different version to already declared ID for " + name, null);
}
}
}
else if( name.equals( "ID" ) )
{
advance(); // skip ' '
String iname = get_string();
advance(); // skip ' '
String id = get_string();
String existingID = (String) parser.currentScopeData().idMap.get (iname);
if (existingID == null)
{
// Set id
parser.currentScopeData().idMap.put (iname, id);
}
else
{
// Check for id change
if (! existingID.equals (id))
{
emit_error
(
"ID re-declaration with different value: #pragma id " +
id,
null
);
}
}
if( parser.currentScopeData().versionMap.get( iname ) != null )
{
if( ((String)parser.currentScopeData().versionMap.get( iname )).equals
( id.substring (1 + id.lastIndexOf (':'))) == false )
{
emit_error ("Declaring ID with different version to already declared version for " + name, null);
}
}
}
else if( name.equals( "inhibit_code_generation" ) )
{
/* proprietary pragma of the JacORB IDL compiler */
parser.setInhibitionState( true );
// do something with it
}
else
{
emit_warn( "Unknown pragma, ignoring: #pragma " + name, null );
}
}
else
{
emit_error( "Unrecognized preprocessor directive " + dir, null );
}
/* swallow to '\n', '\f', or EOF */
while( next_char != '\n' && next_char != '\f' && next_char != '\r' &&
next_char != EOF_CHAR )
{
advance();
}
return;
}
}
// the following is used for parsing the #if defined(...) construct
private static String get_string_no_paren()
throws java.io.IOException
{
StringBuffer sb = new StringBuffer();
char c = (char)next_char;
while( c != ' ' && c != '\t' && c != '\r' && c != '\n' && c != '\f' && c != EOF_CHAR
&& c != '\"' && c != '<' && c != '>' && c != '(' && c != ')' )
{
sb.append( c );
advance();
c = (char)next_char;
}
return sb.toString();
}
private static String get_string()
throws java.io.IOException
{
StringBuffer sb = new StringBuffer( "" );
if( next_char == '\"' )
{
advance();
while( next_char != '\"' )
{
if( next_char == EOF_CHAR )
emit_error( "Unexpected EOF in string" );
sb.append( (char)next_char );
advance();
}
}
else
{
while( next_char != ' ' && next_char != '\t' && next_char != '\r' &&
next_char != '\n' && next_char != '\f' && next_char != EOF_CHAR &&
next_char != '\"' && next_char != '<' && next_char != '>')
{
sb.append( (char)next_char );
advance();
}
}
return sb.toString();
}
/**
* Process an identifier.
* <P>
* Identifiers begin with a letter, underscore, or dollar sign,
* which is followed by zero or more letters, numbers,
* underscores or dollar signs. This routine returns a str_token
* suitable for return by the scanner or null, if the string that
* was read expanded to a symbol that was #defined. In this case,
* the symbol is expanded in place
*/
protected static token do_symbol()
throws java.io.IOException
{
StringBuffer result = new StringBuffer();
String result_str;
Integer keyword_num = null;
char buffer[] = new char[ 1 ];
/* next_char holds first character of id */
buffer[ 0 ] = (char)next_char;
result.append( buffer, 0, 1 );
advance();
/* collect up characters while they fit in id */
while( id_char( next_char ) )
{
buffer[ 0 ] = (char)next_char;
result.append( buffer, 0, 1 );
advance();
}
/* extract a string */
result_str = result.toString();
/* try to look it up as a defined symbol... */
String text = defined( result_str );
if( text != null )
{
char[] next = {(char)next_char, (char)next_char2};
GlobalInputStream.insert( text + ( new String( next ) ) );
advance(); // restore lookahead
advance(); // restore lookahead
return null;
}
// check if it's a keyword
keyword_num = (Integer)keywords.get( result_str );
if( keyword_num != null )
{
if( isScope( result_str ) )
{
parser.openScope();
}
return new token( keyword_num.intValue() );
}
// not a keyword, so treat as identifier after verifying
// case sensitivity rules and prefacing with an _
// if it collides with a Java keyword.
result_str = checkIdentifier( result_str );
if( null != result_str )
return new str_token( sym.ID, result_str, getPosition(),
GlobalInputStream.currentFile().getName() );
else
return null;
}
private static boolean isScope( String keyword )
{
return ( keyword.equals( "module" ) ||
keyword.equals( "interface" ) ||
keyword.equals( "struct" ) ||
keyword.equals( "exception" ) ||
keyword.equals( "union" )
// keyword.equals("valuetype")
);
}
/**
* Checks whether Identifier str is legal and returns it. If the
* identifier is escaped with a leading underscore, that
* underscore is removed. If a the legal IDL identifier clashes
* with a Java reserved word, an underscore is prepended.
* <BR>
* @param str - the IDL identifier <BR>
* <BR>
* Prints an error msg if the identifier collides with an IDL
* keyword.
*/
public static String checkIdentifier( String str )
{
if( parser.logger.isLoggable(Level.FINEST) )
parser.logger.log(Level.FINEST, "checking identifier " + str);
/* if it is an escaped identifier, look it up as a keyword,
otherwise remove the underscore. */
if( str.charAt( 0 ) == '_' )
{
str = str.substring( 1 );
}
else
{
String colliding_keyword = null;
if (org.jacorb.idl.parser.strict_names)
{
// check for name clashes strictly (i.e. case insensitive)
colliding_keyword =
keywords_lower_case.get(str.toLowerCase());
}
else
{
// check for name clashes only loosely (i.e. case sensitive)
colliding_keyword =
(String)keywords.get(str);
}
if( colliding_keyword != null )
{
emit_error( "Identifier " + str + " collides with keyword " +
colliding_keyword + "." );
return null;
}
}
/* clashes with a Java reserved word? */
if( needsJavaEscape( str ) )
{
str = "_" + str;
}
return str;
}
/**
* Only the most general name clashes with Java keywords
* are caught here. Identifiers need to be checked again
* at different other places in the compiler!
*/
private static boolean needsJavaEscape( String s )
{
return ( java_keywords.contains( s ) );
}
/**
* called during the parse phase to catch clashes with
* Java reserved words.
*/
public static boolean strictJavaEscapeCheck( String s )
{
return ( ( !s.equals( "Helper" ) && s.endsWith( "Helper" ) ) ||
( !s.equals( "Holder" ) && s.endsWith( "Holder" ) ) ||
( !s.equals( "Operations" ) && s.endsWith( "Operations" ) ) ||
( !s.equals( "Package" ) && s.endsWith( "Package" ) ) ||
( !s.equals( "POA" ) && s.endsWith( "POA" ) ) ||
( !s.equals( "POATie" ) && s.endsWith( "POATie" ) ) );
}
public static boolean needsJavaEscape( Module m )
{
String s = m.pack_name;
if( parser.logger.isLoggable(Level.ALL) )
parser.logger.log(Level.ALL, "checking module name " + s);
return ( strictJavaEscapeCheck( s ) );
}
/**
* Return one token. This is the main external interface to the scanner.
* It consumes sufficient characters to determine the next input token
* and returns it.
*/
public static token next_token()
throws java.io.IOException
{
parser.set_included( GlobalInputStream.includeState() );
token result = real_next_token();
return result;
}
private static void swallow_whitespace()
throws java.io.IOException
{
/* look for white space */
while( next_char == ' ' || next_char == '\t' || next_char == '\n' ||
next_char == '\f' || next_char == '\r' )
{
/* advance past it and try the next character */
advance();
}
}
/**
* The actual routine to return one token.
*
* @return token
* @throws java.io.IOException
*/
protected static token real_next_token()
throws java.io.IOException
{
int sym_num;
/* if we found more than a single token last time, these
tokens were remembered on the tokenStack - return the first
one here */
if( !tokenStack.empty() )
return tokenStack.pop();
/* else */
for( ; ; )
{
/* scan input until we return something */
if( !in_string )
{
swallow_whitespace();
/* look for preprocessor directives */
if( (char)next_char == '#' )
{
preprocess();
continue;
}
/* look for a comment */
if( next_char == '/' && ( next_char2 == '*' || next_char2 == '/' ) )
{
/* swallow then continue the scan */
swallow_comment();
continue;
}
if( !conditionalCompilation )
{
advance();
if( next_char == EOF_CHAR )
{
emit_error( "EOF in conditional compilation!", null );
return null;
}
else
continue;
}
/* look for COLON or DBLCOLON */
if( next_char == ':' )
{
if( next_char2 == ':' )
{
advance();
advance();
return new token( sym.DBLCOLON );
}
else
{
advance();
return new token( sym.COLON );
}
}
/* leading L for wide strings */
if( next_char == 'L' && ( next_char2 =='\"' || next_char2 =='\'') )
{
wide = true;
advance();
if( next_char2 == '\"' )
{
advance();
in_string = true;
return new token( sym.LDBLQUOTE );
}
// wide char literal may follow, but detecting that
// is done below.
}
/* look for Shifts */
if( next_char == '<' )
{
if( next_char2 == '<' )
{
advance();
advance();
return new token( sym.LSHIFT );
}
else
{
advance();
return new token( sym.LESSTHAN );
}
}
if( next_char == '>' )
{
if( next_char2 == '>' )
{
advance();
advance();
return new token( sym.RSHIFT );
}
else
{
advance();
return new token( sym.GREATERTHAN );
}
}
/* leading 0: */
/* Try to scan octal/hexadecimal numbers, might even find a float */
if( next_char == '0' )
{
int radix = 8;
int digit = 0;
advance();
if( next_char == '.' )
{
StringBuffer f_string = new StringBuffer( "0." );
advance();
while( next_char >= '0' && next_char <= '9' )
{
f_string.append( (char)next_char );
advance();
}
float f_val = ( new Float( f_string.toString() ) ).floatValue();
return new float_token( sym.FLOAT_NUMBER, f_val );
}
else
{
// See if hexadecimal value
if( next_char == 'x' || next_char == 'X' )
{
advance();
radix = 16;
}
StringBuffer val = new StringBuffer( "0" );
digit = Character.digit( (char)next_char, radix );
while( digit != -1 )
{
val.append( (char)next_char );
advance();
digit = Character.digit( (char)next_char, radix );
}
String str = val.toString();
try
{
return new int_token( sym.NUMBER,
Integer.parseInt( str, radix ) );
}
catch( NumberFormatException ex )
{
try
{
return new long_token( sym.LONG_NUMBER,
Long.parseLong( str, radix ) );
}
catch( NumberFormatException ex2 )
{
emit_error( "Invalid octal/hex value: " + str );
}
}
return null;
}
}
/* Try to scan integer, floating point or fixed point literals */
if (isDigit (((char)next_char)) ||
next_char == '.' ||
(next_char == '-' && isDigit (((char)next_char2))))
{
StringBuffer value = new StringBuffer();
StringBuffer fraction = null;
if ( next_char == '-' )
{
value.append( (char)next_char );
advance();
}
// Read integer part
while( next_char >= '0' && next_char <= '9' )
{
value.append( (char)next_char );
advance();
}
// Read fraction
if( next_char == '.' )
{
fraction = new StringBuffer();
advance();
while( next_char >= '0' && next_char <= '9' )
{
fraction.append( (char)next_char );
advance();
}
}
// Read exponent
if( next_char == 'e' || next_char == 'E' )
{
if( fraction == null )
fraction = new StringBuffer();
fraction.append( 'e' );
advance();
if( next_char == '-' || next_char == '+' )
{
fraction.append( (char)next_char );
advance();
}
while( next_char >= '0' && next_char <= '9' )
{
fraction.append( (char)next_char );
advance();
}
if( fraction.length() == 1 )
{
emit_error( "Empty exponent in float/double." );
continue;
}
return new float_token( sym.FLOAT_NUMBER,
Float.valueOf( value.toString() +
"." +
fraction.toString() ).floatValue() );
}
if( next_char == 'd' || next_char == 'D' )
{
advance();
if( fraction == null )
fraction = new StringBuffer();
java.math.BigDecimal bi =
new java.math.BigDecimal( value.toString() + "." +
fraction.toString() );
return new fixed_token( sym.FIXED_NUMBER, bi );
}
if( fraction == null )
{
/* integer or long */
token tok = null;
String str = value.toString();
try
{
tok = new int_token( sym.NUMBER, Integer.parseInt( str ) );
}
catch( NumberFormatException ex )
{
try
{
tok = new long_token
( sym.LONG_NUMBER, Long.parseLong( str ) );
}
catch( NumberFormatException ex2 )
{
try
{
// Not quite critical yet - lets try stuffing it into
// a bigdecimal for later checking.
tok = new fixed_token
(sym.FIXED_NUMBER, new java.math.BigDecimal (str));
}
catch (NumberFormatException ex3)
{
emit_error( "Invalid long value: " + str );
}
}
}
return tok;
}
else
{
try
{
float f =
Float.valueOf( value.toString() + "." +
fraction.toString() ).floatValue();
return new float_token( sym.FLOAT_NUMBER, f );
}
catch( NumberFormatException nf )
{
emit_error( "Unexpected symbol: " +
value.toString() + "." +
fraction.toString() );
}
}
}
/* look for a single character symbol */
sym_num = find_single_char( next_char );
/* upon an opening double quote, return the
sym.DBLQUOTE token and continue scanning in the
in_string branch */
if( (char)next_char == '\"' )
{
in_string = true;
advance();
return new token( sym.DBLQUOTE );
}
if( (char)next_char == '\'' )
{
advance();
token t = null;
if( next_char == '\\' )
{
// Now need to process escaped character.
advance();
if( isDigit( (char)next_char ) )
{
// Octal character
char octal1 = '0';
char octal2 = '0';
char octal3 = (char)next_char;
if( isDigit( (char)next_char2 ) )
{
advance();
octal2 = octal3;
octal3 = (char)next_char;
if( isDigit( (char)next_char2 ) )
{
advance();
octal1 = octal2;
octal2 = octal3;
octal3 = (char)next_char;
}
}
t = new char_token
(
sym.CH,
(char)Integer.parseInt
( new String
( new char[]{octal1, octal2, octal3} ),
8
)
);
}
else if( (char)next_char == 'x' )
{
// Hexadecimal character
advance();
char hex1 = '0';
char hex2 = (char)next_char;
if( isHexLetterOrDigit( (char)next_char2 ) )
{
advance();
hex1 = hex2;
hex2 = (char)next_char;
}
else if( (char)next_char2 != '\'' )
{
emit_error( "Illegal hex character" );
return null;
}
t = new char_token
(
sym.CH,
(char)Integer.parseInt
( new String
( new char[]{hex1, hex2} ),
16
)
);
}
else if( (char)next_char == 'u' )
{
if( wide == false )
{
emit_error( "Unicode characters are only legal with wide character" );
return null;
}
else
{
// Hexadecimal character
advance();
char uni1 = '0';
char uni2 = '0';
char uni3 = '0';
char uni4 = (char)next_char;
if( isHexLetterOrDigit( (char)next_char2 ) )
{
advance();
uni3 = uni4;
uni4 = (char)next_char;
if( isHexLetterOrDigit( (char)next_char2 ) )
{
advance();
uni2 = uni3;
uni3 = uni4;
uni4 = (char)next_char;
if( isHexLetterOrDigit( (char)next_char2 ) )
{
advance();
uni1 = uni2;
uni2 = uni3;
uni3 = uni4;
uni4 = (char)next_char;
}
else if( (char)next_char2 != '\'' )
{
emit_error( "Illegal unicode character" );
return null;
}
}
else if( (char)next_char2 != '\'' )
{
emit_error( "Illegal unicode character" );
return null;
}
}
else if( (char)next_char2 != '\'' )
{
emit_error( "Illegal unicode character" );
return null;
}
t = new char_token
(
sym.CH,
(char)Integer.parseInt
( new String
( new char[]{uni1, uni2, uni3, uni4} ),
16
)
);
}
}
else
{
switch( next_char )
{
case 'n':
{
t = new char_token( sym.CH, '\n' );
break;
}
case 't':
{
t = new char_token( sym.CH, '\t' );
break;
}
case 'v':
{
t = new char_token( sym.CH, '\013' );
break;
}
case 'b':
{
t = new char_token( sym.CH, '\b' );
break;
}
case 'r':
{
t = new char_token( sym.CH, '\r' );
break;
}
case 'f':
{
t = new char_token( sym.CH, '\f' );
break;
}
case 'a':
{
t = new char_token( sym.CH, '\007' );
break;
}
case '\\':
{
t = new char_token( sym.CH, '\\' );
break;
}
case '?':
{
t = new char_token( sym.CH, '?' );
break;
}
case '0':
{
t = new char_token( sym.CH, '\0' );
break;
}
case '\'':
{
t = new char_token( sym.CH, '\'' );
break;
}
case '\"':
{
t = new char_token( sym.CH, '\"' );
break;
}
default:
{
emit_error( "Invalid escape symbol \'" );
return null;
}
}
}
}
else
{
t = new char_token( sym.CH, (char)next_char );
}
advance();
if( (char)next_char == '\'' )
{
tokenStack.push( new token( sym.QUOTE ) );
tokenStack.push( t );
advance();
}
else
{
emit_error( "Expecting closing \'" );
return null;
}
wide = false;
return new token( sym.QUOTE );
}
if( sym_num != -1 )
{
/* found one -- advance past it and return a token for it */
advance();
return new token( sym_num );
}
/* look for an id or keyword */
if( id_start_char( next_char ) )
{
token t = do_symbol();
if( t != null )
return t;
else
continue;
}
/* look for EOF */
if( next_char == EOF_CHAR )
{
return new token( sym.EOF );
}
}
else // in_string
{
/* empty string ? */
if( (char)next_char == '\"' )
{
in_string = false;
advance();
return new token( sym.DBLQUOTE );
}
StringBuffer result = new StringBuffer();
char previous = ' ';
/* collect up characters while they fit in id */
while( true )
{
if( next_char == '\\' )
{
// Remap those characters that have no equivilant in java
switch( next_char2 )
{
case 'a':
{
result.append( "\\007" );
previous = 'a';
advance();
break;
}
case 'v':
{
result.append( "\\013" );
previous = 'v';
advance();
break;
}
case '?':
{
result.append( "?" );
previous = '?';
advance();
break;
}
// Replace \xA0 by octal equivilant
case 'x':
{
advance();
advance();
// Now next_char will be A and next_char2 will be 0
String octal = Integer.toOctalString
(
Integer.parseInt
(
new String
(
new char[]{
(char)next_char,
(char)next_char2}
),
16
)
);
if( octal.length() != 3 )
{
if( octal.length() == 1 )
{
octal = "0" + octal;
}
octal = "0" + octal;
}
result.append( "\\" + octal );
previous = (char)next_char2;
advance();
break;
}
case 'u':
{
if( wide == false )
{
emit_error( "Unicode characters are only legal with wide strings" );
return null;
}
else
{
result.append( (char)next_char );
result.append( (char)next_char2 );
advance();
advance();
char uni1 = (char)next_char;
char uni2 = '0';
char uni3 = '0';
char uni4 = '0';
if( isHexLetterOrDigit( (char)next_char2 ) )
{
advance();
uni2 = (char)next_char;
if( isHexLetterOrDigit( (char)next_char2 ) )
{
advance();
uni3 = (char)next_char;
if( isHexLetterOrDigit( (char)next_char2 ) )
{
advance();
uni4 = (char)next_char;
}
else
{
emit_error( "Illegal unicode character" );
return null;
}
}
else
{
emit_error( "Illegal unicode character" );
return null;
}
}
else
{
emit_error( "Illegal unicode character" );
return null;
}
previous = uni4;
result.append( uni1 );
result.append( uni2 );
result.append( uni3 );
result.append( uni4 );
}
break;
}
default:
{
previous = (char)next_char;
result.append( (char)next_char );
}
}
}
else
{
previous = (char)next_char;
result.append( (char)next_char );
}
advance();
// Handle backslash quote but exit if just quote
if( ( (char)next_char ) == '\"' && previous != '\\' )
{
break;
}
}
wide = false;
String s = result.toString();
/* build and return an id token with an attached string */
return new org.jacorb.idl.str_token( sym.ID, s,
getPosition(),
GlobalInputStream.currentFile().getName() );
}
/* if we get here, we have an unrecognized character */
emit_warn( "Unrecognized character '" +
new Character( (char)next_char ) + "'(" + next_char + ") -- ignored" );
/* advance past it */
advance();
}
}
/**
* Returns true if character is US ASCII 0-9
*
* @param c a value of type 'char'
* @return a value of type 'boolean'
*/
static boolean isDigit( char c )
{
boolean result = false;
if( c >= '\u0030' )
{
if( c <= '\u0039' )
{
// Range 0030 [0] -> 0039 [9]
result = true;
}
}
return result;
}
/**
* Returns true if character is US ASCII 0-9, a-f, A-F
*
* @param c a value of type 'char'
* @return a value of type 'boolean'
*/
private static boolean isHexLetterOrDigit( char c )
{
boolean result = false;
if( c >= '\u0030' )
{
if( c <= '\u0039' )
{
// Range 0030 [0] -> 0039 [9]
result = true;
}
else
{
if( c >= '\u0041' )
{
if( c <= '\u0046' )
{
// Range 0041 [A] -> 0046 [F]
result = true;
}
if( c >= '\u0061' )
{
if( c <= '\u0066' )
{
// Range 0061 [a] -> 0066 [f]
result = true;
}
}
}
}
}
return result;
}
}