/* * JacORB - a free Java ORB * * Copyright (C) 1997-2014 Gerald Brose / The JacORB Team. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Library General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Library General Public License for more details. * * You should have received a copy of the GNU Library General Public * License along with this library; if not, write to the Free * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ package org.jacorb.idl; import java.util.Enumeration; import java.util.HashSet; import java.util.Hashtable; import java.util.Stack; import java.util.logging.Level; import org.jacorb.idl.runtime.char_token; import org.jacorb.idl.runtime.float_token; import org.jacorb.idl.runtime.int_token; import org.jacorb.idl.runtime.long_token; import org.jacorb.idl.runtime.token; /** * This class implements a scanner (aka lexical analyzer or * lexer) for IDL. The scanner reads characters from a global input * stream and returns integers corresponding to the terminal number * of the next token. Once the end of input is reached the EOF token * is returned on every subsequent call.<p> * * All symbol constants are defined in sym.java which is generated by * JavaCup from parser.cup.<p> * * In addition to the scanner proper (called first via init() then * with next_token() to get each token) this class provides simple * error and warning routines and keeps a count of errors and * warnings that is publicly accessible. It also provides basic * preprocessing facilties, i.e. it does handle preprocessor * directives such as #define, #undef, #include, etc. although it * does not provide full C++ preprocessing * * This class is "static" (i.e., it has only static members and methods). * * @author Gerald Brose * */ public class lexer { /** First and second character of lookahead. */ protected static int next_char; protected static int next_char2; /** EOF constant. */ protected static final int EOF_CHAR = -1; /** * Table of keywords. Keywords are initially treated as * identifiers. Just before they are returned we look them up in * this table to see if they match one of the keywords. The * string of the name is the key here, which indexes Integer * objects holding the symbol number. */ protected static Hashtable keywords = new Hashtable(); /** Table of keywords, stored in lower case. Keys are the * lower case version of the keywords used as keys for the keywords * hash above, and the values are the case sensitive versions of * the keywords. This table is used for detecting collisions of * identifiers with keywords. */ protected static Hashtable<String, String> keywords_lower_case = new Hashtable<String, String>(); /** Table of Java reserved names. */ protected static HashSet<String> java_keywords = new HashSet<String>(); /** Table of single character symbols. For ease of implementation, we * store all unambiguous single character tokens in this table of Integer * objects keyed by Integer objects with the numerical value of the * appropriate char (currently Character objects have a bug which precludes * their use in tables). */ protected static Hashtable<Integer, Integer> char_symbols = new Hashtable<Integer, Integer>( 25 ); /** Defined symbols (preprocessor) */ protected static Hashtable<String, String> defines = new Hashtable<String, String>(); protected static boolean conditionalCompilation = true; /** nested #ifdefs are pushed on this stack by the "preprocessor" */ private static Stack<Boolean> ifStack = new Stack<Boolean>(); private static Stack<token> tokenStack = new Stack<token>(); /** Current line number for use in error messages. */ protected static int current_line = 1; /** Current line for use in error messages. */ protected static StringBuffer line = new StringBuffer(); /** Character position in current line. */ protected static int current_position = 1; /** Have we already read a '"' ? */ protected static boolean in_string = false; /** Are we processing a wide char or string ? */ protected static boolean wide = false; /** Count of total errors detected so far. */ static int error_count = 0; /** Count of warnings issued so far */ public static int warning_count = 0; /** currently active pragma prefix */ public static String currentPragmaPrefix = ""; /** current file name */ public static String currentFile = ""; /** reset the scanner state */ public static void reset() { current_position = 1; error_count = 0; warning_count = 0; currentPragmaPrefix = ""; line = new StringBuffer(); ifStack.removeAllElements(); tokenStack.removeAllElements(); defines.clear(); } /** * Initialize the scanner. This sets up the keywords and char_symbols * tables and reads the first two characters of lookahead. * * "Object" is listed as reserved in the OMG spec. * "int" is not, but I reserved it to bar its usage as a legal integer * type. */ public static void init() throws java.io.IOException { /* set up standard symbols */ defines.put( "JACORB_IDL_1_4", "" ); /* set up the keyword table */ keywords.put( "abstract", new Integer( sym.ABSTRACT ) ); keywords.put( "any", new Integer( sym.ANY ) ); keywords.put( "attribute", new Integer( sym.ATTRIBUTE ) ); keywords.put( "boolean", new Integer( sym.BOOLEAN ) ); keywords.put( "case", new Integer( sym.CASE ) ); keywords.put( "char", new Integer( sym.CHAR ) ); keywords.put( "const", new Integer( sym.CONST ) ); keywords.put( "context", new Integer( sym.CONTEXT ) ); keywords.put( "custom", new Integer( sym.CUSTOM ) ); keywords.put( "default", new Integer( sym.DEFAULT ) ); keywords.put( "double", new Integer( sym.DOUBLE ) ); keywords.put( "enum", new Integer( sym.ENUM ) ); keywords.put( "exception", new Integer( sym.EXCEPTION ) ); keywords.put( "factory", new Integer( sym.FACTORY ) ); keywords.put( "FALSE", new Integer( sym.FALSE ) ); keywords.put( "fixed", new Integer( sym.FIXED ) ); keywords.put( "float", new Integer( sym.FLOAT ) ); keywords.put( "getraises", new Integer( sym.GETRAISES ) ); keywords.put( "in", new Integer( sym.IN ) ); keywords.put( "inout", new Integer( sym.INOUT ) ); keywords.put( "interface", new Integer( sym.INTERFACE ) ); keywords.put( "local", new Integer( sym.LOCAL ) ); keywords.put( "long", new Integer( sym.LONG ) ); keywords.put( "module", new Integer( sym.MODULE ) ); keywords.put( "native", new Integer( sym.NATIVE ) ); keywords.put( "Object", new Integer( sym.OBJECT ) ); keywords.put( "octet", new Integer( sym.OCTET ) ); keywords.put( "oneway", new Integer( sym.ONEWAY ) ); keywords.put( "out", new Integer( sym.OUT ) ); keywords.put( "private", new Integer( sym.PRIVATE ) ); keywords.put( "public", new Integer( sym.PUBLIC ) ); keywords.put( "pseudo", new Integer( sym.PSEUDO ) ); keywords.put( "raises", new Integer( sym.RAISES ) ); keywords.put( "readonly", new Integer( sym.READONLY ) ); keywords.put( "sequence", new Integer( sym.SEQUENCE ) ); keywords.put( "setraises", new Integer( sym.SETRAISES ) ); keywords.put( "short", new Integer( sym.SHORT ) ); keywords.put( "string", new Integer( sym.STRING ) ); keywords.put( "struct", new Integer( sym.STRUCT ) ); keywords.put( "supports", new Integer( sym.SUPPORTS ) ); keywords.put( "switch", new Integer( sym.SWITCH ) ); keywords.put( "TRUE", new Integer( sym.TRUE ) ); keywords.put( "truncatable", new Integer( sym.TRUNCATABLE ) ); keywords.put( "typedef", new Integer( sym.TYPEDEF ) ); keywords.put( "typeprefix", new Integer( sym.TYPEPREFIX ) ); keywords.put( "unsigned", new Integer( sym.UNSIGNED ) ); keywords.put( "union", new Integer( sym.UNION ) ); keywords.put( "ValueBase", new Integer( sym.VALUEBASE ) ); keywords.put( "valuetype", new Integer( sym.VALUETYPE ) ); keywords.put( "void", new Integer( sym.VOID ) ); keywords.put( "wchar", new Integer( sym.WCHAR ) ); keywords.put( "wstring", new Integer( sym.WSTRING ) ); keywords.put( "::", new Integer( sym.DBLCOLON ) ); keywords.put( "<<", new Integer( sym.LSHIFT ) ); keywords.put( ">>", new Integer( sym.RSHIFT ) ); keywords.put( "L\"", new Integer( sym.LDBLQUOTE ) ); // setup the mapping of lower case keywords to case sensitive // keywords for( Enumeration<String> e = keywords.keys(); e.hasMoreElements(); ) { String keyword = e.nextElement(); String keyword_lower_case = keyword.toLowerCase(); keywords_lower_case.put( keyword_lower_case, keyword ); } /* set up the table of single character symbols */ char_symbols.put( new Integer( ';' ), new Integer( sym.SEMI ) ); char_symbols.put( new Integer( ',' ), new Integer( sym.COMMA ) ); char_symbols.put( new Integer( '*' ), new Integer( sym.STAR ) ); char_symbols.put( new Integer( '.' ), new Integer( sym.DOT ) ); char_symbols.put( new Integer( ':' ), new Integer( sym.COLON ) ); char_symbols.put( new Integer( '=' ), new Integer( sym.EQUALS ) ); char_symbols.put( new Integer( '+' ), new Integer( sym.PLUS ) ); char_symbols.put( new Integer( '-' ), new Integer( sym.MINUS ) ); char_symbols.put( new Integer( '{' ), new Integer( sym.LCBRACE ) ); char_symbols.put( new Integer( '}' ), new Integer( sym.RCBRACE ) ); char_symbols.put( new Integer( '(' ), new Integer( sym.LPAREN ) ); char_symbols.put( new Integer( ')' ), new Integer( sym.RPAREN ) ); char_symbols.put( new Integer( '[' ), new Integer( sym.LSBRACE ) ); char_symbols.put( new Integer( ']' ), new Integer( sym.RSBRACE ) ); char_symbols.put( new Integer( '<' ), new Integer( sym.LESSTHAN ) ); char_symbols.put( new Integer( '>' ), new Integer( sym.GREATERTHAN ) ); char_symbols.put( new Integer( '\'' ), new Integer( sym.QUOTE ) ); char_symbols.put( new Integer( '\"' ), new Integer( sym.DBLQUOTE ) ); char_symbols.put( new Integer( '\\' ), new Integer( sym.BSLASH ) ); char_symbols.put( new Integer( '^' ), new Integer( sym.CIRCUM ) ); char_symbols.put( new Integer( '&' ), new Integer( sym.AMPERSAND ) ); char_symbols.put( new Integer( '/' ), new Integer( sym.SLASH ) ); char_symbols.put( new Integer( '%' ), new Integer( sym.PERCENT ) ); char_symbols.put( new Integer( '~' ), new Integer( sym.TILDE ) ); char_symbols.put( new Integer( '|' ), new Integer( sym.BAR ) ); char_symbols.put( new Integer( ' ' ), new Integer( sym.SPACE ) ); /* set up reserved Java names */ java_keywords.add( "abstract" ); java_keywords.add( "assert"); java_keywords.add( "boolean" ); java_keywords.add( "break" ); java_keywords.add( "byte" ); java_keywords.add( "case" ); java_keywords.add( "catch" ); java_keywords.add( "char" ); java_keywords.add( "class" ); java_keywords.add( "const" ); java_keywords.add( "continue" ); java_keywords.add( "default" ); java_keywords.add( "do" ); java_keywords.add( "double" ); java_keywords.add( "else" ); java_keywords.add( "enum"); java_keywords.add( "extends" ); java_keywords.add( "false" ); java_keywords.add( "final" ); java_keywords.add( "finally" ); java_keywords.add( "float" ); java_keywords.add( "for" ); java_keywords.add( "goto" ); java_keywords.add( "if" ); java_keywords.add( "implements" ); java_keywords.add( "import" ); java_keywords.add( "instanceof" ); java_keywords.add( "int" ); java_keywords.add( "interface" ); java_keywords.add( "long" ); java_keywords.add( "native" ); java_keywords.add( "new" ); java_keywords.add( "null" ); java_keywords.add( "package" ); java_keywords.add( "private" ); java_keywords.add( "protected" ); java_keywords.add( "public" ); java_keywords.add( "return" ); java_keywords.add( "short" ); java_keywords.add( "static" ); java_keywords.add( "strictfp"); java_keywords.add( "super" ); java_keywords.add( "switch" ); java_keywords.add( "synchronized" ); java_keywords.add( "true" ); java_keywords.add( "this" ); java_keywords.add( "throw" ); java_keywords.add( "throws" ); java_keywords.add( "transient" ); java_keywords.add( "try" ); java_keywords.add( "void" ); java_keywords.add( "volatile" ); java_keywords.add( "while" ); java_keywords.add( "clone" ); java_keywords.add( "equals" ); java_keywords.add( "finalize" ); java_keywords.add( "getClass" ); java_keywords.add( "hashCode" ); java_keywords.add( "notify" ); java_keywords.add( "notifyAll" ); java_keywords.add( "toString" ); java_keywords.add( "wait" ); /* stack needs a topmost value */ ifStack.push( new Boolean( true ) ); /* read two characters of lookahead */ try { next_char = GlobalInputStream.read(); } catch( Exception e ) { org.jacorb.idl.parser.fatal_error( "Cannot read from file " + GlobalInputStream.currentFile().getAbsolutePath() + ", please check file name.", null ); } if( next_char == EOF_CHAR ) next_char2 = EOF_CHAR; else next_char2 = GlobalInputStream.read(); } public static void define( String symbol, String value ) { if( parser.logger.isLoggable(Level.ALL) ) org.jacorb.idl.parser.logger.log( Level.ALL, "Defining: " + symbol + " as " + value ); defines.put( symbol, value ); } public static void undefine( String symbol ) { if( parser.logger.isLoggable(Level.ALL) ) parser.logger.log(Level.ALL, "Un-defining: " + symbol); defines.remove( symbol ); } public static String defined( String symbol ) { return defines.get( symbol ); } /** * record information about the last lexical scope so that it can be * restored later */ public static int currentLine() { return current_line; } /** * return the current reading position */ public static PositionInfo getPosition() { return new PositionInfo( current_line, current_position, currentPragmaPrefix, line.toString(), GlobalInputStream.currentFile() ); } public static void restorePosition( PositionInfo p ) { current_line = p.line_no; currentPragmaPrefix = p.pragma_prefix; current_position = 0; } /** * Advance the scanner one character in the input stream. This moves * next_char2 to next_char and then reads a new next_char2. */ protected static void advance() throws java.io.IOException { int old_char; old_char = next_char; next_char = next_char2; next_char2 = GlobalInputStream.read(); line.append( (char)old_char ); /* count this */ current_position++; if( old_char == '\n' ) { current_line++; current_position = 1; line = new StringBuffer(); } } /** * Emit an error message. The message will be marked with both the * current line number and the position in the line. Error messages * are printed on standard error (System.err). * @param message the message to print. */ public static void emit_error( String message ) { if (parser.logger.isLoggable(Level.SEVERE)) { if (GlobalInputStream.currentFile() != null) { parser.logger.log(Level.SEVERE, GlobalInputStream.currentFile().getAbsolutePath() + ", line: " + current_line + "(" + current_position + "): " + message + "\n\t" + line.toString()); } else { //error probably ocurred before parsing parser.logger.log(Level.SEVERE, message); } } error_count++; } public static void emit_error( String message, str_token t ) { if( t == null ) { emit_error( message ); } else { if (parser.logger.isLoggable(Level.SEVERE)) { parser.logger.log(Level.SEVERE, t.fileName + ", line:" + t.line_no + "(" + t.char_pos + "): " + message + "\n\t" + t.line_val); } error_count++; } } /** * Emit a warning message. The message will be marked with both the * current line number and the position in the line. Messages are * printed on standard error (System.err). * * @param message the message to print. */ public static void emit_warn( String message ) { if (parser.logger.isLoggable(Level.WARNING)) { parser.logger.log(Level.WARNING, message + " at " + current_line + "(" + current_position + "): \"" + line.toString() + "\""); } warning_count++; } public static void emit_warn( String message, str_token t ) { if( t == null ) { emit_warn( message ); } else { if (parser.logger.isLoggable(Level.WARNING)) { parser.logger.log(Level.WARNING, " at " + t.fileName + ", line:" + t.line_no + "(" + t.char_pos + "): " + message + "\n\t" + t.line_val); } warning_count++; } } /** * Determine if a character is ok to start an id. * @param ch the character in question. */ protected static boolean id_start_char( int ch ) { return ( ch >= 'a' && ch <= 'z' ) || ( ch >= 'A' && ch <= 'Z' ) || ( ch == '_' ); } /** * Determine if a character is ok for the middle of an id. * @param ch the character in question. */ protected static boolean id_char( int ch ) { return id_start_char( ch ) || ( ch == '_' ) || ( ch >= '0' && ch <= '9' ); } /** * Try to look up a single character symbol, returns -1 for not found. * @param ch the character in question. */ protected static int find_single_char( int ch ) { Integer result; result = char_symbols.get( new Integer( (char)ch ) ); if( result == null ) return -1; else return result.intValue(); } /** * Handle swallowing up a comment. Both old style C and new style C++ * comments are handled. */ protected static void swallow_comment() throws java.io.IOException { /* next_char == '/' at this point */ /* is it a traditional comment */ if( next_char2 == '*' ) { /* swallow the opener */ advance(); advance(); /* swallow the comment until end of comment or EOF */ for( ; ; ) { /* if its EOF we have an error */ if( next_char == EOF_CHAR ) { emit_error( "Specification file ends inside a comment", null ); return; } /* if we can see the closer we are done */ if( next_char == '*' && next_char2 == '/' ) { advance(); advance(); return; } /* otherwise swallow char and move on */ advance(); } } /* is its a new style comment */ if( next_char2 == '/' ) { /* swallow the opener */ advance(); advance(); /* swallow to '\n', '\f', or EOF */ while( next_char != '\n' && next_char != '\f' && next_char != '\r' && next_char != EOF_CHAR ) { advance(); } return; } /* shouldn't get here, but... if we get here we have an error */ emit_error( "Malformed comment in specification -- ignored", null ); advance(); } /** * Preprocessor directives are handled here. */ protected static void preprocess() throws java.io.IOException { for( ; ; ) { /* if its EOF we have an error */ if( next_char == EOF_CHAR ) { emit_error( "Specification file ends inside a preprocessor directive", null ); return; } else if( next_char != '#' ) { emit_error( "expected #, got " + (char)next_char + " instead!", null ); } else advance(); // skip '#' // the following is done to allow for # ifdef sloppiness while( ( ' ' == next_char ) || ( '\t' == next_char ) ) advance(); String dir = get_string(); if( dir.equals( "include" ) ) { if( !conditionalCompilation ) return; // Swallow between include and < or " swallow_whitespace(); boolean useIncludePath = ( next_char == '<' ); advance(); // skip `\"' or '<' String fname = get_string(); if( useIncludePath && ( next_char != '>' ) ) emit_error( "Syntax error in #include directive, expecting '>'" ); else if( !useIncludePath && ( next_char != '\"' ) ) emit_error( "Syntax error in #include directive, expecting \"" ); // Don't skip forward here and swallow \n,\f etc just in case the included // file does not end on a newline. The newline from the include will ensure // the included file is terminated. GlobalInputStream.include( fname, next_char2, useIncludePath ); current_line = 0; advance(); advance(); return; } else if( dir.equals( "define" ) ) { if( !conditionalCompilation ) return; swallow_whitespace(); String name = get_string(); StringBuffer text = new StringBuffer(); if( next_char == ' ' ) { advance(); } while( next_char != '\n' ) { if( next_char == '\\' ) { advance(); advance(); } text.append( (char)next_char ); advance(); } define( name, text.toString() ); } else if( dir.equals( "error" ) ) { if( !conditionalCompilation ) return; advance(); // skip ' ' String name = get_string(); emit_error( name ); } else if( dir.equals( "undef" ) ) { // Undefining symbol if( !conditionalCompilation ) return; swallow_whitespace(); String name = get_string(); undefine( name ); } else if( dir.equals( "if" ) || dir.equals( "elif" ) ) { if (! dir.equals( "elif" ) ) { ifStack.push( new Boolean( conditionalCompilation ) ); if( !conditionalCompilation ) return; } swallow_whitespace(); // the following snippet distinguishes between #if defined // and #if !defined boolean straightDefined = true; if( '!' == next_char ) { advance(); straightDefined = false; } String defineStr = get_string_no_paren(); if (defineStr.equals ("defined")) { swallow_whitespace(); boolean brackets = ( '(' == next_char ); if( brackets ) { advance(); // skip '(' swallow_whitespace(); // any whitespace after ( ? skip it } String name = get_string_no_paren(); if( brackets ) { swallow_whitespace(); if( parser.logger.isLoggable(Level.ALL) ) parser.logger.log(Level.ALL, "next char: " + next_char); if( ')' != next_char ) { emit_error( "Expected ) terminating #if defined", null ); return; } advance(); } if( straightDefined ) conditionalCompilation = ( null != defined( name ) ); else conditionalCompilation = ( null == defined( name ) ); } else if (defineStr.equals("0")) { conditionalCompilation = false; } else if (defineStr.equals("1")) { conditionalCompilation = true; } else { emit_error( "Expected \"defined\" following #if: " + dir, null ); return; } } else if( dir.equals( "ifdef" ) ) { ifStack.push( new Boolean( conditionalCompilation ) ); if( !conditionalCompilation ) return; swallow_whitespace(); String name = get_string(); conditionalCompilation = ( defined( name ) != null ); } else if( dir.equals( "ifndef" ) ) { ifStack.push( new Boolean( conditionalCompilation ) ); if( !conditionalCompilation ) return; swallow_whitespace(); String name = get_string(); conditionalCompilation = ( defined( name ) == null ); } else if( dir.equals( "else" ) ) { if( ifStack.peek().booleanValue() ) conditionalCompilation = !conditionalCompilation; } else if( dir.equals( "endif" ) ) { boolean b = ifStack.pop().booleanValue(); conditionalCompilation = b; } else if( dir.equals( "pragma" ) ) { if( !conditionalCompilation ) return; swallow_whitespace(); String name = get_string(); if( name.equals( "prefix" ) ) { swallow_whitespace(); currentPragmaPrefix = get_string(); } else if( name.equals( "version" ) ) { advance(); // skip ' ' String vname = get_string(); advance(); // skip ' ' String version = get_string(); String existingVersion = (String) parser.currentScopeData().versionMap.get (vname); if (existingVersion == null) { // Set version parser.currentScopeData().versionMap.put (vname, version); } else { // Check for version change if (! existingVersion.equals (version)) { emit_error ( "Version re-declaration with different value: #pragma version " + version, null ); } } String iname = (String)parser.currentScopeData().idMap.get (vname); if (iname != null) { if (version.equals (iname.substring (1 + iname.lastIndexOf (':'))) == false) { emit_error ("Declaring version with different version to already declared ID for " + name, null); } } } else if( name.equals( "ID" ) ) { advance(); // skip ' ' String iname = get_string(); advance(); // skip ' ' String id = get_string(); String existingID = (String) parser.currentScopeData().idMap.get (iname); if (existingID == null) { // Set id parser.currentScopeData().idMap.put (iname, id); } else { // Check for id change if (! existingID.equals (id)) { emit_error ( "ID re-declaration with different value: #pragma id " + id, null ); } } if( parser.currentScopeData().versionMap.get( iname ) != null ) { if( ((String)parser.currentScopeData().versionMap.get( iname )).equals ( id.substring (1 + id.lastIndexOf (':'))) == false ) { emit_error ("Declaring ID with different version to already declared version for " + name, null); } } } else if( name.equals( "inhibit_code_generation" ) ) { /* proprietary pragma of the JacORB IDL compiler */ parser.setInhibitionState( true ); // do something with it } else { emit_warn( "Unknown pragma, ignoring: #pragma " + name, null ); } } else { emit_error( "Unrecognized preprocessor directive " + dir, null ); } /* swallow to '\n', '\f', or EOF */ while( next_char != '\n' && next_char != '\f' && next_char != '\r' && next_char != EOF_CHAR ) { advance(); } return; } } // the following is used for parsing the #if defined(...) construct private static String get_string_no_paren() throws java.io.IOException { StringBuffer sb = new StringBuffer(); char c = (char)next_char; while( c != ' ' && c != '\t' && c != '\r' && c != '\n' && c != '\f' && c != EOF_CHAR && c != '\"' && c != '<' && c != '>' && c != '(' && c != ')' ) { sb.append( c ); advance(); c = (char)next_char; } return sb.toString(); } private static String get_string() throws java.io.IOException { StringBuffer sb = new StringBuffer( "" ); if( next_char == '\"' ) { advance(); while( next_char != '\"' ) { if( next_char == EOF_CHAR ) emit_error( "Unexpected EOF in string" ); sb.append( (char)next_char ); advance(); } } else { while( next_char != ' ' && next_char != '\t' && next_char != '\r' && next_char != '\n' && next_char != '\f' && next_char != EOF_CHAR && next_char != '\"' && next_char != '<' && next_char != '>') { sb.append( (char)next_char ); advance(); } } return sb.toString(); } /** * Process an identifier. * <P> * Identifiers begin with a letter, underscore, or dollar sign, * which is followed by zero or more letters, numbers, * underscores or dollar signs. This routine returns a str_token * suitable for return by the scanner or null, if the string that * was read expanded to a symbol that was #defined. In this case, * the symbol is expanded in place */ protected static token do_symbol() throws java.io.IOException { StringBuffer result = new StringBuffer(); String result_str; Integer keyword_num = null; char buffer[] = new char[ 1 ]; /* next_char holds first character of id */ buffer[ 0 ] = (char)next_char; result.append( buffer, 0, 1 ); advance(); /* collect up characters while they fit in id */ while( id_char( next_char ) ) { buffer[ 0 ] = (char)next_char; result.append( buffer, 0, 1 ); advance(); } /* extract a string */ result_str = result.toString(); /* try to look it up as a defined symbol... */ String text = defined( result_str ); if( text != null ) { char[] next = {(char)next_char, (char)next_char2}; GlobalInputStream.insert( text + ( new String( next ) ) ); advance(); // restore lookahead advance(); // restore lookahead return null; } // check if it's a keyword keyword_num = (Integer)keywords.get( result_str ); if( keyword_num != null ) { if( isScope( result_str ) ) { parser.openScope(); } return new token( keyword_num.intValue() ); } // not a keyword, so treat as identifier after verifying // case sensitivity rules and prefacing with an _ // if it collides with a Java keyword. result_str = checkIdentifier( result_str ); if( null != result_str ) return new str_token( sym.ID, result_str, getPosition(), GlobalInputStream.currentFile().getName() ); else return null; } private static boolean isScope( String keyword ) { return ( keyword.equals( "module" ) || keyword.equals( "interface" ) || keyword.equals( "struct" ) || keyword.equals( "exception" ) || keyword.equals( "union" ) // keyword.equals("valuetype") ); } /** * Checks whether Identifier str is legal and returns it. If the * identifier is escaped with a leading underscore, that * underscore is removed. If a the legal IDL identifier clashes * with a Java reserved word, an underscore is prepended. * <BR> * @param str - the IDL identifier <BR> * <BR> * Prints an error msg if the identifier collides with an IDL * keyword. */ public static String checkIdentifier( String str ) { if( parser.logger.isLoggable(Level.FINEST) ) parser.logger.log(Level.FINEST, "checking identifier " + str); /* if it is an escaped identifier, look it up as a keyword, otherwise remove the underscore. */ if( str.charAt( 0 ) == '_' ) { str = str.substring( 1 ); } else { String colliding_keyword = null; if (org.jacorb.idl.parser.strict_names) { // check for name clashes strictly (i.e. case insensitive) colliding_keyword = keywords_lower_case.get(str.toLowerCase()); } else { // check for name clashes only loosely (i.e. case sensitive) colliding_keyword = (String)keywords.get(str); } if( colliding_keyword != null ) { emit_error( "Identifier " + str + " collides with keyword " + colliding_keyword + "." ); return null; } } /* clashes with a Java reserved word? */ if( needsJavaEscape( str ) ) { str = "_" + str; } return str; } /** * Only the most general name clashes with Java keywords * are caught here. Identifiers need to be checked again * at different other places in the compiler! */ private static boolean needsJavaEscape( String s ) { return ( java_keywords.contains( s ) ); } /** * called during the parse phase to catch clashes with * Java reserved words. */ public static boolean strictJavaEscapeCheck( String s ) { return ( ( !s.equals( "Helper" ) && s.endsWith( "Helper" ) ) || ( !s.equals( "Holder" ) && s.endsWith( "Holder" ) ) || ( !s.equals( "Operations" ) && s.endsWith( "Operations" ) ) || ( !s.equals( "Package" ) && s.endsWith( "Package" ) ) || ( !s.equals( "POA" ) && s.endsWith( "POA" ) ) || ( !s.equals( "POATie" ) && s.endsWith( "POATie" ) ) ); } public static boolean needsJavaEscape( Module m ) { String s = m.pack_name; if( parser.logger.isLoggable(Level.ALL) ) parser.logger.log(Level.ALL, "checking module name " + s); return ( strictJavaEscapeCheck( s ) ); } /** * Return one token. This is the main external interface to the scanner. * It consumes sufficient characters to determine the next input token * and returns it. */ public static token next_token() throws java.io.IOException { parser.set_included( GlobalInputStream.includeState() ); token result = real_next_token(); return result; } private static void swallow_whitespace() throws java.io.IOException { /* look for white space */ while( next_char == ' ' || next_char == '\t' || next_char == '\n' || next_char == '\f' || next_char == '\r' ) { /* advance past it and try the next character */ advance(); } } /** * The actual routine to return one token. * * @return token * @throws java.io.IOException */ protected static token real_next_token() throws java.io.IOException { int sym_num; /* if we found more than a single token last time, these tokens were remembered on the tokenStack - return the first one here */ if( !tokenStack.empty() ) return tokenStack.pop(); /* else */ for( ; ; ) { /* scan input until we return something */ if( !in_string ) { swallow_whitespace(); /* look for preprocessor directives */ if( (char)next_char == '#' ) { preprocess(); continue; } /* look for a comment */ if( next_char == '/' && ( next_char2 == '*' || next_char2 == '/' ) ) { /* swallow then continue the scan */ swallow_comment(); continue; } if( !conditionalCompilation ) { advance(); if( next_char == EOF_CHAR ) { emit_error( "EOF in conditional compilation!", null ); return null; } else continue; } /* look for COLON or DBLCOLON */ if( next_char == ':' ) { if( next_char2 == ':' ) { advance(); advance(); return new token( sym.DBLCOLON ); } else { advance(); return new token( sym.COLON ); } } /* leading L for wide strings */ if( next_char == 'L' && ( next_char2 =='\"' || next_char2 =='\'') ) { wide = true; advance(); if( next_char2 == '\"' ) { advance(); in_string = true; return new token( sym.LDBLQUOTE ); } // wide char literal may follow, but detecting that // is done below. } /* look for Shifts */ if( next_char == '<' ) { if( next_char2 == '<' ) { advance(); advance(); return new token( sym.LSHIFT ); } else { advance(); return new token( sym.LESSTHAN ); } } if( next_char == '>' ) { if( next_char2 == '>' ) { advance(); advance(); return new token( sym.RSHIFT ); } else { advance(); return new token( sym.GREATERTHAN ); } } /* leading 0: */ /* Try to scan octal/hexadecimal numbers, might even find a float */ if( next_char == '0' ) { int radix = 8; int digit = 0; advance(); if( next_char == '.' ) { StringBuffer f_string = new StringBuffer( "0." ); advance(); while( next_char >= '0' && next_char <= '9' ) { f_string.append( (char)next_char ); advance(); } float f_val = ( new Float( f_string.toString() ) ).floatValue(); return new float_token( sym.FLOAT_NUMBER, f_val ); } else { // See if hexadecimal value if( next_char == 'x' || next_char == 'X' ) { advance(); radix = 16; } StringBuffer val = new StringBuffer( "0" ); digit = Character.digit( (char)next_char, radix ); while( digit != -1 ) { val.append( (char)next_char ); advance(); digit = Character.digit( (char)next_char, radix ); } String str = val.toString(); try { return new int_token( sym.NUMBER, Integer.parseInt( str, radix ) ); } catch( NumberFormatException ex ) { try { return new long_token( sym.LONG_NUMBER, Long.parseLong( str, radix ) ); } catch( NumberFormatException ex2 ) { emit_error( "Invalid octal/hex value: " + str ); } } return null; } } /* Try to scan integer, floating point or fixed point literals */ if (isDigit (((char)next_char)) || next_char == '.' || (next_char == '-' && isDigit (((char)next_char2)))) { StringBuffer value = new StringBuffer(); StringBuffer fraction = null; if ( next_char == '-' ) { value.append( (char)next_char ); advance(); } // Read integer part while( next_char >= '0' && next_char <= '9' ) { value.append( (char)next_char ); advance(); } // Read fraction if( next_char == '.' ) { fraction = new StringBuffer(); advance(); while( next_char >= '0' && next_char <= '9' ) { fraction.append( (char)next_char ); advance(); } } // Read exponent if( next_char == 'e' || next_char == 'E' ) { if( fraction == null ) fraction = new StringBuffer(); fraction.append( 'e' ); advance(); if( next_char == '-' || next_char == '+' ) { fraction.append( (char)next_char ); advance(); } while( next_char >= '0' && next_char <= '9' ) { fraction.append( (char)next_char ); advance(); } if( fraction.length() == 1 ) { emit_error( "Empty exponent in float/double." ); continue; } return new float_token( sym.FLOAT_NUMBER, Float.valueOf( value.toString() + "." + fraction.toString() ).floatValue() ); } if( next_char == 'd' || next_char == 'D' ) { advance(); if( fraction == null ) fraction = new StringBuffer(); java.math.BigDecimal bi = new java.math.BigDecimal( value.toString() + "." + fraction.toString() ); return new fixed_token( sym.FIXED_NUMBER, bi ); } if( fraction == null ) { /* integer or long */ token tok = null; String str = value.toString(); try { tok = new int_token( sym.NUMBER, Integer.parseInt( str ) ); } catch( NumberFormatException ex ) { try { tok = new long_token ( sym.LONG_NUMBER, Long.parseLong( str ) ); } catch( NumberFormatException ex2 ) { try { // Not quite critical yet - lets try stuffing it into // a bigdecimal for later checking. tok = new fixed_token (sym.FIXED_NUMBER, new java.math.BigDecimal (str)); } catch (NumberFormatException ex3) { emit_error( "Invalid long value: " + str ); } } } return tok; } else { try { float f = Float.valueOf( value.toString() + "." + fraction.toString() ).floatValue(); return new float_token( sym.FLOAT_NUMBER, f ); } catch( NumberFormatException nf ) { emit_error( "Unexpected symbol: " + value.toString() + "." + fraction.toString() ); } } } /* look for a single character symbol */ sym_num = find_single_char( next_char ); /* upon an opening double quote, return the sym.DBLQUOTE token and continue scanning in the in_string branch */ if( (char)next_char == '\"' ) { in_string = true; advance(); return new token( sym.DBLQUOTE ); } if( (char)next_char == '\'' ) { advance(); token t = null; if( next_char == '\\' ) { // Now need to process escaped character. advance(); if( isDigit( (char)next_char ) ) { // Octal character char octal1 = '0'; char octal2 = '0'; char octal3 = (char)next_char; if( isDigit( (char)next_char2 ) ) { advance(); octal2 = octal3; octal3 = (char)next_char; if( isDigit( (char)next_char2 ) ) { advance(); octal1 = octal2; octal2 = octal3; octal3 = (char)next_char; } } t = new char_token ( sym.CH, (char)Integer.parseInt ( new String ( new char[]{octal1, octal2, octal3} ), 8 ) ); } else if( (char)next_char == 'x' ) { // Hexadecimal character advance(); char hex1 = '0'; char hex2 = (char)next_char; if( isHexLetterOrDigit( (char)next_char2 ) ) { advance(); hex1 = hex2; hex2 = (char)next_char; } else if( (char)next_char2 != '\'' ) { emit_error( "Illegal hex character" ); return null; } t = new char_token ( sym.CH, (char)Integer.parseInt ( new String ( new char[]{hex1, hex2} ), 16 ) ); } else if( (char)next_char == 'u' ) { if( wide == false ) { emit_error( "Unicode characters are only legal with wide character" ); return null; } else { // Hexadecimal character advance(); char uni1 = '0'; char uni2 = '0'; char uni3 = '0'; char uni4 = (char)next_char; if( isHexLetterOrDigit( (char)next_char2 ) ) { advance(); uni3 = uni4; uni4 = (char)next_char; if( isHexLetterOrDigit( (char)next_char2 ) ) { advance(); uni2 = uni3; uni3 = uni4; uni4 = (char)next_char; if( isHexLetterOrDigit( (char)next_char2 ) ) { advance(); uni1 = uni2; uni2 = uni3; uni3 = uni4; uni4 = (char)next_char; } else if( (char)next_char2 != '\'' ) { emit_error( "Illegal unicode character" ); return null; } } else if( (char)next_char2 != '\'' ) { emit_error( "Illegal unicode character" ); return null; } } else if( (char)next_char2 != '\'' ) { emit_error( "Illegal unicode character" ); return null; } t = new char_token ( sym.CH, (char)Integer.parseInt ( new String ( new char[]{uni1, uni2, uni3, uni4} ), 16 ) ); } } else { switch( next_char ) { case 'n': { t = new char_token( sym.CH, '\n' ); break; } case 't': { t = new char_token( sym.CH, '\t' ); break; } case 'v': { t = new char_token( sym.CH, '\013' ); break; } case 'b': { t = new char_token( sym.CH, '\b' ); break; } case 'r': { t = new char_token( sym.CH, '\r' ); break; } case 'f': { t = new char_token( sym.CH, '\f' ); break; } case 'a': { t = new char_token( sym.CH, '\007' ); break; } case '\\': { t = new char_token( sym.CH, '\\' ); break; } case '?': { t = new char_token( sym.CH, '?' ); break; } case '0': { t = new char_token( sym.CH, '\0' ); break; } case '\'': { t = new char_token( sym.CH, '\'' ); break; } case '\"': { t = new char_token( sym.CH, '\"' ); break; } default: { emit_error( "Invalid escape symbol \'" ); return null; } } } } else { t = new char_token( sym.CH, (char)next_char ); } advance(); if( (char)next_char == '\'' ) { tokenStack.push( new token( sym.QUOTE ) ); tokenStack.push( t ); advance(); } else { emit_error( "Expecting closing \'" ); return null; } wide = false; return new token( sym.QUOTE ); } if( sym_num != -1 ) { /* found one -- advance past it and return a token for it */ advance(); return new token( sym_num ); } /* look for an id or keyword */ if( id_start_char( next_char ) ) { token t = do_symbol(); if( t != null ) return t; else continue; } /* look for EOF */ if( next_char == EOF_CHAR ) { return new token( sym.EOF ); } } else // in_string { /* empty string ? */ if( (char)next_char == '\"' ) { in_string = false; advance(); return new token( sym.DBLQUOTE ); } StringBuffer result = new StringBuffer(); char previous = ' '; /* collect up characters while they fit in id */ while( true ) { if( next_char == '\\' ) { // Remap those characters that have no equivilant in java switch( next_char2 ) { case 'a': { result.append( "\\007" ); previous = 'a'; advance(); break; } case 'v': { result.append( "\\013" ); previous = 'v'; advance(); break; } case '?': { result.append( "?" ); previous = '?'; advance(); break; } // Replace \xA0 by octal equivilant case 'x': { advance(); advance(); // Now next_char will be A and next_char2 will be 0 String octal = Integer.toOctalString ( Integer.parseInt ( new String ( new char[]{ (char)next_char, (char)next_char2} ), 16 ) ); if( octal.length() != 3 ) { if( octal.length() == 1 ) { octal = "0" + octal; } octal = "0" + octal; } result.append( "\\" + octal ); previous = (char)next_char2; advance(); break; } case 'u': { if( wide == false ) { emit_error( "Unicode characters are only legal with wide strings" ); return null; } else { result.append( (char)next_char ); result.append( (char)next_char2 ); advance(); advance(); char uni1 = (char)next_char; char uni2 = '0'; char uni3 = '0'; char uni4 = '0'; if( isHexLetterOrDigit( (char)next_char2 ) ) { advance(); uni2 = (char)next_char; if( isHexLetterOrDigit( (char)next_char2 ) ) { advance(); uni3 = (char)next_char; if( isHexLetterOrDigit( (char)next_char2 ) ) { advance(); uni4 = (char)next_char; } else { emit_error( "Illegal unicode character" ); return null; } } else { emit_error( "Illegal unicode character" ); return null; } } else { emit_error( "Illegal unicode character" ); return null; } previous = uni4; result.append( uni1 ); result.append( uni2 ); result.append( uni3 ); result.append( uni4 ); } break; } default: { previous = (char)next_char; result.append( (char)next_char ); } } } else { previous = (char)next_char; result.append( (char)next_char ); } advance(); // Handle backslash quote but exit if just quote if( ( (char)next_char ) == '\"' && previous != '\\' ) { break; } } wide = false; String s = result.toString(); /* build and return an id token with an attached string */ return new org.jacorb.idl.str_token( sym.ID, s, getPosition(), GlobalInputStream.currentFile().getName() ); } /* if we get here, we have an unrecognized character */ emit_warn( "Unrecognized character '" + new Character( (char)next_char ) + "'(" + next_char + ") -- ignored" ); /* advance past it */ advance(); } } /** * Returns true if character is US ASCII 0-9 * * @param c a value of type 'char' * @return a value of type 'boolean' */ static boolean isDigit( char c ) { boolean result = false; if( c >= '\u0030' ) { if( c <= '\u0039' ) { // Range 0030 [0] -> 0039 [9] result = true; } } return result; } /** * Returns true if character is US ASCII 0-9, a-f, A-F * * @param c a value of type 'char' * @return a value of type 'boolean' */ private static boolean isHexLetterOrDigit( char c ) { boolean result = false; if( c >= '\u0030' ) { if( c <= '\u0039' ) { // Range 0030 [0] -> 0039 [9] result = true; } else { if( c >= '\u0041' ) { if( c <= '\u0046' ) { // Range 0041 [A] -> 0046 [F] result = true; } if( c >= '\u0061' ) { if( c <= '\u0066' ) { // Range 0061 [a] -> 0066 [f] result = true; } } } } } return result; } }