/** Modifications Copyright (C) 1999 Raja Vallee-Rai (rvalleerai@sable.mcgill.ca) All rights reserved. Changes: - Added \\ to the list of possible escape characters for Strings. - March 15, 1999: $ does no longer significant substitution */ /* --- Copyright Jonathan Meyer 1996. All rights reserved. ----------------- > File: jasmin/src/jasmin/Scanner.java > Purpose: Tokenizer for Jasmin > Author: Jonathan Meyer, 10 July 1996 */ /* Scanner.java - class for tokenizing Jasmin files. This is rather * cheap and cheerful. */ package jasmin; import jas.*; import java_cup.runtime.*; import java.util.*; import java.io.InputStream; class Scanner implements java_cup.runtime.Scanner { InputStream inp; // single lookahead character int next_char; // temporary buffer char chars[]; char secondChars[]; char[] unicodeBuffer; // true if we have not yet emitted a SEP ('\n') token. This is a bit // of a hack so to strip out multiple newlines at the start of the file // and replace them with a single SEP token. (for some reason I can't // write the CUP grammar to accept multiple newlines at the start of the // file) boolean is_first_sep; // Whitespace characters static final String WHITESPACE = " \n\t\r"; // Separator characters static final String SEPARATORS = WHITESPACE + ":="; // used for error reporting to print out where an error is on the line public int line_num, char_num, token_line_num; public StringBuffer line; // used by the .set directive to define new variables. public Hashtable dict = new Hashtable(); // // returns true if a character code is a whitespace character // protected static boolean whitespace(int c) { return (WHITESPACE.indexOf(c) != -1); } // // returns true if a character code is a separator character // protected static boolean separator(int c) { return (SEPARATORS.indexOf(c) != -1); } // // Advanced the input by one character // protected void advance() throws java.io.IOException { next_char = inp.read(); if (next_char == '\n') { // a new line line_num++; char_num = 0; line.setLength(0); } else { line.append((char)next_char); char_num++; } } // // initialize the scanner // final static int BIGNUM=65000; public Scanner(InputStream i) throws java.io.IOException { inp = i; line_num = 1; char_num = 0; line = new StringBuffer(); chars = new char[BIGNUM]; secondChars = new char[BIGNUM]; unicodeBuffer = new char[4]; is_first_sep = true; advance(); } int readOctal(int firstChar) throws java.io.IOException { int d1, d2, d3; d1 = firstChar; advance(); d2 = next_char; advance(); d3 = next_char; return ((d1-'0')&7) * 64 + ((d2-'0')&7) * 8 + ((d3-'0')&7); } // // recognize and return the next complete symbol // public Symbol next_token() throws java.io.IOException, jasError { token_line_num = line_num; for (;;) { switch (next_char) { case ';': // a comment do { advance(); } while (next_char != '\n'); case '\n': // return single SEP token (skip multiple newlines // interspersed with whitespace or comments) for (;;) { do { advance(); } while (whitespace(next_char)); if (next_char == ';') { do { advance(); } while (next_char != '\n'); } else { break; } } if (is_first_sep) { return next_token(); } token_line_num = line_num; return new Symbol(sym.SEP); case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': case '-': case '+': case '.': // a number { int pos = 0; // record that we have found first item is_first_sep = false; chars[0] = (char)next_char; pos++; for (;;) { advance(); if (separator(next_char)) { break; } try { chars[pos] = (char)next_char; } catch (ArrayIndexOutOfBoundsException abe) { char[] tmparray = new char[chars.length*2]; System.arraycopy(chars, 0, tmparray, 0, chars.length); chars = tmparray; chars[pos] = (char)next_char; } pos++; } String str = new String(chars, 0, pos); Symbol tok; if(str.equals("+DoubleInfinity")) return new Symbol(sym.Num, new Double(1.0/0.0)); if(str.equals("+DoubleNaN")) return new Symbol(sym.Num, new Double(0.0d/0.0)); if(str.equals("+FloatNaN")) return new Symbol(sym.Num, new Float(0.0f/0.0)); if(str.equals("-DoubleInfinity")) return new Symbol(sym.Num, new Double(-1.0/0.0)); if(str.equals("+FloatInfinity")) return new Symbol(sym.Num, new Float(1.0f/0.0f)); if(str.equals("-FloatInfinity")) return new Symbol(sym.Num, new Float(-1.0f/0.0f)); // This catches directives like ".method" if ((tok = ReservedWords.get(str)) != null) { return tok; } Number num; try { num = ScannerUtils.convertNumber(str); } catch (NumberFormatException e) { if (chars[0] == '.') { throw new jasError("Unknown directive or badly formed number."); } else { throw new jasError("Badly formatted number"); } } if (num instanceof Integer) { return new Symbol(sym.Int, new Integer(num.intValue())); } else { return new Symbol(sym.Num, num); } } case '"': // quoted strings { int pos = 0; is_first_sep = false; for (;;) { advance(); if (next_char == '\\') { advance(); switch (next_char) { case 'n': next_char = '\n'; break; case 'r': next_char = '\r'; break; case 't': next_char = '\t'; break; case 'f': next_char = '\f'; break; case 'b': next_char = '\b'; break; case 'u': { advance(); unicodeBuffer[0] = (char) next_char; advance(); unicodeBuffer[1] = (char) next_char; advance(); unicodeBuffer[2] = (char) next_char; advance(); unicodeBuffer[3] = (char) next_char; // System.out.println(unicodeBuffer[0] + ":" + unicodeBuffer[1] + ":" + unicodeBuffer[2] + ":" + unicodeBuffer[3] + ":"); next_char = (char) Integer.parseInt(new String(unicodeBuffer, 0, 4), 16); // System.out.println("value: " + next_char); break; } case '"': next_char = '"'; break; case '\'': next_char = '\''; break; case '\\': next_char = '\\'; break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': next_char = readOctal(next_char); break; default: throw new jasError("Bad backslash escape sequence"); } } else if (next_char == '"') { break; } try { chars[pos] = (char)next_char; } catch (ArrayIndexOutOfBoundsException abe) { char[] tmparray = new char[chars.length*2]; System.arraycopy(chars, 0, tmparray, 0, chars.length); chars = tmparray; chars[pos] = (char)next_char; } pos++; } advance(); // skip close quote return new Symbol(sym.Str, new String(chars, 0, pos)); } case ' ': case '\t': case '\r': // whitespace advance(); break; case '=': // EQUALS token advance(); is_first_sep = false; return new Symbol(sym.EQ); case ':': // COLON token advance(); is_first_sep = false; return new Symbol(sym.COLON); case -1: // EOF token is_first_sep = false; char_num = -1; line.setLength(0); return new Symbol(sym.EOF); default: { // read up until a separatorcharacter int pos = 0; int secondPos = 0; chars[0] = (char)next_char; is_first_sep = false; pos++; for (;;) { advance(); if (separator(next_char)) { break; } try { chars[pos] = (char)next_char; } catch (ArrayIndexOutOfBoundsException abe) { char[] tmparray = new char[chars.length*2]; System.arraycopy(chars, 0, tmparray, 0, chars.length); chars = tmparray; chars[pos] = (char)next_char; } pos++; } secondPos = 0; // Parse all the unicode escape sequences for(int i = 0; i < pos; i++) { if(chars[i] == '\\' && (i + 5) < pos && chars[i+1] == 'u') { int intValue = Integer.parseInt(new String(chars, i+2, 4), 16); try { secondChars[secondPos] = (char) intValue; } catch (ArrayIndexOutOfBoundsException abe) { char[] tmparray = new char[secondChars.length*2]; System.arraycopy(secondChars, 0, tmparray, 0, secondChars.length); secondChars = tmparray; secondChars[secondPos] = (char)intValue; } secondPos++; i += 5; } else { try { secondChars[secondPos] = chars[i]; } catch (ArrayIndexOutOfBoundsException abe) { char[] tmparray = new char[secondChars.length*2]; System.arraycopy(secondChars, 0, tmparray, 0, secondChars.length); secondChars = tmparray; secondChars[secondPos] = chars[i]; } secondPos++; } } // convert the byte array into a String String str = new String(secondChars, 0, secondPos); Symbol tok; if ((tok = ReservedWords.get(str)) != null) { // Jasmin keyword or directive return tok; } else if (InsnInfo.contains(str)) { // its a JVM instruction return new Symbol(sym.Insn, str); } /*else if (str.charAt(0) == '$') { // Perform variable substitution Object v; if ((v = dict.get(str.substring(1))) != null) { return ((Symbol)v); } } */ else { // Unrecognized string token (e.g. a classname) return new Symbol(sym.Word, str); } } /* default */ } /* switch */ } /* for */ } }; /* --- Revision History --------------------------------------------------- --- Jonathan Meyer, Feb 8 1997 Converted to be non-static --- Jonathan Meyer, Oct 30 1996 Added support for more \ escapes in quoted strings (including octals). --- Jonathan Meyer, Oct 1 1996 Added .interface and .implements --- Jonathan Meyer, July 25 1996 changed IN to IS. Added token_line_num, which is the line number of the last token returned by next_token(). --- Jonathan Meyer, July 24 1996 added mods to recognize '\r' as whitespace. */