Scanner.java example

Explorer
apkinspector-master
/** 
    Modifications Copyright (C) 1999 Raja Vallee-Rai (rvalleerai@sable.mcgill.ca)
    All rights reserved.                                              
   
    Changes:
        - Added \\ to the list of possible escape characters for Strings.
        - March 15, 1999: $ does no longer significant substitution
*/

/* --- Copyright Jonathan Meyer 1996. All rights reserved. -----------------
 > File:        jasmin/src/jasmin/Scanner.java
 > Purpose:     Tokenizer for Jasmin
 > Author:      Jonathan Meyer, 10 July 1996
 */

/* Scanner.java - class for tokenizing Jasmin files. This is rather
 * cheap and cheerful.
*/

package jasmin;

import jas.*;
import java_cup.runtime.*;
import java.util.*;
import java.io.InputStream;

class Scanner implements java_cup.runtime.Scanner {
    InputStream inp;

    // single lookahead character
    int next_char;

    // temporary buffer
    char chars[];
    char secondChars[];
    char[] unicodeBuffer;
        
    // true if we have not yet emitted a SEP ('\n') token. This is a bit
    // of a hack so to strip out multiple newlines at the start of the file
    // and replace them with a single SEP token. (for some reason I can't
    // write the CUP grammar to accept multiple newlines at the start of the
    // file)
    boolean is_first_sep;

    // Whitespace characters
    static final String WHITESPACE = " \n\t\r";

    // Separator characters
    static final String SEPARATORS = WHITESPACE + ":=";


    // used for error reporting to print out where an error is on the line
    public int line_num, char_num, token_line_num;
    public StringBuffer line;

    // used by the .set directive to define new variables.
    public Hashtable dict = new Hashtable();

    //
    // returns true if a character code is a whitespace character
    //
    protected static boolean whitespace(int c) {
        return (WHITESPACE.indexOf(c) != -1);
    }

    //
    // returns true if a character code is a separator character
    //
    protected static boolean separator(int c) {
        return (SEPARATORS.indexOf(c) != -1);
    }


    //
    // Advanced the input by one character
    //
    protected void advance() throws java.io.IOException
    {
        next_char = inp.read();
        if (next_char == '\n') {
            // a new line
            line_num++;
            char_num = 0;
            line.setLength(0);
        } else {
            line.append((char)next_char);
            char_num++;
        }
    }

    //
    // initialize the scanner
    //
    final static int BIGNUM=65000;
    public Scanner(InputStream i) throws java.io.IOException
    {
	inp = i;
        line_num = 1;
        char_num = 0;
        line = new StringBuffer();
        chars = new char[BIGNUM];
        secondChars = new char[BIGNUM];
        unicodeBuffer = new char[4];
        is_first_sep = true;
        advance();
    }

    int readOctal(int firstChar) throws java.io.IOException {
        int d1, d2, d3;
        d1 = firstChar;
        advance();
        d2 = next_char;
        advance();
        d3 = next_char;
        return ((d1-'0')&7) * 64 + ((d2-'0')&7) * 8 + ((d3-'0')&7);
    }

    //
    // recognize and return the next complete symbol
    //
    public Symbol next_token()
                throws java.io.IOException, jasError
    {

        token_line_num = line_num;

        for (;;) {
            switch (next_char) {

            case ';':
                // a comment
                do { advance(); } while (next_char != '\n');

            case '\n':
                // return single SEP token (skip multiple newlines
                // interspersed with whitespace or comments)
                for (;;) {
                    do { advance(); } while (whitespace(next_char));
                    if (next_char == ';') {
                        do { advance(); } while (next_char != '\n');
                    } else {
                        break;
                    }
                }
                if (is_first_sep) {
                    return next_token();
                }
                token_line_num = line_num;
                return new Symbol(sym.SEP);

            case '0': case '1': case '2': case '3': case '4':
            case '5': case '6': case '7': case '8': case '9':
            case '-': case '+':
            case '.':                       // a number
                {
                    int pos = 0;

                    // record that we have found first item
                    is_first_sep = false;

                    chars[0] = (char)next_char;
                    pos++;
                    for (;;) {
                        advance();
                        if (separator(next_char)) {
                            break;
                        }
			try {
			  chars[pos] = (char)next_char;
			} catch (ArrayIndexOutOfBoundsException abe) {
			  char[] tmparray = new char[chars.length*2];
			  System.arraycopy(chars, 0,
					   tmparray, 0,
					   chars.length);
			  chars = tmparray;
			  chars[pos] = (char)next_char;			 
			}
                        pos++;
                    }
                    String str = new String(chars, 0, pos);
                    Symbol tok;

                    if(str.equals("+DoubleInfinity"))
                        return new Symbol(sym.Num, new Double(1.0/0.0));
                    
                    if(str.equals("+DoubleNaN"))
                        return new Symbol(sym.Num, new Double(0.0d/0.0));
                    
                    if(str.equals("+FloatNaN"))
                        return new Symbol(sym.Num, new Float(0.0f/0.0));
                        
                    if(str.equals("-DoubleInfinity"))
                        return new Symbol(sym.Num, new Double(-1.0/0.0));
                    
                    if(str.equals("+FloatInfinity"))
                        return new Symbol(sym.Num, new Float(1.0f/0.0f));
                        
                    if(str.equals("-FloatInfinity"))
                        return new Symbol(sym.Num, new Float(-1.0f/0.0f));
                    
                     
                            
                    // This catches directives like ".method"
                    if ((tok = ReservedWords.get(str)) != null) {
                        return tok;
                    }

                    Number num;
                    try {
                        num = ScannerUtils.convertNumber(str);
                    } catch (NumberFormatException e) {
                        if (chars[0] == '.') {
                            throw new jasError("Unknown directive or badly formed number.");
                        } else {
                            throw new jasError("Badly formatted number");
                        }
                    }

                    if (num instanceof Integer) {
                        return new Symbol(sym.Int, new Integer(num.intValue()));
                    } else {
                        return new Symbol(sym.Num, num);
                    }
                }

            case '"':           // quoted strings
                {
                    int pos = 0;

                    is_first_sep = false;

                    for (;;) {
                        advance();
                        if (next_char == '\\') {
                            advance();
                            switch (next_char) {
                            case 'n': next_char = '\n'; break;
                            case 'r': next_char = '\r'; break;
                            case 't': next_char = '\t'; break;
                            case 'f': next_char = '\f'; break;
                            case 'b': next_char = '\b'; break;
                            case 'u': 
                            {                          
                                advance();
                                unicodeBuffer[0] = (char) next_char;
                                advance();
                                unicodeBuffer[1] = (char) next_char;
                                advance();
                                unicodeBuffer[2] = (char) next_char;
                                advance();
                                unicodeBuffer[3] = (char) next_char;
                                
                                // System.out.println(unicodeBuffer[0] + ":" + unicodeBuffer[1] + ":" + unicodeBuffer[2] + ":" + unicodeBuffer[3] + ":");
                                
                                next_char = (char) Integer.parseInt(new String(unicodeBuffer, 0, 4), 16);
                                // System.out.println("value: " + next_char);
                                break;
                            }
                            case '"': next_char = '"'; break;
                            case '\'': next_char = '\''; break;
                            case '\\': next_char = '\\'; break;
                            
                            case '0': case '1': case '2': case '3': case '4':
                            case '5': case '6': case '7':
				next_char = readOctal(next_char);
				break;
                            default:
				throw new jasError("Bad backslash escape sequence");
                            }
                        } else if (next_char == '"') {
                            break;
                        }
			
			try {
			  chars[pos] = (char)next_char;
			} catch (ArrayIndexOutOfBoundsException abe) {
			  char[] tmparray = new char[chars.length*2];
			  System.arraycopy(chars, 0,
					   tmparray, 0,
					   chars.length);
			  chars = tmparray;
			  chars[pos] = (char)next_char;
			}
                        pos++;
                    }
                    advance(); // skip close quote
                    return new Symbol(sym.Str, new String(chars, 0, pos));
                }

            case ' ':
            case '\t':
            case '\r':              // whitespace
                advance();
                break;

            case '=':               // EQUALS token
                advance();
                is_first_sep = false;
                return new Symbol(sym.EQ);

            case ':':               // COLON token
                advance();
                is_first_sep = false;
                return new Symbol(sym.COLON);

            case -1:                // EOF token
                is_first_sep = false;
                char_num = -1;
                line.setLength(0);
                return new Symbol(sym.EOF);

            default:
                {
                    // read up until a separatorcharacter

                    int pos = 0;
                    int secondPos = 0;
                    chars[0] = (char)next_char;
                    is_first_sep = false;

                    pos++;
                    for (;;) {
                        advance();
                        if (separator(next_char)) {
                            break;
                        }
			try {
			  chars[pos] = (char)next_char;
			} catch (ArrayIndexOutOfBoundsException abe) {
			  char[] tmparray = new char[chars.length*2];
			  System.arraycopy(chars, 0,
					   tmparray, 0,
					   chars.length);
			  chars = tmparray;
			  chars[pos] = (char)next_char;
			}
                        pos++;
                    }

                    secondPos = 0;
                    
                    // Parse all the unicode escape sequences
                        for(int i = 0; i < pos; i++)
                        {
			  if(chars[i] == '\\' && (i + 5) < pos &&
			     chars[i+1] == 'u') {
			    int intValue = 
			      Integer.parseInt(new String(chars, i+2, 4), 16);
                          
			    try {
			      secondChars[secondPos] = (char) intValue;
			    } catch (ArrayIndexOutOfBoundsException abe) {
			      char[] tmparray = 
				new char[secondChars.length*2];
			      System.arraycopy(secondChars, 0,
					       tmparray, 0,
					       secondChars.length);
			      secondChars = tmparray;
			      secondChars[secondPos] = (char)intValue;
			    }
			    secondPos++;

			    i += 5;
			  } else {
			    try {
			      secondChars[secondPos] = chars[i];
			    } catch (ArrayIndexOutOfBoundsException abe) {
			      char[] tmparray = 
				new char[secondChars.length*2];
			      System.arraycopy(secondChars, 0,
					       tmparray, 0,
					       secondChars.length);
			      secondChars = tmparray;
			      secondChars[secondPos] = chars[i];
			    }
			    secondPos++;
			  }
                        }
                        
                    // convert the byte array into a String
                    String str = new String(secondChars, 0, secondPos);

                    Symbol tok;
                    if ((tok = ReservedWords.get(str)) != null) {
                        // Jasmin keyword or directive
                        return tok;
                    } else if (InsnInfo.contains(str)) {
                        // its a JVM instruction
                        return new Symbol(sym.Insn, str);
                    } /*else if (str.charAt(0) == '$') {
                        // Perform variable substitution
                        Object v;
                        if ((v = dict.get(str.substring(1))) != null) {
                            return ((Symbol)v);
                        }
                    } */ else {
                        // Unrecognized string token (e.g. a classname)
                        return new Symbol(sym.Word, str);
                    }

                } /* default */
            } /* switch */
        } /* for */
    }

};

/* --- Revision History ---------------------------------------------------
--- Jonathan Meyer, Feb 8 1997
    Converted to be non-static
--- Jonathan Meyer, Oct 30 1996
    Added support for more \ escapes in quoted strings (including octals).
--- Jonathan Meyer, Oct 1 1996
    Added .interface and .implements
--- Jonathan Meyer, July 25 1996
    changed IN to IS. Added token_line_num, which is the line number of the
    last token returned by next_token().
--- Jonathan Meyer, July 24 1996 added mods to recognize '\r' as whitespace.
*/