InputReader.java example

Explorer
jvmnotebook-master
- jvmnotebook
// File: InputReader.java
// Date: 7/4/2008
package org.bot.jscheme;

import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.util.Stack;

/** 
 * Generic input reader class for most scheme functions.  The
 * InputReader also includes the token parsing functionality.
 *  
 * @author Berlin Brown (refactoring modifications)
 * @author Peter Norvig, peter@norvig.com http://www.norvig.com  
 * Copyright 1998 Peter Norvig, see http://www.norvig.com/license.html 
 */
public class InputReader {

	public static final String EOF = "#!EOF";

	public static final char TOK_LEFT_PAREN = '('; 
	public static final char TOK_RIGHT_PAREN = ')'; 
	public static final char TOK_SINGLE_QUOT = '\''; 
	public static final char TOK_SEMICOLON = ';';
	public static final char TOK_DOUBLE_QUOT = '"'; 
	public static final char TOK_COMMA = ',';
	public static final char TOK_BACK_QUOT = '`';
	
	private Reader inputReader;

	private StringBuffer buff = new StringBuffer();

	private Stack charStack = new Stack();
	private Stack tokenStack = new Stack();

	public InputReader(InputStream in) {
		this.inputReader = new InputStreamReader(new BufferedInputStream(in));
	}

	/**
	 * (This code is only included to show you a lighter implementation of this
	 * method)
	 * 
	 * Determines if a character is Java whitespace. This includes Unicode
	 * space characters (SPACE_SEPARATOR, LINE_SEPARATOR, and
	 * PARAGRAPH_SEPARATOR) except the non-breaking spaces
	 * (<code>'\u00A0'</code>, <code>'\u2007'</code>, and <code>'\u202F'</code>);
	 * and these characters: <code>'\u0009'</code>, <code>'\u000A'</code>,
	 * <code>'\u000B'</code>, <code>'\u000C'</code>, <code>'\u000D'</code>,
	 * <code>'\u001C'</code>, <code>'\u001D'</code>, <code>'\u001E'</code>,
	 * and <code>'\u001F'</code>.
	 * <br>
	 * 
	 * Java whitespace = ([Zs] not Nb)|[Zl]|[Zp]|U+0009-U+000D|U+001C-U+001F
	 * 
	 * This implemenation taken from GNU classpath:
	 * 
	 * GNU Classpath is free software; you can redistribute it and/or modify
	 * it under the terms of the GNU General Public License
	 *
	 * @param ch character to test
	 * @return true if ch is Java whitespace, else false
	 * @see #isSpaceChar(char)
	 * @since 1.1	 
	 */
	private static final int TYPE_MASK = 0x1F;
	private static final int NO_BREAK_MASK = 0x20;
	private static final byte SPACE_SEPARATOR = 12;
	private static final byte LINE_SEPARATOR = 13;
	private static final byte PARAGRAPH_SEPARATOR = 14;
	public static boolean isWhitespaceGNUClasspath(char ch) {	  
		int attr = (int) ch;
		return ((((1 << (attr & TYPE_MASK))
				& ((1 << SPACE_SEPARATOR)
						| (1 << LINE_SEPARATOR)
						| (1 << PARAGRAPH_SEPARATOR))) != 0)
						&& (attr & NO_BREAK_MASK) == 0)
						|| (ch <= '\u001F' && ((1 << ch)
								& ((1 << '\t')
										| (1 << '\n')
										| (1 << '\u000B')
										| (1 << '\u000C')
										| (1 << '\r')
										| (1 << '\u001C')
										| (1 << '\u001D')
										| (1 << '\u001E')
										| (1 << '\u001F'))) != 0);
	}

	/**
	 * Determines if the specified character is white space according to Java.
	 *
	 * @param   ch the character to be tested.
	 * @return  <code>true</code> if the character is a Java whitespace
	 *          character; <code>false</code> otherwise.
	 * @see     java.lang.Character#isSpaceChar(char)
	 */
	public static boolean isWhitespace(char c) {
		int ci = (int) c;
		switch (ci) {
		case 9:		// Horizontal Tab 
		case 10:	// Newline
		case 11:	// Vertical Tab
		case 12:	// New Page
		case 13:	// Carriage Return
		case 28:	// File separator
		case 29:	// Group separator
		case 30:	// Record separator
		case 31:	// Unit separator	
		case 32:	// Space
			return true;
		default:
			return false;
		}
	}

	/**
	 * Read and return a Scheme expression, or EOF.
	 */
	public Object read() {
		try {
			Object token = nextToken();
			if (token == "(") {
				return readTail();
			} else if (token == ")") {
				System.out.println("WARN: Extra ')' ignored.");
				return read();
			} else {
				return token;
			} // End of the if - else
		} catch (IOException e) {
			System.out.println("WARN: On input, exception: " + e);
			return EOF;
		} // End try - catch
	}

	private Object readTail() throws IOException {
		Object token = nextToken();
		System.out.println("trace: readTail(): " + token);
		if (token == EOF) {
			final String msg = "ERROR: readTail() - EOF during read.";
			System.err.println(msg);
			throw (new RuntimeException(msg));
		} else if (token == ")") {
			return null;
		} else if (token == ".") {
			Object result = read();
			token = nextToken();
			if (token != ")") {
				System.out.println("WARN: Missing ')'? Received " + token + " after .");
			}
			return result;
		} else {
			tokenStack.push(token);
			return SchemeUtil.cons(read(), readTail());
		}
	}
	
	/**
	 * Collect the set of characters from the input stream until whitespace or
	 * one of the language tokens is found.
	 * 
	 * @param o_ch
	 * @throws IOException
	 */
	private void buildGenericToken(final int o_ch) throws IOException {
		int ch = o_ch;
		do {
			// Build alpha numeric, atom/symbol characters/tokens into the buffer
			buff.append((char) ch);
			ch = inputReader.read();
		} while (!Character.isWhitespace((char) ch)
				&& (ch != -1)
				&& (ch != TOK_LEFT_PAREN)  && (ch != TOK_RIGHT_PAREN) 
				&& (ch != TOK_SINGLE_QUOT) && (ch != TOK_SEMICOLON)
				&& (ch != TOK_DOUBLE_QUOT) && (ch != TOK_COMMA) 
				&& (ch != TOK_BACK_QUOT)); // End of do - while
		
		// Push a language token onto the character stack
		charStack.push(new Character((char) ch));
	}
	
	private Object nextToken() throws IOException {
		int ch;

		// See if we should re-use a pushed char or token
		// Task 1: Pop the token and character stacks
		if (!this.tokenStack.empty() && (this.tokenStack.peek() != null)) {
			return this.tokenStack.pop();
		} else if (!this.charStack.empty() && (this.charStack.peek() != null)) {
			ch = ((Character) this.charStack.pop()).charValue();
		} else {
			ch = inputReader.read();
		}

		// Ignore whitespace
		// Task 2: Check for and ignore whitespace
		while (isWhitespace((char) ch)) {
			ch = inputReader.read();
		}
		System.out.println("trace: nextToken() -> " + (char) ch + " $" + ch);

		// See what kind of non-white character we got
		// Task 3: Check if the character is of various token types.
		switch (ch) {
			case -1:
				return EOF;
			case TOK_LEFT_PAREN:
				return "(";
			case TOK_RIGHT_PAREN:
				return ")";
			case TOK_SINGLE_QUOT:
				return "'";
			case TOK_BACK_QUOT:
				return "`";
			case TOK_SEMICOLON:
				// Comment: skip to end of line and then read next token
				while (ch != -1 && ch != '\n' && ch != '\r') {
					ch = inputReader.read();
				}
				return nextToken();
			case TOK_DOUBLE_QUOT:
				// Strings are represented as char[]
				buff.setLength(0);
				while ((ch = inputReader.read()) != '"' && ch != -1) {
					buff.append((char) ((ch == '\\') ? inputReader.read() : ch));
				}
				if (ch == -1) {
					System.out.println("WARN: EOF inside of a string.");
				}
				return buff.toString().toCharArray();
			case '#':
				// Begin new switch statement, next set of characters
				switch (ch = inputReader.read()) {
					case 't':
					case 'T':
						return Boolean.TRUE;
					case 'f':
					case 'F':
						return Boolean.FALSE;
					default:
						System.out.println("WARN: #" + ((char) ch)
								+ " not recognized, ignored.");
						return nextToken();
				} // End of Switch
			default:
				buff.setLength(0);				
				int c = ch;
				buildGenericToken(ch);
				// Try potential numbers, but catch any format errors.
				if (c == '.' || c == '+' || c == '-' || (c >= '0' && c <= '9')) {
					try {
						// Number type is currently in the buffer queue
						return new Double(buff.toString());
					} catch (NumberFormatException e) {
						;
					}
				} // End of If
				return buff.toString().toLowerCase();
		} // End of the Switch
	}

}