// File: InputReader.java
// Date: 7/4/2008
package org.bot.jscheme;
import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.util.Stack;
/**
* Generic input reader class for most scheme functions. The
* InputReader also includes the token parsing functionality.
*
* @author Berlin Brown (refactoring modifications)
* @author Peter Norvig, peter@norvig.com http://www.norvig.com
* Copyright 1998 Peter Norvig, see http://www.norvig.com/license.html
*/
public class InputReader {
public static final String EOF = "#!EOF";
public static final char TOK_LEFT_PAREN = '(';
public static final char TOK_RIGHT_PAREN = ')';
public static final char TOK_SINGLE_QUOT = '\'';
public static final char TOK_SEMICOLON = ';';
public static final char TOK_DOUBLE_QUOT = '"';
public static final char TOK_COMMA = ',';
public static final char TOK_BACK_QUOT = '`';
private Reader inputReader;
private StringBuffer buff = new StringBuffer();
private Stack charStack = new Stack();
private Stack tokenStack = new Stack();
public InputReader(InputStream in) {
this.inputReader = new InputStreamReader(new BufferedInputStream(in));
}
/**
* (This code is only included to show you a lighter implementation of this
* method)
*
* Determines if a character is Java whitespace. This includes Unicode
* space characters (SPACE_SEPARATOR, LINE_SEPARATOR, and
* PARAGRAPH_SEPARATOR) except the non-breaking spaces
* (<code>'\u00A0'</code>, <code>'\u2007'</code>, and <code>'\u202F'</code>);
* and these characters: <code>'\u0009'</code>, <code>'\u000A'</code>,
* <code>'\u000B'</code>, <code>'\u000C'</code>, <code>'\u000D'</code>,
* <code>'\u001C'</code>, <code>'\u001D'</code>, <code>'\u001E'</code>,
* and <code>'\u001F'</code>.
* <br>
*
* Java whitespace = ([Zs] not Nb)|[Zl]|[Zp]|U+0009-U+000D|U+001C-U+001F
*
* This implemenation taken from GNU classpath:
*
* GNU Classpath is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License
*
* @param ch character to test
* @return true if ch is Java whitespace, else false
* @see #isSpaceChar(char)
* @since 1.1
*/
private static final int TYPE_MASK = 0x1F;
private static final int NO_BREAK_MASK = 0x20;
private static final byte SPACE_SEPARATOR = 12;
private static final byte LINE_SEPARATOR = 13;
private static final byte PARAGRAPH_SEPARATOR = 14;
public static boolean isWhitespaceGNUClasspath(char ch) {
int attr = (int) ch;
return ((((1 << (attr & TYPE_MASK))
& ((1 << SPACE_SEPARATOR)
| (1 << LINE_SEPARATOR)
| (1 << PARAGRAPH_SEPARATOR))) != 0)
&& (attr & NO_BREAK_MASK) == 0)
|| (ch <= '\u001F' && ((1 << ch)
& ((1 << '\t')
| (1 << '\n')
| (1 << '\u000B')
| (1 << '\u000C')
| (1 << '\r')
| (1 << '\u001C')
| (1 << '\u001D')
| (1 << '\u001E')
| (1 << '\u001F'))) != 0);
}
/**
* Determines if the specified character is white space according to Java.
*
* @param ch the character to be tested.
* @return <code>true</code> if the character is a Java whitespace
* character; <code>false</code> otherwise.
* @see java.lang.Character#isSpaceChar(char)
*/
public static boolean isWhitespace(char c) {
int ci = (int) c;
switch (ci) {
case 9: // Horizontal Tab
case 10: // Newline
case 11: // Vertical Tab
case 12: // New Page
case 13: // Carriage Return
case 28: // File separator
case 29: // Group separator
case 30: // Record separator
case 31: // Unit separator
case 32: // Space
return true;
default:
return false;
}
}
/**
* Read and return a Scheme expression, or EOF.
*/
public Object read() {
try {
Object token = nextToken();
if (token == "(") {
return readTail();
} else if (token == ")") {
System.out.println("WARN: Extra ')' ignored.");
return read();
} else {
return token;
} // End of the if - else
} catch (IOException e) {
System.out.println("WARN: On input, exception: " + e);
return EOF;
} // End try - catch
}
private Object readTail() throws IOException {
Object token = nextToken();
System.out.println("trace: readTail(): " + token);
if (token == EOF) {
final String msg = "ERROR: readTail() - EOF during read.";
System.err.println(msg);
throw (new RuntimeException(msg));
} else if (token == ")") {
return null;
} else if (token == ".") {
Object result = read();
token = nextToken();
if (token != ")") {
System.out.println("WARN: Missing ')'? Received " + token + " after .");
}
return result;
} else {
tokenStack.push(token);
return SchemeUtil.cons(read(), readTail());
}
}
/**
* Collect the set of characters from the input stream until whitespace or
* one of the language tokens is found.
*
* @param o_ch
* @throws IOException
*/
private void buildGenericToken(final int o_ch) throws IOException {
int ch = o_ch;
do {
// Build alpha numeric, atom/symbol characters/tokens into the buffer
buff.append((char) ch);
ch = inputReader.read();
} while (!Character.isWhitespace((char) ch)
&& (ch != -1)
&& (ch != TOK_LEFT_PAREN) && (ch != TOK_RIGHT_PAREN)
&& (ch != TOK_SINGLE_QUOT) && (ch != TOK_SEMICOLON)
&& (ch != TOK_DOUBLE_QUOT) && (ch != TOK_COMMA)
&& (ch != TOK_BACK_QUOT)); // End of do - while
// Push a language token onto the character stack
charStack.push(new Character((char) ch));
}
private Object nextToken() throws IOException {
int ch;
// See if we should re-use a pushed char or token
// Task 1: Pop the token and character stacks
if (!this.tokenStack.empty() && (this.tokenStack.peek() != null)) {
return this.tokenStack.pop();
} else if (!this.charStack.empty() && (this.charStack.peek() != null)) {
ch = ((Character) this.charStack.pop()).charValue();
} else {
ch = inputReader.read();
}
// Ignore whitespace
// Task 2: Check for and ignore whitespace
while (isWhitespace((char) ch)) {
ch = inputReader.read();
}
System.out.println("trace: nextToken() -> " + (char) ch + " $" + ch);
// See what kind of non-white character we got
// Task 3: Check if the character is of various token types.
switch (ch) {
case -1:
return EOF;
case TOK_LEFT_PAREN:
return "(";
case TOK_RIGHT_PAREN:
return ")";
case TOK_SINGLE_QUOT:
return "'";
case TOK_BACK_QUOT:
return "`";
case TOK_SEMICOLON:
// Comment: skip to end of line and then read next token
while (ch != -1 && ch != '\n' && ch != '\r') {
ch = inputReader.read();
}
return nextToken();
case TOK_DOUBLE_QUOT:
// Strings are represented as char[]
buff.setLength(0);
while ((ch = inputReader.read()) != '"' && ch != -1) {
buff.append((char) ((ch == '\\') ? inputReader.read() : ch));
}
if (ch == -1) {
System.out.println("WARN: EOF inside of a string.");
}
return buff.toString().toCharArray();
case '#':
// Begin new switch statement, next set of characters
switch (ch = inputReader.read()) {
case 't':
case 'T':
return Boolean.TRUE;
case 'f':
case 'F':
return Boolean.FALSE;
default:
System.out.println("WARN: #" + ((char) ch)
+ " not recognized, ignored.");
return nextToken();
} // End of Switch
default:
buff.setLength(0);
int c = ch;
buildGenericToken(ch);
// Try potential numbers, but catch any format errors.
if (c == '.' || c == '+' || c == '-' || (c >= '0' && c <= '9')) {
try {
// Number type is currently in the buffer queue
return new Double(buff.toString());
} catch (NumberFormatException e) {
;
}
} // End of If
return buff.toString().toLowerCase();
} // End of the Switch
}
}