// -*- mode: Java; c-basic-offset: 3; tab-width: 8; indent-tabs-mode: nil -*-
// Copyright (C) 2007, 2008 Andreas Krey, Ulm, Germany <a.krey@gmx.de>
package gloop;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.util.Vector;
final public class Tokenizer {
// This is identified by identity so it does not matter that
// stupid people may modify the actual vector.
final public static Vector<Tokenizer.Token> empty_tokens =
new Vector<Tokenizer.Token> ();
final public static String
EOF = "<eof>",
// TYP = "<typ>",
NUM = "<num>",
ASGN = "=",
SEMI = ";",
COMMA = ",",
LPAR = "(",
RPAR = ")",
LBRC = "{",
RBRC = "}",
AST = "*",
STR = "<str>",
SYM = "<sym>";
public static class Token {
public String tok;
public String val;
public int line;
public Token (String t, String v, int l) {
tok = t;
val = v;
line = l;
// System.out.println ("Token(" + tok + ":" + val + ")");
}
public Token (String t, int l) {
tok = t;
line = l;
// System.out.println ("Token(" + tok + ")");
}
public boolean is (String t) {
return tok == t;
}
public String toString () {
if (val != null) {
return tok + "(" + val + ")";
} else {
return tok;
}
}
}
public static class TokEx extends Exception {
TokEx (String s) {
super (s);
}
}
private final Reader rd;
private int c;
private int line;
private static int lpos = 0;
public static void flush () {
if (lpos > 0) {
System.out.println ();
lpos = 0;
}
}
public static void println (String s) {
flush ();
System.out.println (s);
}
public Tokenizer (Reader r) throws IOException {
line = 1;
rd = r;
getc ();
}
private final int getc () throws IOException {
if (c == '\n') {
line ++;
}
c = rd.read ();
if (c == '\n') {
if (lpos > 0) {
lpos = 0;
System.out.println ();
}
} else if (c != -1) {
System.out.print ((char)c);
lpos ++;
}
// System.out.println ("<" + (char)c + ">");
return c;
}
public void push (Vector<Token> toklist) {
int i = 0;
for (Token t: toklist) {
backlist.insertElementAt (t, i ++);
}
}
Vector<Token> backlist = new Vector<Token> ();
public Token get () throws IOException, TokEx {
if (backlist.size () > 0) {
Token t = backlist.elementAt (0);
backlist.removeElementAt (0);
System.out.println ("Backlist: " + t);
return t;
}
while (true) {
ignb ();
if (c == -1) {
return new Token (EOF, line);
}
if (c == ';') {
getc ();
return new Token (SEMI, line);
}
if (c == ',') {
getc ();
return new Token (COMMA, line);
}
if (c == '(') {
getc ();
return new Token (LPAR, line);
}
if (c == ')') {
getc ();
return new Token (RPAR, line);
}
if (c == '{') {
getc ();
return new Token (LBRC, line);
}
if (c == '}') {
getc ();
return new Token (RBRC, line);
}
if (c == '*') {
getc ();
return new Token (AST, line);
}
if (c == '=') {
getc ();
return new Token (ASGN, line);
}
// if (isup (c)) {
// StringBuffer coll = new StringBuffer ();
// while (isid (c) || isnum (c)) {
// coll.append ((char)c);
// getc ();
// }
// return new Token (TYP, coll.toString (), line);
// }
if (isid (c)) {
StringBuffer coll = new StringBuffer ();
while (isid (c) || isnum (c)) {
coll.append ((char)c);
getc ();
}
return new Token (SYM, coll.toString (), line);
}
if (isnum (c)) {
StringBuffer coll = new StringBuffer ();
while (isnum (c)) {
coll.append ((char)c);
getc ();
}
return new Token (NUM, coll.toString (), line);
}
if (c == '"') {
StringBuffer coll = new StringBuffer ();
getc ();
while (c != '"') {
if (c == -1) throw new TokEx ("unterminated string");
pchr (coll);
}
getc ();
return new Token (STR, coll.toString (), line);
}
if (c >= ' ' && c <= '~') {
throw new TokEx ("bad char '" + (char)c + "'");
}
throw new TokEx ("bad char " + c);
}
}
private final void ignb () throws IOException {
while (true) {
if (isblnk (c)) {
getc ();
} else if (c == '#') {
while (c != -1 && c != '\n' && c != '\r') getc ();
} else {
break;
}
}
}
private static boolean isblnk (int c) {
return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f';
}
private static boolean isnum (int c) {
return (c >= '0' && c <= '9');
}
private static int hexof (int c) {
if (c >= 'a' && c <= 'f') return c - 'a' + 10;
if (c >= 'A' && c <= 'F') return c - 'A' + 10;
if (c >= '0' && c <= '9') return c - '0';
return -1;
}
private static boolean isup (int c) {
return (c >= 'A' && c <= 'Z');
}
private static boolean isid (int c) {
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') ||
c == '_' || c == '$';
}
private static boolean isopchar (int c) {
return c == '-' || c == '+' || c == '_' || c == '&' ||
c == '/' || c == '*' || c == '!' || c == '@' ||
c == '<' || c == '>' || c == '=' || c == '%' ||
c == '$' || c == '.';
}
private void pchr (StringBuffer sb) throws IOException, TokEx {
if (c != '\\') {
sb.append ((char) c);
getc ();
return;
}
getc ();
if (c == -1) {
throw new TokEx ("eof in name/string");
}
switch (c) {
case 'n': sb.append ('\n'); break;
case 'r': sb.append ('\r'); break;
case 't': sb.append ('\t'); break;
case 'f': sb.append ('\f'); break;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
int v = c - '0';
getc ();
if (c >= '0' && c <= '9') {
v = 8 * v + c - '0';
getc ();
if (c >= '0' && c <= '9') {
v = 8 * v + c - '0';
getc ();
}
}
sb.append ((char) v);
return;
case 'u':
v = 0;
getc ();
for (int i = 0; i < 4; i++) {
int z = hexof (c);
if (z >= 0) {
v = 16 * v + z - '0';
} else {
break;
}
getc ();
}
sb.append ((char) v);
return;
default:
sb.append ((char) c);
break;
}
}
}