/*
* Copyright 2004-2011 H2 Group. Multiple-Licensed under the H2 License,
* Version 1.0, and under the Eclipse Public License, Version 1.0
* (http://h2database.com/html/license.html).
* Initial Developer: H2 Group
*/
package org.h2.test.coverage;
import java.io.EOFException;
import java.io.IOException;
import java.io.Reader;
/**
* Helper class for the java file parser.
*/
public class Tokenizer {
/**
* This token type means no more tokens are available.
*/
static final int TYPE_EOF = -1;
private static final int TYPE_WORD = -2;
private static final int TYPE_NOTHING = -3;
private static final byte WHITESPACE = 1;
private static final byte ALPHA = 4;
private static final byte QUOTE = 8;
private StringBuilder buffer;
private Reader reader;
private char[] chars = new char[20];
private int peekChar;
private int line = 1;
private byte[] charTypes = new byte[256];
private int type = TYPE_NOTHING;
private String value;
private Tokenizer() {
wordChars('a', 'z');
wordChars('A', 'Z');
wordChars('0', '9');
wordChars('.', '.');
wordChars('+', '+');
wordChars('-', '-');
wordChars('_', '_');
wordChars(128 + 32, 255);
whitespaceChars(0, ' ');
charTypes['"'] = QUOTE;
charTypes['\''] = QUOTE;
}
Tokenizer(Reader r) {
this();
reader = r;
}
String getString() {
return value;
}
private void wordChars(int low, int hi) {
while (low <= hi) {
charTypes[low++] |= ALPHA;
}
}
private void whitespaceChars(int low, int hi) {
while (low <= hi) {
charTypes[low++] = WHITESPACE;
}
}
private int read() throws IOException {
int i = reader.read();
if (i != -1) {
append(i);
}
return i;
}
/**
* Initialize the tokenizer.
*/
void initToken() {
buffer = new StringBuilder();
}
String getToken() {
buffer.setLength(buffer.length() - 1);
return buffer.toString();
}
private void append(int i) {
buffer.append((char) i);
}
/**
* Read the next token and get the token type.
*
* @return the token type
*/
int nextToken() throws IOException {
byte[] ct = charTypes;
int c;
value = null;
if (type == TYPE_NOTHING) {
c = read();
if (c >= 0) {
type = c;
}
} else {
c = peekChar;
if (c < 0) {
try {
c = read();
if (c >= 0) {
type = c;
}
} catch (EOFException e) {
c = -1;
}
}
}
if (c < 0) {
return type = TYPE_EOF;
}
int charType = c < 256 ? ct[c] : ALPHA;
while ((charType & WHITESPACE) != 0) {
if (c == '\r') {
line++;
c = read();
if (c == '\n') {
c = read();
}
} else {
if (c == '\n') {
line++;
}
c = read();
}
if (c < 0) {
return type = TYPE_EOF;
}
charType = c < 256 ? ct[c] : ALPHA;
}
if ((charType & ALPHA) != 0) {
initToken();
append(c);
int i = 0;
do {
if (i >= chars.length) {
char[] nb = new char[chars.length * 2];
System.arraycopy(chars, 0, nb, 0, chars.length);
chars = nb;
}
chars[i++] = (char) c;
c = read();
charType = c < 0 ? WHITESPACE : c < 256 ? ct[c] : ALPHA;
} while ((charType & ALPHA) != 0);
peekChar = c;
value = String.copyValueOf(chars, 0, i);
return type = TYPE_WORD;
}
if ((charType & QUOTE) != 0) {
initToken();
append(c);
type = c;
int i = 0;
// \octal needs a lookahead
peekChar = read();
while (peekChar >= 0 && peekChar != type && peekChar != '\n'
&& peekChar != '\r') {
if (peekChar == '\\') {
c = read();
// to allow \377, but not \477
int first = c;
if (c >= '0' && c <= '7') {
c = c - '0';
int c2 = read();
if ('0' <= c2 && c2 <= '7') {
c = (c << 3) + (c2 - '0');
c2 = read();
if ('0' <= c2 && c2 <= '7' && first <= '3') {
c = (c << 3) + (c2 - '0');
peekChar = read();
} else {
peekChar = c2;
}
} else {
peekChar = c2;
}
} else {
switch (c) {
case 'b':
c = '\b';
break;
case 'f':
c = '\f';
break;
case 'n':
c = '\n';
break;
case 'r':
c = '\r';
break;
case 't':
c = '\t';
break;
default:
}
peekChar = read();
}
} else {
c = peekChar;
peekChar = read();
}
if (i >= chars.length) {
char[] nb = new char[chars.length * 2];
System.arraycopy(chars, 0, nb, 0, chars.length);
chars = nb;
}
chars[i++] = (char) c;
}
if (peekChar == type) {
// keep \n or \r intact in peekChar
peekChar = read();
}
value = String.copyValueOf(chars, 0, i);
return type;
}
if (c == '/') {
c = read();
if (c == '*') {
int prevChar = 0;
while ((c = read()) != '/' || prevChar != '*') {
if (c == '\r') {
line++;
c = read();
if (c == '\n') {
c = read();
}
} else {
if (c == '\n') {
line++;
c = read();
}
}
if (c < 0) {
return type = TYPE_EOF;
}
prevChar = c;
}
peekChar = read();
return nextToken();
} else if (c == '/') {
while ((c = read()) != '\n' && c != '\r' && c >= 0) {
// nothing
}
peekChar = c;
return nextToken();
} else {
peekChar = c;
return type = '/';
}
}
peekChar = read();
return type = c;
}
int getLine() {
return line;
}
}