/*
* $Id$
*
* Copyright (C) 2003-2015 JNode.org
*
* This library is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation; either version 2.1 of the License, or
* (at your option) any later version.
*
* This library is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
* License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this library; If not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
package org.jnode.shell.bjorne;
import static org.jnode.shell.bjorne.BjorneToken.RULE_1_CONTEXT;
import static org.jnode.shell.bjorne.BjorneToken.RULE_5_CONTEXT;
import static org.jnode.shell.bjorne.BjorneToken.RULE_6_CONTEXT;
import static org.jnode.shell.bjorne.BjorneToken.RULE_7a_CONTEXT;
import static org.jnode.shell.bjorne.BjorneToken.RULE_7b_CONTEXT;
import static org.jnode.shell.bjorne.BjorneToken.RULE_8_CONTEXT;
import static org.jnode.shell.bjorne.BjorneToken.TOK_AMP;
import static org.jnode.shell.bjorne.BjorneToken.TOK_AND_IF;
import static org.jnode.shell.bjorne.BjorneToken.TOK_ASSIGNMENT;
import static org.jnode.shell.bjorne.BjorneToken.TOK_BANG;
import static org.jnode.shell.bjorne.BjorneToken.TOK_BAR;
import static org.jnode.shell.bjorne.BjorneToken.TOK_CASE;
import static org.jnode.shell.bjorne.BjorneToken.TOK_CLOBBER;
import static org.jnode.shell.bjorne.BjorneToken.TOK_DGREAT;
import static org.jnode.shell.bjorne.BjorneToken.TOK_DLESS;
import static org.jnode.shell.bjorne.BjorneToken.TOK_DLESSDASH;
import static org.jnode.shell.bjorne.BjorneToken.TOK_DO;
import static org.jnode.shell.bjorne.BjorneToken.TOK_DONE;
import static org.jnode.shell.bjorne.BjorneToken.TOK_DSEMI;
import static org.jnode.shell.bjorne.BjorneToken.TOK_ELIF;
import static org.jnode.shell.bjorne.BjorneToken.TOK_ELSE;
import static org.jnode.shell.bjorne.BjorneToken.TOK_END_OF_LINE;
import static org.jnode.shell.bjorne.BjorneToken.TOK_END_OF_STREAM;
import static org.jnode.shell.bjorne.BjorneToken.TOK_ESAC;
import static org.jnode.shell.bjorne.BjorneToken.TOK_FI;
import static org.jnode.shell.bjorne.BjorneToken.TOK_FOR;
import static org.jnode.shell.bjorne.BjorneToken.TOK_GREAT;
import static org.jnode.shell.bjorne.BjorneToken.TOK_GREATAND;
import static org.jnode.shell.bjorne.BjorneToken.TOK_IF;
import static org.jnode.shell.bjorne.BjorneToken.TOK_IN;
import static org.jnode.shell.bjorne.BjorneToken.TOK_IO_NUMBER;
import static org.jnode.shell.bjorne.BjorneToken.TOK_LBRACE;
import static org.jnode.shell.bjorne.BjorneToken.TOK_LESS;
import static org.jnode.shell.bjorne.BjorneToken.TOK_LESSAND;
import static org.jnode.shell.bjorne.BjorneToken.TOK_LESSGREAT;
import static org.jnode.shell.bjorne.BjorneToken.TOK_LPAREN;
import static org.jnode.shell.bjorne.BjorneToken.TOK_NAME;
import static org.jnode.shell.bjorne.BjorneToken.TOK_OR_IF;
import static org.jnode.shell.bjorne.BjorneToken.TOK_RBRACE;
import static org.jnode.shell.bjorne.BjorneToken.TOK_RPAREN;
import static org.jnode.shell.bjorne.BjorneToken.TOK_SEMI;
import static org.jnode.shell.bjorne.BjorneToken.TOK_THEN;
import static org.jnode.shell.bjorne.BjorneToken.TOK_UNTIL;
import static org.jnode.shell.bjorne.BjorneToken.TOK_WHILE;
import static org.jnode.shell.bjorne.BjorneToken.TOK_WORD;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import org.jnode.shell.ShellFailureException;
import org.jnode.shell.ShellSyntaxException;
public class BjorneTokenizer {
private final Reader reader;
private BjorneToken prev, current, next;
private static final int EOS = -1;
private static final int INVALID = -2;
private int pos = 0;
private int lastCh = INVALID;
private int nextCh = INVALID;
private final boolean debug;
/**
* Create a tokenizer for the supplied shell input text.
* @param text the text to be tokenized
* @throws ShellSyntaxException
*/
public BjorneTokenizer(String text)
throws ShellSyntaxException {
this(new StringReader(text), false);
}
/**
* Create a tokenizer for the supplied shell input reader.
* @param reader the reader to be tokenized.
* @throws ShellSyntaxException
*/
public BjorneTokenizer(Reader reader)
throws ShellSyntaxException {
this(reader, false);
}
/**
* Create a tokenizer for the supplied shell input text.
* @param reader the reader to be tokenized.
* @param debug if {@code true}, produce debug output
* @throws ShellSyntaxException
*/
public BjorneTokenizer(Reader reader, boolean debug)
throws ShellSyntaxException {
this.reader = reader;
this.debug = debug;
}
/**
* Get the next token without advancing. The default tokenization
* rules are used.
*
* @return the next token
*/
public BjorneToken peek() {
if (current == null) {
current = advance();
}
if (debug) {
System.err.println("peek() -> " + current);
}
return current;
}
/**
* Get the next token without advancing, using the tokenization
* rules corresponding to the supplied 'context'.
*
* @param context gives the tokenization rules
* @return the next token
*/
public BjorneToken peek(int context) {
BjorneToken res = reinterpret(peek(), context);
if (debug) {
System.err.println("peek(" + context + ") --> " + res);
}
return res;
}
/**
* Test if {@link #next()} will return something other that EOS.
* @return <code>true</code> if there are more tokens to be delivered.
*/
public boolean hasNext() {
return peek().getTokenType() != TOK_END_OF_STREAM;
}
/**
* Get the next token and advance. The default tokenization
* rules are used.
*
* @return the next token
*/
public BjorneToken next() {
if (current == null) {
prev = advance();
} else {
prev = current;
current = next;
next = null;
}
if (debug) {
System.err.println("next() -> " + prev);
}
return prev;
}
/**
* Backup one token in the token sequence. Calling this method twice without
* an intervening {@link #next()} call is invalid.
*/
public void backup() {
if (prev == null) {
throw new ShellFailureException("incorrect backup");
}
if (debug) {
System.err.println("backup() ... {" + prev + "," + current + ","
+ next + "}");
}
next = current;
current = prev;
prev = null;
}
/**
* Get the next token and advance, using the tokenization
* rules corresponding to the supplied 'context'.
*
* @param context gives the tokenization rules
* @return the next token
*/
public BjorneToken next(int context) {
BjorneToken res = reinterpret(next(), context);
if (debug) {
System.err.println("next(" + context + ") --> " + res);
}
return res;
}
/**
* This operation is not supported.
*/
public void remove() {
throw new UnsupportedOperationException("remove not supported");
}
/**
* This method bypasses normal tokenization and reads a raw line of
* text up to the next NL (or the end of stream).
*
* @return the line read without the terminating NL. If we got an
* end of stream immediately, return {@code null}.
*/
public String readRawLine() {
StringBuilder sb = new StringBuilder(40);
while (true) {
int ch = nextCh();
switch (ch) {
case '\n':
return sb.toString();
case EOS:
return (sb.length() > 0) ? sb.toString() : null;
default:
sb.append((char) ch);
}
}
}
/**
* Parse and return the next token
*
* @return
*/
private BjorneToken advance() {
if (debug) {
System.err.print("advance() ... {" + prev + "," + current + ","
+ next + "} ...");
}
int ch = peekCh();
while (ch == '\t' || ch == ' ') {
nextCh();
ch = peekCh();
}
int start = getPos() - 1;
switch (ch) {
case EOS:
return makeToken(TOK_END_OF_STREAM, getPos());
case '\n':
nextCh();
return makeToken(TOK_END_OF_LINE, start);
case '#':
while ((ch = nextCh()) != EOS) {
if (ch == '\n') {
return makeToken(TOK_END_OF_LINE, start);
}
}
return makeToken(TOK_END_OF_STREAM, start);
case '(':
nextCh();
return makeToken(TOK_LPAREN, start);
case ')':
nextCh();
return makeToken(TOK_RPAREN, start);
case '<':
case '>':
case ';':
case '&':
case '|':
return parseOperator();
default:
return parseWord();
}
}
private BjorneToken makeToken(int tokenType, int start) {
return new BjorneToken(tokenType, "", start, getPos());
}
private BjorneToken makeToken(int tokenType, String value, int start) {
return new BjorneToken(tokenType, value, start, getPos());
}
private BjorneToken parseWord() {
int quoteChar = 0;
StringBuilder sb = new StringBuilder();
int ch = peekCh();
int start = getPos() - 1;
LOOP:
while (true) {
switch (ch) {
case EOS:
case '\n':
break LOOP;
case '(':
case ')':
case '<':
case '>':
case ';':
case '&':
case '|':
case ' ':
case '\t':
if (quoteChar == 0) {
break LOOP;
}
break;
case '"':
case '\'':
case '`':
if (quoteChar == 0) {
quoteChar = ch;
} else if (quoteChar == ch) {
quoteChar = 0;
}
break;
case '\\':
nextCh();
ch = peekCh();
if (ch == '\n') {
// A '\\' followed by a newline is a line continuation:
// the two characters are skipped.
nextCh();
ch = peekCh();
continue;
} else if (ch == EOS) {
// Silently eat a '\\' at the end of stream position.
nextCh();
break LOOP;
} else {
// The '\\' is included in the (raw) word.
sb.append('\\');
}
break;
default:
// include anything else in the word.
break;
}
sb.append((char) ch);
nextCh();
ch = peekCh();
}
if (ch == '<' || ch == '>') {
boolean allDigits = true;
for (int i = 0; i < sb.length(); i++) {
ch = sb.charAt(i);
// FIXME ... I should deal with "\\\n" here I think.
if (ch < '0' || ch > '9') {
allDigits = false;
break;
}
}
if (allDigits) {
return makeToken(TOK_IO_NUMBER, sb.toString(), start);
}
}
return makeToken(TOK_WORD, sb.toString(), start);
}
private BjorneToken parseOperator() {
int start = getPos() - 1;
switch (nextCh()) {
case '<':
switch (peekCh()) {
case '<':
nextCh();
if (peekCh() == '-') {
nextCh();
return makeToken(TOK_DLESSDASH, start);
}
return makeToken(TOK_DLESS, start);
case '>':
nextCh();
return makeToken(TOK_LESSGREAT, start);
case '&':
nextCh();
return makeToken(TOK_LESSAND, start);
default:
return makeToken(TOK_LESS, start);
}
case '>':
switch (peekCh()) {
case '|':
nextCh();
return makeToken(TOK_CLOBBER, start);
case '>':
nextCh();
return makeToken(TOK_DGREAT, start);
case '&':
nextCh();
return makeToken(TOK_GREATAND, start);
default:
return makeToken(TOK_GREAT, start);
}
case ';':
if (peekCh() == ';') {
nextCh();
return makeToken(TOK_DSEMI, start);
}
return makeToken(TOK_SEMI, start);
case '&':
if (peekCh() == '&') {
nextCh();
return makeToken(TOK_AND_IF, start);
}
return makeToken(TOK_AMP, start);
case '|':
if (peekCh() == '|') {
nextCh();
return makeToken(TOK_OR_IF, start);
}
return makeToken(TOK_BAR, start);
}
throw new ShellFailureException("bad lexer state");
}
private int nextCh() throws ShellFailureException {
try {
if (nextCh == INVALID) {
if (lastCh != EOS) {
lastCh = reader.read();
pos++;
}
} else {
lastCh = nextCh;
nextCh = INVALID;
pos++;
}
return lastCh;
} catch (IOException ex) {
throw new ShellFailureException("Unexpected exception", ex);
}
}
private int peekCh() {
try {
if (nextCh == INVALID) {
nextCh = reader.read();
}
return nextCh;
} catch (IOException ex) {
throw new ShellFailureException("Unexpected exception", ex);
}
}
private int getPos() {
return pos;
}
/**
* Reinterpret a token according to the context-sensitive tokenization rule
* for a given context. WORD tokens may be mapped to reserved words, NAME or
* ASSIGNMENT. Other tokens are left alone.
*
* @param token
* @param context
* @return
*/
private BjorneToken reinterpret(BjorneToken token, int context) {
if (token.getTokenType() != TOK_WORD) {
return token;
}
switch (context) {
case RULE_1_CONTEXT: {
BjorneToken tmp = toReservedWordToken(token);
if (tmp != null) {
return tmp;
}
return token;
}
case RULE_5_CONTEXT:
if (token.isName()) {
return remakeToken(TOK_NAME, token);
}
return token;
case RULE_6_CONTEXT:
if (token.getText().equals("in")) {
return remakeToken(TOK_IN, token);
}
return token;
case RULE_7a_CONTEXT:
if (token.getText().indexOf('=') == -1) {
return reinterpret(token, RULE_1_CONTEXT);
}
// DROP THROUGH TO RULE 7b
case RULE_7b_CONTEXT:
int pos = token.getText().indexOf('=');
if (pos <= 0
|| !BjorneToken.isName(token.getText().substring(0, pos))) {
return token;
}
return remakeToken(TOK_ASSIGNMENT, token);
case RULE_8_CONTEXT:
BjorneToken tmp = toReservedWordToken(token);
if (tmp != null) {
return tmp;
}
if (token.isName()) {
return remakeToken(TOK_NAME, token);
}
return reinterpret(token, RULE_7b_CONTEXT);
default:
return token;
}
}
private BjorneToken remakeToken(int tokenType, BjorneToken token) {
return new BjorneToken(tokenType, token.getText(), token.start,
token.end);
}
private BjorneToken toReservedWordToken(BjorneToken token) {
String str = token.getText();
if (str.equals("for")) {
return remakeToken(TOK_FOR, token);
} else if (str.equals("while")) {
return remakeToken(TOK_WHILE, token);
} else if (str.equals("until")) {
return remakeToken(TOK_UNTIL, token);
} else if (str.equals("do")) {
return remakeToken(TOK_DO, token);
} else if (str.equals("done")) {
return remakeToken(TOK_DONE, token);
} else if (str.equals("if")) {
return remakeToken(TOK_IF, token);
} else if (str.equals("then")) {
return remakeToken(TOK_THEN, token);
} else if (str.equals("else")) {
return remakeToken(TOK_ELSE, token);
} else if (str.equals("elif")) {
return remakeToken(TOK_ELIF, token);
} else if (str.equals("fi")) {
return remakeToken(TOK_FI, token);
} else if (str.equals("case")) {
return remakeToken(TOK_CASE, token);
} else if (str.equals("esac")) {
return remakeToken(TOK_ESAC, token);
} else if (str.equals("{")) {
return remakeToken(TOK_LBRACE, token);
} else if (str.equals("}")) {
return remakeToken(TOK_RBRACE, token);
} else if (str.equals("!")) {
return remakeToken(TOK_BANG, token);
}
return null;
}
}