/* * $Id$ * * Copyright (C) 2003-2015 JNode.org * * This library is free software; you can redistribute it and/or modify it * under the terms of the GNU Lesser General Public License as published * by the Free Software Foundation; either version 2.1 of the License, or * (at your option) any later version. * * This library is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public * License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this library; If not, write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ package org.jnode.shell.bjorne; import static org.jnode.shell.bjorne.BjorneToken.RULE_1_CONTEXT; import static org.jnode.shell.bjorne.BjorneToken.RULE_5_CONTEXT; import static org.jnode.shell.bjorne.BjorneToken.RULE_6_CONTEXT; import static org.jnode.shell.bjorne.BjorneToken.RULE_7a_CONTEXT; import static org.jnode.shell.bjorne.BjorneToken.RULE_7b_CONTEXT; import static org.jnode.shell.bjorne.BjorneToken.RULE_8_CONTEXT; import static org.jnode.shell.bjorne.BjorneToken.TOK_AMP; import static org.jnode.shell.bjorne.BjorneToken.TOK_AND_IF; import static org.jnode.shell.bjorne.BjorneToken.TOK_ASSIGNMENT; import static org.jnode.shell.bjorne.BjorneToken.TOK_BANG; import static org.jnode.shell.bjorne.BjorneToken.TOK_BAR; import static org.jnode.shell.bjorne.BjorneToken.TOK_CASE; import static org.jnode.shell.bjorne.BjorneToken.TOK_CLOBBER; import static org.jnode.shell.bjorne.BjorneToken.TOK_DGREAT; import static org.jnode.shell.bjorne.BjorneToken.TOK_DLESS; import static org.jnode.shell.bjorne.BjorneToken.TOK_DLESSDASH; import static org.jnode.shell.bjorne.BjorneToken.TOK_DO; import static org.jnode.shell.bjorne.BjorneToken.TOK_DONE; import static org.jnode.shell.bjorne.BjorneToken.TOK_DSEMI; import static org.jnode.shell.bjorne.BjorneToken.TOK_ELIF; import static org.jnode.shell.bjorne.BjorneToken.TOK_ELSE; import static org.jnode.shell.bjorne.BjorneToken.TOK_END_OF_LINE; import static org.jnode.shell.bjorne.BjorneToken.TOK_END_OF_STREAM; import static org.jnode.shell.bjorne.BjorneToken.TOK_ESAC; import static org.jnode.shell.bjorne.BjorneToken.TOK_FI; import static org.jnode.shell.bjorne.BjorneToken.TOK_FOR; import static org.jnode.shell.bjorne.BjorneToken.TOK_GREAT; import static org.jnode.shell.bjorne.BjorneToken.TOK_GREATAND; import static org.jnode.shell.bjorne.BjorneToken.TOK_IF; import static org.jnode.shell.bjorne.BjorneToken.TOK_IN; import static org.jnode.shell.bjorne.BjorneToken.TOK_IO_NUMBER; import static org.jnode.shell.bjorne.BjorneToken.TOK_LBRACE; import static org.jnode.shell.bjorne.BjorneToken.TOK_LESS; import static org.jnode.shell.bjorne.BjorneToken.TOK_LESSAND; import static org.jnode.shell.bjorne.BjorneToken.TOK_LESSGREAT; import static org.jnode.shell.bjorne.BjorneToken.TOK_LPAREN; import static org.jnode.shell.bjorne.BjorneToken.TOK_NAME; import static org.jnode.shell.bjorne.BjorneToken.TOK_OR_IF; import static org.jnode.shell.bjorne.BjorneToken.TOK_RBRACE; import static org.jnode.shell.bjorne.BjorneToken.TOK_RPAREN; import static org.jnode.shell.bjorne.BjorneToken.TOK_SEMI; import static org.jnode.shell.bjorne.BjorneToken.TOK_THEN; import static org.jnode.shell.bjorne.BjorneToken.TOK_UNTIL; import static org.jnode.shell.bjorne.BjorneToken.TOK_WHILE; import static org.jnode.shell.bjorne.BjorneToken.TOK_WORD; import java.io.IOException; import java.io.Reader; import java.io.StringReader; import org.jnode.shell.ShellFailureException; import org.jnode.shell.ShellSyntaxException; public class BjorneTokenizer { private final Reader reader; private BjorneToken prev, current, next; private static final int EOS = -1; private static final int INVALID = -2; private int pos = 0; private int lastCh = INVALID; private int nextCh = INVALID; private final boolean debug; /** * Create a tokenizer for the supplied shell input text. * @param text the text to be tokenized * @throws ShellSyntaxException */ public BjorneTokenizer(String text) throws ShellSyntaxException { this(new StringReader(text), false); } /** * Create a tokenizer for the supplied shell input reader. * @param reader the reader to be tokenized. * @throws ShellSyntaxException */ public BjorneTokenizer(Reader reader) throws ShellSyntaxException { this(reader, false); } /** * Create a tokenizer for the supplied shell input text. * @param reader the reader to be tokenized. * @param debug if {@code true}, produce debug output * @throws ShellSyntaxException */ public BjorneTokenizer(Reader reader, boolean debug) throws ShellSyntaxException { this.reader = reader; this.debug = debug; } /** * Get the next token without advancing. The default tokenization * rules are used. * * @return the next token */ public BjorneToken peek() { if (current == null) { current = advance(); } if (debug) { System.err.println("peek() -> " + current); } return current; } /** * Get the next token without advancing, using the tokenization * rules corresponding to the supplied 'context'. * * @param context gives the tokenization rules * @return the next token */ public BjorneToken peek(int context) { BjorneToken res = reinterpret(peek(), context); if (debug) { System.err.println("peek(" + context + ") --> " + res); } return res; } /** * Test if {@link #next()} will return something other that EOS. * @return <code>true</code> if there are more tokens to be delivered. */ public boolean hasNext() { return peek().getTokenType() != TOK_END_OF_STREAM; } /** * Get the next token and advance. The default tokenization * rules are used. * * @return the next token */ public BjorneToken next() { if (current == null) { prev = advance(); } else { prev = current; current = next; next = null; } if (debug) { System.err.println("next() -> " + prev); } return prev; } /** * Backup one token in the token sequence. Calling this method twice without * an intervening {@link #next()} call is invalid. */ public void backup() { if (prev == null) { throw new ShellFailureException("incorrect backup"); } if (debug) { System.err.println("backup() ... {" + prev + "," + current + "," + next + "}"); } next = current; current = prev; prev = null; } /** * Get the next token and advance, using the tokenization * rules corresponding to the supplied 'context'. * * @param context gives the tokenization rules * @return the next token */ public BjorneToken next(int context) { BjorneToken res = reinterpret(next(), context); if (debug) { System.err.println("next(" + context + ") --> " + res); } return res; } /** * This operation is not supported. */ public void remove() { throw new UnsupportedOperationException("remove not supported"); } /** * This method bypasses normal tokenization and reads a raw line of * text up to the next NL (or the end of stream). * * @return the line read without the terminating NL. If we got an * end of stream immediately, return {@code null}. */ public String readRawLine() { StringBuilder sb = new StringBuilder(40); while (true) { int ch = nextCh(); switch (ch) { case '\n': return sb.toString(); case EOS: return (sb.length() > 0) ? sb.toString() : null; default: sb.append((char) ch); } } } /** * Parse and return the next token * * @return */ private BjorneToken advance() { if (debug) { System.err.print("advance() ... {" + prev + "," + current + "," + next + "} ..."); } int ch = peekCh(); while (ch == '\t' || ch == ' ') { nextCh(); ch = peekCh(); } int start = getPos() - 1; switch (ch) { case EOS: return makeToken(TOK_END_OF_STREAM, getPos()); case '\n': nextCh(); return makeToken(TOK_END_OF_LINE, start); case '#': while ((ch = nextCh()) != EOS) { if (ch == '\n') { return makeToken(TOK_END_OF_LINE, start); } } return makeToken(TOK_END_OF_STREAM, start); case '(': nextCh(); return makeToken(TOK_LPAREN, start); case ')': nextCh(); return makeToken(TOK_RPAREN, start); case '<': case '>': case ';': case '&': case '|': return parseOperator(); default: return parseWord(); } } private BjorneToken makeToken(int tokenType, int start) { return new BjorneToken(tokenType, "", start, getPos()); } private BjorneToken makeToken(int tokenType, String value, int start) { return new BjorneToken(tokenType, value, start, getPos()); } private BjorneToken parseWord() { int quoteChar = 0; StringBuilder sb = new StringBuilder(); int ch = peekCh(); int start = getPos() - 1; LOOP: while (true) { switch (ch) { case EOS: case '\n': break LOOP; case '(': case ')': case '<': case '>': case ';': case '&': case '|': case ' ': case '\t': if (quoteChar == 0) { break LOOP; } break; case '"': case '\'': case '`': if (quoteChar == 0) { quoteChar = ch; } else if (quoteChar == ch) { quoteChar = 0; } break; case '\\': nextCh(); ch = peekCh(); if (ch == '\n') { // A '\\' followed by a newline is a line continuation: // the two characters are skipped. nextCh(); ch = peekCh(); continue; } else if (ch == EOS) { // Silently eat a '\\' at the end of stream position. nextCh(); break LOOP; } else { // The '\\' is included in the (raw) word. sb.append('\\'); } break; default: // include anything else in the word. break; } sb.append((char) ch); nextCh(); ch = peekCh(); } if (ch == '<' || ch == '>') { boolean allDigits = true; for (int i = 0; i < sb.length(); i++) { ch = sb.charAt(i); // FIXME ... I should deal with "\\\n" here I think. if (ch < '0' || ch > '9') { allDigits = false; break; } } if (allDigits) { return makeToken(TOK_IO_NUMBER, sb.toString(), start); } } return makeToken(TOK_WORD, sb.toString(), start); } private BjorneToken parseOperator() { int start = getPos() - 1; switch (nextCh()) { case '<': switch (peekCh()) { case '<': nextCh(); if (peekCh() == '-') { nextCh(); return makeToken(TOK_DLESSDASH, start); } return makeToken(TOK_DLESS, start); case '>': nextCh(); return makeToken(TOK_LESSGREAT, start); case '&': nextCh(); return makeToken(TOK_LESSAND, start); default: return makeToken(TOK_LESS, start); } case '>': switch (peekCh()) { case '|': nextCh(); return makeToken(TOK_CLOBBER, start); case '>': nextCh(); return makeToken(TOK_DGREAT, start); case '&': nextCh(); return makeToken(TOK_GREATAND, start); default: return makeToken(TOK_GREAT, start); } case ';': if (peekCh() == ';') { nextCh(); return makeToken(TOK_DSEMI, start); } return makeToken(TOK_SEMI, start); case '&': if (peekCh() == '&') { nextCh(); return makeToken(TOK_AND_IF, start); } return makeToken(TOK_AMP, start); case '|': if (peekCh() == '|') { nextCh(); return makeToken(TOK_OR_IF, start); } return makeToken(TOK_BAR, start); } throw new ShellFailureException("bad lexer state"); } private int nextCh() throws ShellFailureException { try { if (nextCh == INVALID) { if (lastCh != EOS) { lastCh = reader.read(); pos++; } } else { lastCh = nextCh; nextCh = INVALID; pos++; } return lastCh; } catch (IOException ex) { throw new ShellFailureException("Unexpected exception", ex); } } private int peekCh() { try { if (nextCh == INVALID) { nextCh = reader.read(); } return nextCh; } catch (IOException ex) { throw new ShellFailureException("Unexpected exception", ex); } } private int getPos() { return pos; } /** * Reinterpret a token according to the context-sensitive tokenization rule * for a given context. WORD tokens may be mapped to reserved words, NAME or * ASSIGNMENT. Other tokens are left alone. * * @param token * @param context * @return */ private BjorneToken reinterpret(BjorneToken token, int context) { if (token.getTokenType() != TOK_WORD) { return token; } switch (context) { case RULE_1_CONTEXT: { BjorneToken tmp = toReservedWordToken(token); if (tmp != null) { return tmp; } return token; } case RULE_5_CONTEXT: if (token.isName()) { return remakeToken(TOK_NAME, token); } return token; case RULE_6_CONTEXT: if (token.getText().equals("in")) { return remakeToken(TOK_IN, token); } return token; case RULE_7a_CONTEXT: if (token.getText().indexOf('=') == -1) { return reinterpret(token, RULE_1_CONTEXT); } // DROP THROUGH TO RULE 7b case RULE_7b_CONTEXT: int pos = token.getText().indexOf('='); if (pos <= 0 || !BjorneToken.isName(token.getText().substring(0, pos))) { return token; } return remakeToken(TOK_ASSIGNMENT, token); case RULE_8_CONTEXT: BjorneToken tmp = toReservedWordToken(token); if (tmp != null) { return tmp; } if (token.isName()) { return remakeToken(TOK_NAME, token); } return reinterpret(token, RULE_7b_CONTEXT); default: return token; } } private BjorneToken remakeToken(int tokenType, BjorneToken token) { return new BjorneToken(tokenType, token.getText(), token.start, token.end); } private BjorneToken toReservedWordToken(BjorneToken token) { String str = token.getText(); if (str.equals("for")) { return remakeToken(TOK_FOR, token); } else if (str.equals("while")) { return remakeToken(TOK_WHILE, token); } else if (str.equals("until")) { return remakeToken(TOK_UNTIL, token); } else if (str.equals("do")) { return remakeToken(TOK_DO, token); } else if (str.equals("done")) { return remakeToken(TOK_DONE, token); } else if (str.equals("if")) { return remakeToken(TOK_IF, token); } else if (str.equals("then")) { return remakeToken(TOK_THEN, token); } else if (str.equals("else")) { return remakeToken(TOK_ELSE, token); } else if (str.equals("elif")) { return remakeToken(TOK_ELIF, token); } else if (str.equals("fi")) { return remakeToken(TOK_FI, token); } else if (str.equals("case")) { return remakeToken(TOK_CASE, token); } else if (str.equals("esac")) { return remakeToken(TOK_ESAC, token); } else if (str.equals("{")) { return remakeToken(TOK_LBRACE, token); } else if (str.equals("}")) { return remakeToken(TOK_RBRACE, token); } else if (str.equals("!")) { return remakeToken(TOK_BANG, token); } return null; } }