/******************************************************************************* * openDLX - A DLX/MIPS processor simulator. * Copyright (C) 2013 The openDLX project, University of Augsburg, Germany * Project URL: <https://sourceforge.net/projects/opendlx> * Development branch: <https://github.com/smetzlaff/openDLX> * * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program, see <LICENSE>. If not, see * <http://www.gnu.org/licenses/>. ******************************************************************************/ package openDLX.asm.tokenizer; import java.io.BufferedReader; import java.io.IOException; import java.util.Vector; import openDLX.asm.fsm.CharacterList; import openDLX.asm.fsm.FunctionTransition; import openDLX.asm.fsm.InverseCharacterList; import openDLX.asm.fsm.Procedure; import openDLX.asm.fsm.State; import openDLX.asm.fsm.Transition; import openDLX.asm.instruction.Instructions; import openDLX.asm.instruction.Registers; /* * TODO: * character literal * Error detection e.g. * - 012abc * - abc.+ * very ugly code but I learned much from it */ /** * This class reads Tokens from a BufferedReader * */ public class Tokenizer { private BufferedCharReader reader_; private Token token_; private int char_; // ================* states *================ private State startState; private State identifierState; private State labelState; private State directiveState; private State operatorState; private State separatorState; private State hexOctalConstantState; private State octalConstantState; private State hexConstantState; private State decimalConstantState; private State stringLiteralState; private State stringBackslashState; private State stringEndState; private State characterLiteralState; /** * create new Tokenizer and initialize state machine * * @param reader * @throws IOException */ public Tokenizer() { startState = new State(false, "start"); identifierState = new State(true, "identifier"); labelState = new State(true, "label"); directiveState = new State(false, "directive"); operatorState = new State(true, "operator"); separatorState = new State(true, "separator"); hexOctalConstantState = new State(true, "hex or octal constant"); octalConstantState = new State(true, "octal constant"); hexConstantState = new State(true, "hex constant"); decimalConstantState = new State(true, "decimal constant"); stringLiteralState = new State(false, "string literal"); stringBackslashState = new State(false, "string backslash"); stringEndState = new State(true, "string literal end"); characterLiteralState = new State(false, "character literal"); // ================* startState *================ //leading whitespace startState.addTransition(new Transition(startState, new CharacterList( Properties.T_WHITESPACE))); //identifier startState.addTransition(new FunctionTransition(identifierState, new CharacterList( Properties.T_IDENTIFIER_START), new Procedure() { public void procedure(Object o) { startToken(); setTokenType(TokenType.Identifier); appendChar(); } })); //directive startState.addTransition(new FunctionTransition(directiveState, new Character('.'), new Procedure() { public void procedure(Object o) { startToken(); setTokenType(TokenType.Directive); appendChar(); } })); //operator startState.addTransition(new FunctionTransition(operatorState, new CharacterList( Properties.T_OPERATOR), new Procedure() { public void procedure(Object o) { startToken(); setTokenType(TokenType.Operator); appendChar(); } })); //separator startState.addTransition(new FunctionTransition(separatorState, new CharacterList( Properties.T_SEPARATOR), new Procedure() { public void procedure(Object o) { startToken(); setTokenType(TokenType.Separator); appendChar(); } })); //decimal constant startState.addTransition(new FunctionTransition(decimalConstantState, new CharacterList( Properties.T_DECIMAL_DIGIT_BEGIN), new Procedure() { public void procedure(Object o) { startToken(); setTokenType(TokenType.IntegerConstant); appendChar(); } })); //octal constant startState.addTransition(new FunctionTransition(hexOctalConstantState, new Character('0'), new Procedure() { public void procedure(Object o) { startToken(); setTokenType(TokenType.IntegerConstant); appendChar(); } })); //string literal startState.addTransition(new FunctionTransition(stringLiteralState, new Character('"'), new Procedure() { public void procedure(Object o) { startToken(); setTokenType(TokenType.StringLiteral); } })); //character literal startState.addTransition(new FunctionTransition(characterLiteralState, new Character('\''), new Procedure() { public void procedure(Object o) { startToken(); setTokenType(TokenType.CharacterLiteral); } })); // ================* identifierState *================ identifierState.addTransition(new FunctionTransition(identifierState, new CharacterList( Properties.T_IDENTIFIER_PART), new Procedure() { public void procedure(Object o) { appendChar(); } })); //label identifierState.addTransition(new FunctionTransition(labelState, new Character(':'), new Procedure() { public void procedure(Object o) { setTokenType(TokenType.Label); appendChar(); } })); // ================* directiveState *================ directiveState.addTransition(new FunctionTransition(identifierState, new CharacterList( Properties.T_IDENTIFIER_START), new Procedure() { public void procedure(Object o) { appendChar(); } })); // ================* hexOctalConstantState *================ hexOctalConstantState.addTransition(new FunctionTransition(octalConstantState, new CharacterList(Properties.T_OCTAL_DIGIT), new Procedure() { public void procedure(Object o) { appendChar(); } })); char[] x = { 'X', 'x' }; hexOctalConstantState.addTransition(new FunctionTransition(hexConstantState, new CharacterList(x), new Procedure() { public void procedure(Object o) { appendChar(); } })); // ================* decimalConstantState *================ decimalConstantState.addTransition(new FunctionTransition(decimalConstantState, new CharacterList(Properties.T_DECIMAL_DIGIT), new Procedure() { public void procedure(Object o) { appendChar(); } })); // ================* octalConstantState *================ octalConstantState.addTransition(new FunctionTransition(octalConstantState, new CharacterList(Properties.T_OCTAL_DIGIT), new Procedure() { public void procedure(Object o) { appendChar(); } })); // ================* hexConstantState *================ hexConstantState.addTransition(new FunctionTransition(hexConstantState, new CharacterList( Properties.T_HEX_DIGIT), new Procedure() { public void procedure(Object o) { appendChar(); } })); // ================* stringLiteralState *================ stringLiteralState.addTransition(new Transition(stringEndState, new Character('"'))); stringLiteralState.addTransition(new Transition(stringBackslashState, new Character('\\'))); char[] negChars = { '"', '\\' }; stringLiteralState.addTransition(new FunctionTransition(stringLiteralState, new InverseCharacterList(negChars), new Procedure() { public void procedure(Object o) { appendChar(); } })); stringBackslashState.addTransition(new FunctionTransition(stringLiteralState, new Character('"'), new Procedure() { public void procedure(Object o) { appendChar('"'); } })); stringBackslashState.addTransition(new FunctionTransition(stringLiteralState, new InverseCharacterList('"'), new Procedure() { public void procedure(Object o) { appendChar('\\'); appendChar(); } })); // ================* characterLiteralState *================ //TODO: evaluating character literal } public void setReader(BufferedReader reader) throws IOException { if (reader == null) reader_ = null; else reader_ = new BufferedCharReader(reader); } /** * * @return array of tokens of one line * @throws IOException * @throws TokenizerException */ public Token[] readLine() throws IOException, TokenizerException { if (reader_ == null || !reader_.readLine()) return null; char_ = reader_.next(); Vector<Token> tokens = new Vector<Token>(); Token t = nextToken(); while (t != null) { tokens.add(t); t = nextToken(); } return tokens.toArray(new Token[0]); } /** * * @return next token in line * @throws TokenizerException */ private Token nextToken() throws TokenizerException { if (char_ == -1) return null; State currentState = startState; State lastState = null; token_ = null; lastState = currentState; currentState = currentState.doTransition(new Character((char) char_)); if(currentState != null) char_ = reader_.next(); while (currentState != null && char_ != -1) { lastState = currentState; currentState = currentState.doTransition(new Character((char) char_)); if (currentState == null) break; char_ = reader_.next(); } //TODO: problems with exceptions? if (token_ != null && currentState != null && !currentState.isAccepting()) { throw new TokenizerException("unexpected end of token'", reader_.position()); } if (char_ != -1 && !lastState.isAccepting()) throw new TokenizerException("not expected character: '" + (char) char_ + "'", reader_.position()); if (token_ != null && token_.getTokenType() == TokenType.Identifier) setIdentifierType(token_); if (token_ != null && token_.getTokenType() == TokenType.Label && token_.getString().charAt(0) == '.') { throw new TokenizerException("Label cannot start with a period", new Position( reader_.position().line, 0)); } return token_; } private void setIdentifierType(Token t) { if (Registers.instance().getInteger(t.getString()) != null) t.setTokenType(TokenType.Register); else if (Instructions.instance().getInstruction(t.getString()) != null) { t.setTokenType(TokenType.Mnemonic); } } /* * ===============================* Wrapper *=============================== */ private void startToken() { token_ = new Token(reader_.position()); } private void appendChar() { token_.append((char) char_); } private void appendChar(char c) { token_.append(c); } private void setTokenType(TokenType t) { token_.setTokenType(t); } }