import java.util.List; import java.util.ArrayList; import java.io.BufferedReader; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStreamReader; import java.io.Reader; import java.nio.charset.Charset; public class Tokenizer { public boolean IsOp(char chr) { boolean addOp = chr == '+' || chr == '-'; boolean mulOp = chr == '*' || chr == '/'; boolean compOp = chr == '<' || chr == '>' || chr == '='; boolean lgicOp = chr == '!' || chr == '|' || chr == '&'; return addOp || mulOp || compOp || lgicOp; } public TokenType FindOpType(char firstOperator, char nextChar) { TokenType type = TokenType.UNKNOWN; switch(firstOperator) { case '+': type = TokenType.ADD; break; case '-': type = TokenType.SUBTRACT; break; case '*': type = TokenType.MULTIPLY; break; case '/': type = TokenType.DIVIDE; break; case '<': type = TokenType.LESS; if (nextChar == '=') type = TokenType.LESSEQUAL; break; case '>': type = TokenType.GREATER; if (nextChar == '=') type = TokenType.GREATEREQUAL; break; case '=': type = TokenType.ASSIGNMENT; if (nextChar == '=') type = TokenType.EQUAL; break; case '!': type = TokenType.NOT; if (nextChar == '=') type = TokenType.NOTEQUAL; break; case '|': type = TokenType.OR; break; case '&': type = TokenType.AND; break; } return type; } public boolean IsParen(char chr) { boolean prntOp = chr == '(' || chr == ')'; boolean brktOp = chr == '[' || chr == ']'; return prntOp || brktOp; } public TokenType FindParenType(char chr) { TokenType type = TokenType.UNKNOWN; switch(chr) { case '(': type = TokenType.LEFT_PAREN; break; case ')': type = TokenType.RIGHT_PAREN; break; case '[': type = TokenType.LEFT_BRACKET; break; case ']': type = TokenType.RIGHT_BRACKET; break; } return type; } public boolean IsPunc(char chr) { boolean puncOp = chr == ','; return puncOp; } public TokenType FindPuncType(char firstOperator) { TokenType type = TokenType.UNKNOWN; switch(firstOperator) { case ',': type = TokenType.COMMA; break; } return type; } public TokenType FindStatementType(String str) { TokenType type = TokenType.UNKNOWN; switch(str) { case "script": type = TokenType.SCRIPT; break; case "end": type = TokenType.END; break; case "while": type = TokenType.WHILE; break; case "if": type = TokenType.IF; break; case "else": type = TokenType.ELSE; break; case "def": type = TokenType.DEF; break; default: type = TokenType.KEYWORD; } return type; } public List<Token> Tokenize(String source) { List<Token> tokens = new ArrayList<Token>(); Token token = null; String tokenText = ""; char firstOperator = '\0'; TokenizeState state = TokenizeState.DEFAULT; for (int index = 0; index < source.length(); index++) { char chr = source.charAt(index); switch (state) { case DEFAULT: if (IsOp(chr)) { firstOperator = chr; TokenType opType = FindOpType(firstOperator, '\0'); token = new Token(Character.toString(chr), opType); state = TokenizeState.OPERATOR; } else if (Character.isDigit(chr)) { tokenText += chr; state = TokenizeState.NUMBER; } else if (IsParen(chr)) { TokenType parenType = FindParenType(chr); tokens.add(new Token(Character.toString(chr), parenType)); } else if (IsPunc(chr)) { TokenType puncType = FindPuncType(chr); tokens.add(new Token(Character.toString(chr), puncType)); } else if (chr == '"') { state = TokenizeState.STRING; } else if (chr == '#') { state = TokenizeState.COMMENT; } else if (Character.isLetter(chr)) { tokenText += chr; state = TokenizeState.KEYWORD; } break; case OPERATOR: if (IsOp(chr)) { TokenType opType = FindOpType(firstOperator, chr); token = new Token(Character.toString(firstOperator) + Character.toString(chr), opType); } else { tokens.add(token); state = TokenizeState.DEFAULT; index--; } break; case NUMBER: if (Character.isDigit(chr)) { tokenText += chr; } else { tokens.add(new Token(tokenText, TokenType.NUMBER)); tokenText = ""; state = TokenizeState.DEFAULT; index--; } break; case STRING: if (chr == '"') { tokens.add(new Token(tokenText, TokenType.STRING)); tokenText = ""; state = TokenizeState.DEFAULT; } else { tokenText += chr; } break; case COMMENT: if (chr == '\n') state = TokenizeState.DEFAULT; break; case KEYWORD: if (Character.isLetterOrDigit(chr)) { tokenText += chr; } else { TokenType type = FindStatementType(tokenText); tokens.add(new Token(tokenText, type)); tokenText = ""; state = TokenizeState.DEFAULT; index--; } break; } } return tokens; } }