/**
*
*/
package org.sinnlabs.dbvim.evaluator;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.sinnlabs.dbvim.evaluator.exceptions.ParseException;
/**
* A String tokenizer
* @author peter.liverovsky
*
*/
public class ConditionTokenizer {
protected static final int FIELD_STATE = 0;
protected static final int DELIMETER_STATE = 1;
protected static final int VALUE_STATE = 2;
protected static final int LITERAL_STATE = 3;
protected static final int NUMBER_STATE = 4;
protected static final int OPERATOR_STATE = 5;
protected static final int JOIN_FIELD_STATE = 6;
private char decimalSeparator = '.';
private List<String> delimeters;
public ConditionTokenizer(char decimalSeparator, List<String> delimeters) {
this.decimalSeparator = decimalSeparator;
this.delimeters = delimeters;
}
/** Converts a string into tokens.
* @param string The string to be split into tokens
* @return The tokens
* @throws ParseException
*/
public Iterator<String> tokenize(String string) throws ParseException {
List<String> tokens = new ArrayList<String>();
int state = DELIMETER_STATE;
String token = "";
for(int i=0; i<string.length(); i++) {
char c = string.charAt(i);
switch(state) {
case DELIMETER_STATE:
if (c == '(' || c == ')') {
tokens.add(String.valueOf(c));
continue;
} else if (c=='\'') { // field name start
state = FIELD_STATE;
token = "'";
continue;
} else if (c=='`') { // join field name start
state = JOIN_FIELD_STATE;
token = "`";
continue;
} else if (c == '"') {
state = VALUE_STATE;
token = "\"";
continue;
} else if (Character.isWhitespace(c)) {
continue;
} else if (Character.isJavaIdentifierStart(c)) {
token = String.valueOf(c);
state = LITERAL_STATE;
continue;
} else if (Character.isDigit(c)) {
token = String.valueOf(c);
state = NUMBER_STATE;
continue;
} else { // all other characters
token = String.valueOf(c);
state = OPERATOR_STATE;
}
break;
case FIELD_STATE:
if (c == '\'') {
token += "'";
tokens.add(token);
token = "";
state = DELIMETER_STATE;
} else if (c == '\\') {
if (i<string.length()-1) {
i++;
token += string.charAt(i);
continue;
} else {
throw new ParseException("Expected: \'.");
}
} else {
token += c;
}
break;
case JOIN_FIELD_STATE:
if (c == '`') {
token += "`";
tokens.add(token);
token = "";
state = DELIMETER_STATE;
} else if (c == '\\') {
if (i<string.length()-1) {
i++;
token += string.charAt(i);
continue;
} else {
throw new ParseException("Expected: \\`.");
}
} else {
token += c;
}
break;
case VALUE_STATE:
if (c == '"') {
token += "\"";
tokens.add(token);
token = "";
state = DELIMETER_STATE;
} else if (c == '\\') {
if (i<string.length()-1) {
i++;
token += string.charAt(i);
continue;
} else {
throw new ParseException("Expected: \".");
}
} else {
token += c;
}
break;
case LITERAL_STATE:
if (!Character.isJavaIdentifierPart(c)) {
tokens.add(token);
token = "";
state = DELIMETER_STATE;
if (!Character.isWhitespace(c)) // if character not a white space
i--;
continue;
} else {
token += c;
}
break;
case NUMBER_STATE:
if (!Character.isDigit(c) && c!=decimalSeparator) {
tokens.add(token);
token = "";
state = DELIMETER_STATE;
if (!Character.isWhitespace(c))
i--;
} else {
token += c;
}
break;
case OPERATOR_STATE:
boolean hasCandidate = false;
for(String d : delimeters) {
if (d.startsWith(token) && !d.equals(token)) {
hasCandidate = true;
break;
}
}
if (!hasCandidate || Character.isWhitespace(c)) {
tokens.add(token);
i--;
state = DELIMETER_STATE;
}
token += c;
}
}
if (state == LITERAL_STATE || state == NUMBER_STATE)
tokens.add(token);
if (state == FIELD_STATE)
throw new ParseException("Unexpected end of statement. Expected: \'");
if (state == VALUE_STATE)
throw new ParseException("Unexpected end of statement. Expected: \"");
return tokens.iterator();
}
}