/*
* Copyright (C) 2009 JavaRosa
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package org.openrosa.client.jr.xpath.parser;
import java.util.Vector;
import org.openrosa.client.jr.xpath.expr.XPathQName;
public class Lexer {
public static final int LEX_CONTEXT_VAL = 1;
public static final int LEX_CONTEXT_OP = 2;
public static Vector lex (String expr) throws XPathSyntaxException {
Vector tokens = new Vector();
int i = 0;
int context = LEX_CONTEXT_VAL;
while (i < expr.length()) {
int c = expr.charAt(i);
int d = getChar(expr, i + 1);
Token token = null;
int skip = 1;
if (" \n\t\f\r".indexOf(c) >= 0) {
/* whitespace; do nothing */
} else if (c == '=') {
token = new Token(Token.EQ);
} else if (c == '!' && d == '=') {
token = new Token(Token.NEQ);
skip = 2;
} else if (c == '<') {
if (d == '=') {
token = new Token(Token.LTE);
skip = 2;
} else {
token = new Token(Token.LT);
}
} else if (c == '>') {
if (d == '=') {
token = new Token(Token.GTE);
skip = 2;
} else {
token = new Token(Token.GT);
}
} else if (c == '+') {
token = new Token(Token.PLUS);
} else if (c == '-') {
token = new Token(context == LEX_CONTEXT_VAL ? Token.UMINUS : Token.MINUS); //not sure this is entirely correct
} else if (c == '*') {
token = new Token(context == LEX_CONTEXT_VAL ? Token.WILDCARD : Token.MULT);
} else if (c == '|') {
token = new Token(Token.UNION);
} else if (c == '/') {
if (d == '/') {
token = new Token(Token.DBL_SLASH);
skip = 2;
} else {
token = new Token(Token.SLASH);
}
} else if (c == '[') {
token = new Token(Token.LBRACK);
} else if (c == ']') {
token = new Token(Token.RBRACK);
} else if (c == '(') {
token = new Token(Token.LPAREN);
} else if (c == ')') {
token = new Token(Token.RPAREN);
} else if (c == '.') {
if (d == '.') {
token = new Token(Token.DBL_DOT);
skip = 2;
} else if (isDigit(d)) {
skip = matchNumeric(expr, i);
token = new Token(Token.NUM, Double.valueOf(expr.substring(i, i + skip)));
} else {
token = new Token(Token.DOT);
}
} else if (c == '@') {
token = new Token(Token.AT);
} else if (c == ',') {
token = new Token(Token.COMMA);
} else if (c == ':' && d == ':') {
token = new Token(Token.DBL_COLON);
skip = 2;
} else if (context == LEX_CONTEXT_OP && i + 3 <= expr.length() && "and".equals(expr.substring(i, i + 3))) {
token = new Token(Token.AND);
skip = 3;
} else if (context == LEX_CONTEXT_OP && i + 2 <= expr.length() && "or".equals(expr.substring(i, i + 2))) {
token = new Token(Token.OR);
skip = 2;
} else if (context == LEX_CONTEXT_OP && i + 3 <= expr.length() && "div".equals(expr.substring(i, i + 3))) {
token = new Token(Token.DIV);
skip = 3;
} else if (context == LEX_CONTEXT_OP && i + 3 <= expr.length() && "mod".equals(expr.substring(i, i + 3))) {
token = new Token(Token.MOD);
skip = 3;
} else if (c == '$') {
int len = matchQName(expr, i + 1);
if (len == 0) {
throw new XPathSyntaxException();
} else {
token = new Token(Token.VAR, new XPathQName(expr.substring(i + 1, i + len + 1)));
skip = len + 1;
}
} else if (c == '\'' || c == '\"') {
int end = expr.indexOf(c, i + 1);
if (end == -1) {
throw new XPathSyntaxException();
} else {
token = new Token(Token.STR, expr.substring(i + 1, end));
skip = (end - i) + 1;
}
} else if (isDigit(c)) {
skip = matchNumeric(expr, i);
token = new Token(Token.NUM, Double.valueOf(expr.substring(i, i + skip)));
} else if (context == LEX_CONTEXT_VAL && (isAlpha(c) || c == '_')) {
int len = matchQName(expr, i);
String name = expr.substring(i, i + len);
if (name.indexOf(':') == -1 && getChar(expr, i + len) == ':' && getChar(expr, i + len + 1) == '*') {
token = new Token(Token.NSWILDCARD, name);
skip = len + 2;
} else {
token = new Token(Token.QNAME, new XPathQName(name));
skip = len;
}
} else {
throw new XPathSyntaxException();
}
if (token != null) {
if (token.type == Token.WILDCARD ||
token.type == Token.NSWILDCARD ||
token.type == Token.QNAME ||
token.type == Token.VAR ||
token.type == Token.NUM ||
token.type == Token.STR ||
token.type == Token.RBRACK ||
token.type == Token.RPAREN ||
token.type == Token.DOT ||
token.type == Token.DBL_DOT) {
context = LEX_CONTEXT_OP;
} else {
context = LEX_CONTEXT_VAL;
}
tokens.addElement(token);
}
i += skip;
}
return tokens;
}
private static int matchNumeric (String expr, int i) {
boolean seenDecimalPoint = false;
int start = i;
int c;
for (; i < expr.length(); i++) {
c = expr.charAt(i);
if (!(isDigit(c) || (!seenDecimalPoint && c == '.')))
break;
if (c == '.')
seenDecimalPoint = true;
}
return i - start;
}
private static int matchQName (String expr, int i) {
int len = matchNCName(expr, i);
if (len > 0 && getChar(expr, i + len) == ':') {
int len2 = matchNCName(expr, i + len + 1);
if (len2 > 0)
len += len2 + 1;
}
return len;
}
private static int matchNCName (String expr, int i) {
int start = i;
int c;
for (; i < expr.length(); i++) {
c = expr.charAt(i);
if (!(isAlpha(c) || c == '_' || (i > start && (isDigit(c) || c == '.' || c == '-'))))
break;
}
return i - start;
}
//get char from string, return -1 for EOF
private static int getChar (String expr, int i) {
return (i < expr.length() ? expr.charAt(i) : -1);
}
private static boolean isDigit (int c) {
return (c < 0 ? false : Character.isDigit((char)c));
}
private static boolean isAlpha (int c) {
return (c < 0 ? false : Character.isLowerCase((char)c) || Character.isUpperCase((char)c));
}
}