/* * Copyright (C) 2009 JavaRosa * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package org.openrosa.client.jr.xpath.parser; import java.util.Vector; import org.openrosa.client.jr.xpath.expr.XPathQName; public class Lexer { public static final int LEX_CONTEXT_VAL = 1; public static final int LEX_CONTEXT_OP = 2; public static Vector lex (String expr) throws XPathSyntaxException { Vector tokens = new Vector(); int i = 0; int context = LEX_CONTEXT_VAL; while (i < expr.length()) { int c = expr.charAt(i); int d = getChar(expr, i + 1); Token token = null; int skip = 1; if (" \n\t\f\r".indexOf(c) >= 0) { /* whitespace; do nothing */ } else if (c == '=') { token = new Token(Token.EQ); } else if (c == '!' && d == '=') { token = new Token(Token.NEQ); skip = 2; } else if (c == '<') { if (d == '=') { token = new Token(Token.LTE); skip = 2; } else { token = new Token(Token.LT); } } else if (c == '>') { if (d == '=') { token = new Token(Token.GTE); skip = 2; } else { token = new Token(Token.GT); } } else if (c == '+') { token = new Token(Token.PLUS); } else if (c == '-') { token = new Token(context == LEX_CONTEXT_VAL ? Token.UMINUS : Token.MINUS); //not sure this is entirely correct } else if (c == '*') { token = new Token(context == LEX_CONTEXT_VAL ? Token.WILDCARD : Token.MULT); } else if (c == '|') { token = new Token(Token.UNION); } else if (c == '/') { if (d == '/') { token = new Token(Token.DBL_SLASH); skip = 2; } else { token = new Token(Token.SLASH); } } else if (c == '[') { token = new Token(Token.LBRACK); } else if (c == ']') { token = new Token(Token.RBRACK); } else if (c == '(') { token = new Token(Token.LPAREN); } else if (c == ')') { token = new Token(Token.RPAREN); } else if (c == '.') { if (d == '.') { token = new Token(Token.DBL_DOT); skip = 2; } else if (isDigit(d)) { skip = matchNumeric(expr, i); token = new Token(Token.NUM, Double.valueOf(expr.substring(i, i + skip))); } else { token = new Token(Token.DOT); } } else if (c == '@') { token = new Token(Token.AT); } else if (c == ',') { token = new Token(Token.COMMA); } else if (c == ':' && d == ':') { token = new Token(Token.DBL_COLON); skip = 2; } else if (context == LEX_CONTEXT_OP && i + 3 <= expr.length() && "and".equals(expr.substring(i, i + 3))) { token = new Token(Token.AND); skip = 3; } else if (context == LEX_CONTEXT_OP && i + 2 <= expr.length() && "or".equals(expr.substring(i, i + 2))) { token = new Token(Token.OR); skip = 2; } else if (context == LEX_CONTEXT_OP && i + 3 <= expr.length() && "div".equals(expr.substring(i, i + 3))) { token = new Token(Token.DIV); skip = 3; } else if (context == LEX_CONTEXT_OP && i + 3 <= expr.length() && "mod".equals(expr.substring(i, i + 3))) { token = new Token(Token.MOD); skip = 3; } else if (c == '$') { int len = matchQName(expr, i + 1); if (len == 0) { throw new XPathSyntaxException(); } else { token = new Token(Token.VAR, new XPathQName(expr.substring(i + 1, i + len + 1))); skip = len + 1; } } else if (c == '\'' || c == '\"') { int end = expr.indexOf(c, i + 1); if (end == -1) { throw new XPathSyntaxException(); } else { token = new Token(Token.STR, expr.substring(i + 1, end)); skip = (end - i) + 1; } } else if (isDigit(c)) { skip = matchNumeric(expr, i); token = new Token(Token.NUM, Double.valueOf(expr.substring(i, i + skip))); } else if (context == LEX_CONTEXT_VAL && (isAlpha(c) || c == '_')) { int len = matchQName(expr, i); String name = expr.substring(i, i + len); if (name.indexOf(':') == -1 && getChar(expr, i + len) == ':' && getChar(expr, i + len + 1) == '*') { token = new Token(Token.NSWILDCARD, name); skip = len + 2; } else { token = new Token(Token.QNAME, new XPathQName(name)); skip = len; } } else { throw new XPathSyntaxException(); } if (token != null) { if (token.type == Token.WILDCARD || token.type == Token.NSWILDCARD || token.type == Token.QNAME || token.type == Token.VAR || token.type == Token.NUM || token.type == Token.STR || token.type == Token.RBRACK || token.type == Token.RPAREN || token.type == Token.DOT || token.type == Token.DBL_DOT) { context = LEX_CONTEXT_OP; } else { context = LEX_CONTEXT_VAL; } tokens.addElement(token); } i += skip; } return tokens; } private static int matchNumeric (String expr, int i) { boolean seenDecimalPoint = false; int start = i; int c; for (; i < expr.length(); i++) { c = expr.charAt(i); if (!(isDigit(c) || (!seenDecimalPoint && c == '.'))) break; if (c == '.') seenDecimalPoint = true; } return i - start; } private static int matchQName (String expr, int i) { int len = matchNCName(expr, i); if (len > 0 && getChar(expr, i + len) == ':') { int len2 = matchNCName(expr, i + len + 1); if (len2 > 0) len += len2 + 1; } return len; } private static int matchNCName (String expr, int i) { int start = i; int c; for (; i < expr.length(); i++) { c = expr.charAt(i); if (!(isAlpha(c) || c == '_' || (i > start && (isDigit(c) || c == '.' || c == '-')))) break; } return i - start; } //get char from string, return -1 for EOF private static int getChar (String expr, int i) { return (i < expr.length() ? expr.charAt(i) : -1); } private static boolean isDigit (int c) { return (c < 0 ? false : Character.isDigit((char)c)); } private static boolean isAlpha (int c) { return (c < 0 ? false : Character.isLowerCase((char)c) || Character.isUpperCase((char)c)); } }