/*
* $Header: /home/projects/jaxen/scm/jaxen/src/java/main/org/jaxen/saxpath/base/XPathLexer.java,v 1.17 2006/02/05 21:47:42 elharo Exp $
* $Revision: 1.17 $
* $Date: 2006/02/05 21:47:42 $
*
* ====================================================================
*
* Copyright 2000-2002 bob mcwhirter & James Strachan.
* All rights reserved.
*
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* * Neither the name of the Jaxen Project nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
* IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
* PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
* OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* ====================================================================
* This software consists of voluntary contributions made by many
* individuals on behalf of the Jaxen Project and was originally
* created by bob mcwhirter <bob@werken.com> and
* James Strachan <jstrachan@apache.org>. For more information on the
* Jaxen Project, please see <http://www.jaxen.org/>.
*
* $Id: XPathLexer.java,v 1.17 2006/02/05 21:47:42 elharo Exp $
*/
package org.orbeon.jaxen.saxpath.base;
class XPathLexer
{
private String xpath;
private int currentPosition;
private int endPosition;
private Token previousToken;
XPathLexer(String xpath)
{
setXPath( xpath );
}
private void setXPath(String xpath)
{
this.xpath = xpath;
this.currentPosition = 0;
this.endPosition = xpath.length();
}
String getXPath()
{
return this.xpath;
}
Token nextToken()
{
Token token = null;
do
{
token = null;
switch ( LA(1) )
{
case '$':
{
token = dollar();
break;
}
case '"':
case '\'':
{
token = literal();
break;
}
case '/':
{
token = slashes();
break;
}
case ',':
{
token = comma();
break;
}
case '(':
{
token = leftParen();
break;
}
case ')':
{
token = rightParen();
break;
}
case '[':
{
token = leftBracket();
break;
}
case ']':
{
token = rightBracket();
break;
}
case '+':
{
token = plus();
break;
}
case '-':
{
token = minus();
break;
}
case '<':
case '>':
{
token = relationalOperator();
break;
}
case '=':
{
token = equals();
break;
}
case '!':
{
if ( LA(2) == '=' )
{
token = notEquals();
}
break;
}
case '|':
{
token = pipe();
break;
}
case '@':
{
token = at();
break;
}
case ':':
{
if ( LA(2) == ':' )
{
token = doubleColon();
}
else
{
token = colon();
}
break;
}
case '*':
{
token = star();
break;
}
case '.':
{
switch ( LA(2) )
{
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
{
token = number();
break;
}
default:
{
token = dots();
break;
}
}
break;
}
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
{
token = number();
break;
}
case ' ':
case '\t':
case '\n':
case '\r':
{
token = whitespace();
break;
}
default:
{
if ( isIdentifierStartChar( LA(1) ) )
{
token = identifierOrOperatorName();
}
}
}
if ( token == null )
{
if (!hasMoreChars())
{
token = new Token( TokenTypes.EOF,
getXPath(),
currentPosition(),
endPosition() );
}
else
{
token = new Token( TokenTypes.ERROR,
getXPath(),
currentPosition(),
endPosition() );
}
}
}
while ( token.getTokenType() == TokenTypes.SKIP );
setPreviousToken( token );
return token;
}
private Token identifierOrOperatorName()
{
Token token = null;
if ( previousToken != null )
{
// For some reason, section 3.7, Lexical structure,
// doesn't seem to feel like it needs to mention the
// SLASH, DOUBLE_SLASH, and COLON tokens for the test
// if an NCName is an operator or not.
//
// According to section 3.7, "/foo" should be considered
// as a SLASH following by an OperatorName being 'foo'.
// Which is just simply, clearly, wrong, in my mind.
//
// -bob
switch ( previousToken.getTokenType() )
{
case TokenTypes.AT:
case TokenTypes.DOUBLE_COLON:
case TokenTypes.LEFT_PAREN:
case TokenTypes.LEFT_BRACKET:
case TokenTypes.AND:
case TokenTypes.OR:
case TokenTypes.MOD:
case TokenTypes.DIV:
case TokenTypes.COLON:
case TokenTypes.SLASH:
case TokenTypes.DOUBLE_SLASH:
case TokenTypes.PIPE:
case TokenTypes.DOLLAR:
case TokenTypes.PLUS:
case TokenTypes.MINUS:
case TokenTypes.STAR:
case TokenTypes.COMMA:
case TokenTypes.LESS_THAN_SIGN:
case TokenTypes.GREATER_THAN_SIGN:
case TokenTypes.LESS_THAN_OR_EQUALS_SIGN:
case TokenTypes.GREATER_THAN_OR_EQUALS_SIGN:
case TokenTypes.EQUALS:
case TokenTypes.NOT_EQUALS:
{
token = identifier();
break;
}
default:
{
token = operatorName();
break;
}
}
}
else
{
token = identifier();
}
return token;
}
private Token identifier()
{
Token token = null;
int start = currentPosition();
while ( hasMoreChars() )
{
if ( isIdentifierChar( LA(1) ) )
{
consume();
}
else
{
break;
}
}
token = new Token( TokenTypes.IDENTIFIER,
getXPath(),
start,
currentPosition() );
return token;
}
private Token operatorName()
{
Token token = null;
switch ( LA(1) )
{
case 'a':
{
token = and();
break;
}
case 'o':
{
token = or();
break;
}
case 'm':
{
token = mod();
break;
}
case 'd':
{
token = div();
break;
}
}
return token;
}
private Token mod()
{
Token token = null;
if ( ( LA(1) == 'm' )
&&
( LA(2) == 'o' )
&&
( LA(3) == 'd' )
)
{
token = new Token( TokenTypes.MOD,
getXPath(),
currentPosition(),
currentPosition()+3 );
consume();
consume();
consume();
}
return token;
}
private Token div()
{
Token token = null;
if ( ( LA(1) == 'd' )
&&
( LA(2) == 'i' )
&&
( LA(3) == 'v' )
)
{
token = new Token( TokenTypes.DIV,
getXPath(),
currentPosition(),
currentPosition()+3 );
consume();
consume();
consume();
}
return token;
}
private Token and()
{
Token token = null;
if ( ( LA(1) == 'a' )
&&
( LA(2) == 'n' )
&&
( LA(3) == 'd' )
)
{
token = new Token( TokenTypes.AND,
getXPath(),
currentPosition(),
currentPosition()+3 );
consume();
consume();
consume();
}
return token;
}
private Token or()
{
Token token = null;
if ( ( LA(1) == 'o' )
&&
( LA(2) == 'r' )
)
{
token = new Token( TokenTypes.OR,
getXPath(),
currentPosition(),
currentPosition()+2 );
consume();
consume();
}
return token;
}
private Token number()
{
int start = currentPosition();
boolean periodAllowed = true;
loop:
while( true )
{
switch ( LA(1) )
{
case '.':
if ( periodAllowed )
{
periodAllowed = false;
consume();
}
else
{
break loop;
}
break;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
consume();
break;
default:
break loop;
}
}
return new Token( TokenTypes.DOUBLE,
getXPath(),
start,
currentPosition() );
}
private Token whitespace()
{
consume();
loop:
while( hasMoreChars() )
{
switch ( LA(1) )
{
case ' ':
case '\t':
case '\n':
case '\r':
{
consume();
break;
}
default:
{
break loop;
}
}
}
return new Token( TokenTypes.SKIP,
getXPath(),
0,
0 );
}
private Token comma()
{
Token token = new Token( TokenTypes.COMMA,
getXPath(),
currentPosition(),
currentPosition()+1 );
consume();
return token;
}
private Token equals()
{
Token token = new Token( TokenTypes.EQUALS,
getXPath(),
currentPosition(),
currentPosition()+1 );
consume();
return token;
}
private Token minus()
{
Token token = new Token( TokenTypes.MINUS,
getXPath(),
currentPosition(),
currentPosition()+1 );
consume();
return token;
}
private Token plus()
{
Token token = new Token( TokenTypes.PLUS,
getXPath(),
currentPosition(),
currentPosition()+1 );
consume();
return token;
}
private Token dollar()
{
Token token = new Token( TokenTypes.DOLLAR,
getXPath(),
currentPosition(),
currentPosition()+1 );
consume();
return token;
}
private Token pipe()
{
Token token = new Token( TokenTypes.PIPE,
getXPath(),
currentPosition(),
currentPosition()+1 );
consume();
return token;
}
private Token at()
{
Token token = new Token( TokenTypes.AT,
getXPath(),
currentPosition(),
currentPosition()+1 );
consume();
return token;
}
private Token colon()
{
Token token = new Token( TokenTypes.COLON,
getXPath(),
currentPosition(),
currentPosition()+1 );
consume();
return token;
}
private Token doubleColon()
{
Token token = new Token( TokenTypes.DOUBLE_COLON,
getXPath(),
currentPosition(),
currentPosition()+2 );
consume();
consume();
return token;
}
private Token notEquals()
{
Token token = new Token( TokenTypes.NOT_EQUALS,
getXPath(),
currentPosition(),
currentPosition() + 2 );
consume();
consume();
return token;
}
private Token relationalOperator()
{
Token token = null;
switch ( LA(1) )
{
case '<':
{
if ( LA(2) == '=' )
{
token = new Token( TokenTypes.LESS_THAN_OR_EQUALS_SIGN,
getXPath(),
currentPosition(),
currentPosition() + 2 );
consume();
}
else
{
token = new Token( TokenTypes.LESS_THAN_SIGN,
getXPath(),
currentPosition(),
currentPosition() + 1);
}
consume();
break;
}
case '>':
{
if ( LA(2) == '=' )
{
token = new Token( TokenTypes.GREATER_THAN_OR_EQUALS_SIGN,
getXPath(),
currentPosition(),
currentPosition() + 2 );
consume();
}
else
{
token = new Token( TokenTypes.GREATER_THAN_SIGN,
getXPath(),
currentPosition(),
currentPosition() + 1 );
}
consume();
break;
}
}
return token;
}
private Token star()
{
Token token = new Token( TokenTypes.STAR,
getXPath(),
currentPosition(),
currentPosition()+1 );
consume();
return token;
}
private Token literal()
{
Token token = null;
char match = LA(1);
consume();
int start = currentPosition();
while ( ( token == null )
&&
hasMoreChars() )
{
if ( LA(1) == match )
{
token = new Token( TokenTypes.LITERAL,
getXPath(),
start,
currentPosition() );
}
consume();
}
return token;
}
private Token dots()
{
Token token = null;
switch ( LA(2) )
{
case '.':
{
token = new Token( TokenTypes.DOT_DOT,
getXPath(),
currentPosition(),
currentPosition()+2 ) ;
consume();
consume();
break;
}
default:
{
token = new Token( TokenTypes.DOT,
getXPath(),
currentPosition(),
currentPosition()+1 );
consume();
break;
}
}
return token;
}
private Token leftBracket()
{
Token token = new Token( TokenTypes.LEFT_BRACKET,
getXPath(),
currentPosition(),
currentPosition()+1 );
consume();
return token;
}
private Token rightBracket()
{
Token token = new Token( TokenTypes.RIGHT_BRACKET,
getXPath(),
currentPosition(),
currentPosition()+1 );
consume();
return token;
}
private Token leftParen()
{
Token token = new Token( TokenTypes.LEFT_PAREN,
getXPath(),
currentPosition(),
currentPosition()+1 );
consume();
return token;
}
private Token rightParen()
{
Token token = new Token( TokenTypes.RIGHT_PAREN,
getXPath(),
currentPosition(),
currentPosition()+1 );
consume();
return token;
}
private Token slashes()
{
Token token = null;
switch ( LA(2) )
{
case '/':
{
token = new Token( TokenTypes.DOUBLE_SLASH,
getXPath(),
currentPosition(),
currentPosition()+2 );
consume();
consume();
break;
}
default:
{
token = new Token( TokenTypes.SLASH,
getXPath(),
currentPosition(),
currentPosition()+1 );
consume();
}
}
return token;
}
private char LA(int i)
{
if ( currentPosition + ( i - 1 ) >= endPosition() )
{
return (char) -1;
}
return getXPath().charAt( currentPosition() + (i - 1) );
}
private void consume()
{
++this.currentPosition;
}
private int currentPosition()
{
return this.currentPosition;
}
private int endPosition()
{
return this.endPosition;
}
private void setPreviousToken(Token previousToken)
{
this.previousToken = previousToken;
}
private boolean hasMoreChars()
{
return currentPosition() < endPosition();
}
private boolean isIdentifierChar(char c)
{
return Verifier.isXMLNCNameCharacter( c );
}
private boolean isIdentifierStartChar(char c)
{
return Verifier.isXMLNCNameStartCharacter( c );
}
}