XPathLexer.java example

Explorer
orbeon-forms-master
/*
 * $Header: /home/projects/jaxen/scm/jaxen/src/java/main/org/jaxen/saxpath/base/XPathLexer.java,v 1.17 2006/02/05 21:47:42 elharo Exp $
 * $Revision: 1.17 $
 * $Date: 2006/02/05 21:47:42 $
 *
 * ====================================================================
 *
 * Copyright 2000-2002 bob mcwhirter & James Strachan.
 * All rights reserved.
 *
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are
 * met:
 *
 *   * Redistributions of source code must retain the above copyright
 *     notice, this list of conditions and the following disclaimer.
 *
 *   * Redistributions in binary form must reproduce the above copyright
 *     notice, this list of conditions and the following disclaimer in the
 *     documentation and/or other materials provided with the distribution.
 *
 *   * Neither the name of the Jaxen Project nor the names of its
 *     contributors may be used to endorse or promote products derived
 *     from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
 * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
 * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * ====================================================================
 * This software consists of voluntary contributions made by many
 * individuals on behalf of the Jaxen Project and was originally
 * created by bob mcwhirter <bob@werken.com> and
 * James Strachan <jstrachan@apache.org>.  For more information on the
 * Jaxen Project, please see <http://www.jaxen.org/>.
 *
 * $Id: XPathLexer.java,v 1.17 2006/02/05 21:47:42 elharo Exp $
 */




package org.orbeon.jaxen.saxpath.base;

class XPathLexer
{
    private String xpath;
    private int    currentPosition;
    private int    endPosition;

    private Token  previousToken;

    XPathLexer(String xpath)
    {
        setXPath( xpath );
    }

    private void setXPath(String xpath)
    {
        this.xpath           = xpath;
        this.currentPosition = 0;
        this.endPosition     = xpath.length();
    }

    String getXPath()
    {
        return this.xpath;
    }

    Token nextToken()
    {
        Token token = null;

        do
        {
            token = null;

            switch ( LA(1) )
            {
                case '$':
                {
                    token = dollar();
                    break;
                }

                case '"':
                case '\'':
                {
                    token = literal();
                    break;
                }

                case '/':
                {
                    token = slashes();
                    break;
                }

                case ',':
                {
                    token = comma();
                    break;
                }

                case '(':
                {
                    token = leftParen();
                    break;
                }

                case ')':
                {
                    token = rightParen();
                    break;
                }

                case '[':
                {
                    token = leftBracket();
                    break;
                }

                case ']':
                {
                    token = rightBracket();
                    break;
                }

                case '+':
                {
                    token = plus();
                    break;
                }

                case '-':
                {
                    token = minus();
                    break;
                }

                case '<':
                case '>':
                {
                    token = relationalOperator();
                    break;
                }

                case '=':
                {
                    token = equals();
                    break;
                }

                case '!':
                {
                    if ( LA(2) == '=' )
                    {
                        token = notEquals();
                    }
                    break;
                }

                case '|':
                {
                    token = pipe();
                    break;
                }

                case '@':
                {
                    token = at();
                    break;
                }

                case ':':
                {
                    if ( LA(2) == ':' )
                    {
                        token = doubleColon();
                    }
                    else
                    {
                        token = colon();
                    }
                    break;
                }

                case '*':
                {
                    token = star();
                    break;
                }

                case '.':
                {
                    switch ( LA(2) )
                    {
                        case '0':
                        case '1':
                        case '2':
                        case '3':
                        case '4':
                        case '5':
                        case '6':
                        case '7':
                        case '8':
                        case '9':
                        {
                            token = number();
                            break;
                        }
                        default:
                        {
                            token = dots();
                            break;
                        }
                    }
                    break;
                }

                case '0':
                case '1':
                case '2':
                case '3':
                case '4':
                case '5':
                case '6':
                case '7':
                case '8':
                case '9':
                {
                    token = number();
                    break;
                }

                case ' ':
                case '\t':
                case '\n':
                case '\r':
                {
                    token = whitespace();
                    break;
                }

                default:
                {
                    if ( isIdentifierStartChar( LA(1) ) )
                    {
                        token = identifierOrOperatorName();
                    }
                }
            }

            if ( token == null )
            {
                if (!hasMoreChars())
                {
                    token = new Token( TokenTypes.EOF,
                                   getXPath(),
                                   currentPosition(),
                                   endPosition() );
            }
                else
                {
                    token = new Token( TokenTypes.ERROR,
                                   getXPath(),
                                   currentPosition(),
                                   endPosition() );
                }
            }

        }
        while ( token.getTokenType() == TokenTypes.SKIP );

        setPreviousToken( token );

        return token;
    }

    private Token identifierOrOperatorName()
    {
        Token token = null;

        if ( previousToken != null )
        {
            // For some reason, section 3.7, Lexical structure,
            // doesn't seem to feel like it needs to mention the
            // SLASH, DOUBLE_SLASH, and COLON tokens for the test
            // if an NCName is an operator or not.
            //
            // According to section 3.7, "/foo" should be considered
            // as a SLASH following by an OperatorName being 'foo'.
            // Which is just simply, clearly, wrong, in my mind.
            //
            //     -bob

            switch ( previousToken.getTokenType() )
            {
                case TokenTypes.AT:
                case TokenTypes.DOUBLE_COLON:
                case TokenTypes.LEFT_PAREN:
                case TokenTypes.LEFT_BRACKET:
                case TokenTypes.AND:
                case TokenTypes.OR:
                case TokenTypes.MOD:
                case TokenTypes.DIV:
                case TokenTypes.COLON:
                case TokenTypes.SLASH:
                case TokenTypes.DOUBLE_SLASH:
                case TokenTypes.PIPE:
                case TokenTypes.DOLLAR:
                case TokenTypes.PLUS:
                case TokenTypes.MINUS:
                case TokenTypes.STAR:
                case TokenTypes.COMMA:
                case TokenTypes.LESS_THAN_SIGN:
                case TokenTypes.GREATER_THAN_SIGN:
                case TokenTypes.LESS_THAN_OR_EQUALS_SIGN:
                case TokenTypes.GREATER_THAN_OR_EQUALS_SIGN:
                case TokenTypes.EQUALS:
                case TokenTypes.NOT_EQUALS:
                {
                    token = identifier();
                    break;
                }
                default:
                {
                    token = operatorName();
                    break;
                }
            }
        }
        else
        {
            token = identifier();
        }

        return token;
    }

    private Token identifier()
    {
        Token token = null;

        int start = currentPosition();

        while ( hasMoreChars() )
        {
            if ( isIdentifierChar( LA(1) ) )
            {
                consume();
            }
            else
            {
                break;
            }
        }

        token = new Token( TokenTypes.IDENTIFIER,
                           getXPath(),
                           start,
                           currentPosition() );

        return token;
    }

    private Token operatorName()
    {
        Token token = null;

        switch ( LA(1) )
        {
            case 'a':
            {
                token = and();
                break;
            }

            case 'o':
            {
                token = or();
                break;
            }

            case 'm':
            {
                token = mod();
                break;
            }

            case 'd':
            {
                token = div();
                break;
            }
        }

        return token;
    }

    private Token mod()
    {
        Token token = null;

        if ( ( LA(1) == 'm' )
             &&
             ( LA(2) == 'o' )
             &&
             ( LA(3) == 'd' )
           )
        {
            token = new Token( TokenTypes.MOD,
                               getXPath(),
                               currentPosition(),
                               currentPosition()+3 );

            consume();
            consume();
            consume();
        }

        return token;
    }

    private Token div()
    {
        Token token = null;

        if ( ( LA(1) == 'd' )
             &&
             ( LA(2) == 'i' )
             &&
             ( LA(3) == 'v' )
            )
        {
            token = new Token( TokenTypes.DIV,
                               getXPath(),
                               currentPosition(),
                               currentPosition()+3 );

            consume();
            consume();
            consume();
        }

        return token;
    }

    private Token and()
    {
        Token token = null;

        if ( ( LA(1) == 'a' )
             &&
             ( LA(2) == 'n' )
             &&
             ( LA(3) == 'd' )
           )
        {
            token = new Token( TokenTypes.AND,
                               getXPath(),
                               currentPosition(),
                               currentPosition()+3 );

            consume();
            consume();
            consume();
        }

        return token;
    }

    private Token or()
    {
        Token token = null;

        if ( ( LA(1) == 'o' )
             &&
             ( LA(2) == 'r' )
           )
        {
            token = new Token( TokenTypes.OR,
                               getXPath(),
                               currentPosition(),
                               currentPosition()+2 );

            consume();
            consume();
        }

        return token;
    }

    private Token number()
    {
        int     start         = currentPosition();
        boolean periodAllowed = true;

      loop:
        while( true )
        {
            switch ( LA(1) )
            {
                case '.':
                    if ( periodAllowed )
                    {
                        periodAllowed = false;
                        consume();
                    }
                    else
                    {
                        break loop;
                    }
                    break;
                case '0':
                case '1':
                case '2':
                case '3':
                case '4':
                case '5':
                case '6':
                case '7':
                case '8':
                case '9':
                    consume();
                    break;
                default:
                    break loop;
            }
        }

        return new Token( TokenTypes.DOUBLE,
                               getXPath(),
                               start,
                               currentPosition() );
    }

    private Token whitespace()
    {
        consume();

      loop:
        while( hasMoreChars() )
        {
            switch ( LA(1) )
            {
                case ' ':
                case '\t':
                case '\n':
                case '\r':
                {
                    consume();
                    break;
                }

                default:
                {
                    break loop;
                }
            }
        }

        return new Token( TokenTypes.SKIP,
                          getXPath(),
                          0,
                          0 );
    }

    private Token comma()
    {
        Token token = new Token( TokenTypes.COMMA,
                                 getXPath(),
                                 currentPosition(),
                                 currentPosition()+1 );

        consume();

        return token;
    }

    private Token equals()
    {
        Token token = new Token( TokenTypes.EQUALS,
                                 getXPath(),
                                 currentPosition(),
                                 currentPosition()+1 );

        consume();

        return token;
    }

    private Token minus()
    {
        Token token = new Token( TokenTypes.MINUS,
                                 getXPath(),
                                 currentPosition(),
                                 currentPosition()+1 );
        consume();

        return token;
    }

    private Token plus()
    {
        Token token = new Token( TokenTypes.PLUS,
                                 getXPath(),
                                 currentPosition(),
                                 currentPosition()+1 );
        consume();

        return token;
    }

    private Token dollar()
    {
        Token token = new Token( TokenTypes.DOLLAR,
                                 getXPath(),
                                 currentPosition(),
                                 currentPosition()+1 );
        consume();

        return token;
    }

    private Token pipe()
    {
        Token token = new Token( TokenTypes.PIPE,
                                 getXPath(),
                                 currentPosition(),
                                 currentPosition()+1 );

        consume();

        return token;
    }

    private Token at()
    {
        Token token = new Token( TokenTypes.AT,
                                 getXPath(),
                                 currentPosition(),
                                 currentPosition()+1 );

        consume();

        return token;
    }

    private Token colon()
    {
        Token token = new Token( TokenTypes.COLON,
                                 getXPath(),
                                 currentPosition(),
                                 currentPosition()+1 );
        consume();

        return token;
    }

    private Token doubleColon()
    {
        Token token = new Token( TokenTypes.DOUBLE_COLON,
                                 getXPath(),
                                 currentPosition(),
                                 currentPosition()+2 );

        consume();
        consume();

        return token;
    }

    private Token notEquals()
    {
        Token token = new Token( TokenTypes.NOT_EQUALS,
                                 getXPath(),
                                 currentPosition(),
                                 currentPosition() + 2 );

        consume();
        consume();

        return token;
    }

    private Token relationalOperator()
    {
        Token token = null;

        switch ( LA(1) )
        {
            case '<':
            {
                if ( LA(2) == '=' )
                {
                    token = new Token( TokenTypes.LESS_THAN_OR_EQUALS_SIGN,
                                       getXPath(),
                                       currentPosition(),
                                       currentPosition() + 2 );
                    consume();
                }
                else
                {
                    token = new Token( TokenTypes.LESS_THAN_SIGN,
                                       getXPath(),
                                       currentPosition(),
                                       currentPosition() + 1);
                }

                consume();
                break;
            }
            case '>':
            {
                if ( LA(2) == '=' )
                {
                    token = new Token( TokenTypes.GREATER_THAN_OR_EQUALS_SIGN,
                                       getXPath(),
                                       currentPosition(),
                                       currentPosition() + 2 );
                    consume();
                }
                else
                {
                    token = new Token( TokenTypes.GREATER_THAN_SIGN,
                                       getXPath(),
                                       currentPosition(),
                                       currentPosition() + 1 );
                }

                consume();
                break;
            }
        }

        return token;

    }

    private Token star()
    {
        Token token = new Token( TokenTypes.STAR,
                                 getXPath(),
                                 currentPosition(),
                                 currentPosition()+1 );

        consume();

        return token;
    }

    private Token literal()
    {
        Token token = null;

        char match  = LA(1);

        consume();

        int start = currentPosition();

        while ( ( token == null )
                &&
                hasMoreChars() )
        {
            if ( LA(1) == match )
            {
                token = new Token( TokenTypes.LITERAL,
                                   getXPath(),
                                   start,
                                   currentPosition() );
            }
            consume();
        }

        return token;
    }

    private Token dots()
    {
        Token token = null;

        switch ( LA(2) )
        {
            case '.':
            {
                token = new Token( TokenTypes.DOT_DOT,
                                   getXPath(),
                                   currentPosition(),
                                   currentPosition()+2 ) ;
                consume();
                consume();
                break;
            }
            default:
            {
                token = new Token( TokenTypes.DOT,
                                   getXPath(),
                                   currentPosition(),
                                   currentPosition()+1 );
                consume();
                break;
            }
        }

        return token;
    }

    private Token leftBracket()
    {
        Token token = new Token( TokenTypes.LEFT_BRACKET,
                                 getXPath(),
                                 currentPosition(),
                                 currentPosition()+1 );

        consume();

        return token;
    }

    private Token rightBracket()
    {
        Token token = new Token( TokenTypes.RIGHT_BRACKET,
                                 getXPath(),
                                 currentPosition(),
                                 currentPosition()+1 );

        consume();

        return token;
    }

    private Token leftParen()
    {
        Token token = new Token( TokenTypes.LEFT_PAREN,
                                 getXPath(),
                                 currentPosition(),
                                 currentPosition()+1 );

        consume();

        return token;
    }

    private Token rightParen()
    {
        Token token = new Token( TokenTypes.RIGHT_PAREN,
                                 getXPath(),
                                 currentPosition(),
                                 currentPosition()+1 );

        consume();

        return token;
    }

    private Token slashes()
    {
        Token token = null;

        switch ( LA(2) )
        {
            case '/':
            {
                token = new Token( TokenTypes.DOUBLE_SLASH,
                                   getXPath(),
                                   currentPosition(),
                                   currentPosition()+2 );
                consume();
                consume();
                break;
            }
            default:
            {
                token = new Token( TokenTypes.SLASH,
                                   getXPath(),
                                   currentPosition(),
                                   currentPosition()+1 );
                consume();
            }
        }

        return token;
    }

    private char LA(int i)
    {
        if ( currentPosition + ( i - 1 ) >= endPosition() )
        {
            return (char) -1;
        }

        return getXPath().charAt( currentPosition() + (i - 1) );
    }

    private void consume()
    {
        ++this.currentPosition;
    }

    private int currentPosition()
    {
        return this.currentPosition;
    }

    private int endPosition()
    {
        return this.endPosition;
    }

    private void setPreviousToken(Token previousToken)
    {
        this.previousToken = previousToken;
    }

    private boolean hasMoreChars()
    {
        return currentPosition() < endPosition();
    }

    private boolean isIdentifierChar(char c)
    {
        return Verifier.isXMLNCNameCharacter( c );
    }

    private boolean isIdentifierStartChar(char c)
    {
        return Verifier.isXMLNCNameStartCharacter( c );
    }

}