/** * Copyright (C) 2000 - 2009 Silverpeas * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as * published by the Free Software Foundation, either version 3 of the * License, or (at your option) any later version. * * As a special exception to the terms and conditions of version 3.0 of * the GPL, you may redistribute this Program in connection with Free/Libre * Open Source Software ("FLOSS") applications as described in Silverpeas's * FLOSS exception. You should have received a copy of the text describing * the FLOSS exception, and it is also available here: * "http://repository.silverpeas.com/legal/licensing" * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package org.silverpeas.xml.xpath; import java.util.Arrays; import org.jdom.IllegalNameException; /** * Titre : Application Builder Description : implements a simplified XPath parser that retrieves the * different token composing a XPath Tokens are named * by the public fields STEP_SEPARATOR ('/') * PREDICATE_OPEN ('[') PREDICATE_CLOSE (']') EQUALITY ('=') DOT ('.' the abreviated step) * ABREV_ATTRIBAXIS ('@' the abreviated attribute axis specifier) PARENT_STEP ("..") LITERAL (string * enclosed by quotation marks or apostrophes) INTEGER (greater or equal to zero) REAL (greater or * equal to zero) NAME (a valid Name as specified in W3C XML 1.0 Recommendation) */ public class XPathTokenizer { // ######### Constructors ########## public XPathTokenizer() { } public XPathTokenizer(String xpath) { setXPath(xpath); } // ######### getters and setters ########## public void reinitRead() { // removeStates(); setIndex(0); setIndexAsPosition(); setCurrentToken(null); setCurrentTokenType(UNDEFINED); } public void setXPath(String xpath) { _xpath = xpath; reinitRead(); } public String getXPath() { return _xpath; } private String _xpath = null; // ######### lexical analyser ########## // used as lexical elements and as lexical element types public static final char STEP_SEPARATOR = '/'; public static final char PREDICATE_OPEN = '['; public static final char PREDICATE_CLOSE = ']'; public static final char EQUALITY = '='; public static final char DOT = '.'; public static final char ABREV_ATTRIBAXIS = '@'; // lexical element types public static final char PARENT_STEP = 'P'; public static final char LITERAL = 'L'; public static final char INTEGER = 'I'; public static final char REAL = 'R'; public static final char NAME = 'N'; public static final char END_OF_XPATH = 'E'; public static final char UNDEFINED = 'U'; // used to parse literals private static final char LITERAL_DELIM_SINGLE = '\''; private static final char LITERAL_DELIM_DOUBLE = '"'; // used to parse numbers private static char[] DIGITS = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' }; static { Arrays.sort(DIGITS); } /** * If null, the token is the element type */ public String getCurrentToken() { return _token; } /** * the token type and, for single characters, the token itself */ public char getCurrentTokenType() { return _tokenType; } /** * @return the position of the current token */ public int getCurrentTokenPosition() { return _tokenPosition; } private int _iXpath; private int _tokenPosition; private String _token = null; private char _tokenType = UNDEFINED; /** * @return the the current index */ private int getIndex() { return _iXpath; } /** * sets the index of the current token */ private void setIndex(int newIndex) { _iXpath = newIndex; } /** * sets the current token */ private void setCurrentToken(String token) { _token = token; } /** * sets the current token type */ private void setCurrentTokenType(char tokenType) { _tokenType = tokenType; } /** * sets the position of the current token */ private void setIndexAsPosition() { _tokenPosition = getIndex() + 1; } /** * Sets the token found * @return the lexical element type. If getCurrentToken() is null, it is also the element itself. */ public char readNextToken() throws XPathParseException { setIndexAsPosition(); setCurrentTokenType(UNDEFINED); // no path if (getXPath() == null) { throw new XPathParseException("someone forgot to set my XPath !"); } // empty path if (getXPath().length() == 0) { throw new XPathParseException("please give me a non empty XPath"); } // end of read if (getIndex() == getXPath().length()) { setCurrentTokenType(END_OF_XPATH); return _tokenType; } switch (getXPath().charAt(getIndex())) { // Step separator case STEP_SEPARATOR: setCurrentTokenType(STEP_SEPARATOR); setCurrentToken(null); setIndex(getIndex() + 1); break; // Abreviated steps or Number start case DOT: if (getXPath().length() > getIndex() + 1 && getXPath().charAt(getIndex() + 1) == DOT) { setCurrentTokenType(PARENT_STEP); setCurrentToken(new String(new char[] { DOT, DOT })); setIndex(getIndex() + 2); } else { int iStartStepOrNumber = getIndex(); setIndex(getIndex() + 1); if (readDigits()) { setCurrentTokenType(REAL); setCurrentToken(getXPath() .substring(iStartStepOrNumber, getIndex())); } else { setCurrentToken(null); setCurrentTokenType(DOT); } } break; case ABREV_ATTRIBAXIS: setCurrentTokenType(ABREV_ATTRIBAXIS); setCurrentToken(null); setIndex(getIndex() + 1); break; case PREDICATE_OPEN: setCurrentTokenType(PREDICATE_OPEN); setCurrentToken(null); setIndex(getIndex() + 1); break; case PREDICATE_CLOSE: setCurrentTokenType(PREDICATE_CLOSE); setCurrentToken(null); setIndex(getIndex() + 1); break; case EQUALITY: setCurrentTokenType(EQUALITY); setCurrentToken(null); setIndex(getIndex() + 1); break; case LITERAL_DELIM_SINGLE: setIndex(getIndex() + 1); int iDelimSingle = getXPath().indexOf(LITERAL_DELIM_SINGLE, getIndex()); if (iDelimSingle == -1) { throw new XPathParseException("literal not closed", getXPath(), getIndex()); } setCurrentTokenType(LITERAL); setCurrentToken(getXPath().substring(getIndex(), iDelimSingle)); setIndex(iDelimSingle + 1); break; case LITERAL_DELIM_DOUBLE: setIndex(getIndex() + 1); int iDelimDouble = getXPath().indexOf(LITERAL_DELIM_DOUBLE, getIndex()); if (iDelimDouble == -1) { throw new XPathParseException("literal not closed", getXPath(), getIndex()); } setCurrentTokenType(LITERAL); setCurrentToken(getXPath().substring(getIndex(), iDelimDouble)); setIndex(iDelimDouble + 1); break; // Number // starting with '.' : see DOT case case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': setCurrentTokenType(INTEGER); int iStartNumber = getIndex(); readDigits(); if (getIndex() < getXPath().length() && getXPath().charAt(getIndex()) == DOT) { setCurrentTokenType(REAL); setIndex(getIndex() + 1); readDigits(); } setCurrentToken(getXPath().substring(iStartNumber, getIndex())); break; // XML Name default: int iStartName = getIndex(); boolean nameFound = false; setIndex(getIndex() + 1); try { while (getIndex() <= getXPath().length() && isValidName(getXPath().substring(iStartName, getIndex()))) { nameFound = true; setIndex(getIndex() + 1); } } catch (RuntimeException re) { throw new XPathParseException("unauthorised char in XML name", getXPath(), getIndex()); } if (nameFound) { setCurrentTokenType(NAME); setIndex(getIndex() - 1); setCurrentToken(getXPath().substring(iStartName, getIndex())); } else { throw new XPathParseException("unauthorised char", getXPath(), iStartName + 1); } break; } return getCurrentTokenType(); } private boolean readDigits() { boolean found = false; while (getIndex() < getXPath().length() && Arrays.binarySearch(DIGITS, getXPath().charAt(getIndex())) >= 0) { found = true; setIndex(getIndex() + 1); } return found; } protected boolean isValidName(String name) { try { new org.jdom.Element(name); return true; } catch (IllegalNameException e) { return false; } } }