/**
*
*/
package org.zkoss.zk.ui.select.impl;
import java.util.ArrayList;
import org.zkoss.fsm.StateCtx;
import org.zkoss.fsm.StateMachine;
import org.zkoss.zk.ui.select.impl.Token.Type;
/**
* A tokenizer of selector string.
* @since 6.0.0
* @author simonpai
*/
public class Tokenizer {
private final StateMachine<State, CharClass, Character> _machine;
private ArrayList<Token> _tokens;
public Tokenizer() {
_tokens = null;
_machine = new StateMachine<State, CharClass, Character>() {
private int _anchor;
private char _prevChar;
private CharClass _prevClass;
protected boolean _inDoubleQuote;
protected boolean _inSingleQuote;
protected boolean _inParam;
protected boolean _escaped;
protected boolean _opEscaped;
protected void init() {
getState(State.MAIN).setReturningAll(true)
// B70-ZK-1829: Use Enumeration.
.addTransition(CharClass.OPEN_BRACKET, State.IN_ATTRIBUTE);
//.addMinorTransition('[', State.IN_ATTRIBUTE);
setState(State.IN_ATTRIBUTE, new StateCtx<State, CharClass, Character>() {
protected void onReturn(Character i, CharClass cls) {
if (cls != CharClass.OTHER)
return;
if (i == '"')
_inDoubleQuote = !_inDoubleQuote;
else if (i == '\'')
_inSingleQuote = !_inSingleQuote;
}
}).setReturningAll(true)
// B70-ZK-1829: Use Enumeration.
.addTransition(CharClass.CLOSE_BRACKET, State.MAIN);
//.addMinorTransition(']', State.MAIN);
// TODO: IN_PARAM
}
protected void onReset() {
_inDoubleQuote = false;
_inSingleQuote = false;
_inParam = false;
_escaped = false;
_opEscaped = false;
_anchor = 0;
_prevChar = '!';
_prevClass = null;
_tokens = new ArrayList<Token>();
}
protected void afterStep(Character input, CharClass inputClass, State origin, State destination) {
doDebug("* OP Escaped: " + _opEscaped);
if (inputClass == CharClass.ESCAPE)
return;
boolean isPrefix = origin == State.IN_ATTRIBUTE && inputClass == CharClass.OTHER
&& (input == '^' || input == '$' || input == '*');
// ZK-2944: pseudo element, two continuous colons like ::
if (_prevChar == ':' && input == ':' && !_inParam) {
flush(Type.NTN_PSDOELEM, true);
}
// ZK-2944: pseudo class, added checks to avoid confusion with "the end" of pseudo element
if (_prevChar == ':' && input != ':' && !_inParam && previousTokenIsNotPseudoElement()) {
flush(_prevChar, _prevClass, false);
}
// flush previous identifier/whitespace
if (inputClass != _prevClass && _prevClass != null && _prevClass.isMultiple())
flush(_prevChar, _prevClass, false);
// previous char is ^/$/* but input is not =
if (origin == State.IN_ATTRIBUTE && _opEscaped && input != '=')
flush(_prevChar, _prevClass, false);
// flush current
// ZK-2944: no longer handle any pseudo class or pseudo element related input
if (!inputClass.isMultiple() && !isPrefix && input != ':')
flush(input, inputClass, true);
// update status
if (input == '(')
_inParam = true;
else if (input == ')')
_inParam = false;
_prevChar = input;
_prevClass = inputClass;
_opEscaped = isPrefix;
}
protected void onStop(boolean endOfInput) {
if (!endOfInput)
return;
// flush last token if any
if (_anchor < _step)
flush(_prevChar, _prevClass, false);
}
protected CharClass getClass(Character c) {
if (_inDoubleQuote && (_escaped || c != '"'))
return CharClass.LITERAL;
if (_inSingleQuote && (_escaped || c != '\''))
return CharClass.LITERAL;
// B70-ZK-1829: Return the enumeration from corresponding character.
if (_current == State.MAIN && c == '[') {
return CharClass.OPEN_BRACKET;
}
if (_current == State.IN_ATTRIBUTE && !_inDoubleQuote && !_inSingleQuote && c == ']') {
return CharClass.CLOSE_BRACKET;
}
if (_inParam && c != ',' && c != ')')
return Character.isWhitespace(c) ? CharClass.OTHER : CharClass.LITERAL;
if (_escaped)
return Character.isWhitespace(c) ? CharClass.WHITESPACE : CharClass.LITERAL;
if (Character.isLetter(c) || Character.isDigit(c) || c == '-' || c == '_')
return CharClass.LITERAL;
if (Character.isWhitespace(c))
return CharClass.WHITESPACE;
//TODO: additional spec of a.b.c='sdf'
if ('.' == c.charValue() && _current == State.IN_ATTRIBUTE) {
return CharClass.LITERAL;
}
return c == '\\' ? CharClass.ESCAPE : CharClass.OTHER;
}
protected State getLandingState(Character input, CharClass inputClass) {
if (input == '[')
return State.IN_ATTRIBUTE;
if (super._current == State.IN_ATTRIBUTE) {
if (_inDoubleQuote || _inSingleQuote)
return State.IN_ATTRIBUTE;
}
if (inputClass == CharClass.ESCAPE)
_escaped = true;
return State.MAIN;
}
protected void onReject(Character input) {
throw new ParseException(_step, _current, input);
}
// FIXME ugly implementation
private boolean previousTokenIsNotPseudoElement() {
return _tokens.isEmpty() || !_tokens.get(_tokens.size() - 1).getType().equals(Type.NTN_PSDOELEM);
}
private void flush(char input, CharClass inputClass, boolean withCurrChar) {
int endIndex = _step + (withCurrChar ? 1 : _escaped ? -1 : 0);
_tokens.add(new Token(getTokenType(input, inputClass), _anchor, endIndex));
doDebug("! flush: [" + _anchor + ", " + endIndex + "]");
_anchor = endIndex;
}
private void flush(Type tokenType, boolean withCurrChar) {
int endIndex = _step + (withCurrChar ? 1 : _escaped ? -1 : 0);
_tokens.add(new Token(tokenType, _anchor, endIndex));
doDebug("! flush: [" + _anchor + ", " + endIndex + "]");
_anchor = endIndex;
}
private Type getTokenType(char input, CharClass inputClass) {
switch (inputClass) {
case LITERAL:
return Type.IDENTIFIER;
case WHITESPACE:
return Type.WHITESPACE;
}
switch (input) {
case ',':
return _inParam ? Type.PARAM_SEPARATOR : Type.SELECTOR_SEPARATOR;
case '*':
return Type.UNIVERSAL;
case '>':
return Type.CBN_CHILD;
case '+':
return Type.CBN_ADJACENT_SIBLING;
case '~':
return Type.CBN_GENERAL_SIBLING;
case '#':
return Type.NTN_ID;
case '.': //TODO
return (inputClass == CharClass.ATTR_GETTER_OP) ? Type.IDENTIFIER : Type.NTN_CLASS;
case ':': // ZK-2944: not handling pseudo element
return Type.NTN_PSDOCLS;
case '\'':
return Type.SINGLE_QUOTE;
case '"':
return Type.DOUBLE_QUOTE;
case '[':
return Type.OPEN_BRACKET;
case ']':
return Type.CLOSE_BRACKET;
case '(':
return Type.OPEN_PAREN;
case ')':
return Type.CLOSE_PAREN;
case '=':
switch (_prevChar) {
case '^':
return Type.OP_BEGIN_WITH;
case '$':
return Type.OP_END_WITH;
case '*':
return Type.OP_CONTAIN;
default:
return Type.OP_EQUAL;
}
default:
return Character.isWhitespace(input) ? Type.MINOR_WHITESPACE : Type.UNKNOWN_CHAR;
}
}
};
}
public ArrayList<Token> tokenize(String selector) {
_machine.start(new CharSequenceIterator(selector));
return _tokens;
}
public void setDebugMode(boolean mode) {
_machine.setDebugMode(mode);
}
// state, input class //
private enum State {
MAIN, IN_ATTRIBUTE;
}
private enum CharClass {
// B70-ZK-1829: Add additional Type.
LITERAL(true), WHITESPACE(true), ESCAPE, OTHER, ATTR_GETTER_OP, OPEN_BRACKET, CLOSE_BRACKET;
private boolean _multiple;
CharClass() {
this(false);
}
CharClass(boolean multiple) {
_multiple = multiple;
}
public boolean isMultiple() {
return _multiple;
}
}
}