/**
* Copyright (c) 2009-2015, Christer Sandberg
*/
package se.fishtank.css.selectors.parser;
import java.util.LinkedList;
import java.util.List;
import se.fishtank.css.selectors.selector.*;
import se.fishtank.css.selectors.tokenizer.Token;
import se.fishtank.css.selectors.tokenizer.TokenType;
import se.fishtank.css.selectors.tokenizer.Tokenizer;
import se.fishtank.css.selectors.util.Pair;
/**
* Selector parser.
*
* @author Christer Sandberg
*/
public class SelectorParser {
/** Tokenizer used when parsing. */
private final Tokenizer tokenizer;
/** Possibly saved token. */
private Token savedToken;
/**
* Create a new selector parser.
*
* @param tokenizer Tokenizer used when parsing.
*/
private SelectorParser(Tokenizer tokenizer) {
this.tokenizer = tokenizer;
}
/**
* Parse selectors from the given string.
*
* @param str The selectors string.
* @return The selectors parsed.
*/
public static List<Selector> parse(String str) {
return parse(new Tokenizer(str));
}
/**
* Parse selectors from the given tokenizer.
*
* @param tokenizer The tokenizer to use when parsing.
* @return The selectors parsed.
*/
public static List<Selector> parse(Tokenizer tokenizer) {
return new SelectorParser(tokenizer).parseSelectorList();
}
/**
* Parse a selector list.
* <p/>
* See http://www.w3.org/TR/selectors/#grouping
*
* @return A list of selectors parsed.
*/
private List<Selector> parseSelectorList() {
LinkedList<Selector> selectors = new LinkedList<>();
selectors.add(parseSelector());
while (true) {
Token token = skipWhitespace().first;
if (token.type == TokenType.EOF) {
break;
}
if (token.type != TokenType.COMMA) {
throw expected(",", token);
}
selectors.add(parseSelector());
}
return selectors;
}
/**
* Parse a selector.
* <p/>
* See http://www.w3.org/TR/selectors/#selector-syntax
*
* @return The selector parsed.
*/
private Selector parseSelector() {
Pair<List<SimpleSelector>, PseudoElementSelector> simpleSelectors = parseSimpleSelectors();
CompoundSelector compoundSelector = CompoundSelector.of(simpleSelectors.first);
PseudoElementSelector pseudoElement = simpleSelectors.second;
while (pseudoElement == null) {
Pair<Token, Boolean> p = skipWhitespace();
if (p.first.type == TokenType.EOF) {
break;
} else if (p.first.type == TokenType.COMMA) {
savedToken = p.first;
break;
}
Combinator combinator = null;
if (p.first.type == TokenType.DELIM) {
switch (p.first.value) {
case ">":
combinator = Combinator.CHILD;
break;
case "+":
combinator = Combinator.NEXT_SIBLING;
break;
case "~":
combinator = Combinator.LATER_SIBLING;
break;
}
}
if (combinator == null) {
if (p.second) {
combinator = Combinator.DESCENDANT;
} else {
throw expected("one of ' ', '>', '+', '~'", p.first);
}
savedToken = p.first;
}
simpleSelectors = parseSimpleSelectors();
compoundSelector = new CompoundSelector(simpleSelectors.first, new Pair<>(combinator, compoundSelector));
}
return new Selector(compoundSelector, pseudoElement);
}
/**
* Parse a sequence of simple selectors.
* <p/>
* On successful parsing it returns a sequence of selectors and maybe a pseudo element selector
* indicating if the last (or only) simple selector in the sequence is a pseudo element.
* <p/>
* See http://www.w3.org/TR/selectors/#sequence
*
* @return A sequence of simple selectors parsed and a pseudo element selector or {@code null}
*/
private Pair<List<SimpleSelector>, PseudoElementSelector> parseSimpleSelectors() {
int pos = tokenizer.getPosition();
LinkedList<SimpleSelector> selectorSequence = new LinkedList<>();
PseudoElementSelector pseudoElement = null;
boolean empty = true;
Pair<String, Boolean> name = parseName();
if (name.second && !"*".equals(name.first)) {
selectorSequence.add(new LocalNameSelector(name.first));
empty = false;
}
while (true) {
SimpleSelector selector = parseOneSimpleSelector(false);
if (selector == null) {
break;
}
if (selector instanceof PseudoElementSelector) {
empty = false;
pseudoElement = (PseudoElementSelector) selector;
break;
}
selectorSequence.add(selector);
empty = false;
}
if (empty && !name.second) {
throw new IllegalArgumentException("No simple selectors found at position " + pos);
}
return new Pair<List<SimpleSelector>, PseudoElementSelector>(selectorSequence, pseudoElement);
}
/**
* Parse the name of an element and returns the name and a boolean indicating
* if a type selector was found or not.
* <p/>
* See http://www.w3.org/TR/selectors/#type-selectors and http://www.w3.org/TR/selectors/#universal-selector
*
* @return The parsed name and whether a type selector was found or not.
*/
private Pair<String, Boolean> parseName() {
Token token = skipWhitespace().first;
switch (token.type) {
case DELIM:
if ("*".equals(token.value)) {
return new Pair<>("*", true);
}
break;
case IDENT:
return new Pair<>(token.value, true);
}
savedToken = token;
return new Pair<>("*", false);
}
/**
* Parse one simple selector (excluding the type selector).
* <p/>
* See http://www.w3.org/TR/selectors/#simple-selectors
*
* @param insideNegation If inside a negation selector.
* @return The simple selector parsed or {@code null}
*/
private SimpleSelector parseOneSimpleSelector(boolean insideNegation) {
Token token = nextToken();
switch (token.type) {
case HASH:
Token.Hash h = (Token.Hash) token;
return new AttributeSelector(AttributeSelector.Match.EQUALS, "id", h.value);
case DELIM:
if (".".equals(token.value)) {
token = nextToken();
if (token.type == TokenType.IDENT) {
return new AttributeSelector(AttributeSelector.Match.INCLUDES, "class", token.value);
} else {
throw expected("class value", token);
}
}
throw expected(".", token);
case LEFT_SQUARE_BRACKET:
return parseAttribute();
case COLON:
token = nextToken();
switch (token.type) {
case IDENT:
switch (token.value.toLowerCase()) {
case "first-line":
case "first-letter":
case "before":
case "after":
return new PseudoElementSelector(token.value);
default:
return new PseudoClassSelector(token.value);
}
case COLON:
token = nextToken();
if (token.type != TokenType.IDENT) {
throw expected("pseudo element value", token);
} else {
return new PseudoElementSelector(token.value);
}
case FUNCTION:
return parseFunctionalPseudoClass(token.value, insideNegation);
}
}
savedToken = token;
return null;
}
/**
* Parse an attribute selector.
* <p/>
* See http://www.w3.org/TR/selectors/#attribute-selectors
*
* @return The attribute selector parsed.
*/
private AttributeSelector parseAttribute() {
Token token = skipWhitespace().first;
if (token.type != TokenType.IDENT) {
throw expected("attribute name", token);
}
String name = token.value;
token = skipWhitespace().first;
if (token.type == TokenType.RIGHT_SQUARE_BRACKET) {
return new AttributeSelector(AttributeSelector.Match.EXISTS, name, "");
}
AttributeSelector.Match match = null;
switch (token.type) {
case PREFIX_MATCH:
match = AttributeSelector.Match.BEGINS;
break;
case SUFFIX_MATCH:
match = AttributeSelector.Match.ENDS;
break;
case SUBSTRING_MATCH:
match = AttributeSelector.Match.CONTAINS;
break;
case INCLUDE_MATCH:
match = AttributeSelector.Match.INCLUDES;
break;
case DASH_MATCH:
match = AttributeSelector.Match.HYPHENS;
break;
case DELIM:
if ("=".equals(token.value)) {
match = AttributeSelector.Match.EQUALS;
} else {
throw expected("=", token);
}
}
token = skipWhitespace().first;
String value;
if (token.type == TokenType.IDENT || token.type == TokenType.STRING) {
value = token.value;
} else {
throw expected("attribute value", token);
}
token = skipWhitespace().first;
if (token.type != TokenType.RIGHT_SQUARE_BRACKET) {
throw expected("]", token);
}
return new AttributeSelector(match, name, value);
}
/**
* Parse a functional pseudo class.
* <p/>
* See http://www.w3.org/TR/selectors/#structural-pseudos
*
* @param name The functional pseudo class name.
* @param insideNegation If inside a negation selector.
* @return The simple selector parsed.
*/
private SimpleSelector parseFunctionalPseudoClass(String name, boolean insideNegation) {
int pos = tokenizer.getPosition();
switch (name.toLowerCase()) {
case "nth-child":
case "nth-last-child":
case "nth-of-type":
case "nth-last-of-type":
Pair<Integer, Integer> nth = NthParser.parse(tokenizer);
return new PseudoNthSelector(name, nth.first, nth.second);
case "not":
if (insideNegation) {
throw new ParserException("Error at position " + pos + ": negations may not be nested");
}
PseudoNegationSelector selector;
Pair<String, Boolean> pair = parseName();
if (pair.second) {
selector = new PseudoNegationSelector(new LocalNameSelector(pair.first));
} else {
SimpleSelector simpleSelector = parseOneSimpleSelector(true);
if (simpleSelector == null) {
throw expected("simple selector", nextToken());
}
selector = new PseudoNegationSelector(simpleSelector);
}
Token token = skipWhitespace().first;
if (token.type != TokenType.RIGHT_PAREN) {
throw expected(")", token);
}
return selector;
}
StringBuilder sb = new StringBuilder();
while (true) {
Token token = nextToken();
if (token.type == TokenType.EOF) {
throw new ParserException("EOF in function expression starting at position " + pos);
} else if (token.type == TokenType.RIGHT_PAREN) {
break;
} else {
sb.append(token.value);
}
}
return new PseudoFunctionSelector(name, sb.toString());
}
/**
* Returns the next token to parse.
*
* @return The next token.
*/
private Token nextToken() {
if (savedToken != null) {
Token token = savedToken;
savedToken = null;
return token;
}
return tokenizer.nextToken();
}
/**
* Skips whitespace tokens and returns the next non-whitespace token and a
* boolean inidicating if some whitespace was skipped.
*
* @return A pair of the next non-whitespace token and whether some whitespace was skipped.
*/
private Pair<Token, Boolean> skipWhitespace() {
boolean skipped = false;
while (true) {
Token token = nextToken();
if (token.type != TokenType.WHITESPACE) {
return new Pair<>(token, skipped);
}
skipped = true;
}
}
/**
* Returns an exception of what was expected and what was unexpectedly found.
*
* @param what What was expected.
* @param token The token found.
* @return An exception.
*/
private static ParserException expected(String what, Token token) {
String msg = String.format("Expected %s at position %d, got %s", what, token.position, token.type);
return new ParserException(msg);
}
}