/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.ambari.server.api.predicate; import java.util.ArrayDeque; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.Deque; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * Scans a query expression and generates an array of tokens. * Each token contains type and value information. * * First, the query expression is broken down into string tokens using * a regular expression which splits on a set of deliminators which includes * operators and brackets. * * Second, each string token is converted into a Token with type and value information. */ public class QueryLexer { /** * Query string constants. */ public static final String QUERY_FIELDS = "fields"; public static final String QUERY_FORMAT = "format"; public static final String QUERY_PAGE_SIZE = "page_size"; public static final String QUERY_TO = "to"; public static final String QUERY_FROM = "from"; public static final String QUERY_MINIMAL = "minimal_response"; public static final String QUERY_SORT = "sortBy"; /** * All valid deliminators. */ private static final String[] ALL_DELIMS = {".matches\\(",".in\\(",".isEmpty\\(","<=",">=","!=","=","<",">","&","|","!","(", ")"}; /** * Map of token type to list of valid handlers for next token. */ private static final Map<Token.TYPE, List<TokenHandler>> TOKEN_HANDLERS = new HashMap<>(); /** * Static set of property names to ignore. */ private static final Set<String> SET_IGNORE = new HashSet<>(); /** * Constructor. * Register token handlers. */ public QueryLexer() { //todo: refactor handler registration List<TokenHandler> listHandlers = new ArrayList<>(); listHandlers.add(new LogicalUnaryOperatorTokenHandler()); listHandlers.add(new OpenBracketTokenHandler()); listHandlers.add(new PropertyOperandTokenHandler()); TOKEN_HANDLERS.put(Token.TYPE.BRACKET_OPEN, listHandlers); TOKEN_HANDLERS.put(Token.TYPE.LOGICAL_OPERATOR, listHandlers); TOKEN_HANDLERS.put(Token.TYPE.LOGICAL_UNARY_OPERATOR, listHandlers); listHandlers= new ArrayList<>(); listHandlers.add(new RelationalOperatorTokenHandler()); listHandlers.add(new RelationalOperatorFuncTokenHandler()); TOKEN_HANDLERS.put(Token.TYPE.PROPERTY_OPERAND, listHandlers); listHandlers = new ArrayList<>(); listHandlers.add(new ValueOperandTokenHandler()); TOKEN_HANDLERS.put(Token.TYPE.RELATIONAL_OPERATOR, listHandlers); listHandlers = new ArrayList<>(); listHandlers.add(new CloseBracketTokenHandler()); listHandlers.add(new ComplexValueOperandTokenHandler()); TOKEN_HANDLERS.put(Token.TYPE.RELATIONAL_OPERATOR_FUNC, listHandlers); listHandlers = new ArrayList<>(); listHandlers.add(new CloseBracketTokenHandler()); listHandlers.add(new LogicalOperatorTokenHandler()); TOKEN_HANDLERS.put(Token.TYPE.BRACKET_CLOSE, listHandlers); listHandlers = new ArrayList<>(listHandlers); // complex value operands can span multiple tokens listHandlers.add(0, new ComplexValueOperandTokenHandler()); TOKEN_HANDLERS.put(Token.TYPE.VALUE_OPERAND, listHandlers); } /** * Scan the provided query and generate a token stream to be used by the query parser. * * @param exp the query expression to scan * * @return an array of tokens * @throws InvalidQueryException if the query is invalid */ public Token[] tokens(String exp) throws InvalidQueryException { return tokens(exp, Collections.<String>emptySet()); } /** * Scan the provided query and generate a token stream to be used by the query parser. * * @param exp the query expression to scan * @param ignoreProperties property names which should be ignored * * @return an array of tokens * @throws InvalidQueryException if the query is invalid */ public Token[] tokens(String exp, Collection<String> ignoreProperties) throws InvalidQueryException { ScanContext ctx = new ScanContext(); ctx.addPropertiesToIgnore(SET_IGNORE); ctx.addPropertiesToIgnore(ignoreProperties); for (String tok : parseStringTokens(exp)) { List<TokenHandler> listHandlers = TOKEN_HANDLERS.get(ctx.getLastTokenType()); boolean processed = false; int idx = 0; while (!processed && idx < listHandlers.size()) { processed = listHandlers.get(idx++).handleToken(tok, ctx); } if (! processed) { throw new InvalidQueryException("Invalid Query Token: token='" + tok + "\', previous token type=" + ctx.getLastTokenType()); } } ctx.validateEndState(); return ctx.getTokenList().toArray(new Token[ctx.getTokenList().size()]); } /** * Uses a regular expression to scan a query expression and produce a list of string tokens. * These tokens are the exact strings that exist in the original syntax. * * @param exp the query expression * * @return list of string tokens from the query expression */ private List<String> parseStringTokens(String exp) { Pattern pattern = generatePattern(); Matcher matcher = pattern.matcher(exp); List<String> listStrTokens = new ArrayList<>(); int pos = 0; while (matcher.find()) { // while there's a delimiter in the string if (pos != matcher.start()) { // add anything between the current and previous delimiter to the tokens list listStrTokens.add(exp.substring(pos, matcher.start())); } listStrTokens.add(matcher.group()); // add the delimiter pos = matcher.end(); // Remember end of delimiter } if (pos != exp.length()) { // Add any chars remaining in the string after last delimiter listStrTokens.add(exp.substring(pos)); } return listStrTokens; } /** * Generate the regex pattern to tokenize the query expression. * * @return the regex pattern */ private Pattern generatePattern() { StringBuilder sb = new StringBuilder(); sb.append('('); for (String delim : ALL_DELIMS) { // For each delimiter if (sb.length() != 1) sb.append('|'); sb.append('\\'); sb.append(delim); } sb.append(')'); return Pattern.compile(sb.toString()); } /** * Add property names that the lexer should ignore. */ static { // ignore values SET_IGNORE.add(QUERY_FIELDS); SET_IGNORE.add(QUERY_FORMAT); SET_IGNORE.add(QUERY_PAGE_SIZE); SET_IGNORE.add(QUERY_TO); SET_IGNORE.add(QUERY_FROM); SET_IGNORE.add(QUERY_MINIMAL); SET_IGNORE.add(QUERY_SORT); SET_IGNORE.add("_"); } /** * Scan context. Provides contextual information related to the current scan. */ private class ScanContext { /** * The last token type scanned. */ private Token.TYPE m_lastType; /** * The last property operand value */ private String m_propertyName; /** * List of tokens generated by the scan */ private List<Token> m_listTokens = new ArrayList<>(); /** * If non-null, ignore all tokens up to and including this token type. */ private Token.TYPE m_ignoreSegmentEndToken = null; /** * Property names which are to be ignored. */ private Set<String> m_propertiesToIgnore = new HashSet<>(); /** * Bracket score. This score is the difference between the number of * opening brackets and the number of closing brackets processed by * a handler. Only handlers which process values containing brackets * will be interested in this information. */ private int bracketScore = 0; /** * Intermediate tokens are tokens which are used by a handler which may * process several adjacent tokens. A handler might push intermediate * tokens and then in subsequent invocations combine/alter/remove/etc * these tokens prior to adding them to the context tokens. */ private Deque<Token> m_intermediateTokens = new ArrayDeque<>(); /** * Constructor. */ private ScanContext() { //init last type to the logical op type m_lastType = Token.TYPE.LOGICAL_OPERATOR; } /** * Ignore all subsequent tokens up to and including the provided token. * * @param type the last token type of the ignore segment */ public void setIgnoreSegmentEndToken(Token.TYPE type) { m_ignoreSegmentEndToken = type; } /** * Get the type of the last token. * * @return the type of the last token */ public Token.TYPE getLastTokenType() { return m_lastType; } /** * Set the type of the last token. * * @param lastType the type of the last token */ public void setLastTokenType(Token.TYPE lastType) { m_lastType = lastType; } /** * Get the current property operand value. * This is used to hold the property operand name until it is added since, * the following relational operator token is added first. * * @return the current property operand value */ public String getPropertyOperand() { return m_propertyName; } /** * Set the current property operand value. * This is used to hold the property operand name until it is added since, * the following relational operator token is added first. */ public void setPropertyOperand(String prop) { m_propertyName = prop; } /** * Add a token. * * @param token the token to add */ public void addToken(Token token) { if (m_ignoreSegmentEndToken == null) { m_listTokens.add(token); } else if (token.getType() == m_ignoreSegmentEndToken) { m_ignoreSegmentEndToken = null; } } /** * Get the list of generated tokens. * * @return the list of generated tokens */ public List<Token> getTokenList() { return m_listTokens; } /** * Get the set of property names that are to be ignored. * * @return set of property names to ignore */ public Set<String> getPropertiesToIgnore() { return m_propertiesToIgnore; } /** * Add property names to the set of property names to ignore. * * @param ignoredProperties set of property names to ignore */ public void addPropertiesToIgnore(Collection<String> ignoredProperties) { if (ignoredProperties != null) { m_propertiesToIgnore.addAll(ignoredProperties); } } /** * Add an intermediate token. * * @param token the token to add */ public void pushIntermediateToken(Token token) { if (m_ignoreSegmentEndToken == null) { m_intermediateTokens.add(token); } else if (token.getType() == m_ignoreSegmentEndToken) { m_ignoreSegmentEndToken = null; } } /** * Return the intermediate tokens if any. * * @return the intermediate tokens. Will never return null. */ public Deque<Token> getIntermediateTokens() { return m_intermediateTokens; } /** * Move all intermediate tokens to the context tokens. */ public void addIntermediateTokens() { m_listTokens.addAll(m_intermediateTokens); m_intermediateTokens.clear(); } /** * Obtain the bracket score. This count is the number of outstanding opening brackets. * A value of 0 indicates all opening and closing brackets are matched * @return the current bracket score */ public int getBracketScore() { return bracketScore; } /** * Increment the bracket score by n. This indicates that n unmatched opening brackets * have been encountered. * * @param n amount to increment * @return the new bracket score after incrementing */ public int incrementBracketScore(int n) { return bracketScore += n; } /** * Decrement the bracket score. This is done when matching a closing bracket with a previously encountered * opening bracket. If the requested decrement would result in a negative number an exception is thrown * as this isn't a valid state. * * @param decValue amount to decrement * @return the new bracket score after decrementing * @throws InvalidQueryException if the decrement operation will result in a negative value */ public int decrementBracketScore(int decValue) throws InvalidQueryException { bracketScore -= decValue; if (bracketScore < 0) { throw new InvalidQueryException("Unexpected closing bracket. Last token type: " + getLastTokenType() + ", Current property operand: " + getPropertyOperand() + ", tokens: " + getTokenList()); } return bracketScore; } //todo: most handlers should implement this /** * Validate the end state of the scan context. * Iterates over each handler associated with the final token type and asks it to validate the context. * @throws InvalidQueryException if the context is determined to in an invalid end state */ public void validateEndState() throws InvalidQueryException { for (TokenHandler handler : TOKEN_HANDLERS.get(getLastTokenType())) { handler.validateEndState(this); } } } /** * Token handler base class. * Token handlers are responsible for processing specific token type. */ private abstract class TokenHandler { /** * Provides base token handler functionality then delegates to the individual concrete handlers. * * @param token the token to process * @param ctx the scan context * * @return true if this handler processed the token; false otherwise * @throws InvalidQueryException if an invalid token is encountered */ public boolean handleToken(String token, ScanContext ctx) throws InvalidQueryException { if (handles(token, ctx)) { _handleToken(token, ctx); ctx.setLastTokenType(getType()); return true; } else { return false; } } public void validateEndState(ScanContext ctx) throws InvalidQueryException { if (! ctx.getIntermediateTokens().isEmpty()) { throw new InvalidQueryException("Unexpected end of expression."); } } /** * Process a token. * * @param token the token to process * @param ctx the current scan context * @throws InvalidQueryException if an invalid token is encountered */ public abstract void _handleToken(String token, ScanContext ctx) throws InvalidQueryException; /** * Get the token handler type. * * @return the token handler type */ public abstract Token.TYPE getType(); /** * Determine if a handler handles a specific token type. * * * @param token the token type * @param ctx scan context * @return true if the handler handles the specified type; false otherwise */ public abstract boolean handles(String token, ScanContext ctx); } /** * Property Operand token handler. */ private class PropertyOperandTokenHandler extends TokenHandler { @Override public void _handleToken(String token, ScanContext ctx) throws InvalidQueryException { //don't add prop name token until after operator token if (! ctx.getPropertiesToIgnore().contains(token)) { ctx.setPropertyOperand(token); } else { if (!ctx.getTokenList().isEmpty() ) { // ignore through next value operand ctx.setIgnoreSegmentEndToken(Token.TYPE.VALUE_OPERAND); // remove preceding '&' token ctx.getTokenList().remove(ctx.getTokenList().size() -1); } else { // first expression. Ignore and strip out next '&' ctx.setIgnoreSegmentEndToken(Token.TYPE.LOGICAL_OPERATOR); } } } @Override public Token.TYPE getType() { return Token.TYPE.PROPERTY_OPERAND; } @Override public boolean handles(String token, ScanContext ctx) { return token.matches("[^!&\\|<=|>=|!=|=|<|>\\(\\)]+"); } } /** * Value Operand token handler. */ private class ValueOperandTokenHandler extends TokenHandler { @Override public void _handleToken(String token, ScanContext ctx) throws InvalidQueryException { ctx.addToken(new Token(Token.TYPE.VALUE_OPERAND, token)); } @Override public Token.TYPE getType() { return Token.TYPE.VALUE_OPERAND; } @Override public boolean handles(String token, ScanContext ctx) { return token.matches("[^!&\\|<=|>=|!=|=|<|>]+"); } } /** * Open Bracket token handler. */ private class OpenBracketTokenHandler extends TokenHandler { @Override public void _handleToken(String token, ScanContext ctx) throws InvalidQueryException { ctx.addToken(new Token(Token.TYPE.BRACKET_OPEN, token)); } @Override public Token.TYPE getType() { return Token.TYPE.BRACKET_OPEN; } @Override public boolean handles(String token, ScanContext ctx) { return token.matches("\\("); } } /** * Close Bracket token handler. */ private class CloseBracketTokenHandler extends TokenHandler { @Override public void _handleToken(String token, ScanContext ctx) throws InvalidQueryException { ctx.addToken(new Token(Token.TYPE.BRACKET_CLOSE, token)); } @Override public Token.TYPE getType() { return Token.TYPE.BRACKET_CLOSE; } @Override public boolean handles(String token, ScanContext ctx) { return token.matches("\\)"); } } /** * Relational Operator token handler. */ private class RelationalOperatorTokenHandler extends TokenHandler { @Override public void _handleToken(String token, ScanContext ctx) throws InvalidQueryException { ctx.addToken(new Token(Token.TYPE.RELATIONAL_OPERATOR, token)); ctx.addToken(new Token(Token.TYPE.PROPERTY_OPERAND, ctx.getPropertyOperand())); } @Override public Token.TYPE getType() { return Token.TYPE.RELATIONAL_OPERATOR; } @Override public boolean handles(String token, ScanContext ctx) { return token.matches("<=|>=|!=|=|<|>"); } } /** * Relational Operator function token handler. */ private class RelationalOperatorFuncTokenHandler extends TokenHandler { @Override public void _handleToken(String token, ScanContext ctx) throws InvalidQueryException { ctx.addToken(new Token(Token.TYPE.RELATIONAL_OPERATOR_FUNC, token)); ctx.addToken(new Token(Token.TYPE.PROPERTY_OPERAND, ctx.getPropertyOperand())); } @Override public Token.TYPE getType() { return Token.TYPE.RELATIONAL_OPERATOR_FUNC; } //todo: add a unary relational operator func @Override public boolean handles(String token, ScanContext ctx) { return token.matches("\\.[a-zA-Z]+\\("); } } /** * Complex Value Operand token handler. * Supports values that span multiple tokens. */ private class ComplexValueOperandTokenHandler extends TokenHandler { @Override public void _handleToken(String token, ScanContext ctx) throws InvalidQueryException { if (token.equals(")")) { ctx.decrementBracketScore(1); } else if (token.endsWith("(")) { // .endsWith() is used because of tokens ".matches(",".in(" and".isEmpty(" ctx.incrementBracketScore(1); } String tokenValue = token; if (ctx.getBracketScore() > 0) { Deque<Token> intermediateTokens = ctx.getIntermediateTokens(); if (intermediateTokens !=null && !intermediateTokens.isEmpty()) { Token lastToken = intermediateTokens.peek(); if (lastToken.getType() == Token.TYPE.VALUE_OPERAND) { intermediateTokens.pop(); tokenValue = lastToken.getValue() + token; } } ctx.pushIntermediateToken(new Token(Token.TYPE.VALUE_OPERAND, tokenValue)); } if (ctx.getBracketScore() == 0) { ctx.addIntermediateTokens(); ctx.addToken(new Token(Token.TYPE.BRACKET_CLOSE, ")")); } } @Override public Token.TYPE getType() { return Token.TYPE.VALUE_OPERAND; } @Override public boolean handles(String token, ScanContext ctx) { Token.TYPE lastTokenType = ctx.getLastTokenType(); if (lastTokenType == Token.TYPE.RELATIONAL_OPERATOR_FUNC) { ctx.incrementBracketScore(1); return true; } else { return ctx.getBracketScore() > 0; } } @Override public void validateEndState(ScanContext ctx) throws InvalidQueryException { if (ctx.getBracketScore() > 0) { throw new InvalidQueryException("Missing closing bracket for function: " + ctx.getTokenList()); } } } /** * Logical Operator token handler. */ private class LogicalOperatorTokenHandler extends TokenHandler { @Override public void _handleToken(String token, ScanContext ctx) throws InvalidQueryException { ctx.addToken(new Token(Token.TYPE.LOGICAL_OPERATOR, token)); } @Override public Token.TYPE getType() { return Token.TYPE.LOGICAL_OPERATOR; } @Override public boolean handles(String token, ScanContext ctx) { return token.matches("[!&\\|]"); } } /** * Logical Unary Operator token handler. */ private class LogicalUnaryOperatorTokenHandler extends TokenHandler { @Override public void _handleToken(String token, ScanContext ctx) throws InvalidQueryException { ctx.addToken(new Token(Token.TYPE.LOGICAL_UNARY_OPERATOR, token)); } @Override public Token.TYPE getType() { return Token.TYPE.LOGICAL_UNARY_OPERATOR; } @Override public boolean handles(String token, ScanContext ctx) { return "!".equals(token); } } }