QueryLexer.java example

Explorer
ambari-master
- ambari-trunk
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.ambari.server.api.predicate;

import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Deque;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * Scans a query expression and generates an array of tokens.
 * Each token contains type and value information.
 *
 * First, the query expression is broken down into string tokens using
 * a regular expression which splits on a set of deliminators which includes
 * operators and brackets.
 *
 * Second, each string token is converted into a Token with type and value information.
 */
public class QueryLexer {

  /**
   * Query string constants.
   */
  public static final String QUERY_FIELDS    = "fields";
  public static final String QUERY_FORMAT    = "format";
  public static final String QUERY_PAGE_SIZE = "page_size";
  public static final String QUERY_TO        = "to";
  public static final String QUERY_FROM      = "from";
  public static final String QUERY_MINIMAL   = "minimal_response";
  public static final String QUERY_SORT      = "sortBy";

  /**
   * All valid deliminators.
   */
  private static final String[] ALL_DELIMS =
      {".matches\\(",".in\\(",".isEmpty\\(","<=",">=","!=","=","<",">","&","|","!","(", ")"};

  /**
   * Map of token type to list of valid handlers for next token.
   */
  private static final Map<Token.TYPE, List<TokenHandler>> TOKEN_HANDLERS =
    new HashMap<>();

  /**
   * Static set of property names to ignore.
   */
  private static final Set<String> SET_IGNORE = new HashSet<>();


  /**
   * Constructor.
   * Register token handlers.
   */
  public QueryLexer() {
    //todo: refactor handler registration
    List<TokenHandler> listHandlers = new ArrayList<>();
    listHandlers.add(new LogicalUnaryOperatorTokenHandler());
    listHandlers.add(new OpenBracketTokenHandler());
    listHandlers.add(new PropertyOperandTokenHandler());

    TOKEN_HANDLERS.put(Token.TYPE.BRACKET_OPEN, listHandlers);
    TOKEN_HANDLERS.put(Token.TYPE.LOGICAL_OPERATOR, listHandlers);
    TOKEN_HANDLERS.put(Token.TYPE.LOGICAL_UNARY_OPERATOR, listHandlers);

    listHandlers= new ArrayList<>();
    listHandlers.add(new RelationalOperatorTokenHandler());
    listHandlers.add(new RelationalOperatorFuncTokenHandler());
    TOKEN_HANDLERS.put(Token.TYPE.PROPERTY_OPERAND, listHandlers);

    listHandlers = new ArrayList<>();
    listHandlers.add(new ValueOperandTokenHandler());
    TOKEN_HANDLERS.put(Token.TYPE.RELATIONAL_OPERATOR, listHandlers);

    listHandlers = new ArrayList<>();
    listHandlers.add(new CloseBracketTokenHandler());
    listHandlers.add(new ComplexValueOperandTokenHandler());
    TOKEN_HANDLERS.put(Token.TYPE.RELATIONAL_OPERATOR_FUNC, listHandlers);

    listHandlers = new ArrayList<>();
    listHandlers.add(new CloseBracketTokenHandler());
    listHandlers.add(new LogicalOperatorTokenHandler());
    TOKEN_HANDLERS.put(Token.TYPE.BRACKET_CLOSE, listHandlers);

    listHandlers = new ArrayList<>(listHandlers);
    // complex value operands can span multiple tokens
    listHandlers.add(0, new ComplexValueOperandTokenHandler());
    TOKEN_HANDLERS.put(Token.TYPE.VALUE_OPERAND, listHandlers);
  }


  /**
   * Scan the provided query and generate a token stream to be used by the query parser.
   *
   * @param exp  the query expression to scan
   *
   * @return an array of tokens
   * @throws InvalidQueryException if the query is invalid
   */
  public Token[] tokens(String exp) throws InvalidQueryException {
    return tokens(exp, Collections.<String>emptySet());
  }

  /**
   * Scan the provided query and generate a token stream to be used by the query parser.
   *
   * @param exp               the query expression to scan
   * @param ignoreProperties  property names which should be ignored
   *
   * @return an array of tokens
   * @throws InvalidQueryException if the query is invalid
   */
  public Token[] tokens(String exp, Collection<String> ignoreProperties) throws InvalidQueryException {
    ScanContext ctx = new ScanContext();
    ctx.addPropertiesToIgnore(SET_IGNORE);
    ctx.addPropertiesToIgnore(ignoreProperties);

    for (String tok : parseStringTokens(exp)) {
      List<TokenHandler> listHandlers = TOKEN_HANDLERS.get(ctx.getLastTokenType());
      boolean            processed    = false;
      int                idx          = 0;

      while (!processed && idx < listHandlers.size()) {
        processed = listHandlers.get(idx++).handleToken(tok, ctx);
      }

      if (! processed) {
        throw new InvalidQueryException("Invalid Query Token: token='" +
            tok + "\', previous token type=" + ctx.getLastTokenType());
      }
    }

    ctx.validateEndState();

    return ctx.getTokenList().toArray(new Token[ctx.getTokenList().size()]);
  }

  /**
   * Uses a regular expression to scan a query expression and produce a list of string tokens.
   * These tokens are the exact strings that exist in the original syntax.
   *
   * @param exp  the query expression
   *
   * @return list of string tokens from the query expression
   */
  private List<String> parseStringTokens(String exp) {
    Pattern      pattern       = generatePattern();
    Matcher      matcher       = pattern.matcher(exp);
    List<String> listStrTokens = new ArrayList<>();
    int pos = 0;

    while (matcher.find()) { // while there's a delimiter in the string
      if (pos != matcher.start()) {
        // add anything between the current and previous delimiter to the tokens list
        listStrTokens.add(exp.substring(pos, matcher.start()));
      }
      listStrTokens.add(matcher.group()); // add the delimiter
      pos = matcher.end(); // Remember end of delimiter
    }
    if (pos != exp.length()) {
      // Add any chars remaining in the string after last delimiter
      listStrTokens.add(exp.substring(pos));
    }
    return listStrTokens;
  }

  /**
   * Generate the regex pattern to tokenize the query expression.
   *
   * @return the regex pattern
   */
  private Pattern generatePattern() {
    StringBuilder sb = new StringBuilder();
    sb.append('(');
    for (String delim : ALL_DELIMS) { // For each delimiter
      if (sb.length() != 1) sb.append('|');
      sb.append('\\');
      sb.append(delim);
    }
    sb.append(')');

    return Pattern.compile(sb.toString());
  }

  /**
   * Add property names that the lexer should ignore.
   */
  static {
    // ignore values
    SET_IGNORE.add(QUERY_FIELDS);
    SET_IGNORE.add(QUERY_FORMAT);
    SET_IGNORE.add(QUERY_PAGE_SIZE);
    SET_IGNORE.add(QUERY_TO);
    SET_IGNORE.add(QUERY_FROM);
    SET_IGNORE.add(QUERY_MINIMAL);
    SET_IGNORE.add(QUERY_SORT);
    SET_IGNORE.add("_");
  }

  /**
   * Scan context.  Provides contextual information related to the current scan.
   */
  private class ScanContext {
    /**
     * The last token type scanned.
     */
    private Token.TYPE m_lastType;

    /**
     * The last property operand value
     */
    private String m_propertyName;

    /**
     * List of tokens generated by the scan
     */
    private List<Token> m_listTokens = new ArrayList<>();

    /**
     * If non-null, ignore all tokens up to and including this token type.
     */
    private Token.TYPE m_ignoreSegmentEndToken = null;

    /**
     * Property names which are to be ignored.
     */
    private Set<String> m_propertiesToIgnore = new HashSet<>();

    /**
     * Bracket score.  This score is the difference between the number of
     * opening brackets and the number of closing brackets processed by
     * a handler.  Only handlers which process values containing brackets
     * will be interested in this information.
     */
    private int bracketScore = 0;

    /**
     * Intermediate tokens are tokens which are used by a handler which may
     * process several adjacent tokens.  A handler might push intermediate
     * tokens and then in subsequent invocations combine/alter/remove/etc
     * these tokens prior to adding them to the context tokens.
     */
    private Deque<Token> m_intermediateTokens = new ArrayDeque<>();


    /**
     * Constructor.
     */
    private ScanContext() {
      //init last type to the logical op type
      m_lastType = Token.TYPE.LOGICAL_OPERATOR;
    }

    /**
     * Ignore all subsequent tokens up to and including the provided token.
     *
     * @param type  the last token type of the ignore segment
     */
    public void setIgnoreSegmentEndToken(Token.TYPE type) {
      m_ignoreSegmentEndToken = type;
    }

    /**
     * Get the type of the last token.
     *
     * @return the type of the last token
     */
    public Token.TYPE getLastTokenType() {
      return m_lastType;
    }

    /**
     * Set the type of the last token.
     *
     * @param lastType  the type of the last token
     */
    public void setLastTokenType(Token.TYPE lastType) {
      m_lastType = lastType;
    }

    /**
     * Get the current property operand value.
     * This is used to hold the property operand name until it is added since,
     * the following relational operator token is added first.
     *
     * @return the current property operand value
     */
    public String getPropertyOperand() {
      return m_propertyName;
    }

    /**
     * Set the current property operand value.
     * This is used to hold the property operand name until it is added since,
     * the following relational operator token is added first.
     */
    public void setPropertyOperand(String prop) {
      m_propertyName = prop;
    }

    /**
     * Add a token.
     *
     * @param token  the token to add
     */
    public void addToken(Token token) {
      if (m_ignoreSegmentEndToken == null) {
        m_listTokens.add(token);
      } else if (token.getType() == m_ignoreSegmentEndToken) {
        m_ignoreSegmentEndToken = null;
      }
    }

    /**
     * Get the list of generated tokens.
     *
     * @return the list of generated tokens
     */
    public List<Token> getTokenList() {
      return m_listTokens;
    }

    /**
     * Get the set of property names that are to be ignored.
     *
     * @return set of property names to ignore
     */
    public Set<String> getPropertiesToIgnore() {
      return m_propertiesToIgnore;
    }

    /**
     * Add property names to the set of property names to ignore.
     *
     * @param ignoredProperties set of property names to ignore
     */
    public void addPropertiesToIgnore(Collection<String> ignoredProperties) {
      if (ignoredProperties != null) {
        m_propertiesToIgnore.addAll(ignoredProperties);
      }
    }

    /**
     * Add an intermediate token.
     *
     * @param token  the token to add
     */
    public void pushIntermediateToken(Token token) {
      if (m_ignoreSegmentEndToken == null) {
        m_intermediateTokens.add(token);
      } else if (token.getType() == m_ignoreSegmentEndToken) {
        m_ignoreSegmentEndToken = null;
      }
    }

    /**
     * Return the intermediate tokens if any.
     *
     * @return the intermediate tokens.  Will never return null.
     */
    public Deque<Token> getIntermediateTokens() {
      return m_intermediateTokens;
    }

    /**
     * Move all intermediate tokens to the context tokens.
     */
    public void addIntermediateTokens() {
      m_listTokens.addAll(m_intermediateTokens);
      m_intermediateTokens.clear();
    }

    /**
     * Obtain the bracket score.  This count is the number of outstanding opening brackets.
     * A value of 0 indicates all opening and closing brackets are matched
     * @return the current bracket score
     */
    public int getBracketScore() {
      return bracketScore;
    }

    /**
     * Increment the bracket score by n.  This indicates that n unmatched opening brackets
     * have been encountered.
     *
     * @param n amount to increment
     * @return the new bracket score after incrementing
     */
    public int incrementBracketScore(int n) {
      return bracketScore += n;
    }

    /**
     * Decrement the bracket score.  This is done when matching a closing bracket with a previously encountered
     * opening bracket.  If the requested decrement would result in a negative number an exception is thrown
     * as this isn't a valid state.
     *
     * @param decValue  amount to decrement
     * @return the new bracket score after decrementing
     * @throws InvalidQueryException if the decrement operation will result in a negative value
     */
    public int decrementBracketScore(int decValue) throws InvalidQueryException {
      bracketScore -= decValue;
      if (bracketScore < 0) {
        throw new InvalidQueryException("Unexpected closing bracket.  Last token type: " + getLastTokenType() +
            ", Current property operand: " + getPropertyOperand() + ", tokens: " + getTokenList());
      }
      return bracketScore;
    }

    //todo: most handlers should implement this
    /**
     * Validate the end state of the scan context.
     * Iterates over each handler associated with the final token type and asks it to validate the context.
     * @throws InvalidQueryException  if the context is determined to in an invalid end state
     */
    public void validateEndState() throws InvalidQueryException {
      for (TokenHandler handler : TOKEN_HANDLERS.get(getLastTokenType())) {
        handler.validateEndState(this);
      }
    }
  }

  /**
   * Token handler base class.
   * Token handlers are responsible for processing specific token type.
   */
  private abstract class TokenHandler {
    /**
     * Provides base token handler functionality then delegates to the individual concrete handlers.
     *
     * @param token   the token to process
     * @param ctx     the scan context
     *
     * @return true if this handler processed the token; false otherwise
     * @throws InvalidQueryException  if an invalid token is encountered
     */
    public boolean handleToken(String token, ScanContext ctx) throws InvalidQueryException {
      if (handles(token, ctx)) {
        _handleToken(token, ctx);
        ctx.setLastTokenType(getType());
        return true;
      } else {
        return false;
      }
    }

    public void validateEndState(ScanContext ctx) throws InvalidQueryException {
      if (! ctx.getIntermediateTokens().isEmpty()) {
        throw new InvalidQueryException("Unexpected end of expression.");
      }
    }

    /**
     * Process a token.
     *
     * @param token  the token to process
     * @param ctx    the current scan context
     * @throws InvalidQueryException if an invalid token is encountered
     */
    public abstract void _handleToken(String token, ScanContext ctx) throws InvalidQueryException;

    /**
     * Get the token handler type.
     *
     * @return the token handler type
     */
    public abstract Token.TYPE getType();

    /**
     * Determine if a handler handles a specific token type.
     *
     *
     * @param token  the token type
     * @param ctx    scan context
     * @return true if the handler handles the specified type; false otherwise
     */
    public abstract boolean handles(String token, ScanContext ctx);
  }

  /**
   * Property Operand token handler.
   */
  private class PropertyOperandTokenHandler extends TokenHandler {

    @Override
    public void _handleToken(String token, ScanContext ctx) throws InvalidQueryException {
      //don't add prop name token until after operator token
      if (! ctx.getPropertiesToIgnore().contains(token)) {
        ctx.setPropertyOperand(token);
      } else {
        if (!ctx.getTokenList().isEmpty() ) {
          // ignore through next value operand
          ctx.setIgnoreSegmentEndToken(Token.TYPE.VALUE_OPERAND);
          // remove preceding '&' token
          ctx.getTokenList().remove(ctx.getTokenList().size() -1);
        } else {
          // first expression.  Ignore and strip out next '&'
          ctx.setIgnoreSegmentEndToken(Token.TYPE.LOGICAL_OPERATOR);
        }
      }
    }

    @Override
    public Token.TYPE getType() {
      return Token.TYPE.PROPERTY_OPERAND;
    }

    @Override
    public boolean handles(String token, ScanContext ctx) {
      return token.matches("[^!&\\|<=|>=|!=|=|<|>\\(\\)]+");
    }
  }

  /**
   * Value Operand token handler.
   */
  private class ValueOperandTokenHandler extends TokenHandler {
    @Override
    public void _handleToken(String token, ScanContext ctx) throws InvalidQueryException {
      ctx.addToken(new Token(Token.TYPE.VALUE_OPERAND, token));
    }

    @Override
    public Token.TYPE getType() {
      return Token.TYPE.VALUE_OPERAND;
    }

    @Override
    public boolean handles(String token, ScanContext ctx) {
      return token.matches("[^!&\\|<=|>=|!=|=|<|>]+");
    }
  }

  /**
   * Open Bracket token handler.
   */
  private class OpenBracketTokenHandler extends TokenHandler {
    @Override
    public void _handleToken(String token, ScanContext ctx) throws InvalidQueryException {
      ctx.addToken(new Token(Token.TYPE.BRACKET_OPEN, token));
    }

    @Override
    public Token.TYPE getType() {
      return Token.TYPE.BRACKET_OPEN;
    }

    @Override
    public boolean handles(String token, ScanContext ctx) {
      return token.matches("\\(");
    }
  }

  /**
   * Close Bracket token handler.
   */
  private class CloseBracketTokenHandler extends TokenHandler {
    @Override
    public void _handleToken(String token, ScanContext ctx) throws InvalidQueryException {
      ctx.addToken(new Token(Token.TYPE.BRACKET_CLOSE, token));
    }

    @Override
    public Token.TYPE getType() {
      return Token.TYPE.BRACKET_CLOSE;
    }

    @Override
    public boolean handles(String token, ScanContext ctx) {
      return token.matches("\\)");
    }
  }

  /**
   * Relational Operator token handler.
   */
  private class RelationalOperatorTokenHandler extends TokenHandler {
    @Override
    public void _handleToken(String token, ScanContext ctx) throws InvalidQueryException {
      ctx.addToken(new Token(Token.TYPE.RELATIONAL_OPERATOR, token));
      ctx.addToken(new Token(Token.TYPE.PROPERTY_OPERAND, ctx.getPropertyOperand()));
    }

    @Override
    public Token.TYPE getType() {
      return Token.TYPE.RELATIONAL_OPERATOR;
    }

    @Override
    public boolean handles(String token, ScanContext ctx) {
      return token.matches("<=|>=|!=|=|<|>");
    }
  }

  /**
   * Relational Operator function token handler.
   */
  private class RelationalOperatorFuncTokenHandler extends TokenHandler {
    @Override
    public void _handleToken(String token, ScanContext ctx) throws InvalidQueryException {
      ctx.addToken(new Token(Token.TYPE.RELATIONAL_OPERATOR_FUNC, token));
      ctx.addToken(new Token(Token.TYPE.PROPERTY_OPERAND, ctx.getPropertyOperand()));
    }

    @Override
    public Token.TYPE getType() {
      return Token.TYPE.RELATIONAL_OPERATOR_FUNC;
    }

    //todo: add a unary relational operator func
    @Override
    public boolean handles(String token, ScanContext ctx) {
      return token.matches("\\.[a-zA-Z]+\\(");
    }
  }

  /**
   * Complex Value Operand token handler.
   * Supports values that span multiple tokens.
   */
  private class ComplexValueOperandTokenHandler extends TokenHandler {
    @Override
    public void _handleToken(String token, ScanContext ctx) throws InvalidQueryException {
      if (token.equals(")")) {
        ctx.decrementBracketScore(1);
      } else if (token.endsWith("(")) {
        // .endsWith() is used because of tokens ".matches(",".in(" and".isEmpty("
        ctx.incrementBracketScore(1);
      }

      String tokenValue = token;
      if (ctx.getBracketScore() > 0) {
        Deque<Token> intermediateTokens = ctx.getIntermediateTokens();
        if (intermediateTokens !=null && !intermediateTokens.isEmpty()) {
          Token lastToken = intermediateTokens.peek();
          if (lastToken.getType() == Token.TYPE.VALUE_OPERAND) {
            intermediateTokens.pop();
            tokenValue = lastToken.getValue() + token;
          }
        }
        ctx.pushIntermediateToken(new Token(Token.TYPE.VALUE_OPERAND, tokenValue));
      }

      if (ctx.getBracketScore() == 0) {
        ctx.addIntermediateTokens();
        ctx.addToken(new Token(Token.TYPE.BRACKET_CLOSE, ")"));
      }
    }

    @Override
    public Token.TYPE getType() {
      return Token.TYPE.VALUE_OPERAND;
    }

    @Override
    public boolean handles(String token, ScanContext ctx) {
      Token.TYPE lastTokenType = ctx.getLastTokenType();
      if (lastTokenType == Token.TYPE.RELATIONAL_OPERATOR_FUNC) {
        ctx.incrementBracketScore(1);
        return true;
      } else {
        return ctx.getBracketScore() > 0;
      }
    }

    @Override
    public void validateEndState(ScanContext ctx) throws InvalidQueryException {
      if (ctx.getBracketScore() > 0) {
        throw new InvalidQueryException("Missing closing bracket for function: " + ctx.getTokenList());
      }
    }
  }

  /**
   * Logical Operator token handler.
   */
  private class LogicalOperatorTokenHandler extends TokenHandler {
    @Override
    public void _handleToken(String token, ScanContext ctx) throws InvalidQueryException {
      ctx.addToken(new Token(Token.TYPE.LOGICAL_OPERATOR, token));
    }

    @Override
    public Token.TYPE getType() {
      return Token.TYPE.LOGICAL_OPERATOR;
    }

    @Override
    public boolean handles(String token, ScanContext ctx) {
      return token.matches("[!&\\|]");
    }
  }

  /**
   * Logical Unary Operator token handler.
   */
  private class LogicalUnaryOperatorTokenHandler extends TokenHandler {
    @Override
    public void _handleToken(String token, ScanContext ctx) throws InvalidQueryException {
      ctx.addToken(new Token(Token.TYPE.LOGICAL_UNARY_OPERATOR, token));
    }

    @Override
    public Token.TYPE getType() {
      return Token.TYPE.LOGICAL_UNARY_OPERATOR;
    }

    @Override
    public boolean handles(String token, ScanContext ctx) {
      return "!".equals(token);
    }
  }
}