RewriteCanApplyProcFactory.java example

Explorer
hive_blinkdb-master
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hive.ql.optimizer.index;

import java.io.Serializable;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Stack;

import org.apache.hadoop.hive.ql.exec.ColumnInfo;
import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
import org.apache.hadoop.hive.ql.exec.FilterOperator;
import org.apache.hadoop.hive.ql.exec.GroupByOperator;
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.RowSchema;
import org.apache.hadoop.hive.ql.exec.SelectOperator;
import org.apache.hadoop.hive.ql.lib.Node;
import org.apache.hadoop.hive.ql.lib.NodeProcessor;
import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.plan.AggregationDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
import org.apache.hadoop.hive.ql.plan.FilterDesc;
import org.apache.hadoop.hive.ql.plan.GroupByDesc;

/**
 * Factory of methods used by {@link RewriteGBUsingIndex}
 * to determine if the rewrite optimization can be applied to the input query.
 *
 */
public final class RewriteCanApplyProcFactory {
  private static RewriteCanApplyCtx canApplyCtx = null;

  private RewriteCanApplyProcFactory(){
    //this prevents the class from getting instantiated
  }

  /**
   * Check for conditions in FilterOperator that do not meet rewrite criteria.
   */
  private static class CheckFilterProc implements NodeProcessor {
    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx,
        Object... nodeOutputs) throws SemanticException {
      FilterOperator operator = (FilterOperator)nd;
      canApplyCtx = (RewriteCanApplyCtx)ctx;
      FilterDesc conf = (FilterDesc)operator.getConf();
      //The filter operator should have a predicate of ExprNodeGenericFuncDesc type.
      //This represents the comparison operator
      ExprNodeGenericFuncDesc oldengfd = (ExprNodeGenericFuncDesc) conf.getPredicate();
      if(oldengfd == null){
        canApplyCtx.setWhrClauseColsFetchException(true);
      }
      //The predicate should have valid left and right columns
      List<String> colList = oldengfd.getCols();
      if(colList == null || colList.size() == 0){
        canApplyCtx.setWhrClauseColsFetchException(true);
      }
      //Add the predicate columns to RewriteCanApplyCtx's predColRefs list to check later
      //if index keys contain all filter predicate columns and vice-a-versa
      for (String col : colList) {
        canApplyCtx.getPredicateColumnsList().add(col);
      }
      return null;
    }
  }

 public static CheckFilterProc canApplyOnFilterOperator() {
    return new CheckFilterProc();
  }

   /**
   * Check for conditions in GroupByOperator that do not meet rewrite criteria.
   *
   */
  private static class CheckGroupByProc implements NodeProcessor {

     public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx,
         Object... nodeOutputs) throws SemanticException {
       GroupByOperator operator = (GroupByOperator)nd;
       canApplyCtx = (RewriteCanApplyCtx)ctx;
       //for each group-by clause in query, only one GroupByOperator of the
       //GBY-RS-GBY sequence is stored in  getGroupOpToInputTables
       //we need to process only this operator
       //Also, we do not rewrite for cases when same query branch has multiple group-by constructs
       if(canApplyCtx.getParseContext().getGroupOpToInputTables().containsKey(operator) &&
           !canApplyCtx.isQueryHasGroupBy()){

         canApplyCtx.setQueryHasGroupBy(true);
         GroupByDesc conf = (GroupByDesc) operator.getConf();
         List<AggregationDesc> aggrList = conf.getAggregators();
         if(aggrList != null && aggrList.size() > 0){
             for (AggregationDesc aggregationDesc : aggrList) {
               canApplyCtx.setAggFuncCnt(canApplyCtx.getAggFuncCnt() + 1);
               //In the current implementation, we do not support more than 1 agg funcs in group-by
               if(canApplyCtx.getAggFuncCnt() > 1) {
                 return false;
               }
               String aggFunc = aggregationDesc.getGenericUDAFName();
               if(!("count".equals(aggFunc))){
                 canApplyCtx.setAggFuncIsNotCount(true);
               }else{
                List<ExprNodeDesc> para = aggregationDesc.getParameters();
                //for a valid aggregation, it needs to have non-null parameter list
                 if(para == null){
                   canApplyCtx.setAggFuncColsFetchException(true);
                 }else if(para.size() == 0){
                   //count(*) case
                   canApplyCtx.setCountOnAllCols(true);
                   canApplyCtx.setAggFunction("_count_of_all");
                 }else{
                   assert para.size()==1;
                   for(int i=0; i< para.size(); i++){
                     ExprNodeDesc expr = para.get(i);
                     if(expr instanceof ExprNodeColumnDesc){
                       //Add the columns to RewriteCanApplyCtx's selectColumnsList list
                       //to check later if index keys contain all select clause columns
                       //and vice-a-versa. We get the select column 'actual' names only here
                       //if we have a agg func along with group-by
                       //SelectOperator has internal names in its colList data structure
                       canApplyCtx.getSelectColumnsList().add(
                           ((ExprNodeColumnDesc) expr).getColumn());
                       //Add the columns to RewriteCanApplyCtx's aggFuncColList list to check later
                       //if columns contained in agg func are index key columns
                       canApplyCtx.getAggFuncColList().add(
                           ((ExprNodeColumnDesc) expr).getColumn());
                       canApplyCtx.setAggFunction("_count_of_" +
                           ((ExprNodeColumnDesc) expr).getColumn() + "");
                     }else if(expr instanceof ExprNodeConstantDesc){
                       //count(1) case
                       canApplyCtx.setCountOfOne(true);
                       canApplyCtx.setAggFunction("_count_of_1");
                     }
                   }
                 }
               }
             }
         }

         //we need to have non-null group-by keys for a valid group-by operator
         List<ExprNodeDesc> keyList = conf.getKeys();
         if(keyList == null || keyList.size() == 0){
           canApplyCtx.setGbyKeysFetchException(true);
         }
         for (ExprNodeDesc expr : keyList) {
           checkExpression(expr);
         }
       }
       return null;
     }

     private void checkExpression(ExprNodeDesc expr){
       if(expr instanceof ExprNodeColumnDesc){
         //Add the group-by keys to RewriteCanApplyCtx's gbKeyNameList list to check later
         //if all keys are from index columns
         canApplyCtx.getGbKeyNameList().addAll(expr.getCols());
       }else if(expr instanceof ExprNodeGenericFuncDesc){
         ExprNodeGenericFuncDesc funcExpr = (ExprNodeGenericFuncDesc)expr;
         List<ExprNodeDesc> childExprs = funcExpr.getChildExprs();
         for (ExprNodeDesc childExpr : childExprs) {
           if(childExpr instanceof ExprNodeColumnDesc){
             canApplyCtx.getGbKeyNameList().addAll(expr.getCols());
             canApplyCtx.getSelectColumnsList().add(((ExprNodeColumnDesc) childExpr).getColumn());
           }else if(childExpr instanceof ExprNodeGenericFuncDesc){
             checkExpression(childExpr);
           }
         }
       }
     }
   }


   public static CheckGroupByProc canApplyOnGroupByOperator() {
     return new CheckGroupByProc();
   }


 /**
   * Check for conditions in SelectOperator that do not meet rewrite criteria.
   */
  private static class CheckSelectProc implements NodeProcessor {
     public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx,
         Object... nodeOutputs) throws SemanticException {
       SelectOperator operator = (SelectOperator)nd;
       canApplyCtx = (RewriteCanApplyCtx)ctx;

       List<Operator<? extends Serializable>> childrenList = operator.getChildOperators();
       Operator<? extends Serializable> child = childrenList.get(0);
       if(child instanceof FileSinkOperator){
         Map<String, String> internalToAlias = new LinkedHashMap<String, String>();
         RowSchema rs = operator.getSchema();
         //to get the internal to alias mapping
         List<ColumnInfo> sign = rs.getSignature();
         for (ColumnInfo columnInfo : sign) {
           internalToAlias.put(columnInfo.getInternalName(), columnInfo.getAlias());
         }

         //if FilterOperator predicate has internal column names,
         //we need to retrieve the 'actual' column names to
         //check if index keys contain all filter predicate columns and vice-a-versa
         Iterator<String> predItr = canApplyCtx.getPredicateColumnsList().iterator();
         while(predItr.hasNext()){
           String predCol = predItr.next();
           String newPredCol = "";
           if(internalToAlias.get(predCol) != null){
             newPredCol = internalToAlias.get(predCol);
             canApplyCtx.getPredicateColumnsList().remove(predCol);
             canApplyCtx.getPredicateColumnsList().add(newPredCol);
           }
         }
       }
       return null;
     }
   }

   public static CheckSelectProc canApplyOnSelectOperator() {
     return new CheckSelectProc();
   }

}