/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.pig.impl.logicalLayer; import java.util.ArrayList; import java.util.List; import org.apache.pig.Expression; import org.apache.pig.PigException; import org.apache.pig.Expression.BinaryExpression; import org.apache.pig.Expression.OpType; import org.apache.pig.impl.plan.DepthFirstWalker; import org.apache.pig.impl.plan.VisitorException; /** * This Visitor works on the filter condition of a LOFilter which immediately * follows a LOLoad that interacts with a metadata system (currently OWL) to * read table data. The visitor looks for conditions on partition columns in the * filter condition and extracts those conditions out of the filter condition. * The condition on partition cols will be used to prune partitions of the table. * */ public class PColFilterExtractor extends LOVisitor { /** * partition columns associated with the table * present in the load on which the filter whose * inner plan is being visited is applied */ private List<String> partitionCols; /** * will contain the partition column filter conditions * accumulated during the visit - the final condition will an expression * built from these sub expressions connected with AND */ private ArrayList<Expression> pColConditions = new ArrayList<Expression>(); /** * flag used during visit to indicate if a partition key * was seen */ private boolean sawKey; private boolean sawNonKeyCol; private enum Side { LEFT, RIGHT, NONE }; private Side replaceSide = Side.NONE; private boolean filterRemovable = false; @Override public void visit() throws VisitorException { // we will visit the leaf and it will recursively walk the plan try { ExpressionOperator leaf = (ExpressionOperator)mPlan.getLeaves().get(0); // if the leaf is a unary operator it should be a FilterFunc in // which case we don't try to extract partition filter conditions if(leaf instanceof BinaryExpressionOperator) { visit((BinaryExpressionOperator)leaf); replaceChild(leaf); // if the entire expression is to be removed, then the above // replaceChild will not set sawKey to false (sawKey is set to // false only in replaceChild() if(sawKey == true) { //there are only conditions on partition columns in the filter //extract it pColConditions.add(getExpression(leaf)); filterRemovable = true; } } } catch (FrontendException e) { throw new VisitorException(e); } } /** * * @param plan logical plan corresponding the filter's comparison condition * @param partitionCols list of partition columns of the table which is * being loaded in the LOAD statement which is input to the filter */ public PColFilterExtractor(LogicalPlan plan, List<String> partitionCols) { // though we configure a DepthFirstWalker to be the walker, we will not // use it - we will visit the leaf and it will recursively walk the // plan super(plan, new DepthFirstWalker<LogicalOperator, LogicalPlan>(plan)); this.partitionCols = new ArrayList<String>(partitionCols); } @Override protected void visit(LOProject project) throws VisitorException { try { String fieldName = project.getFieldSchema().alias; if(partitionCols.contains(fieldName)) { sawKey = true; // The condition on partition column will be used to prune the // scan and removed from the filter condition. Hence the condition // on the partition column will not be re applied when data is read, // so the following cases should throw error until that changes. List<Class<?>> opsToCheckFor = new ArrayList<Class<?>>(); opsToCheckFor.add(LORegexp.class); int errCode = 1110; if(checkSuccessors(project, opsToCheckFor)) { throw new FrontendException("Unsupported query: " + "You have an partition column (" + fieldName + ") inside a regexp operator in the " + "filter condition.", errCode, PigException.INPUT); } opsToCheckFor.set(0, LOUserFunc.class); if(checkSuccessors(project, opsToCheckFor)) { throw new FrontendException("Unsupported query: " + "You have an partition column (" + fieldName + ") inside a function in the " + "filter condition.", errCode, PigException.INPUT); } opsToCheckFor.set(0, LOCast.class); if(checkSuccessors(project, opsToCheckFor)) { throw new FrontendException("Unsupported query: " + "You have an partition column (" + fieldName + ") inside a cast in the " + "filter condition.", errCode, PigException.INPUT); } opsToCheckFor.set(0, LOIsNull.class); if(checkSuccessors(project, opsToCheckFor)) { throw new FrontendException("Unsupported query: " + "You have an partition column (" + fieldName + ") inside a null check operator in the " + "filter condition.", errCode, PigException.INPUT); } opsToCheckFor.set(0, LOBinCond.class); if(checkSuccessors(project, opsToCheckFor)) { throw new FrontendException("Unsupported query: " + "You have an partition column (" + fieldName + ") inside a bincond operator in the " + "filter condition.", errCode, PigException.INPUT); } opsToCheckFor.set(0, LOAnd.class); opsToCheckFor.add(LOOr.class); if(checkSuccessors(project, opsToCheckFor)) { errCode = 1112; throw new FrontendException("Unsupported query: " + "You have an partition column (" + fieldName + " ) in a construction like: " + "(pcond and ...) or (pcond and ...) " + "where pcond is a condition on a partition column.", errCode, PigException.INPUT); } } else { sawNonKeyCol = true; } } catch (FrontendException e) { throw new VisitorException(e); } } @Override protected void visit(BinaryExpressionOperator binOp) throws VisitorException { try { boolean lhsSawKey = false; boolean rhsSawKey = false; boolean lhsSawNonKeyCol = false; boolean rhsSawNonKeyCol = false; sawKey = false; sawNonKeyCol = false; binOp.getLhsOperand().visit(this); replaceChild(binOp.getLhsOperand()); lhsSawKey = sawKey; lhsSawNonKeyCol = sawNonKeyCol; sawKey = false; sawNonKeyCol = false; binOp.getRhsOperand().visit(this); replaceChild(binOp.getRhsOperand()); rhsSawKey = sawKey; rhsSawNonKeyCol = sawNonKeyCol; // only in the case of an AND, we potentially split the AND to // remove conditions on partition columns out of the AND. For this // we set replaceSide accordingly so that when we reach a predecessor // we can trim the appropriate side. If both sides of the AND have // conditions on partition columns, we will remove the AND completely - // in this case, we will not set replaceSide, but sawKey will be // true so that as we go to higher predecessor ANDs we can trim later. if(binOp instanceof LOAnd) { if(lhsSawKey && rhsSawNonKeyCol){ replaceSide = Side.LEFT; }else if(rhsSawKey && lhsSawNonKeyCol){ replaceSide = Side.RIGHT; } } else if(lhsSawKey && rhsSawNonKeyCol || rhsSawKey && lhsSawNonKeyCol){ int errCode = 1111; String errMsg = "Use of partition column/condition with" + " non partition column/condition in filter expression is not " + "supported." ; throw new FrontendException(errMsg, errCode, PigException.INPUT); } sawKey = lhsSawKey || rhsSawKey; sawNonKeyCol = lhsSawNonKeyCol || rhsSawNonKeyCol; } catch (FrontendException e) { throw new VisitorException(e); } } /** * @return the condition on partition columns extracted from filter */ public Expression getPColCondition(){ if(pColConditions.size() == 0) return null; Expression cond = pColConditions.get(0); for(int i=1; i<pColConditions.size(); i++){ //if there is more than one condition expression // connect them using "AND"s cond = new BinaryExpression(cond, pColConditions.get(i), OpType.OP_AND); } return cond; } /** * @return the filterRemovable */ public boolean isFilterRemovable() { return filterRemovable; } //////// helper methods ///////////////////////// /** * check for the presence of a certain operator type in the Successors * @param opToStartFrom * @param opsToCheckFor operators to be checked for at each level of * Successors - the ordering in the list is the order in which the ops * will be checked. * @return true if opsToCheckFor are found * @throws FrontendException */ private boolean checkSuccessors(LogicalOperator opToStartFrom, List<Class<?>> opsToCheckFor) throws FrontendException { boolean done = checkSuccessorsHelper(opToStartFrom, opsToCheckFor); if(!done && !opsToCheckFor.isEmpty()) { // continue checking if there is more to check while(!done) { opToStartFrom = mPlan.getSuccessors(opToStartFrom).get(0); done = checkSuccessorsHelper(opToStartFrom, opsToCheckFor); } } return opsToCheckFor.isEmpty(); } private boolean checkSuccessorsHelper(LogicalOperator opToStartFrom, List<Class<?>> opsToCheckFor) throws FrontendException { List<LogicalOperator> successors = mPlan.getSuccessors( opToStartFrom); if(successors == null || successors.size() == 0) { return true; // further checking cannot be done } if(successors.size() == 1) { LogicalOperator suc = successors.get(0); if(suc.getClass().getCanonicalName().equals( opsToCheckFor.get(0).getCanonicalName())) { // trim the list of operators to check opsToCheckFor.remove(0); if(opsToCheckFor.isEmpty()) { return true; //no further checks required } } } else { throwException(); } return false; // more checking can be done } private void replaceChild(ExpressionOperator childExpr) throws FrontendException { if(replaceSide == Side.NONE) { // the child is trimmed when the appropriate // flag is set to indicate that it needs to be trimmed. return; } // eg if replaceSide == Side.LEFT // binexpop // / \ \ // child (this is the childExpr argument send in) // / \ // Lt Rt // // gets converted to // binexpop // / // Rt if(! (childExpr instanceof BinaryExpressionOperator)){ throwException(); } // child's lhs operand ExpressionOperator childLhs = ((BinaryExpressionOperator)childExpr).getLhsOperand(); // child's rhs operand ExpressionOperator childRhs = ((BinaryExpressionOperator)childExpr).getRhsOperand(); mPlan.disconnect(childLhs, childExpr); mPlan.disconnect(childRhs, childExpr); if(replaceSide == Side.LEFT) { // remove left child and replace childExpr with its right child remove(childLhs); mPlan.replace(childExpr, childRhs); } else if(replaceSide == Side.RIGHT){ // remove right child and replace childExpr with its left child remove(childRhs); mPlan.replace(childExpr, childLhs); }else { throwException(); } //reset replaceSide = Side.NONE; sawKey = false; } /** * @param op * @throws FrontendException */ private void remove(ExpressionOperator op) throws FrontendException { pColConditions.add(getExpression(op)); mPlan.trimAbove(op); mPlan.remove(op); } public static Expression getExpression(ExpressionOperator op) throws FrontendException { if(op instanceof LOConst) { return new Expression.Const(((LOConst)op).getValue()); } else if (op instanceof LOProject) { String fieldName = ((LOProject)op).getFieldSchema().alias; return new Expression.Column(fieldName); } else { if(!(op instanceof BinaryExpressionOperator)) { throwException(); } BinaryExpressionOperator binOp = (BinaryExpressionOperator)op; if(binOp instanceof LOAdd) { return getExpression(binOp, OpType.OP_PLUS); } else if(binOp instanceof LOSubtract) { return getExpression(binOp, OpType.OP_MINUS); } else if(binOp instanceof LOMultiply) { return getExpression(binOp, OpType.OP_TIMES); } else if(binOp instanceof LODivide) { return getExpression(binOp, OpType.OP_DIV); } else if(binOp instanceof LOMod) { return getExpression(binOp, OpType.OP_MOD); } else if(binOp instanceof LOAnd) { return getExpression(binOp, OpType.OP_AND); } else if(binOp instanceof LOOr) { return getExpression(binOp, OpType.OP_OR); } else if(binOp instanceof LOEqual) { return getExpression(binOp, OpType.OP_EQ); } else if(binOp instanceof LONotEqual) { return getExpression(binOp, OpType.OP_NE); } else if(binOp instanceof LOGreaterThan) { return getExpression(binOp, OpType.OP_GT); } else if(binOp instanceof LOGreaterThanEqual) { return getExpression(binOp, OpType.OP_GE); } else if(binOp instanceof LOLesserThan) { return getExpression(binOp, OpType.OP_LT); } else if(binOp instanceof LOLesserThanEqual) { return getExpression(binOp, OpType.OP_LE); } else { throwException(); } } return null; } private static Expression getExpression(BinaryExpressionOperator binOp, OpType opType) throws FrontendException { return new BinaryExpression(getExpression(binOp.getLhsOperand()) ,getExpression(binOp.getRhsOperand()), opType); } public static void throwException() throws FrontendException { int errCode = 2209; throw new FrontendException( "Internal error while processing any partition filter " + "conditions in the filter after the load" , errCode, PigException.BUG ); } // unfortunately LOVisitor today has each visit() method separately defined // so just implementing visit(BinaryExpressionOperator) will not result in // that method being call when LOAdd (say) is encountered (sigh! - we should // fix that at some point) - for now, let's define visit() on each specific // BinaryExpressionOperator that we want to visit to inturn call the // visit(BinaryExpressionOperator) method @Override public void visit(LOAdd op) throws VisitorException { visit((BinaryExpressionOperator)op); } @Override public void visit(LOAnd op) throws VisitorException { visit((BinaryExpressionOperator)op); } @Override public void visit(LODivide op) throws VisitorException { visit((BinaryExpressionOperator)op); } @Override public void visit(LOEqual op) throws VisitorException { visit((BinaryExpressionOperator)op); } @Override public void visit(LOGreaterThan op) throws VisitorException { visit((BinaryExpressionOperator)op); } @Override public void visit(LOGreaterThanEqual op) throws VisitorException { visit((BinaryExpressionOperator)op); } @Override public void visit(LOLesserThan op) throws VisitorException { visit((BinaryExpressionOperator)op); } @Override public void visit(LOLesserThanEqual op) throws VisitorException { visit((BinaryExpressionOperator)op); } @Override public void visit(LOMod op) throws VisitorException { visit((BinaryExpressionOperator)op); } @Override public void visit(LOMultiply op) throws VisitorException { visit((BinaryExpressionOperator)op); } @Override public void visit(LONotEqual op) throws VisitorException { visit((BinaryExpressionOperator)op); } @Override public void visit(LOOr op) throws VisitorException { visit((BinaryExpressionOperator)op); } @Override public void visit(LOSubtract op) throws VisitorException { visit((BinaryExpressionOperator)op); } // this might get called from some visit() - in that case, delegate to // the other visit()s which we have defined here @Override protected void visit(ExpressionOperator op) throws VisitorException { if(op instanceof LOProject) { visit((LOProject)op); } else if (op instanceof BinaryExpressionOperator) { visit((BinaryExpressionOperator)op); } else if (op instanceof LOCast) { visit((LOCast)op); } else if (op instanceof LOBinCond) { visit((LOBinCond)op); } else if (op instanceof LOUserFunc) { visit((LOUserFunc)op); } else if (op instanceof LOIsNull) { visit((LOIsNull)op); } } // some specific operators which are of interest to catch some // unsupported scenarios @Override protected void visit(LOCast cast) throws VisitorException { visit(cast.getExpression()); } @Override public void visit(LONot not) throws VisitorException { visit(not.getOperand()); } @Override protected void visit(LORegexp regexp) throws VisitorException { visit((BinaryExpressionOperator)regexp); } @Override protected void visit(LOBinCond binCond) throws VisitorException { visit(binCond.getCond()); visit(binCond.getLhsOp()); visit(binCond.getRhsOp()); } @Override protected void visit(LOUserFunc udf) throws VisitorException { for (ExpressionOperator op : udf.getArguments()) { visit(op); } } @Override public void visit(LOIsNull isNull) throws VisitorException { visit(isNull.getOperand()); } }