/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.pig.newplan; import java.util.ArrayList; import java.util.List; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.pig.Expression; import org.apache.pig.Expression.OpType; import org.apache.pig.impl.logicalLayer.FrontendException; import org.apache.pig.newplan.logical.expression.AddExpression; import org.apache.pig.newplan.logical.expression.AndExpression; import org.apache.pig.newplan.logical.expression.BinaryExpression; import org.apache.pig.newplan.logical.expression.ConstantExpression; import org.apache.pig.newplan.logical.expression.DivideExpression; import org.apache.pig.newplan.logical.expression.EqualExpression; import org.apache.pig.newplan.logical.expression.GreaterThanEqualExpression; import org.apache.pig.newplan.logical.expression.GreaterThanExpression; import org.apache.pig.newplan.logical.expression.LessThanEqualExpression; import org.apache.pig.newplan.logical.expression.LessThanExpression; import org.apache.pig.newplan.logical.expression.LogicalExpression; import org.apache.pig.newplan.logical.expression.LogicalExpressionPlan; import org.apache.pig.newplan.logical.expression.ModExpression; import org.apache.pig.newplan.logical.expression.MultiplyExpression; import org.apache.pig.newplan.logical.expression.NotEqualExpression; import org.apache.pig.newplan.logical.expression.OrExpression; import org.apache.pig.newplan.logical.expression.ProjectExpression; import org.apache.pig.newplan.logical.expression.RegexExpression; import org.apache.pig.newplan.logical.expression.SubtractExpression; /** * This is a rewrite of {@code PColFilterExtractor} * * We traverse the expression plan bottom up and separate it into two plans * - pushdownExprPlan, plan that can be pushed down to the loader and * - filterExprPlan, remaining plan that needs to be evaluated by pig * */ public class FilterExtractor { private static final Log LOG = LogFactory.getLog(FilterExtractor.class); /** * partition columns associated with the table * present in the load on which the filter whose * inner plan is being visited is applied */ private List<String> partitionCols; /** * We visit this plan to create the filteredPlan */ protected LogicalExpressionPlan originalPlan; /** * Plan that is created after all pushable filters are removed */ protected LogicalExpressionPlan filteredPlan; /** * Plan that can be pushed down */ protected LogicalExpressionPlan pushdownExprPlan; /** * Final filterExpr after we are done */ private LogicalExpression filterExpr = null; /** * @{code Expression} to pushdown */ private Expression pushdownExpr = null; /** * * @param plan logical plan corresponding the filter's comparison condition * @param partitionCols list of partition columns of the table which is * being loaded in the LOAD statement which is input to the filter */ public FilterExtractor(LogicalExpressionPlan plan, List<String> partitionCols) { this.originalPlan = plan; this.partitionCols = new ArrayList<String>(partitionCols); this.filteredPlan = new LogicalExpressionPlan(); this.pushdownExprPlan = new LogicalExpressionPlan(); } public void visit() throws FrontendException { // we will visit the leaf and it will recursively walk the plan LogicalExpression leaf = (LogicalExpression)originalPlan.getSources().get( 0 ); // if the leaf is a unary operator it should be a FilterFunc in // which case we don't try to extract partition filter conditions if(leaf instanceof BinaryExpression) { // recursively traverse the tree bottom up // checkPushdown returns KeyState which is pair of LogicalExpression BinaryExpression binExpr = (BinaryExpression)leaf; KeyState finale = checkPushDown(binExpr); this.filterExpr = finale.filterExpr; this.pushdownExpr = getExpression(finale.pushdownExpr); } } /** * @return new filtered plan after pushdownable filters are removed */ public LogicalExpressionPlan getFilteredPlan() { return filteredPlan; } /** * @return true if pushdown is possible */ public boolean canPushDown() { return pushdownExpr != null; } /** * @return the filterRemovable */ public boolean isFilterRemovable() { return filterExpr == null; } /** * @return the condition on partition columns extracted from filter */ public Expression getPColCondition(){ return pushdownExpr; } private class KeyState { LogicalExpression pushdownExpr; LogicalExpression filterExpr; } private KeyState checkPushDown(LogicalExpression op) throws FrontendException { // Note: Currently, Expression interface only understands 3 Expression Types // (Look at getExpression below) BinaryExpression, ProjectExpression and ConstantExpression if(op instanceof ProjectExpression) { return checkPushDown((ProjectExpression)op); } else if (op instanceof BinaryExpression) { return checkPushDown((BinaryExpression)op); } else if (op instanceof ConstantExpression) { // Constants can be pushdown KeyState state = new KeyState(); state.pushdownExpr = op; state.filterExpr = null; return state; } else { KeyState state = new KeyState(); state.pushdownExpr = null; state.filterExpr = addToFilterPlan(op); return state; } } private LogicalExpression addToFilterPlan(LogicalExpression op) throws FrontendException { // This copies the whole tree underneath op LogicalExpression newOp = op.deepCopy(filteredPlan); return newOp; } private LogicalExpression andLogicalExpressions( LogicalExpressionPlan plan, LogicalExpression a, LogicalExpression b) { if (a == null) { return b; } if (b == null) { return a; } LogicalExpression andOp = new AndExpression(plan, a, b); return andOp; } private LogicalExpression orLogicalExpressions( LogicalExpressionPlan plan, LogicalExpression a, LogicalExpression b) { // Or 2 operators if they are not null if (a == null || b == null) { return null; } LogicalExpression orOp = new OrExpression(plan, a, b); return orOp; } private KeyState checkPushDown(BinaryExpression binExpr) throws FrontendException { KeyState state = new KeyState(); KeyState leftState = checkPushDown(binExpr.getLhs()); KeyState rightState = checkPushDown(binExpr.getRhs()); if (binExpr instanceof AndExpression) { // AND is commutative // Expression = // (leftState.pushdownExpr AND leftState.filterExpr) // AND (rightState.pushdownExpr AND leftState.filterExpr) // // pushDownExpr = (leftState.pushdownExpr AND rightState.pushdownExpr) // filterExpr = (leftState.filterExpr AND rightState.filterExpr) state.pushdownExpr = andLogicalExpressions(pushdownExprPlan, leftState.pushdownExpr, rightState.pushdownExpr); state.filterExpr = andLogicalExpressions(filteredPlan, leftState.filterExpr, rightState.filterExpr); } else if (binExpr instanceof OrExpression) { // Expression = // (leftState.pushdownExpr AND leftState.filterExpr) // OR (rightState.pushdownExpr AND leftState.filterExpr) // // This could be rewritten with distributive property as // (leftState.pushdownExpr OR rightState.pushdownExpr) // AND // ( (leftState.pushdownExpr OR rightState.filterExpr) // AND (leftState.filterExpr OR rightState.pushdownExpr) // AND (leftState.filterExpr OR rightState.filterExpr) // ) // In other words, // pushdownExpr = leftState.pushdownExpr OR rightState.pushdownExpr // filterExpr = (leftState.pushdownExpr OR rightState.filterExpr) // AND (leftState.filterExpr OR rightState.pushdownExpr) // AND (leftState.filterExpr OR rightState.filterExpr) state.pushdownExpr = orLogicalExpressions(pushdownExprPlan, leftState.pushdownExpr, rightState.pushdownExpr); if(state.pushdownExpr == null) { // Whatever we did so far on the right tree is all wasted :( // Undo all the mutation (AND OR distributions) until now removeFromFilteredPlan(leftState.filterExpr); removeFromFilteredPlan(rightState.filterExpr); state.filterExpr = addToFilterPlan(binExpr); } else { LogicalExpression f1 = orLogicalExpressions(filteredPlan, leftState.pushdownExpr, rightState.filterExpr); LogicalExpression f2 = orLogicalExpressions(filteredPlan, leftState.filterExpr, rightState.pushdownExpr); LogicalExpression f3 = orLogicalExpressions(filteredPlan, leftState.filterExpr, rightState.filterExpr); state.filterExpr = andLogicalExpressions(filteredPlan, f1, andLogicalExpressions(filteredPlan, f2, f3)); } } else { // leftState OP rightState if (leftState.filterExpr == null && rightState.filterExpr == null) { state.pushdownExpr = binExpr; state.filterExpr = null; } else { state.pushdownExpr = null; removeFromFilteredPlan(leftState.filterExpr); removeFromFilteredPlan(rightState.filterExpr); state.filterExpr = addToFilterPlan(binExpr); } } return state; } private KeyState checkPushDown(ProjectExpression project) throws FrontendException { String fieldName = project.getFieldSchema().alias; KeyState state = new KeyState(); if(partitionCols.contains(fieldName)) { state.filterExpr = null; state.pushdownExpr = project; } else { state.filterExpr = addToFilterPlan(project); state.pushdownExpr = null; } return state; } /** * Assume that the given operator is already disconnected from its predecessors. * @param op * @throws FrontendException */ private void removeFromFilteredPlan(Operator op) throws FrontendException { List<Operator> succs = filteredPlan.getSuccessors( op ); if( succs == null ) { filteredPlan.remove( op ); return; } Operator[] children = new Operator[succs.size()]; for( int i = 0; i < succs.size(); i++ ) { children[i] = succs.get(i); } for( Operator succ : children ) { filteredPlan.disconnect( op, succ ); removeFromFilteredPlan( succ ); } filteredPlan.remove( op ); } public static Expression getExpression(LogicalExpression op) throws FrontendException { if(op == null) { return null; } if(op instanceof ConstantExpression) { ConstantExpression constExpr =(ConstantExpression)op ; return new Expression.Const( constExpr.getValue() ); } else if (op instanceof ProjectExpression) { ProjectExpression projExpr = (ProjectExpression)op; String fieldName = projExpr.getFieldSchema().alias; return new Expression.Column(fieldName); } else { if( !( op instanceof BinaryExpression ) ) { LOG.error("Unsupported conversion of LogicalExpression to Expression: " + op.getName()); throw new FrontendException("Unsupported conversion of LogicalExpression to Expression: " + op.getName()); } BinaryExpression binOp = (BinaryExpression)op; if(binOp instanceof AddExpression) { return getExpression( binOp, OpType.OP_PLUS ); } else if(binOp instanceof SubtractExpression) { return getExpression(binOp, OpType.OP_MINUS); } else if(binOp instanceof MultiplyExpression) { return getExpression(binOp, OpType.OP_TIMES); } else if(binOp instanceof DivideExpression) { return getExpression(binOp, OpType.OP_DIV); } else if(binOp instanceof ModExpression) { return getExpression(binOp, OpType.OP_MOD); } else if(binOp instanceof AndExpression) { return getExpression(binOp, OpType.OP_AND); } else if(binOp instanceof OrExpression) { return getExpression(binOp, OpType.OP_OR); } else if(binOp instanceof EqualExpression) { return getExpression(binOp, OpType.OP_EQ); } else if(binOp instanceof NotEqualExpression) { return getExpression(binOp, OpType.OP_NE); } else if(binOp instanceof GreaterThanExpression) { return getExpression(binOp, OpType.OP_GT); } else if(binOp instanceof GreaterThanEqualExpression) { return getExpression(binOp, OpType.OP_GE); } else if(binOp instanceof LessThanExpression) { return getExpression(binOp, OpType.OP_LT); } else if(binOp instanceof LessThanEqualExpression) { return getExpression(binOp, OpType.OP_LE); } else if(binOp instanceof RegexExpression) { return getExpression(binOp, OpType.OP_MATCH); } else { LOG.error("Unsupported conversion of LogicalExpression to Expression: " + op.getName()); throw new FrontendException("Unsupported conversion of LogicalExpression to Expression: " + op.getName()); } } } private static Expression getExpression(BinaryExpression binOp, OpType opType) throws FrontendException { return new Expression.BinaryExpression(getExpression(binOp.getLhs()) , getExpression(binOp.getRhs()), opType); } }