/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pig.impl.logicalLayer.optimizer;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.pig.data.DataType;
import org.apache.pig.impl.logicalLayer.CastFinder;
import org.apache.pig.impl.logicalLayer.FrontendException;
import org.apache.pig.impl.logicalLayer.LOCast;
import org.apache.pig.impl.logicalLayer.LOCogroup;
import org.apache.pig.impl.logicalLayer.LOCross;
import org.apache.pig.impl.logicalLayer.LOIsNull;
import org.apache.pig.impl.logicalLayer.LOJoin;
import org.apache.pig.impl.logicalLayer.LOFilter;
import org.apache.pig.impl.logicalLayer.LOForEach;
import org.apache.pig.impl.logicalLayer.LOLimit;
import org.apache.pig.impl.logicalLayer.LOLoad;
import org.apache.pig.impl.logicalLayer.LONative;
import org.apache.pig.impl.logicalLayer.LOProject;
import org.apache.pig.impl.logicalLayer.LOSplit;
import org.apache.pig.impl.logicalLayer.LOStore;
import org.apache.pig.impl.logicalLayer.LOStream;
import org.apache.pig.impl.logicalLayer.LOSplitOutput;
import org.apache.pig.impl.logicalLayer.LOUnion;
import org.apache.pig.impl.logicalLayer.LogicalOperator;
import org.apache.pig.impl.logicalLayer.LogicalPlan;
import org.apache.pig.impl.logicalLayer.TopLevelProjectFinder;
import org.apache.pig.impl.logicalLayer.UDFFinder;
import org.apache.pig.impl.plan.DepthFirstWalker;
import org.apache.pig.impl.plan.ProjectionMap;
import org.apache.pig.impl.plan.RequiredFields;
import org.apache.pig.impl.plan.VisitorException;
import org.apache.pig.impl.plan.optimizer.OptimizerException;
import org.apache.pig.PigException;
import org.apache.pig.impl.util.MultiMap;
import org.apache.pig.impl.util.Pair;
/**
* A visitor to discover if a filter can be pushed as high up the tree as
* possible.
*/
public class PushUpFilter extends LogicalTransformer {
// boolean to remember if the filter has to be swapped
private boolean mSwap = false;
// boolean to remember if the filter has to be pushed into one of the
// filter's predecessor's inputs
private boolean mPushBefore = false;
// the input of the predecessor where the filter has to be pushed
private int mPushBeforeInput = -1;
public PushUpFilter(LogicalPlan plan) {
super(plan);
}
/**
*
* @return true if the filter has to swapped; false otherwise
*/
public boolean getSwap() {
return mSwap;
}
/**
*
* @return true if the filter has to be pushed before its predecessor; false
* otherwise
*/
public boolean getPushBefore() {
return mPushBefore;
}
/**
*
* @return return the input of the predecessor where the filter has to be
* pushed
*/
public int getPushBeforeInput() {
return mPushBeforeInput;
}
@Override
public boolean check(List<LogicalOperator> nodes) throws OptimizerException {
try {
LOFilter filter = (LOFilter) getOperator(nodes);
List<LogicalOperator> predecessors = (mPlan.getPredecessors(filter) == null ? null
: new ArrayList<LogicalOperator>(mPlan
.getPredecessors(filter)));
// if there are no predecessors return false
if (predecessors == null) {
return false;
}
// if the filter has no predecessors or more than one predecessor
// return false
if (predecessors.size() == 0 || predecessors.size() > 1) {
return false;
}
LogicalOperator predecessor = predecessors.get(0);
// if the predecessor is one of LOLoad/LOStore/LOStream/LOLimit/LONative
// return false
if (predecessor instanceof LOLoad || predecessor instanceof LOStore
|| predecessor instanceof LOStream
|| predecessor instanceof LOLimit
|| predecessor instanceof LONative) {
return false;
}
// TODO
// for now filters cannot be combined
// remove this check when filters can be combined
if (predecessor instanceof LOFilter)
return false;
// TODO
// same rule as filters
if (predecessor instanceof LOSplitOutput) {
return false;
}
if (predecessor instanceof LOSplit) {
return false;
}
UDFFinder udfFinder = new UDFFinder(filter.getComparisonPlan());
udfFinder.visit();
// if the filter's inner plan contains any UDF then return false
if (udfFinder.foundAnyUDF()) {
return false;
}
CastFinder castFinder = new CastFinder(filter.getComparisonPlan());
castFinder.visit();
// if the filter's inner plan contains any casts then return false
if (castFinder.foundAnyCast()) {
return false;
}
List<RequiredFields> filterRequiredFields = filter
.getRequiredFields();
if (filterRequiredFields == null) {
return false;
}
RequiredFields requiredField = filterRequiredFields.get(0);
// the filter's conditions contain constant expression
// return false
if (requiredField.needNoFields()) {
return false;
}
// if the predecessor is a multi-input operator then detailed
// checks are required
if (predecessor instanceof LOCross
|| predecessor instanceof LOUnion
|| predecessor instanceof LOCogroup
|| predecessor instanceof LOJoin) {
// check if the filter's required fields in conjunction with the
// predecessor's projection map. If the filter needs more than
// one input then the filter's expressions have to be split
List<LogicalOperator> grandParents = mPlan
.getPredecessors(predecessor);
// if the predecessor does not have predecessors return false
if (grandParents == null || grandParents.size() == 0) {
return false;
}
// check if the predecessor is a group by
if (grandParents.size() == 1) {
if (predecessor instanceof LOCogroup) {
mSwap = true;
return true;
} else {
// only a group by can have a single input
return false;
}
}
if (requiredField.needAllFields()) {
return false;
}
Pair<Boolean, Set<Integer>> mappingResult = isRequiredFieldMapped(requiredField, predecessor.getProjectionMap());
boolean mapped = mappingResult.first;
Set<Integer> grandParentIndexes = mappingResult.second;
if (!mapped) {
return false;
}
// TODO
// the filter's conditions requires more than one input of its
// predecessor
// when the filter's conditions are splittable return true
if ((grandParentIndexes == null)
|| (grandParentIndexes.size() == 0)
|| (grandParentIndexes.size() > 1)) {
return false;
}
if (predecessor instanceof LOCogroup) {
// check for outer
if (isAnyOuter((LOCogroup) predecessor)) {
return false;
}
}
mPushBeforeInput = grandParentIndexes.iterator().next();
if (predecessor instanceof LOJoin) {
boolean otherBranchContainOuter = false;
boolean sawInner = false;
for (int i=0;i<=mPlan.getSuccessors(predecessor).size();i++) {
// We do not push filter if any other branch is outer
// See PIG-1289
// Also in LOJoin, innerFlag==true indicate that branch is the outer join side
// which has the exact opposite semantics
// If all innerFlag is true, that implies a regular join
// If all innerFlag is false, means a outer join, in this case, we can not push up filter for any path (See PIG-1507)
if (i!=mPushBeforeInput && ((LOJoin)predecessor).getInnerFlags()[i]) {
otherBranchContainOuter = true;
}
if (((LOJoin)predecessor).getInnerFlags()[i]==false) {
sawInner = true;
}
}
if (!otherBranchContainOuter && ((LOJoin)predecessor).getInnerFlags()[mPushBeforeInput]==false) // all innerFlag is false, implies an outer join
{
mPushBeforeInput = -1;
return false;
}
if (otherBranchContainOuter && sawInner) // If it is not a regular join and the path we push is on inner side
{
mPushBeforeInput = -1;
return false;
}
}
mPushBefore = true;
return true;
} else if (predecessor instanceof LOForEach) {
LOForEach loForEach = (LOForEach) predecessor;
List<Boolean> mFlatten = loForEach.getFlatten();
boolean hasFlatten = false;
for (Boolean b : mFlatten) {
if (b.equals(true)) {
hasFlatten = true;
}
}
// TODO
// A better check is to examine each column in the filter's
// required fields. If the column is the result of a flatten
// then
// return false else return true
// for now if the foreach has a flatten then return false
if (hasFlatten) {
return false;
}
Pair<Boolean, Set<Integer>> mappingResult = isRequiredFieldMapped(requiredField, predecessor.getProjectionMap());
boolean mapped = mappingResult.first;
// Check if it is a direct mapping, that is, project optionally followed by cast, so if project->project, it is not
// considered as a mapping
for (Pair<Integer, Integer> pair : requiredField.getFields())
{
if (!isFieldSimple(loForEach.getForEachPlans().get(pair.second)))
{
mapped = false;
break;
}
}
if (!mapped) {
return false;
}
}
mSwap = true;
return true;
} catch (OptimizerException oe) {
throw oe;
} catch (Exception e) {
int errCode = 2149;
String msg = "Internal error while trying to check if filters can be pushed up.";
throw new OptimizerException(msg, errCode, PigException.BUG, e);
}
}
private LogicalOperator getOperator(List<LogicalOperator> nodes)
throws FrontendException {
if ((nodes == null) || (nodes.size() <= 0)) {
int errCode = 2052;
String msg = "Internal error. Cannot retrieve operator from null or empty list.";
throw new OptimizerException(msg, errCode, PigException.BUG);
}
LogicalOperator lo = nodes.get(0);
if (lo == null || !(lo instanceof LOFilter)) {
// we should never be called with any other operator class name
int errCode = 2005;
String msg = "Expected " + LOFilter.class.getSimpleName()
+ ", got "
+ (lo == null ? lo : lo.getClass().getSimpleName());
throw new OptimizerException(msg, errCode, PigException.INPUT);
} else {
return lo;
}
}
@Override
public void transform(List<LogicalOperator> nodes)
throws OptimizerException {
try {
LOFilter filter = (LOFilter) getOperator(nodes);
LogicalOperator predecessor = mPlan.getPredecessors(filter).get(0);
if (mSwap) {
mPlan.swap(predecessor, filter);
} else if (mPushBefore) {
if (mPushBeforeInput == -1) {
// something is wrong!
int errCode = 2150;
String msg = "Internal error. The push before input is not set.";
throw new OptimizerException(msg, errCode, PigException.BUG);
}
mPlan.pushBefore(predecessor, filter, mPushBeforeInput);
}
} catch (OptimizerException oe) {
throw oe;
} catch (Exception e) {
int errCode = 2151;
String msg = "Internal error while pushing filters up.";
throw new OptimizerException(msg, errCode, PigException.BUG, e);
}
}
@Override
public void reset() {
mPushBefore = false;
mPushBeforeInput = -1;
mSwap = false;
}
/**
*
* A method to check if there any grouping column has the outer clause in a
* grouping operator
*
* @param cogroup
* the cogroup operator to be examined for presence of outer
* clause
* @return true if the cogroup contains any input that has an outer clause;
* false otherwise
*/
private boolean isAnyOuter(LOCogroup cogroup) {
boolean[] innerList = cogroup.getInner();
for (boolean inner : innerList) {
if (!inner) {
return true;
}
}
return false;
}
/**
* A method to check if the required field contains elements that are mapped
* in the predecessor's inputs without a cast
*
* @param requiredField
* the required field of the operator
* @param predProjectionMap
* the projection map of the predecessor
* @return a pair of boolean and a set of integers; the first element of the
* pair is true if the field is mapped without a cast; false
* otherwise; the second element of the pair is the set of
* predecessor's inputs that are required for the mapping
*/
private Pair<Boolean, Set<Integer>> isRequiredFieldMapped(RequiredFields requiredField,
ProjectionMap predProjectionMap) {
if(requiredField == null) {
return new Pair<Boolean, Set<Integer>>(false, null);
}
// if predecessor projection map is null then return false
if (predProjectionMap == null) {
return new Pair<Boolean, Set<Integer>>(false, null);
}
// if the predecessor does not change its output return true
if (!predProjectionMap.changes()) {
return new Pair<Boolean, Set<Integer>>(true, null);
}
MultiMap<Integer, ProjectionMap.Column> mappedFields = predProjectionMap
.getMappedFields();
// if there is no mapping in the predecessor then return false
if (mappedFields == null) {
return new Pair<Boolean, Set<Integer>>(false, null);
}
Set<Integer> predInputs = new HashSet<Integer>();
for (Pair<Integer, Integer> pair : requiredField.getFields()) {
predInputs.add(pair.second);
}
boolean mapped = false;
Set<Integer> grandParentIndexes = new HashSet<Integer>();
for (Integer input : predInputs) {
List<ProjectionMap.Column> inputList = (List<ProjectionMap.Column>) mappedFields
.get(input);
// inputList is null -> the required field is added
if(inputList == null) {
return new Pair<Boolean, Set<Integer>>(false, null);
}
for (ProjectionMap.Column column : inputList) {
// TODO
// Check if the column has a cast
// if a cast is not used then consider it as mapped
// in the future this should go away and the cast
// type should be used to move around the projections
if (!column.cast()) {
mapped = true;
}
Pair<Integer, Integer> pair = column.getInputColumn();
grandParentIndexes.add(pair.first);
}
}
if (!mapped) {
return new Pair<Boolean, Set<Integer>>(false, null);
}
return new Pair<Boolean, Set<Integer>>(true, grandParentIndexes);
}
/**
* Check if the inner plan is simple
*
* @param lp
* logical plan to check
* @return Whether if the logical plan is a simple project optionally followed by cast
*/
boolean isFieldSimple(LogicalPlan lp) throws OptimizerException
{
TopLevelProjectFinder projectFinder = new TopLevelProjectFinder(lp);
try {
projectFinder.visit();
} catch (VisitorException ve) {
throw new OptimizerException();
}
if (projectFinder.getProjectSet()!=null && projectFinder.getProjectSet().size()==1)
{
LOProject project = projectFinder.getProjectSet().iterator().next();
if (lp.getPredecessors(project)==null)
{
LogicalOperator pred = project;
while (lp.getSuccessors(pred)!=null)
{
if (lp.getSuccessors(pred).size()!=1)
return false;
if (!(lp.getSuccessors(pred).get(0) instanceof LOCast))
{
return false;
}
pred = lp.getSuccessors(pred).get(0);
}
return true;
}
return false;
}
else
return true;
}
}