/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pig.impl.logicalLayer;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.pig.PigException;
import org.apache.pig.impl.plan.DependencyOrderWalker;
import org.apache.pig.impl.plan.RequiredFields;
import org.apache.pig.impl.plan.VisitorException;
import org.apache.pig.impl.plan.optimizer.OptimizerException;
import org.apache.pig.impl.util.Pair;
import org.apache.pig.impl.logicalLayer.RelationalOperator;
public class ColumnPruner extends LOVisitor {
private Map<LogicalOperator, List<Pair<Integer,Integer>>> prunedColumnsMap;
LogicalPlan plan;
public ColumnPruner(LogicalPlan plan) {
super(plan, new DependencyOrderWalker<LogicalOperator, LogicalPlan>(plan));
prunedColumnsMap = new HashMap<LogicalOperator, List<Pair<Integer,Integer>>>();
this.plan = plan;
}
public void addPruneMap(LogicalOperator op, List<Pair<Integer,Integer>> prunedColumns) {
prunedColumnsMap.put(op, prunedColumns);
}
public boolean isEmpty() {
return prunedColumnsMap.isEmpty();
}
protected void prune(RelationalOperator lOp) throws VisitorException {
List<LogicalOperator> predecessors = plan.getPredecessors(lOp);
if (predecessors==null)
{
int errCode = 2187;
throw new VisitorException("Cannot get predessors", errCode, PigException.BUG);
}
List<Pair<Integer, Integer>> columnsPruned = new ArrayList<Pair<Integer, Integer>>();
List<Pair<Integer, Integer>> columnsToPrune = new ArrayList<Pair<Integer, Integer>>();
for (int i=0;i<predecessors.size();i++)
{
RelationalOperator predecessor = (RelationalOperator)predecessors.get(i);
if (prunedColumnsMap.containsKey(predecessor))
{
List<Pair<Integer, Integer>> predColumnsToPrune = prunedColumnsMap.get(predecessor);
if (predColumnsToPrune!=null)
{
for (int j=0;j<predColumnsToPrune.size();j++)
{
predColumnsToPrune.get(j).first = i;
}
columnsPruned.addAll(predColumnsToPrune);
}
}
}
try {
if (lOp.getSchema()==null)
{
int errCode = 2189;
throw new VisitorException("Expect schema", errCode, PigException.BUG);
}
// For every input column, check if it is pruned
for (int i=0;i<lOp.getSchema().size();i++)
{
List<RequiredFields> relevantFieldsList = lOp.getRelevantInputs(0, i);
// Check if this output do not need any inputs, if so, it is a constant field.
// Since we never prune a constant field, so we continue without pruning
boolean needNoInputs = true;
if (relevantFieldsList==null)
needNoInputs = true;
else
{
for (RequiredFields relevantFields: relevantFieldsList)
{
if (relevantFields!=null && !relevantFields.needNoFields())
needNoInputs = false;
}
}
if (needNoInputs)
continue;
boolean columnPruned = false;
// For LOCogroup, one output can be pruned if all its relevant input are pruned except for "key" fields
if (lOp instanceof LOCogroup)
{
List<RequiredFields> requiredFieldsList = lOp.getRequiredFields();
for (Pair<Integer, Integer> column : columnsPruned)
{
if (column.first == i-1) // Saw at least one input pruned
{
if (requiredFieldsList.get(i-1).getFields().contains(column))
{
columnPruned = true;
break;
}
}
}
}
else
{
// If we see any of the relevant field of this column get pruned,
// then we prune this column for this operator
for (RequiredFields relevantFields: relevantFieldsList)
{
if (relevantFields == null)
continue;
if (relevantFields.getNeedAllFields())
break;
for (Pair<Integer, Integer> relevantField: relevantFields.getFields())
{
// If any of the input column is pruned, prune this output column
if (columnsPruned.contains(relevantField))
{
columnPruned = true;
break;
}
}
}
}
if (columnPruned)
columnsToPrune.add(new Pair<Integer, Integer>(0, i));
}
LogicalOperator currentOp = lOp;
// If it is LOCogroup, insert foreach to mimic pruning, because we have no way to prune
// LOCogroup output only by pruning the inputs
if (columnsPruned.size()!=0 && lOp instanceof LOCogroup)
{
List<Integer> columnsToProject = new ArrayList<Integer>();
for (int i=0;i<=predecessors.size();i++) {
if (!columnsToPrune.contains(new Pair<Integer, Integer>(0, i)))
columnsToProject.add(i);
}
currentOp = lOp.insertPlainForEachAfter(columnsToProject);
}
if (!columnsPruned.isEmpty()&&lOp.pruneColumns(columnsPruned)) {
prunedColumnsMap.put(currentOp, columnsToPrune);
}
} catch (FrontendException e) {
int errCode = 2188;
throw new VisitorException("Cannot prune columns for "+lOp, errCode, PigException.BUG, e);
}
}
protected void visit(LOCogroup cogroup) throws VisitorException {
prune(cogroup);
}
protected void visit(LOCross cross) throws VisitorException {
prune(cross);
}
protected void visit(LODistinct distinct) throws VisitorException {
prune(distinct);
}
protected void visit(LOFilter filter) throws VisitorException {
prune(filter);
}
protected void visit(LOForEach foreach) throws VisitorException {
// The only case we should skip foreach is when this is the foreach
// inserted after LOLoad to mimic pruning, then we put the prunedColumns entry
// for that foreach, and we do not need to further visit this foreach here
if (!prunedColumnsMap.containsKey(foreach))
prune(foreach);
}
protected void visit(LOJoin join) throws VisitorException {
prune(join);
}
protected void visit(LOLimit limit) throws VisitorException {
prune(limit);
}
protected void visit(LOSort sort) throws VisitorException {
prune(sort);
}
protected void visit(LOSplit split) throws VisitorException {
prune(split);
}
protected void visit(LOSplitOutput splitoutput) throws VisitorException {
prune(splitoutput);
}
protected void visit(LOStore store) throws VisitorException {
return;
}
protected void visit(LOStream stream) throws VisitorException {
return;
}
protected void visit(LOUnion union) throws VisitorException {
prune(union);
return;
}
protected void visit(LOLoad lOp) throws VisitorException {
return;
}
}