/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pig.impl.logicalLayer;
import java.util.ArrayList;
import java.util.List;
import org.apache.pig.PigException;
import org.apache.pig.impl.plan.NodeIdGenerator;
import org.apache.pig.impl.plan.OperatorKey;
import org.apache.pig.impl.plan.ProjectionMap;
import org.apache.pig.impl.plan.RequiredFields;
import org.apache.pig.impl.plan.VisitorException;
import org.apache.pig.impl.plan.ProjectionMap.Column;
import org.apache.pig.impl.util.MultiMap;
import org.apache.pig.impl.util.Pair;
public abstract class RelationalOperator extends LogicalOperator {
private static final long serialVersionUID = 2L;
/**
* @param plan
* Logical plan this operator is a part of.
* @param k
* Operator key to assign to this node.
* @param rp
* degree of requested parallelism with which to execute this
* node.
*/
public RelationalOperator(LogicalPlan plan, OperatorKey k, int rp) {
super(plan, k, rp);
}
/**
* @param plan
* Logical plan this operator is a part of.
* @param k
* Operator key to assign to this node.
*/
public RelationalOperator(LogicalPlan plan, OperatorKey k) {
super(plan, k);
}
/**
* Produce a map describing how this operator modifies its projection.
* @return ProjectionMap null indicates it does not know how the projection
* changes, for example a join of two inputs where one input does not have
* a schema.
*/
@Override
public ProjectionMap getProjectionMap() {
return null;
};
/**
* Unset the projection map as if it had not been calculated. This is used by
* anyone who reorganizes the tree and needs to have projection maps recalculated.
*/
@Override
public void unsetProjectionMap() {
mIsProjectionMapComputed = false;
mProjectionMap = null;
}
/**
* Regenerate the projection map by unsetting and getting the projection map
*/
@Override
public ProjectionMap regenerateProjectionMap() {
try {
regenerateSchema();
} catch (Exception e) {
}
unsetProjectionMap();
return getProjectionMap();
}
/**
* Get a list of fields that this operator requires. This is not necessarily
* equivalent to the list of fields the operator projects. For example, a
* filter will project anything passed to it, but requires only the fields
* explicitly referenced in its filter expression.
*
* @return list of RequiredFields null indicates that the operator does not need any
* fields from its input.
*/
public List<RequiredFields> getRequiredFields() {
return null;
}
/**
* Get relevant input columns of a particular output column. The resulting input columns
* are necessary components only to the output column. Input columns needed by the entire
* RelationalOperator thus indirectly contribute to the output columns are not counted. Those
* are required columns.
* eg1:
* A = load 'a' AS (a0, a1, a2);
* B = filter a by a0=='1';
*
* Relevant input columns for B.$1 is A.a1 because A.a1 direct generate B.$1. A.a0 is needed
* by the filter operator and it is considered as required fields for the relational operator.
*
* eg2:
* A = load 'a' AS (a0, a1);
* B = load 'b' AS (b0, b1);
* C = join A by a0, B by b0;
*
* Relevant input columns for C.$0 is A.a0. Relevant input columns for C.$1 is A.a1.
*
* eg3:
* A = load 'a' AS (a0, a1);
* B = load 'b' AS (b0, b1);
* C = cogroup A by a0, B by b0;
*
* Relevant input columns for C.$0 is A.a0, B.b0. Relevant input columns for C.$1 is A.*. Relevant input columns for C.$2 is B.*.
*
* eg4:
* A = load 'a' AS (a0, a1, a2);
* B = foreach A generate a1, a0+a2;
*
* Relevant input columns for B.$0 is A.a1. Relevant input columns for B.$1 is A.a0 and A.a2.
*
* eg5:
* A = load 'a' AS (a0, a1, a2);
* B = foreach A generate a1, *;
*
* Relevant input columns for B.$0 is A.a1. Relevant input columns for B.$1 is A.a0.
* Relevant input columns for B.$2 is A.a1. Relevant input columns for B.$3 is A.a2.
*
* @param output output index. Only LOSplit have output other than 0 currently
* @param column output column
* @return List of relevant input columns. null if Pig cannot determine relevant inputs or any error occurs
*/
abstract public List<RequiredFields> getRelevantInputs(int output, int column) throws FrontendException;
public boolean pruneColumns(List<Pair<Integer, Integer>> columns)
throws FrontendException {
unsetSchema();
getSchema();
mIsProjectionMapComputed = false;
getProjectionMap();
return true;
}
public void pruneColumnInPlan(LogicalPlan plan, int column)
throws FrontendException {
TopLevelProjectFinder projectFinder = new TopLevelProjectFinder(plan);
try {
projectFinder.visit();
} catch (VisitorException ve) {
int errCode = 2196;
throw new FrontendException("Exception when traversing inner plan",
errCode, PigException.BUG, ve);
}
for (LOProject loProject : projectFinder.getProjectSet()) {
if (loProject.isStar()) {
int errCode = 2197;
throw new FrontendException(
"Cannot drop column which require *", errCode,
PigException.BUG);
}
int col = loProject.getCol();
if (column < col) {
loProject.getProjection().set(0, col - 1);
}
}
}
// insert a forEach after the operator. This forEach map columns in columnsToProject directly, and remove the rest
public LogicalOperator insertPlainForEachAfter(List<Integer> columnsToProject) throws FrontendException {
ArrayList<Boolean> flattenList = new ArrayList<Boolean>();
ArrayList<LogicalPlan> generatePlans = new ArrayList<LogicalPlan>();
String scope = getOperatorKey().scope;
for (int pos : columnsToProject) {
LogicalPlan projectPlan = new LogicalPlan();
ExpressionOperator column = new LOProject(projectPlan, new OperatorKey(scope, NodeIdGenerator.getGenerator().getNextNodeId(scope)), this, pos);
flattenList.add(false);
projectPlan.add(column);
generatePlans.add(projectPlan);
}
LOForEach forEach = new LOForEach(mPlan, new OperatorKey(scope, NodeIdGenerator.getGenerator().getNextNodeId(scope)), generatePlans, flattenList);
LogicalOperator succ = mPlan.getSuccessors(this).get(0);
MultiMap<Integer, Column> mappedFields = new MultiMap<Integer, Column>();
List<Column> columns;
for (int i=0;i<=getSchema().size();i++) {
columns = new ArrayList<Column>();
columns.add(new Column(new Pair<Integer, Integer>(0, i)));
mappedFields.put(i, columns);
}
mPlan.add(forEach);
mPlan.doInsertBetween(this, forEach, succ, false);
forEach.getProjectionMap().setMappedFields(mappedFields);
succ.rewire(this, 0, forEach, false);
return forEach;
}
}