/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.pig.impl.logicalLayer; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.List; import java.util.Map; import java.util.Set; import java.util.HashSet; import java.util.HashMap; import java.util.Iterator; import org.apache.pig.PigException; import org.apache.pig.data.DataType; import org.apache.pig.impl.plan.Operator; import org.apache.pig.impl.plan.OperatorKey; import org.apache.pig.impl.plan.PlanException; import org.apache.pig.impl.plan.ProjectionMap; import org.apache.pig.impl.plan.RequiredFields; import org.apache.pig.impl.plan.VisitorException; import org.apache.pig.impl.logicalLayer.schema.Schema; import org.apache.pig.impl.logicalLayer.optimizer.SchemaRemover; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.pig.impl.util.MultiMap; import org.apache.pig.impl.util.Pair; public class LOCogroup extends RelationalOperator { private static final long serialVersionUID = 2L; /** * Enum for the type of group */ public static enum GROUPTYPE { REGULAR, // Regular (co)group COLLECTED, // Collected group MERGE // Map-side CoGroup on sorted data }; /** * Cogroup contains a list of logical operators corresponding to the * relational operators and a list of generates for each relational * operator. Each generate operator in turn contains a list of expressions * for the columns that are projected */ private boolean[] mIsInner; private static Log log = LogFactory.getLog(LOCogroup.class); private MultiMap<LogicalOperator, LogicalPlan> mGroupByPlans; private GROUPTYPE mGroupType; /** * static constant to refer to the option of selecting a group type */ public final static Integer OPTION_GROUPTYPE = 1; /** * * @param plan * LogicalPlan this operator is a part of. * @param k * OperatorKey for this operator * @param groupByPlans * the group by columns * @param isInner * indicates whether the cogroup is inner for each relation */ public LOCogroup( LogicalPlan plan, OperatorKey k, MultiMap<LogicalOperator, LogicalPlan> groupByPlans, boolean[] isInner) { this(plan, k, groupByPlans, GROUPTYPE.REGULAR, isInner); } /** * * @param plan * LogicalPlan this operator is a part of. * @param k * OperatorKey for this operator * @param groupByPlans * the group by columns * @param type * the type of this group * @param isInner * indicates whether the cogroup is inner for each relation */ public LOCogroup( LogicalPlan plan, OperatorKey k, MultiMap<LogicalOperator, LogicalPlan> groupByPlans, GROUPTYPE type, boolean[] isInner) { super(plan, k); mGroupByPlans = groupByPlans; if (isInner != null) { mIsInner = Arrays.copyOf(isInner, isInner.length); } mGroupType = type; } public List<LogicalOperator> getInputs() { return mPlan.getPredecessors(this); } public MultiMap<LogicalOperator, LogicalPlan> getGroupByPlans() { return mGroupByPlans; } public void setGroupByPlans(MultiMap<LogicalOperator, LogicalPlan> groupByPlans) { mGroupByPlans = groupByPlans; } public boolean[] getInner() { return mIsInner; } public void setInner(boolean[] inner) { mIsInner = inner; } public GROUPTYPE getGroupType() { return mGroupType; } @Override public String name() { return getAliasString() + "CoGroup " + mKey.scope + "-" + mKey.id; } @Override public boolean supportsMultipleInputs() { return true; } @Override public Schema getSchema() throws FrontendException { List<LogicalOperator> inputs = mPlan.getPredecessors(this); /* * Dumping my understanding of how the schema of a Group/CoGroup will * look. The first field of the resulting tuple will have the alias * 'group'. The schema for this field is a union of the group by columns * for each input. The subsequent fields in the output tuple will have * the alias of the input as the alias for a bag that contains the * tuples from the input that match the grouping criterion */ if (!mIsSchemaComputed) { List<Schema.FieldSchema> fss = new ArrayList<Schema.FieldSchema>( inputs.size() + 1); // one more to account for the "group" // the alias of the first field is group and hence the // string "group" /* * Here goes an attempt to describe how the schema for the first * column - 'group' should look like. If the number of group by * columns = 1 then the schema for 'group' is the * schema(fieldschema(col)) If the number of group by columns > 1 * then find the set union of the group by columns and form the * schema as schema(list<fieldschema of the cols>) * The parser will ensure that the number of group by columns are * the same across all inputs. The computation of the schema for group * is as follows: * For each input of cogroup, for each operator (projection ,udf, constant), etc. * compute the multimaps <group_column_number, alias> and <group_column_number, operator> * and <alias, expression_operator> * Also set the lookup table for each alias to false */ Schema groupBySchema = null; List<Schema.FieldSchema> groupByFss = new ArrayList<Schema.FieldSchema>(); Map<String, Boolean> aliasLookup = new HashMap<String, Boolean>(); MultiMap<String, ExpressionOperator> aliasExop = new MultiMap<String, ExpressionOperator>(); MultiMap<Integer, String> positionAlias = new MultiMap<Integer, String>(); MultiMap<Integer, ExpressionOperator> positionOperators = new MultiMap<Integer, ExpressionOperator>(); for (LogicalOperator op : inputs) { int position = 0; for(LogicalPlan plan: mGroupByPlans.get(op)) { for(LogicalOperator eOp: plan.getLeaves()) { Schema.FieldSchema fs = ((ExpressionOperator)eOp).getFieldSchema(); if (null != fs) { String alias = fs.alias; if(null != alias) { aliasLookup.put(alias, false); aliasExop.put(alias, (ExpressionOperator)eOp); positionAlias.put(position, alias); } //store the operators for each position in the group } else { log.warn("Field Schema of an expression operator cannot be null"); } positionOperators.put(position, (ExpressionOperator)eOp); } ++position; } } /* * Now that the multi maps and the look up table are computed, do the following: * for each column in the group, in order check if the alias is alaready used or not * If the alias is already used, check for the next unused alias. * IF none of the aliases can be used then the alias of that column is null * If an alias is found usable, then use that alias and the schema of the expression operator * corresponding to that position. Note that the first operator for that position is * picked. The type checker will ensure that the correct schema is merged */ int arity = mGroupByPlans.get(inputs.get(0)).size(); for (int i = 0; i < arity; ++i) { Schema.FieldSchema groupByFs; Collection<String> cAliases = positionAlias.get(i); if(null != cAliases) { Object[] aliases = cAliases.toArray(); for(int j = 0; j < aliases.length; ++j) { String alias = (String) aliases[j]; if(null != alias) { //Collection<ExpressionOperator> cEops = aliasExop.get(alias); Collection<ExpressionOperator> cEops = positionOperators.get(i); if(null != cEops) { ExpressionOperator eOp = (ExpressionOperator) (cEops.toArray())[0]; if(null != eOp) { if(!aliasLookup.get(alias)) { Schema.FieldSchema fs = eOp.getFieldSchema(); if(null != fs) { groupByFs = new Schema.FieldSchema(alias, fs.schema, fs.type); groupByFss.add(groupByFs); aliasLookup.put(alias, true); } else { groupByFs = new Schema.FieldSchema(alias, null, DataType.BYTEARRAY); groupByFss.add(groupByFs); } setFieldSchemaParent(groupByFs, positionOperators, i); break; } else { if((j + 1) < aliases.length) { continue; } else { //we have seen this alias before //just add the schema of the expression operator with the null alias Schema.FieldSchema fs = eOp.getFieldSchema(); if(null != fs) { groupByFs = new Schema.FieldSchema(null, fs.schema, fs.type); groupByFss.add(groupByFs); for(ExpressionOperator op: cEops) { Schema.FieldSchema opFs = op.getFieldSchema(); if(null != opFs) { groupByFs.setParent(opFs.canonicalName, eOp); } else { groupByFs.setParent(null, eOp); } } } else { groupByFs = new Schema.FieldSchema(null, null, DataType.BYTEARRAY); groupByFss.add(groupByFs); } setFieldSchemaParent(groupByFs, positionOperators, i); break; } } } else { //should not be here log.debug("Cannot be here: we cannot have a collection of null expression operators"); } } else { //should not be here log.debug("Cannot be here: we should have an expression operator at each position"); } } else { //should not be here log.debug("Cannot be here: we cannot have a collection of null aliases "); } } } else { //We do not have any alias for this position in the group by columns //We have positions $1, $2, etc. Collection<ExpressionOperator> cEops = positionOperators.get(i); if(null != cEops) { ExpressionOperator eOp = (ExpressionOperator) (cEops.toArray())[0]; if(null != eOp) { Schema.FieldSchema fs = eOp.getFieldSchema(); if(null != fs) { groupByFs = new Schema.FieldSchema(null, fs.schema, fs.type); groupByFss.add(groupByFs); } else { groupByFs = new Schema.FieldSchema(null, null, DataType.BYTEARRAY); groupByFss.add(groupByFs); } } else { groupByFs = new Schema.FieldSchema(null, DataType.BYTEARRAY); groupByFss.add(groupByFs); } } else { groupByFs = new Schema.FieldSchema(null, DataType.BYTEARRAY); groupByFss.add(groupByFs); } setFieldSchemaParent(groupByFs, positionOperators, i); } } groupBySchema = new Schema(groupByFss); if(1 == arity) { byte groupByType = getAtomicGroupByType(); Schema groupSchema = groupByFss.get(0).schema; Schema.FieldSchema groupByFs = new Schema.FieldSchema("group", groupSchema, groupByType); setFieldSchemaParent(groupByFs, positionOperators, 0); fss.add(groupByFs); } else { Schema mergedGroupSchema = getTupleGroupBySchema(); if(mergedGroupSchema.size() != groupBySchema.size()) { mSchema = null; mIsSchemaComputed = false; int errCode = 2000; String msg = "Internal error. Mismatch in group by arities. Expected: " + mergedGroupSchema + ". Found: " + groupBySchema; throw new FrontendException(msg, errCode, PigException.BUG, false, null); } else { for(int i = 0; i < mergedGroupSchema.size(); ++i) { Schema.FieldSchema mergedFs = mergedGroupSchema.getField(i); Schema.FieldSchema groupFs = groupBySchema.getField(i); mergedFs.alias = groupFs.alias; mergedGroupSchema.addAlias(mergedFs.alias, mergedFs); } } Schema.FieldSchema groupByFs = new Schema.FieldSchema("group", mergedGroupSchema); fss.add(groupByFs); for(int i = 0; i < arity; ++i) { setFieldSchemaParent(groupByFs, positionOperators, i); } } for (LogicalOperator op : inputs) { try { Schema.FieldSchema bagFs = new Schema.FieldSchema(op.getAlias(), op.getSchema(), DataType.BAG); fss.add(bagFs); setFieldSchemaParent(bagFs, op); } catch (FrontendException ioe) { mIsSchemaComputed = false; mSchema = null; throw ioe; } } mIsSchemaComputed = true; mSchema = new Schema(fss); mType = DataType.BAG;//mType is from the super class } return mSchema; } public boolean isTupleGroupCol() { List<LogicalOperator> inputs = mPlan.getPredecessors(this); if (inputs == null || inputs.size() == 0) { throw new AssertionError("COGroup.isTupleGroupCol() can be called " + "after it has an input only") ; } return mGroupByPlans.get(inputs.get(0)).size() > 1 ; } @Override public void visit(LOVisitor v) throws VisitorException { v.visit(this); } /*** * * This does switch the mapping * * oldOp -> List of inner plans * to * newOp -> List of inner plans * * which is useful when there is a structural change in LogicalPlan * * @param oldOp the old operator * @param newOp the new operator */ public void switchGroupByPlanOp(LogicalOperator oldOp, LogicalOperator newOp) { Collection<LogicalPlan> innerPlans = mGroupByPlans.removeKey(oldOp) ; mGroupByPlans.put(newOp, innerPlans); } public void unsetSchema() throws VisitorException{ for(LogicalOperator input: getInputs()) { for(LogicalPlan plan: mGroupByPlans.get(input)) { SchemaRemover sr = new SchemaRemover(plan); sr.visit(); } } super.unsetSchema(); } /** * This can be used to get the merged type of output group col * only when the group col is of atomic type * TODO: This doesn't work with group by complex type * @return The type of the group by */ public byte getAtomicGroupByType() throws FrontendException { if (isTupleGroupCol()) { int errCode = 1010; String msg = "getAtomicGroupByType is used only when" + " dealing with atomic group col"; throw new FrontendException(msg, errCode, PigException.INPUT, false, null) ; } byte groupType = DataType.BYTEARRAY ; // merge all the inner plan outputs so we know what type // our group column should be for(int i=0;i < getInputs().size(); i++) { LogicalOperator input = getInputs().get(i) ; List<LogicalPlan> innerPlans = new ArrayList<LogicalPlan>(getGroupByPlans().get(input)) ; if (innerPlans.size() != 1) { int errCode = 1012; String msg = "Each COGroup input has to have " + "the same number of inner plans"; throw new FrontendException(msg, errCode, PigException.INPUT, false, null) ; } byte innerType = innerPlans.get(0).getSingleLeafPlanOutputType() ; groupType = DataType.mergeType(groupType, innerType) ; if (groupType==-1) { int errCode = 1107; String msg = "Cannot merge cogroup keys, incompatible types"; throw new FrontendException(msg, errCode, PigException.INPUT) ; } } return groupType ; } /* This implementation is based on the assumption that all the inputs have the same group col tuple arity. TODO: This doesn't work with group by complex type */ public Schema getTupleGroupBySchema() throws FrontendException { if (!isTupleGroupCol()) { int errCode = 1011; String msg = "getTupleGroupBySchema is used only when" + " dealing with tuple group col"; throw new FrontendException(msg, errCode, PigException.INPUT, false, null) ; } // this fsList represents all the columns in group tuple List<Schema.FieldSchema> fsList = new ArrayList<Schema.FieldSchema>() ; int outputSchemaSize = getGroupByPlans().get(getInputs().get(0)).size() ; // by default, they are all bytearray // for type checking, we don't care about aliases for(int i=0; i<outputSchemaSize; i++) { fsList.add(new Schema.FieldSchema(null, DataType.BYTEARRAY)) ; } // merge all the inner plan outputs so we know what type // our group column should be for(int i=0;i < getInputs().size(); i++) { LogicalOperator input = getInputs().get(i) ; List<LogicalPlan> innerPlans = new ArrayList<LogicalPlan>(getGroupByPlans().get(input)) ; boolean seenProjectStar = false; for(int j=0;j < innerPlans.size(); j++) { byte innerType = innerPlans.get(j).getSingleLeafPlanOutputType() ; ExpressionOperator eOp = (ExpressionOperator)innerPlans.get(j).getSingleLeafPlanOutputOp(); if(eOp instanceof LOProject) { if(((LOProject)eOp).isStar()) { seenProjectStar = true; } } Schema.FieldSchema groupFs = fsList.get(j); byte dt = groupFs.type; groupFs.type = DataType.mergeType(groupFs.type, innerType) ; if (!DataType.isUsableType(groupFs.type)) { int errCode = 1110; String msg = "Cogroup column " + j + " has incompatible types: " + DataType.findTypeName(dt) + " versus " + DataType.findTypeName(innerType); throw new FrontendException(msg, errCode, PigException.INPUT, false, null); } Schema.FieldSchema fs = eOp.getFieldSchema(); if(null != fs) { groupFs.setParent(eOp.getFieldSchema().canonicalName, eOp); } else { groupFs.setParent(null, eOp); } } if(seenProjectStar) { int errCode = 1013; String msg = "Grouping attributes can either be star (*) or a list of expressions, but not both."; throw new FrontendException(msg, errCode, PigException.INPUT, false, null); } } return new Schema(fsList) ; } private void setFieldSchemaParent(Schema.FieldSchema fs, MultiMap<Integer, ExpressionOperator> positionOperators, int position) throws FrontendException { for(ExpressionOperator op: positionOperators.get(position)) { Schema.FieldSchema opFs = op.getFieldSchema(); if(null != opFs) { fs.setParent(opFs.canonicalName, op); } else { fs.setParent(null, op); } } } private void setFieldSchemaParent(Schema.FieldSchema fs, LogicalOperator op) throws FrontendException { Schema s = op.getSchema(); if(null != s) { for(Schema.FieldSchema inputFs: s.getFields()) { if(null != inputFs) { fs.setParent(inputFs.canonicalName, op); } else { fs.setParent(null, op); } } } else { fs.setParent(null, op); } } /** * @see org.apache.pig.impl.logicalLayer.LogicalOperator#clone() * Do not use the clone method directly. Operators are cloned when logical plans * are cloned using {@link LogicalPlanCloner} */ @Override protected Object clone() throws CloneNotSupportedException { // first start with LogicalOperator clone LOCogroup cogroupClone = (LOCogroup)super.clone(); // create deep copy of other cogroup specific members cogroupClone.mIsInner = new boolean[mIsInner.length]; for (int i = 0; i < mIsInner.length; i++) { cogroupClone.mIsInner[i] = mIsInner[i]; } cogroupClone.mGroupByPlans = new MultiMap<LogicalOperator, LogicalPlan>(); for (Iterator<LogicalOperator> it = mGroupByPlans.keySet().iterator(); it.hasNext();) { LogicalOperator relOp = it.next(); Collection<LogicalPlan> values = mGroupByPlans.get(relOp); for (Iterator<LogicalPlan> planIterator = values.iterator(); planIterator.hasNext();) { LogicalPlanCloneHelper lpCloneHelper = new LogicalPlanCloneHelper(planIterator.next()); cogroupClone.mGroupByPlans.put(relOp, lpCloneHelper.getClonedPlan()); } } return cogroupClone; } @Override public ProjectionMap getProjectionMap() { if(mIsProjectionMapComputed) return mProjectionMap; mIsProjectionMapComputed = true; Schema outputSchema; try { outputSchema = getSchema(); } catch (FrontendException fee) { mProjectionMap = null; return mProjectionMap; } if(outputSchema == null) { mProjectionMap = null; return mProjectionMap; } List<LogicalOperator> predecessors = (ArrayList<LogicalOperator>)mPlan.getPredecessors(this); if(predecessors == null) { mProjectionMap = null; return mProjectionMap; } //the column with the alias 'group' can be mapped in several ways //1. group A by $0; //Here the mapping is 0 -> (0, 0) //2. group A by ($0, $1); //Here there is no direct mapping and 'group' is an added column //3. cogroup A by $0, B by $0; //Here the mapping is 0 -> ((0, 0), (1, 0)) //4. cogroup A by ($0, $1), B by ($0, $1); //Here there is no direct mapping and 'group' is an added column //For anything other than a simple project 'group' is an added column MultiMap<LogicalOperator, LogicalPlan> groupByPlans = getGroupByPlans(); boolean groupByAdded = false; MultiMap<Integer, ProjectionMap.Column> mapFields = new MultiMap<Integer, ProjectionMap.Column>(); List<Pair<Integer, Integer>> removedFields = new ArrayList<Pair<Integer, Integer>>(); for(int inputNum = 0; (inputNum < predecessors.size()) && (!groupByAdded); ++inputNum) { LogicalOperator predecessor = predecessors.get(inputNum); List<LogicalPlan> predecessorPlans = (ArrayList<LogicalPlan>) groupByPlans.get(predecessor); int inputColumn = -1; for(LogicalPlan predecessorPlan: predecessorPlans) { List<LogicalOperator> leaves = predecessorPlan.getLeaves(); if(leaves == null || leaves.size() > 1) { groupByAdded = true; break; } if(leaves.get(0) instanceof LOProject) { //find out if this project is a chain of projects Pair<LOProject, LOCast> pair = LogicalPlan.chainOfProjects(predecessorPlan); if (pair != null) { LOProject topProject = pair.first; if (topProject != null) { inputColumn = topProject.getCol(); LOCast cast = pair.second; if (cast != null) { mapFields.put(0, new ProjectionMap.Column( new Pair<Integer, Integer>(inputNum, inputColumn), true, cast.getType() ) ); } else { mapFields.put(0, new ProjectionMap.Column(new Pair<Integer, Integer>(inputNum, inputColumn))); } } } } else { groupByAdded = true; } } Schema inputSchema; try { inputSchema = predecessor.getSchema(); } catch (FrontendException fee) { mProjectionMap = null; return mProjectionMap; } if(inputSchema != null) { for(int column = 0; column < inputSchema.size(); ++column) { if(!groupByAdded && inputColumn != column) { removedFields.add(new Pair<Integer, Integer>(inputNum, column)); } } } } List<Integer> addedFields = new ArrayList<Integer>(); if(groupByAdded) { addedFields.add(0); //for the column 'group' mapFields = null; //since 'group' is an added column there is no mapping } //the columns 1 through n - 1 are generated by cogroup for(int i = 0; i < groupByPlans.keySet().size(); ++i) { addedFields.add(i+ 1); } if(removedFields.size() == 0) { removedFields = null; } mProjectionMap = new ProjectionMap(mapFields, removedFields, addedFields); return mProjectionMap; } @Override public List<RequiredFields> getRequiredFields() { List<LogicalOperator> predecessors = mPlan.getPredecessors(this); if(predecessors == null) { return null; } List<RequiredFields> requiredFields = new ArrayList<RequiredFields>(); for(int inputNum = 0; inputNum < predecessors.size(); ++inputNum) { Set<Pair<Integer, Integer>> fields = new HashSet<Pair<Integer, Integer>>(); Set<LOProject> projectSet = new HashSet<LOProject>(); boolean groupByStar = false; for (LogicalPlan plan : getGroupByPlans().get(predecessors.get(inputNum))) { TopLevelProjectFinder projectFinder = new TopLevelProjectFinder(plan); try { projectFinder.visit(); } catch (VisitorException ve) { requiredFields.clear(); requiredFields.add(null); return requiredFields; } projectSet.addAll(projectFinder.getProjectSet()); if(projectFinder.getProjectStarSet() != null) { groupByStar = true; } } if(groupByStar) { requiredFields.add(new RequiredFields(true)); } else { for (LOProject project : projectSet) { for (int inputColumn : project.getProjection()) { fields.add(new Pair<Integer, Integer>(inputNum, inputColumn)); } } if(fields.size() == 0) { requiredFields.add(new RequiredFields(false, true)); } else { requiredFields.add(new RequiredFields(new ArrayList<Pair<Integer, Integer>>(fields))); } } } return (requiredFields.size() == 0? null: requiredFields); } /* (non-Javadoc) * @see org.apache.pig.impl.plan.Operator#rewire(org.apache.pig.impl.plan.Operator, org.apache.pig.impl.plan.Operator) */ @Override public void rewire(Operator<LOVisitor> oldPred, int oldPredIndex, Operator<LOVisitor> newPred, boolean useOldPred) throws PlanException { super.rewire(oldPred, oldPredIndex, newPred, useOldPred); if(newPred == null) { int errCode = 1097; String msg = "Replacement node cannot be null."; throw new PlanException(msg, errCode, PigException.INPUT); } LogicalOperator previous = (LogicalOperator) oldPred; LogicalOperator current = (LogicalOperator) newPred; Set<LogicalOperator> cogroupInputs = new HashSet<LogicalOperator>(mGroupByPlans.keySet()); for(LogicalOperator input: cogroupInputs) { if(input.equals(previous)) { //replace the references to the key(i.e., previous) in the values with current for(LogicalPlan plan: mGroupByPlans.get(input)) { try { ProjectFixerUpper projectFixer = new ProjectFixerUpper( plan, previous, oldPredIndex, current, useOldPred, this); projectFixer.visit(); } catch (VisitorException ve) { int errCode = 2144; String msg = "Problem while fixing project inputs during rewiring."; throw new PlanException(msg, errCode, PigException.BUG, ve); } } //remove the key and the values List<LogicalPlan> plans = (List<LogicalPlan>)mGroupByPlans.get(previous); mGroupByPlans.removeKey(previous); //reinsert new key and values mGroupByPlans.put(current, plans); } } } @Override public List<RequiredFields> getRelevantInputs(int output, int column) throws FrontendException { if (!mIsSchemaComputed) getSchema(); if (output!=0) return null; List<LogicalOperator> predecessors = (ArrayList<LogicalOperator>)mPlan.getPredecessors(this); if(predecessors == null) { return null; } if (column>predecessors.size()) return null; if (column==0) { return getRequiredFields(); } List<RequiredFields> result = new ArrayList<RequiredFields>(); for(int inputNum = 0; inputNum < predecessors.size(); ++inputNum) { if(inputNum == column-1) { result.add(new RequiredFields(true)); } else { result.add(null); } } return result; } @Override public boolean pruneColumns(List<Pair<Integer, Integer>> columns) throws FrontendException { if (!mIsSchemaComputed) getSchema(); if (mSchema == null) { log .warn("Cannot prune columns in cogroup, no schema information found"); return false; } List<LogicalOperator> predecessors = mPlan.getPredecessors(this); if (predecessors == null) { int errCode = 2190; throw new FrontendException("Cannot find predecessors for cogroup", errCode, PigException.BUG); } for (int i=columns.size()-1;i>=0;i--) { Pair<Integer, Integer> column = columns.get(i); if (column.first < 0 || column.first > predecessors.size()) { int errCode = 2191; throw new FrontendException("No input " + column.first + " to prune in cocogroup", errCode, PigException.BUG); } if (column.second < 0) { int errCode = 2192; throw new FrontendException("column to prune does not exist", errCode, PigException.BUG); } for (LogicalPlan plan : getGroupByPlans().get( predecessors.get(column.first))) { pruneColumnInPlan(plan, column.second); } } super.pruneColumns(columns); return true; } }