/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators;
import java.util.Arrays;
import java.util.Map;
import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigMapReduce;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.POStatus;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.Result;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.plans.PhyPlanVisitor;
import org.apache.pig.data.BagFactory;
import org.apache.pig.data.DataBag;
import org.apache.pig.data.DataType;
import org.apache.pig.data.InternalCachedBag;
import org.apache.pig.data.NonSpillableDataBag;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;
import org.apache.pig.impl.io.NullableTuple;
import org.apache.pig.impl.plan.NodeIdGenerator;
import org.apache.pig.impl.plan.OperatorKey;
import org.apache.pig.impl.plan.VisitorException;
import org.apache.pig.impl.util.Pair;
/**
* The package operator that packages the globally rearranged tuples into
* output format after the combiner stage. It differs from POPackage in that
* it does not use the index in the NullableTuple to find the bag to put a
* tuple in. Instead, the inputs are put in a bag corresponding to their
* offset in the tuple.
*/
public class POCombinerPackage extends POPackage {
/**
*
*/
private static final long serialVersionUID = 1L;
private static BagFactory mBagFactory = BagFactory.getInstance();
private static TupleFactory mTupleFactory = TupleFactory.getInstance();
private boolean[] mBags; // For each field, indicates whether or not it
// needs to be put in a bag.
private Map<Integer, Integer> keyLookup;
private int numBags;
/**
* A new POPostCombinePackage will be constructed as a near clone of the
* provided POPackage.
* @param pkg POPackage to clone.
* @param bags for each field, indicates whether it should be a bag (true)
* or a simple field (false).
*/
public POCombinerPackage(POPackage pkg, boolean[] bags) {
super(new OperatorKey(pkg.getOperatorKey().scope,
NodeIdGenerator.getGenerator().getNextNodeId(pkg.getOperatorKey().scope)),
pkg.getRequestedParallelism(), pkg.getInputs());
resultType = pkg.getResultType();
keyType = pkg.keyType;
numInputs = 1;
inner = new boolean[1];
for (int i = 0; i < pkg.inner.length; i++) {
inner[i] = true;
}
if (bags != null) {
mBags = Arrays.copyOf(bags, bags.length);
}
numBags = 0;
for (int i = 0; i < mBags.length; i++) {
if (mBags[i]) numBags++;
}
}
@Override
public String name() {
return "POCombinerPackage" + "[" + DataType.findTypeName(resultType) + "]" + "{" + DataType.findTypeName(keyType) + "}" +" - " + mKey.toString();
}
@Override
public void visit(PhyPlanVisitor v) throws VisitorException {
v.visitCombinerPackage(this);
}
/**
* @param keyInfo the keyInfo to set
*/
public void setKeyInfo(Map<Integer, Pair<Boolean, Map<Integer, Integer>>> keyInfo) {
this.keyInfo = keyInfo;
// TODO: IMPORTANT ASSUMPTION: Currently we only combine in the
// group case and not in cogroups. So there should only
// be one LocalRearrange from which we get the keyInfo for
// which field in the value is in the key. This LocalRearrange
// has an index of 0. When we do support combiner in Cogroups
// THIS WILL NEED TO BE REVISITED.
Pair<Boolean, Map<Integer, Integer>> lrKeyInfo =
keyInfo.get(0); // assumption: only group are "combinable", hence index 0
keyLookup = lrKeyInfo.second;
}
private DataBag createDataBag(int numBags) {
String bagType = null;
if (PigMapReduce.sJobConfInternal.get() != null) {
bagType = PigMapReduce.sJobConfInternal.get().get("pig.cachedbag.type");
}
if (bagType != null && bagType.equalsIgnoreCase("default")) {
return new NonSpillableDataBag();
}
return new InternalCachedBag(numBags);
}
@Override
public Result getNextTuple() throws ExecException {
int keyField = -1;
//Create numInputs bags
Object[] fields = new Object[mBags.length];
for (int i = 0; i < mBags.length; i++) {
if (mBags[i]) fields[i] = createDataBag(numBags);
}
// For each indexed tup in the inp, split them up and place their
// fields into the proper bags. If the given field isn't a bag, just
// set the value as is.
while (tupIter.hasNext()) {
NullableTuple ntup = tupIter.next();
Tuple tup = (Tuple)ntup.getValueAsPigType();
int tupIndex = 0; // an index for accessing elements from
// the value (tup) that we have currently
for(int i = 0; i < mBags.length; i++) {
Integer keyIndex = keyLookup.get(i);
if(keyIndex == null && mBags[i]) {
// the field for this index is not the
// key - so just take it from the "value"
// we were handed - Currently THIS HAS TO BE A BAG
// In future if this changes, THIS WILL NEED TO BE
// REVISITED.
((DataBag)fields[i]).add((Tuple)tup.get(tupIndex));
tupIndex++;
} else {
// the field for this index is in the key
fields[i] = key;
}
}
}
// The successor of the POCombinerPackage as of
// now SHOULD be a POForeach which has been adjusted
// to look for its inputs by projecting from the corresponding
// positions in the POCombinerPackage output.
// So we will NOT be adding the key in the result here but merely
// putting all bags into a result tuple and returning it.
Tuple res;
res = mTupleFactory.newTuple(mBags.length);
for (int i = 0; i < mBags.length; i++) res.set(i, fields[i]);
Result r = new Result();
r.result = res;
r.returnStatus = POStatus.STATUS_OK;
return r;
}
}