/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import org.apache.pig.PigException;
import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.PhysicalOperator;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.Result;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.plans.PhyPlanVisitor;
import org.apache.pig.data.DataType;
import org.apache.pig.data.Tuple;
import org.apache.pig.impl.io.NullableTuple;
import org.apache.pig.impl.io.NullableUnknownWritable;
import org.apache.pig.impl.io.PigNullableWritable;
import org.apache.pig.impl.plan.OperatorKey;
import org.apache.pig.impl.plan.VisitorException;
import org.apache.pig.backend.hadoop.HDataType;
/**
* The package operator that packages the globally rearranged tuples
* into output format as required by multi-query de-multiplexer.
* <p>
* This operator is used when merging multiple Map-Reduce splittees
* into a Map-only splitter during multi-query optimization.
* The package operators of the reduce plans of the splittees form an
* indexed package list inside this operator. When this operator
* receives an input, it extracts the index from the key and calls the
* corresponding package to get the output data.
* <p>
* Due to the recursive nature of multi-query optimization, this operator
* may be contained in another multi-query packager.
* <p>
* The successor of this operator must be a PODemux operator which
* knows how to consume the output of this operator.
*/
public class POMultiQueryPackage extends POPackage {
private static final long serialVersionUID = 1L;
private static int idxPart = 0x7F;
private List<POPackage> packages = new ArrayList<POPackage>();
/**
* If the POLocalRearranges corresponding to the reduce plans in
* myPlans (the list of inner plans of the demux) have different key types
* then the MultiQueryOptimizer converts all the keys to be of type tuple
* by wrapping any non-tuple keys into Tuples (keys which are already tuples
* are left alone).
* The list below is a list of booleans indicating whether extra tuple wrapping
* was done for the key in the corresponding POLocalRearranges and if we need
* to "unwrap" the tuple to get to the key
*/
private ArrayList<Boolean> isKeyWrapped = new ArrayList<Boolean>();
/*
* Indicating if all the inner plans have the same
* map key type. If not, the keys passed in are
* wrapped inside tuples and need to be extracted
* out during the reduce phase
*/
private boolean sameMapKeyType = true;
/*
* Indicating if this operator is in a combiner.
* If not, this operator is in a reducer and the key
* values must first be extracted from the tuple-wrap
* before writing out to the disk
*/
private boolean inCombiner = false;
transient private PigNullableWritable myKey;
/**
* Constructs an operator with the specified key.
*
* @param k the operator key
*/
public POMultiQueryPackage(OperatorKey k) {
this(k, -1, null);
}
/**
* Constructs an operator with the specified key
* and degree of parallelism.
*
* @param k the operator key
* @param rp the degree of parallelism requested
*/
public POMultiQueryPackage(OperatorKey k, int rp) {
this(k, rp, null);
}
/**
* Constructs an operator with the specified key and inputs.
*
* @param k the operator key
* @param inp the inputs that this operator will read data from
*/
public POMultiQueryPackage(OperatorKey k, List<PhysicalOperator> inp) {
this(k, -1, inp);
}
/**
* Constructs an operator with the specified key,
* degree of parallelism and inputs.
*
* @param k the operator key
* @param rp the degree of parallelism requested
* @param inp the inputs that this operator will read data from
*/
public POMultiQueryPackage(OperatorKey k, int rp, List<PhysicalOperator> inp) {
super(k, rp, inp);
}
@Override
public String name() {
return "MultiQuery Package [" + isKeyWrapped + "] - " + getOperatorKey().toString();
}
@Override
public boolean supportsMultipleInputs() {
return false;
}
@Override
public void visit(PhyPlanVisitor v) throws VisitorException {
v.visitMultiQueryPackage(this);
}
@Override
public boolean supportsMultipleOutputs() {
return false;
}
@Override
public void attachInput(PigNullableWritable k, Iterator<NullableTuple> inp) {
tupIter = inp;
myKey = k;
}
@Override
public void detachInput() {
tupIter = null;
myKey = null;
}
/**
* Appends the specified package object to the end of
* the package list.
*
* @param pack package to be appended to the list
*/
public void addPackage(POPackage pack) {
packages.add(pack);
}
/**
* Appends the specified package object to the end of
* the package list.
*
* @param pack package to be appended to the list
* @param mapKeyType the map key type associated with the package
*/
public void addPackage(POPackage pack, byte mapKeyType) {
packages.add(pack);
// if mapKeyType is already a tuple, we will NOT
// be wrapping it in an extra tuple. If it is not
// a tuple, we will wrap into in a tuple
isKeyWrapped.add(mapKeyType == DataType.TUPLE ? false : true);
}
/**
* Returns the list of packages.
*
* @return the list of the packages
*/
public List<POPackage> getPackages() {
return packages;
}
/**
* Constructs the output tuple from the inputs.
* <p>
* The output is consumed by for the demultiplexer operator
* (PODemux) in the format (key, {bag of tuples}) where key
* is an indexed WritableComparable, not the wrapped value as a pig type.
*/
@Override
public Result getNextTuple() throws ExecException {
byte origIndex = myKey.getIndex();
int index = (int)origIndex;
index &= idxPart;
if (index >= packages.size() || index < 0) {
int errCode = 2140;
String msg = "Invalid package index " + index
+ " should be in the range between 0 and " + packages.size();
throw new ExecException(msg, errCode, PigException.BUG);
}
POPackage pack = packages.get(index);
// check to see if we need to unwrap the key. The keys may be
// wrapped inside a tuple by LocalRearrange operator when jobs
// with different map key types are merged
PigNullableWritable curKey = myKey;
if (!sameMapKeyType && !inCombiner && isKeyWrapped.get(index)) {
Tuple tup = (Tuple)myKey.getValueAsPigType();
curKey = HDataType.getWritableComparableTypes(tup.get(0), pack.getKeyType());
curKey.setIndex(origIndex);
}
pack.attachInput(curKey, tupIter);
Result res = pack.getNextTuple();
pack.detachInput();
Tuple tuple = (Tuple)res.result;
// the object present in the first field
// of the tuple above is the real data without
// index information - this is because the
// package above, extracts the real data out of
// the PigNullableWritable object - we are going to
// give this result tuple to a PODemux operator
// which needs a PigNullableWritable first field so
// it can figure out the index. Therefore we need
// to add index to the first field of the tuple.
Object obj = tuple.get(0);
if (obj instanceof PigNullableWritable) {
((PigNullableWritable)obj).setIndex(origIndex);
}
else {
PigNullableWritable myObj = null;
if (obj == null) {
myObj = new NullableUnknownWritable();
myObj.setNull(true);
}
else {
myObj = HDataType.getWritableComparableTypes(obj, HDataType.findTypeFromNullableWritable(curKey));
}
myObj.setIndex(origIndex);
tuple.set(0, myObj);
}
// illustrator markup has been handled by "pack"
return res;
}
/**
* Returns the list of booleans that indicates if the
* key needs to unwrapped for the corresponding plan.
*
* @return the list of isKeyWrapped boolean values
*/
public List<Boolean> getIsKeyWrappedList() {
return Collections.unmodifiableList(isKeyWrapped);
}
/**
* Adds a list of IsKeyWrapped boolean values
*
* @param lst the list of boolean values to add
*/
public void addIsKeyWrappedList(List<Boolean> lst) {
for (Boolean b : lst) {
isKeyWrapped.add(b);
}
}
public void setInCombiner(boolean inCombiner) {
this.inCombiner = inCombiner;
}
public boolean isInCombiner() {
return inCombiner;
}
public void setSameMapKeyType(boolean sameMapKeyType) {
this.sameMapKeyType = sameMapKeyType;
}
public boolean isSameMapKeyType() {
return sameMapKeyType;
}
}