/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pig.pen.physicalOperators;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.POStatus;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.PhysicalOperator;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.Result;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.plans.PhyPlanVisitor;
import org.apache.pig.data.BagFactory;
import org.apache.pig.data.DataBag;
import org.apache.pig.data.DataType;
import org.apache.pig.data.SortedDataBag;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;
import org.apache.pig.impl.plan.OperatorKey;
import org.apache.pig.impl.plan.VisitorException;
import org.apache.pig.pen.util.ExampleTuple;
/** This is a local implementation of Cogroup.
* The inputs need to be connected to LocalRearranges possibly by the
* logical to physical translator.
*
* This is a blocking operator. The outputs of LRs are put into
* SortedDataBags. They are sorted on the keys. We then start pulling
* tuple out of these bags and start constructing output.
*
*
*/
//We intentionally skip type checking in backend for performance reasons
@SuppressWarnings("unchecked")
public class POCogroup extends PhysicalOperator {
private static final long serialVersionUID = 1L;
Tuple[] data = null;
transient Iterator<Tuple>[] its = null;
boolean[] inner;
public POCogroup(OperatorKey k) {
super(k);
}
public POCogroup(OperatorKey k, int rp) {
super(k, rp);
}
public POCogroup(OperatorKey k, List<PhysicalOperator> inp) {
super(k, inp);
}
public POCogroup(OperatorKey k, int rp, List<PhysicalOperator> inp) {
super(k, rp, inp);
}
public void setInner(boolean[] inner) {
this.inner = inner;
}
@Override
public void visit(PhyPlanVisitor v) throws VisitorException {
v.visitPenCogroup(this);
}
@Override
public String name() {
return getAliasString() + "POCogroup"+ ": POCogroup" + "["
+ DataType.findTypeName(resultType) + "]" + " - "
+ mKey.toString();
}
@Override
public Result getNext(Tuple t) throws ExecException{
if(its == null) {
accumulateData();
}
boolean done = true;
Result res = new Result();
for(int i = 0; i < data.length; i++) {
done &= (data[i] == null);
}
if(done) {
res.returnStatus = POStatus.STATUS_EOP;
its = null;
return res;
}
Tuple smallestTuple = getSmallest(data);
Comparator<Tuple> comp = new groupComparator();
int size = data.length;
Tuple output = TupleFactory.getInstance().newTuple(size + 1);
output.set(0, smallestTuple.get(1));
for(int i = 1; i < size + 1; i++) {
output.set(i, BagFactory.getInstance().newDefaultBag());
}
ExampleTuple tOut = null;
if(lineageTracer != null) {
tOut = new ExampleTuple(output);
lineageTracer.insert(tOut);
}
boolean loop = true;
while(loop) {
loop = false;
for(int i = 0; i < size; i++) {
if(data[i] != null && comp.compare(data[i], smallestTuple) == 0) {
loop = true;
DataBag bag = (DataBag) output.get(i + 1);
//update lineage if it exists
//Tuple temp = ((IndexedTuple) data[i].get(1)).toTuple();
Tuple temp = (Tuple) data[i].get(2);
if(lineageTracer != null) {
if(((ExampleTuple)temp).synthetic) tOut.synthetic = true;
lineageTracer.union(temp, tOut);
}
//bag.add(((IndexedTuple) data[i].get(1)).toTuple());
bag.add(temp);
if(its[i].hasNext())
data[i] = its[i].next();
else
data[i] = null;
}
}
}
if(lineageTracer != null)
res.result = tOut;
else
res.result = output;
res.returnStatus = POStatus.STATUS_OK;
// System.out.println(output);
for(int i = 0; i < size; i++) {
if(inner != null && inner[i] && ((DataBag)output.get(i+1)).size() == 0) {
res.returnStatus = POStatus.STATUS_NULL;
break;
}
}
return res;
}
private void accumulateData() throws ExecException {
int size = inputs.size();
its = new Iterator[size];
data = new Tuple[size];
for(int i = 0; i < size; i++) {
DataBag bag = new SortedDataBag(new groupComparator());
for(Result input = inputs.get(i).getNext(dummyTuple); input.returnStatus != POStatus.STATUS_EOP; input = inputs.get(i).getNext(dummyTuple)) {
if(input.returnStatus == POStatus.STATUS_ERR) {
throw new ExecException("Error accumulating output at local Cogroup operator");
}
if(input.returnStatus == POStatus.STATUS_NULL)
continue;
bag.add((Tuple) input.result);
}
its[i] = bag.iterator();
data[i] = its[i].next();
}
}
// private Tuple getSmallest(Tuple[] data) {
// Tuple t = (Tuple) data[0];
// Comparator<Tuple> comp = new groupComparator();
// for(int i = 1; i < data.length; i++) {
// if(comp.compare(t, (Tuple) data[i]) < 0)
// t = data[i];
// }
// return t;
// }
private Tuple getSmallest(Tuple[] data) {
Tuple t = null;
Comparator<Tuple> comp = new groupComparator();
for(int i = 0; i < data.length; i++) {
if(data[i] == null) continue;
if(t == null) {
t = data[i];
continue; //since the previous data was probably null so we dont really need a comparison
}
if(comp.compare(t, data[i]) > 0)
t = data[i];
}
return t;
}
@Override
public boolean supportsMultipleInputs() {
// TODO Auto-generated method stub
return true;
}
@Override
public boolean supportsMultipleOutputs() {
// TODO Auto-generated method stub
return false;
}
private static class groupComparator implements Comparator<Tuple> {
public int compare(Tuple o1, Tuple o2) {
//We want to make it as efficient as possible by only comparing the keys
Object t1 = null;
Object t2 = null;
try {
// get the keys
t1 = o1.get(1);
t2 = o2.get(1);
if(t1 == t2 && t1 == null) {
// null keys from different inputs
// are not treated as equals
int firstInputIndex = (Byte)(o1.get(0));
int secondInputIndex = (Byte)(o2.get(0));
return firstInputIndex - secondInputIndex;
}
} catch (ExecException e) {
// TODO Auto-generated catch block
throw new RuntimeException("Error comparing tuples");
}
int result = DataType.compare(t1, t2);
// Further check if any field is null
// See PIG-927
if (result == 0 && t1 instanceof Tuple && t2 instanceof Tuple)
{
try {
int firstInputIndex = (Byte)(o1.get(0));
int secondInputIndex = (Byte)(o2.get(0));
for (int i=0;i<((Tuple)t1).size();i++)
if (((Tuple)t1).get(i)==null)
return firstInputIndex - secondInputIndex;
} catch (ExecException e) {
throw new RuntimeException("Error comparing tuple fields", e);
}
}
return result;
}
}
}