package org.apache.pig.backend.hadoop.executionengine.spark_streaming.converter;
import java.io.Serializable;
import java.util.List;
import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.POStatus;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.Result;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POFilter;
import org.apache.pig.backend.hadoop.executionengine.spark_streaming.SparkUtil;
import org.apache.pig.data.Tuple;
import scala.runtime.AbstractFunction1;
import org.apache.spark.rdd.RDD;
import org.apache.spark.streaming.api.java.JavaDStream;
/**
* Converter that converts an RDD to a filtered RRD using POFilter
* @author billg
*/
@SuppressWarnings({ "serial"})
public class FilterConverter implements POConverter<Tuple, Tuple, POFilter> {
@Override
public JavaDStream<Tuple> convert(List<JavaDStream<Tuple>> predecessors, POFilter physicalOperator) {
SparkUtil.assertPredecessorSize(predecessors, physicalOperator, 1);
JavaDStream<Tuple> rdd = predecessors.get(0);
FilterFunction filterFunction = new FilterFunction(physicalOperator);
return new JavaDStream<Tuple>(rdd.dstream().filter(filterFunction),SparkUtil.getManifest(Tuple.class));
}
private static class FilterFunction extends AbstractFunction1<Tuple, Object>
implements Serializable {
private POFilter poFilter;
private FilterFunction(POFilter poFilter) {
this.poFilter = poFilter;
}
@Override
public Boolean apply(Tuple v1) {
Result result;
try {
poFilter.setInputs(null);
poFilter.attachInput(v1);
result = poFilter.getNextTuple();
} catch (ExecException e) {
throw new RuntimeException("Couldn't filter tuple", e);
}
if (result == null) { return false; }
switch (result.returnStatus) {
case POStatus.STATUS_OK:
return true;
case POStatus.STATUS_EOP: // TODO: probably also ok for EOS, END_OF_BATCH
return false;
default:
throw new RuntimeException("Unexpected response code from filter: " + result);
}
}
}
}