package org.apache.pig.backend.hadoop.executionengine.spark_streaming.converter; import java.io.IOException; import java.io.Serializable; import java.util.List; import org.apache.pig.backend.executionengine.ExecException; import org.apache.pig.backend.hadoop.executionengine.physicalLayer.Result; import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POForEach; import org.apache.pig.backend.hadoop.executionengine.spark_streaming.SparkUtil; import org.apache.pig.data.Tuple; import scala.collection.Iterator; import scala.collection.JavaConversions; import org.apache.spark.api.java.function.Function; import org.apache.spark.rdd.RDD; import org.apache.spark.streaming.api.java.JavaDStream; /** * Convert that is able to convert an RRD to another RRD using a POForEach * @author billg */ @SuppressWarnings({ "serial"}) public class ForEachConverter implements POConverter<Tuple, Tuple, POForEach> { @Override public JavaDStream<Tuple> convert(List<JavaDStream<Tuple>> predecessors, POForEach physicalOperator) throws IOException { SparkUtil.assertPredecessorSize(predecessors, physicalOperator, 1); JavaDStream<Tuple> rdd = predecessors.get(0); ForEachFunction forEachFunction = new ForEachFunction(physicalOperator); return new JavaDStream<Tuple>(rdd.dstream().mapPartitions(forEachFunction, true, SparkUtil.getManifest(Tuple.class)), SparkUtil.getManifest(Tuple.class)); } private static class ForEachFunction extends Function<Iterator<Tuple>, Iterator<Tuple>> implements Serializable { private POForEach poForEach; private ForEachFunction(POForEach poForEach) { this.poForEach = poForEach; } public Iterator<Tuple> call(Iterator<Tuple> i) { final java.util.Iterator<Tuple> input = JavaConversions.asJavaIterator(i); Iterator<Tuple> output = JavaConversions.asScalaIterator(new POOutputConsumerIterator(input) { protected void attach(Tuple tuple) { poForEach.setInputs(null); poForEach.attachInput(tuple); } protected Result getNextResult() throws ExecException { return poForEach.getNextTuple(); } }); return output; } } }