/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.pig.pen.physicalOperators; import java.util.Properties; import org.apache.pig.backend.executionengine.ExecException; import org.apache.pig.backend.hadoop.executionengine.physicalLayer.POStatus; import org.apache.pig.backend.hadoop.executionengine.physicalLayer.Result; import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POStream; import org.apache.pig.data.Tuple; import org.apache.pig.impl.plan.OperatorKey; import org.apache.pig.impl.streaming.ExecutableManager; import org.apache.pig.impl.streaming.StreamingCommand; public class POStreamLocal extends POStream { /** * */ private static final long serialVersionUID = 2L; public POStreamLocal(OperatorKey k, ExecutableManager executableManager, StreamingCommand command, Properties properties) { super(k, executableManager, command, properties); // TODO Auto-generated constructor stub } /** * This is different from the Map-Reduce implementation of the POStream since there is no * push model here. POStatus_EOP signals the end of input and can be used to decide when * to stop the stdin to the process */ @Override public Result getNext(Tuple t) throws ExecException { // The POStream Operator works with ExecutableManager to // send input to the streaming binary and to get output // from it. To achieve a tuple oriented behavior, two queues // are used - one for output from the binary and one for // input to the binary. In each getNext() call: // 1) If there is no more output expected from the binary, an EOP is // sent to successor // 2) If there is any output from the binary in the queue, it is passed // down to the successor // 3) if neither of these two are true and if it is possible to // send input to the binary, then the next tuple from the // predecessor is got and passed to the binary try { // if we are being called AFTER all output from the streaming // binary has already been sent to us then just return EOP // The "allOutputFromBinaryProcessed" flag is set when we see // an EOS (End of Stream output) from streaming binary if(allOutputFromBinaryProcessed) { return new Result(POStatus.STATUS_EOP, null); } // if we are here AFTER all map() calls have been completed // AND AFTER we process all possible input to be sent to the // streaming binary, then all we want to do is read output from // the streaming binary if(allInputFromPredecessorConsumed) { Result r = binaryOutputQueue.take(); if(r.returnStatus == POStatus.STATUS_EOS) { // If we received EOS, it means all output // from the streaming binary has been sent to us // So we can send an EOP to the successor in // the pipeline. Also since we are being called // after all input from predecessor has been processed // it means we got here from a call from close() in // map or reduce. So once we send this EOP down, // getNext() in POStream should never be called. So // we don't need to set any flag noting we saw all output // from binary r.returnStatus = POStatus.STATUS_EOP; } return(r); } // if we are here, we haven't consumed all input to be sent // to the streaming binary - check if we are being called // from close() on the map or reduce //if(this.parentPlan.endOfAllInput) { Result r = getNextHelper(t); if(r.returnStatus == POStatus.STATUS_EOP) { // we have now seen *ALL* possible input // check if we ever had any real input // in the course of the map/reduce - if we did // then "initialized" will be true. If not, just // send EOP down. if(initialized) { // signal End of ALL input to the Executable Manager's // Input handler thread binaryInputQueue.put(r); // note this state for future calls allInputFromPredecessorConsumed = true; // look for output from binary r = binaryOutputQueue.take(); if(r.returnStatus == POStatus.STATUS_EOS) { // If we received EOS, it means all output // from the streaming binary has been sent to us // So we can send an EOP to the successor in // the pipeline. Also since we are being called // after all input from predecessor has been processed // it means we got here from a call from close() in // map or reduce. So once we send this EOP down, // getNext() in POStream should never be called. So // we don't need to set any flag noting we saw all output // from binary r.returnStatus = POStatus.STATUS_EOP; } } } else if(r.returnStatus == POStatus.STATUS_EOS) { // If we received EOS, it means all output // from the streaming binary has been sent to us // So we can send an EOP to the successor in // the pipeline. Also we are being called // from close() in map or reduce (this is so because // only then this.parentPlan.endOfAllInput is true). // So once we send this EOP down, getNext() in POStream // should never be called. So we don't need to set any // flag noting we saw all output from binary r.returnStatus = POStatus.STATUS_EOP; } return r; // } else { // // we are not being called from close() - so // // we must be called from either map() or reduce() // // get the next Result from helper // Result r = getNextHelper(t); // if(r.returnStatus == POStatus.STATUS_EOS) { // // If we received EOS, it means all output // // from the streaming binary has been sent to us // // So we can send an EOP to the successor in // // the pipeline and also note this condition // // for future calls // r.returnStatus = POStatus.STATUS_EOP; // allOutputFromBinaryProcessed = true; // } // return r; // } } catch(Exception e) { throw new ExecException("Error while trying to get next result in POStream", e); } } }