/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pig.backend.hadoop.executionengine.physicalLayer.expressionOperators;
import java.util.ArrayList;
import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.POStatus;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.Result;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.plans.PhyPlanVisitor;
import org.apache.pig.data.DataBag;
import org.apache.pig.data.DataType;
import org.apache.pig.data.NonSpillableDataBag;
import org.apache.pig.impl.plan.NodeIdGenerator;
import org.apache.pig.impl.plan.OperatorKey;
import org.apache.pig.impl.plan.VisitorException;
/**
* Implements a specialized form of POProject which is
* used *ONLY* in the following case:
* This project is Project(*) introduced after a relational operator
* to supply a bag as output (as an expression). This project is either
* providing the bag as input to a successor expression operator or is
* itself the leaf in a inner plan
* If the predecessor relational operator sends an EOP
* then send an empty bag first to signal "empty" output
* and then send an EOP
* NOTE: A Project(*) of return type BAG whose predecessor is
* from an outside plan (i.e. not in the same inner plan as the project)
* will NOT lead us here. So a query like:
* a = load 'baginp.txt' as (b:bag{t:tuple()}); b = foreach a generate $0; dump b;
* will go through a regular project (without the following flag)
*/
public class PORelationToExprProject extends POProject {
/**
*
*/
private static final long serialVersionUID = 1L;
boolean sendEmptyBagOnEOP = false;
public PORelationToExprProject(OperatorKey k) {
this(k,-1,0);
}
public PORelationToExprProject(OperatorKey k, int rp) {
this(k, rp, 0);
}
public PORelationToExprProject(OperatorKey k, int rp, int col) {
super(k, rp, col);
}
public PORelationToExprProject(OperatorKey k, int rp, ArrayList<Integer> cols) {
super(k, rp, cols);
}
@Override
public String name() {
return "RelationToExpressionProject" + "[" + DataType.findTypeName(resultType) + "]" + ((isStar()) ? "[*]" : columns) + " - " + mKey.toString();
}
@Override
public void visit(PhyPlanVisitor v) throws VisitorException {
// for now the specialization in this class
// does not affect the way visitors visit it - so
// we can just use visitProject()
v.visitProject(this);
}
/* (non-Javadoc)
* @see org.apache.pig.backend.hadoop.executionengine.physicalLayer.PhysicalOperator#reset()
*/
@Override
public void reset() {
// the foreach in which this operator is
// present is starting with a new set of inputs
// if we see an EOP from the predecessor *first*
// (.i.e we do not see any other input before and EOP
// and only see an EOP - this can happen if a Filter
// is the predecessor and it filters away all its input)
// we should send an empty bag. Set a flag which can be
// checked if an EOP is encountered.
sendEmptyBagOnEOP = true;
}
@Override
public Result getNext(DataBag db) throws ExecException {
Result input = processInputBag();
// if this is called during accumulation, it is ok to have an empty bag
// we need to send STATUS_OK so that the UDF can be called.
if (isAccumulative()) {
reset();
}
if(input.returnStatus!=POStatus.STATUS_OK) {
if(input.returnStatus == POStatus.STATUS_EOP && sendEmptyBagOnEOP) {
// we received an EOP from the predecessor
// since the successor in the pipeline is
// expecting a bag, send an empty bag
input.result = new NonSpillableDataBag();
input.returnStatus = POStatus.STATUS_OK;
// we should send EOP the next time we are called
// if the foreach in which this operator is present
// calls this.getNext(bag) with new inputs then
// this flag will be reset in this.reset()
} else {
// since we are sending down some result (empty bag or otherwise)
// we should not be sending an empty bag on EOP any more UNLESS
// we are processing new inputs (see reset())
sendEmptyBagOnEOP = false;
return input;
}
}
Result r = consumeInputBag(input);
// since we are sending down some result (empty bag or otherwise)
// we should not be sending an empty bag on EOP any more UNLESS
// we are processing new inputs (see reset())
sendEmptyBagOnEOP = false;
return(r);
}
@Override
public PORelationToExprProject clone() throws CloneNotSupportedException {
return (PORelationToExprProject) super.clone();
}
}