/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pig.backend.hadoop.executionengine.physicalLayer;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Stack;
import org.apache.pig.ComparisonFunc;
import org.apache.pig.EvalFunc;
import org.apache.pig.FuncSpec;
import org.apache.pig.PigException;
import org.apache.pig.data.DataType;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;
import org.apache.pig.impl.PigContext;
import org.apache.pig.ResourceSchema.ResourceFieldSchema;
import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.plans.PhysicalPlan;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.*;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POPackage.PackageType;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.expressionOperators.*;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.expressionOperators.ExpressionOperator;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.expressionOperators.BinaryExpressionOperator;
import org.apache.pig.impl.builtin.GFCross;
import org.apache.pig.impl.io.FileLocalizer;
import org.apache.pig.impl.io.FileSpec;
import org.apache.pig.impl.logicalLayer.*;
import org.apache.pig.impl.logicalLayer.schema.Schema;
import org.apache.pig.impl.plan.DependencyOrderWalker;
import org.apache.pig.impl.plan.DependencyOrderWalkerWOSeenChk;
import org.apache.pig.impl.plan.NodeIdGenerator;
import org.apache.pig.impl.plan.OperatorKey;
import org.apache.pig.impl.plan.PlanException;
import org.apache.pig.impl.plan.PlanWalker;
import org.apache.pig.impl.plan.VisitorException;
import org.apache.pig.impl.util.CompilerUtils;
import org.apache.pig.impl.util.LinkedMultiMap;
import org.apache.pig.impl.util.MultiMap;
import org.apache.pig.impl.util.Utils;
public class LogToPhyTranslationVisitor extends LOVisitor {
protected Map<LogicalOperator, PhysicalOperator> logToPhyMap;
protected Stack<PhysicalPlan> currentPlans;
protected PhysicalPlan currentPlan;
protected NodeIdGenerator nodeGen = NodeIdGenerator.getGenerator();
protected PigContext pc;
public LogToPhyTranslationVisitor(LogicalPlan plan) {
super(plan, new DependencyOrderWalker<LogicalOperator, LogicalPlan>(
plan));
currentPlans = new Stack<PhysicalPlan>();
currentPlan = new PhysicalPlan();
logToPhyMap = new HashMap<LogicalOperator, PhysicalOperator>();
}
public void setPigContext(PigContext pc) {
this.pc = pc;
}
public PhysicalPlan getPhysicalPlan() {
return currentPlan;
}
@Override
protected void visit(LOGreaterThan op) throws VisitorException {
String scope = op.getOperatorKey().scope;
BinaryComparisonOperator exprOp = new GreaterThanExpr(new OperatorKey(
scope, nodeGen.getNextNodeId(scope)), op
.getRequestedParallelism());
exprOp.setAlias(op.getAlias());
exprOp.setOperandType(op.getLhsOperand().getType());
exprOp.setLhs((ExpressionOperator) logToPhyMap.get(op.getLhsOperand()));
exprOp.setRhs((ExpressionOperator) logToPhyMap.get(op.getRhsOperand()));
LogicalPlan lp = op.getPlan();
currentPlan.add(exprOp);
logToPhyMap.put(op, exprOp);
List<LogicalOperator> predecessors = lp.getPredecessors(op);
if (predecessors == null)
return;
for (LogicalOperator lo : predecessors) {
PhysicalOperator from = logToPhyMap.get(lo);
try {
// currentExprPlan.connect(from, exprOp);
currentPlan.connect(from, exprOp);
} catch (PlanException e) {
int errCode = 2015;
String msg = "Invalid physical operators in the physical plan" ;
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, e);
}
}
}
@Override
protected void visit(LOLesserThan op) throws VisitorException {
String scope = op.getOperatorKey().scope;
BinaryComparisonOperator exprOp = new LessThanExpr(new OperatorKey(
scope, nodeGen.getNextNodeId(scope)), op
.getRequestedParallelism());
exprOp.setAlias(op.getAlias());
exprOp.setOperandType(op.getLhsOperand().getType());
exprOp.setLhs((ExpressionOperator) logToPhyMap.get(op.getLhsOperand()));
exprOp.setRhs((ExpressionOperator) logToPhyMap.get(op.getRhsOperand()));
LogicalPlan lp = op.getPlan();
currentPlan.add(exprOp);
logToPhyMap.put(op, exprOp);
List<LogicalOperator> predecessors = lp.getPredecessors(op);
if (predecessors == null)
return;
for (LogicalOperator lo : predecessors) {
PhysicalOperator from = logToPhyMap.get(lo);
try {
currentPlan.connect(from, exprOp);
} catch (PlanException e) {
int errCode = 2015;
String msg = "Invalid physical operators in the physical plan" ;
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, e);
}
}
}
@Override
protected void visit(LOGreaterThanEqual op) throws VisitorException {
String scope = op.getOperatorKey().scope;
BinaryComparisonOperator exprOp = new GTOrEqualToExpr(new OperatorKey(
scope, nodeGen.getNextNodeId(scope)), op
.getRequestedParallelism());
exprOp.setAlias(op.getAlias());
exprOp.setOperandType(op.getLhsOperand().getType());
exprOp.setLhs((ExpressionOperator) logToPhyMap.get(op.getLhsOperand()));
exprOp.setRhs((ExpressionOperator) logToPhyMap.get(op.getRhsOperand()));
LogicalPlan lp = op.getPlan();
currentPlan.add(exprOp);
logToPhyMap.put(op, exprOp);
List<LogicalOperator> predecessors = lp.getPredecessors(op);
if (predecessors == null)
return;
for (LogicalOperator lo : predecessors) {
PhysicalOperator from = logToPhyMap.get(lo);
try {
currentPlan.connect(from, exprOp);
} catch (PlanException e) {
int errCode = 2015;
String msg = "Invalid physical operators in the physical plan" ;
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, e);
}
}
}
@Override
protected void visit(LOLesserThanEqual op) throws VisitorException {
String scope = op.getOperatorKey().scope;
BinaryComparisonOperator exprOp = new LTOrEqualToExpr(new OperatorKey(
scope, nodeGen.getNextNodeId(scope)), op
.getRequestedParallelism());
exprOp.setAlias(op.getAlias());
exprOp.setOperandType(op.getLhsOperand().getType());
exprOp.setLhs((ExpressionOperator) logToPhyMap.get(op.getLhsOperand()));
exprOp.setRhs((ExpressionOperator) logToPhyMap.get(op.getRhsOperand()));
LogicalPlan lp = op.getPlan();
currentPlan.add(exprOp);
logToPhyMap.put(op, exprOp);
List<LogicalOperator> predecessors = lp.getPredecessors(op);
if (predecessors == null)
return;
for (LogicalOperator lo : predecessors) {
PhysicalOperator from = logToPhyMap.get(lo);
try {
currentPlan.connect(from, exprOp);
} catch (PlanException e) {
int errCode = 2015;
String msg = "Invalid physical operators in the physical plan" ;
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, e);
}
}
}
@Override
protected void visit(LOEqual op) throws VisitorException {
String scope = op.getOperatorKey().scope;
BinaryComparisonOperator exprOp = new EqualToExpr(new OperatorKey(
scope, nodeGen.getNextNodeId(scope)), op
.getRequestedParallelism());
exprOp.setAlias(op.getAlias());
exprOp.setOperandType(op.getLhsOperand().getType());
exprOp.setLhs((ExpressionOperator) logToPhyMap.get(op.getLhsOperand()));
exprOp.setRhs((ExpressionOperator) logToPhyMap.get(op.getRhsOperand()));
LogicalPlan lp = op.getPlan();
currentPlan.add(exprOp);
logToPhyMap.put(op, exprOp);
List<LogicalOperator> predecessors = lp.getPredecessors(op);
if (predecessors == null)
return;
for (LogicalOperator lo : predecessors) {
PhysicalOperator from = logToPhyMap.get(lo);
try {
currentPlan.connect(from, exprOp);
} catch (PlanException e) {
int errCode = 2015;
String msg = "Invalid physical operators in the physical plan" ;
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, e);
}
}
}
@Override
protected void visit(LONotEqual op) throws VisitorException {
String scope = op.getOperatorKey().scope;
BinaryComparisonOperator exprOp = new NotEqualToExpr(new OperatorKey(
scope, nodeGen.getNextNodeId(scope)), op
.getRequestedParallelism());
exprOp.setAlias(op.getAlias());
exprOp.setOperandType(op.getLhsOperand().getType());
exprOp.setLhs((ExpressionOperator) logToPhyMap.get(op.getLhsOperand()));
exprOp.setRhs((ExpressionOperator) logToPhyMap.get(op.getRhsOperand()));
LogicalPlan lp = op.getPlan();
currentPlan.add(exprOp);
logToPhyMap.put(op, exprOp);
List<LogicalOperator> predecessors = lp.getPredecessors(op);
if (predecessors == null)
return;
for (LogicalOperator lo : predecessors) {
PhysicalOperator from = logToPhyMap.get(lo);
try {
currentPlan.connect(from, exprOp);
} catch (PlanException e) {
int errCode = 2015;
String msg = "Invalid physical operators in the physical plan" ;
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, e);
}
}
}
@Override
protected void visit(LORegexp op) throws VisitorException {
String scope = op.getOperatorKey().scope;
BinaryComparisonOperator exprOp =
new PORegexp(new OperatorKey(scope, nodeGen.getNextNodeId(scope)),
op.getRequestedParallelism());
exprOp.setAlias(op.getAlias());
exprOp.setLhs((ExpressionOperator)logToPhyMap.get(op.getLhsOperand()));
exprOp.setRhs((ExpressionOperator)logToPhyMap.get(op.getRhsOperand()));
LogicalPlan lp = op.getPlan();
currentPlan.add(exprOp);
logToPhyMap.put(op, exprOp);
List<LogicalOperator> predecessors = lp.getPredecessors(op);
if (predecessors == null)
return;
int counter = 0;
for (LogicalOperator lo : predecessors) {
PhysicalOperator from = logToPhyMap.get(lo);
// If the source is a ConstantExpression we notify PORegexp about it.
// It helps to optimize regex operation
if( from.getClass().getCanonicalName().compareTo(ConstantExpression.class.getCanonicalName()) == 0
&& counter == 1 ) {
((PORegexp)exprOp).setConstExpr(true);
}
counter++;
try {
currentPlan.connect(from, exprOp);
} catch (PlanException e) {
int errCode = 2015;
String msg = "Invalid physical operators in the physical plan" ;
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, e);
}
}
}
@Override
protected void visit(LOAdd op) throws VisitorException {
String scope = op.getOperatorKey().scope;
BinaryExpressionOperator exprOp = new Add(new OperatorKey(scope,
nodeGen.getNextNodeId(scope)), op.getRequestedParallelism());
exprOp.setAlias(op.getAlias());
exprOp.setResultType(op.getType());
exprOp.setLhs((ExpressionOperator) logToPhyMap.get(op.getLhsOperand()));
exprOp.setRhs((ExpressionOperator) logToPhyMap.get(op.getRhsOperand()));
LogicalPlan lp = op.getPlan();
currentPlan.add(exprOp);
logToPhyMap.put(op, exprOp);
List<LogicalOperator> predecessors = lp.getPredecessors(op);
if (predecessors == null)
return;
for (LogicalOperator lo : predecessors) {
PhysicalOperator from = logToPhyMap.get(lo);
try {
currentPlan.connect(from, exprOp);
} catch (PlanException e) {
int errCode = 2015;
String msg = "Invalid physical operators in the physical plan" ;
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, e);
}
}
}
@Override
protected void visit(LOSubtract op) throws VisitorException {
String scope = op.getOperatorKey().scope;
BinaryExpressionOperator exprOp = new Subtract(new OperatorKey(scope,
nodeGen.getNextNodeId(scope)), op.getRequestedParallelism());
exprOp.setAlias(op.getAlias());
exprOp.setResultType(op.getType());
exprOp.setLhs((ExpressionOperator) logToPhyMap.get(op.getLhsOperand()));
exprOp.setRhs((ExpressionOperator) logToPhyMap.get(op.getRhsOperand()));
LogicalPlan lp = op.getPlan();
currentPlan.add(exprOp);
logToPhyMap.put(op, exprOp);
List<LogicalOperator> predecessors = lp.getPredecessors(op);
if (predecessors == null)
return;
for (LogicalOperator lo : predecessors) {
PhysicalOperator from = logToPhyMap.get(lo);
try {
currentPlan.connect(from, exprOp);
} catch (PlanException e) {
int errCode = 2015;
String msg = "Invalid physical operators in the physical plan" ;
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, e);
}
}
}
@Override
protected void visit(LOMultiply op) throws VisitorException {
String scope = op.getOperatorKey().scope;
BinaryExpressionOperator exprOp = new Multiply(new OperatorKey(scope,
nodeGen.getNextNodeId(scope)), op.getRequestedParallelism());
exprOp.setAlias(op.getAlias());
exprOp.setResultType(op.getType());
exprOp.setLhs((ExpressionOperator) logToPhyMap.get(op.getLhsOperand()));
exprOp.setRhs((ExpressionOperator) logToPhyMap.get(op.getRhsOperand()));
LogicalPlan lp = op.getPlan();
currentPlan.add(exprOp);
logToPhyMap.put(op, exprOp);
List<LogicalOperator> predecessors = lp.getPredecessors(op);
if (predecessors == null)
return;
for (LogicalOperator lo : predecessors) {
PhysicalOperator from = logToPhyMap.get(lo);
try {
currentPlan.connect(from, exprOp);
} catch (PlanException e) {
int errCode = 2015;
String msg = "Invalid physical operators in the physical plan" ;
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, e);
}
}
}
@Override
protected void visit(LODivide op) throws VisitorException {
String scope = op.getOperatorKey().scope;
BinaryExpressionOperator exprOp = new Divide(new OperatorKey(scope,
nodeGen.getNextNodeId(scope)), op.getRequestedParallelism());
exprOp.setAlias(op.getAlias());
exprOp.setResultType(op.getType());
exprOp.setLhs((ExpressionOperator) logToPhyMap.get(op.getLhsOperand()));
exprOp.setRhs((ExpressionOperator) logToPhyMap.get(op.getRhsOperand()));
LogicalPlan lp = op.getPlan();
currentPlan.add(exprOp);
logToPhyMap.put(op, exprOp);
List<LogicalOperator> predecessors = lp.getPredecessors(op);
if (predecessors == null)
return;
for (LogicalOperator lo : predecessors) {
PhysicalOperator from = logToPhyMap.get(lo);
try {
currentPlan.connect(from, exprOp);
} catch (PlanException e) {
int errCode = 2015;
String msg = "Invalid physical operators in the physical plan" ;
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, e);
}
}
}
@Override
protected void visit(LOMod op) throws VisitorException {
String scope = op.getOperatorKey().scope;
BinaryExpressionOperator exprOp = new Mod(new OperatorKey(scope,
nodeGen.getNextNodeId(scope)), op.getRequestedParallelism());
exprOp.setAlias(op.getAlias());
exprOp.setResultType(op.getType());
exprOp.setLhs((ExpressionOperator) logToPhyMap.get(op.getLhsOperand()));
exprOp.setRhs((ExpressionOperator) logToPhyMap.get(op.getRhsOperand()));
LogicalPlan lp = op.getPlan();
currentPlan.add(exprOp);
logToPhyMap.put(op, exprOp);
List<LogicalOperator> predecessors = lp.getPredecessors(op);
if (predecessors == null)
return;
for (LogicalOperator lo : predecessors) {
PhysicalOperator from = logToPhyMap.get(lo);
try {
currentPlan.connect(from, exprOp);
} catch (PlanException e) {
int errCode = 2015;
String msg = "Invalid physical operators in the physical plan" ;
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, e);
}
}
}
@Override
protected void visit(LOAnd op) throws VisitorException {
String scope = op.getOperatorKey().scope;
BinaryComparisonOperator exprOp = new POAnd(new OperatorKey(scope, nodeGen.getNextNodeId(scope)), op.getRequestedParallelism());
exprOp.setAlias(op.getAlias());
exprOp.setLhs((ExpressionOperator)logToPhyMap.get(op.getLhsOperand()));
exprOp.setRhs((ExpressionOperator)logToPhyMap.get(op.getRhsOperand()));
LogicalPlan lp = op.getPlan();
currentPlan.add(exprOp);
logToPhyMap.put(op, exprOp);
List<LogicalOperator> predecessors = lp.getPredecessors(op);
if(predecessors == null) return;
for(LogicalOperator lo : predecessors) {
PhysicalOperator from = logToPhyMap.get(lo);
try {
currentPlan.connect(from, exprOp);
} catch (PlanException e) {
int errCode = 2015;
String msg = "Invalid physical operators in the physical plan" ;
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, e);
}
}
}
@Override
protected void visit(LOOr op) throws VisitorException {
String scope = op.getOperatorKey().scope;
BinaryComparisonOperator exprOp = new POOr(new OperatorKey(scope, nodeGen.getNextNodeId(scope)), op.getRequestedParallelism());
exprOp.setAlias(op.getAlias());
exprOp.setLhs((ExpressionOperator)logToPhyMap.get(op.getLhsOperand()));
exprOp.setRhs((ExpressionOperator)logToPhyMap.get(op.getRhsOperand()));
LogicalPlan lp = op.getPlan();
currentPlan.add(exprOp);
logToPhyMap.put(op, exprOp);
List<LogicalOperator> predecessors = lp.getPredecessors(op);
if(predecessors == null) return;
for(LogicalOperator lo : predecessors) {
PhysicalOperator from = logToPhyMap.get(lo);
try {
currentPlan.connect(from, exprOp);
} catch (PlanException e) {
int errCode = 2015;
String msg = "Invalid physical operators in the physical plan" ;
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, e);
}
}
}
@Override
protected void visit(LONot op) throws VisitorException {
String scope = op.getOperatorKey().scope;
UnaryComparisonOperator exprOp = new PONot(new OperatorKey(scope, nodeGen.getNextNodeId(scope)), op.getRequestedParallelism());
exprOp.setAlias(op.getAlias());
exprOp.setExpr((ExpressionOperator)logToPhyMap.get(op.getOperand()));
LogicalPlan lp = op.getPlan();
currentPlan.add(exprOp);
logToPhyMap.put(op, exprOp);
List<LogicalOperator> predecessors = lp.getPredecessors(op);
if(predecessors == null) return;
PhysicalOperator from = logToPhyMap.get(predecessors.get(0));
try {
currentPlan.connect(from, exprOp);
} catch (PlanException e) {
int errCode = 2015;
String msg = "Invalid physical operators in the physical plan" ;
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, e);
}
}
@Override
protected void visit(LOCross cs) throws VisitorException {
String scope = cs.getOperatorKey().scope;
List<LogicalOperator> inputs = cs.getInputs();
POGlobalRearrange poGlobal = new POGlobalRearrange(new OperatorKey(
scope, nodeGen.getNextNodeId(scope)), cs
.getRequestedParallelism());
poGlobal.setCustomPartitioner(cs.getCustomPartitioner());
poGlobal.setAlias(cs.getAlias());
POPackage poPackage = new POPackage(new OperatorKey(scope, nodeGen
.getNextNodeId(scope)), cs.getRequestedParallelism());
poGlobal.setAlias(cs.getAlias());
currentPlan.add(poGlobal);
currentPlan.add(poPackage);
int count = 0;
try {
currentPlan.connect(poGlobal, poPackage);
List<Boolean> flattenLst = Arrays.asList(true, true);
for (LogicalOperator op : inputs) {
List<PhysicalOperator> pop = Arrays.asList(logToPhyMap.get(op));
PhysicalPlan fep1 = new PhysicalPlan();
ConstantExpression ce1 = new ConstantExpression(new OperatorKey(scope, nodeGen.getNextNodeId(scope)),cs.getRequestedParallelism());
ce1.setValue(inputs.size());
ce1.setResultType(DataType.INTEGER);
fep1.add(ce1);
ConstantExpression ce2 = new ConstantExpression(new OperatorKey(scope, nodeGen.getNextNodeId(scope)),cs.getRequestedParallelism());
ce2.setValue(count);
ce2.setResultType(DataType.INTEGER);
fep1.add(ce2);
/*Tuple ce1val = TupleFactory.getInstance().newTuple(2);
ce1val.set(0,inputs.size());
ce1val.set(1,count);
ce1.setValue(ce1val);
ce1.setResultType(DataType.TUPLE);*/
POUserFunc gfc = new POUserFunc(new OperatorKey(scope, nodeGen.getNextNodeId(scope)),cs.getRequestedParallelism(), Arrays.asList((PhysicalOperator)ce1,(PhysicalOperator)ce2), new FuncSpec(GFCross.class.getName()));
gfc.setAlias(cs.getAlias());
gfc.setResultType(DataType.BAG);
fep1.addAsLeaf(gfc);
gfc.setInputs(Arrays.asList((PhysicalOperator)ce1,(PhysicalOperator)ce2));
/*fep1.add(gfc);
fep1.connect(ce1, gfc);
fep1.connect(ce2, gfc);*/
PhysicalPlan fep2 = new PhysicalPlan();
POProject feproj = new POProject(new OperatorKey(scope, nodeGen.getNextNodeId(scope)), cs.getRequestedParallelism());
feproj.setAlias(cs.getAlias());
feproj.setResultType(DataType.TUPLE);
feproj.setStar(true);
feproj.setOverloaded(false);
fep2.add(feproj);
List<PhysicalPlan> fePlans = Arrays.asList(fep1, fep2);
POForEach fe = new POForEach(new OperatorKey(scope, nodeGen.getNextNodeId(scope)), cs.getRequestedParallelism(), fePlans, flattenLst );
fe.setAlias(cs.getAlias());
currentPlan.add(fe);
currentPlan.connect(logToPhyMap.get(op), fe);
POLocalRearrange physOp = new POLocalRearrange(new OperatorKey(
scope, nodeGen.getNextNodeId(scope)), cs
.getRequestedParallelism());
physOp.setAlias(cs.getAlias());
List<PhysicalPlan> lrPlans = new ArrayList<PhysicalPlan>();
for(int i=0;i<inputs.size();i++){
PhysicalPlan lrp1 = new PhysicalPlan();
POProject lrproj1 = new POProject(new OperatorKey(scope, nodeGen.getNextNodeId(scope)), cs.getRequestedParallelism(), i);
lrproj1.setAlias(cs.getAlias());
lrproj1.setOverloaded(false);
lrproj1.setResultType(DataType.INTEGER);
lrp1.add(lrproj1);
lrPlans.add(lrp1);
}
physOp.setCross(true);
physOp.setIndex(count++);
physOp.setKeyType(DataType.TUPLE);
physOp.setPlans(lrPlans);
physOp.setResultType(DataType.TUPLE);
currentPlan.add(physOp);
currentPlan.connect(fe, physOp);
currentPlan.connect(physOp, poGlobal);
}
} catch (PlanException e1) {
int errCode = 2015;
String msg = "Invalid physical operators in the physical plan" ;
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, e1);
} catch (ExecException e) {
int errCode = 2058;
String msg = "Unable to set index on newly create POLocalRearrange.";
throw new VisitorException(msg, errCode, PigException.BUG, e);
}
poPackage.setKeyType(DataType.TUPLE);
poPackage.setResultType(DataType.TUPLE);
poPackage.setNumInps(count);
boolean inner[] = new boolean[count];
for (int i=0;i<count;i++) {
inner[i] = true;
}
poPackage.setInner(inner);
List<PhysicalPlan> fePlans = new ArrayList<PhysicalPlan>();
List<Boolean> flattenLst = new ArrayList<Boolean>();
for(int i=1;i<=count;i++){
PhysicalPlan fep1 = new PhysicalPlan();
POProject feproj1 = new POProject(new OperatorKey(scope, nodeGen.getNextNodeId(scope)), cs.getRequestedParallelism(), i);
feproj1.setAlias(cs.getAlias());
feproj1.setResultType(DataType.BAG);
feproj1.setOverloaded(false);
fep1.add(feproj1);
fePlans.add(fep1);
flattenLst.add(true);
}
POForEach fe = new POForEach(new OperatorKey(scope, nodeGen.getNextNodeId(scope)), cs.getRequestedParallelism(), fePlans, flattenLst );
fe.setAlias(cs.getAlias());
currentPlan.add(fe);
try{
currentPlan.connect(poPackage, fe);
}catch (PlanException e1) {
int errCode = 2015;
String msg = "Invalid physical operators in the physical plan" ;
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, e1);
}
logToPhyMap.put(cs, fe);
}
@Override
protected void visit(LOCogroup cg) throws VisitorException {
switch (cg.getGroupType()) {
case COLLECTED:
translateCollectedCogroup(cg);
break;
case REGULAR:
POPackage poPackage = compileToLR_GR_PackTrio(cg.getOperatorKey().scope,
cg.getInputs(), cg.getRequestedParallelism(), cg.getCustomPartitioner(),
cg.getAlias(), cg.getInner(),cg.getGroupByPlans());
poPackage.setPackageType(PackageType.GROUP);
logToPhyMap.put(cg, poPackage);
break;
case MERGE:
if(!validateMergeCogrp(cg.getInner())){
throw new LogicalToPhysicalTranslatorException("Inner is not " +
"supported for any relation on Merge Cogroup.");
}
String alias = cg.getAlias();
List<LogicalOperator> inputs = cg.getInputs();
validateMapSideMerge(inputs, cg.getPlan());
POMergeCogroup poCogrp = compileToMergeCogrp(cg.getOperatorKey().scope,
inputs, cg.getGroupByPlans(), alias, cg.getRequestedParallelism());
poCogrp.setResultType(DataType.TUPLE);
poCogrp.setAlias(cg.getAlias());
currentPlan.add(poCogrp);
for (LogicalOperator op : inputs) {
try {
currentPlan.connect(logToPhyMap.get(op), poCogrp);
} catch (PlanException e) {
int errCode = 2015;
String msg = "Invalid physical operators in the physical plan" ;
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, e);
}
}
logToPhyMap.put(cg, poCogrp);
break;
default:
throw new LogicalToPhysicalTranslatorException("Unknown CoGroup Modifier",PigException.BUG);
}
translateSoftLinks(cg);
}
private boolean validateMergeCogrp(boolean[] innerFlags){
for(boolean flag : innerFlags){
if(flag)
return false;
}
return true;
}
private POMergeCogroup compileToMergeCogrp(String scope, List<LogicalOperator> inputs,
MultiMap<LogicalOperator, LogicalPlan> innerPlans, String alias, int parallel) throws VisitorException{
// LocalRearrange corresponding to each of input
// LR is needed to extract keys out of the tuples.
POLocalRearrange[] innerLRs = new POLocalRearrange[inputs.size()];
int count = 0;
List<PhysicalOperator> inpPOs = new ArrayList<PhysicalOperator>(inputs.size());
for (LogicalOperator op : inputs) {
PhysicalOperator physOp = logToPhyMap.get(op);
inpPOs.add(physOp);
Collection<LogicalPlan> plans = innerPlans.get(op);
POLocalRearrange poInnerLR = new POLocalRearrange(new OperatorKey(scope, nodeGen.getNextNodeId(scope)));
poInnerLR.setAlias(alias);
// LR will contain list of physical plans, because there could be
// multiple keys and each key can be an expression.
List<PhysicalPlan> exprPlans = new ArrayList<PhysicalPlan>();
currentPlans.push(currentPlan);
for (LogicalPlan lp : plans) {
currentPlan = new PhysicalPlan();
PlanWalker<LogicalOperator, LogicalPlan> childWalker = mCurrentWalker
.spawnChildWalker(lp);
pushWalker(childWalker);
mCurrentWalker.walk(this);
exprPlans.add(currentPlan);
popWalker();
}
currentPlan = currentPlans.pop();
try {
poInnerLR.setPlans(exprPlans);
} catch (PlanException pe) {
int errCode = 2071;
String msg = "Problem with setting up local rearrange's plans.";
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, pe);
}
innerLRs[count] = poInnerLR;
try {
poInnerLR.setIndex(count++);
} catch (ExecException e1) {
int errCode = 2058;
String msg = "Unable to set index on newly create POLocalRearrange.";
throw new VisitorException(msg, errCode, PigException.BUG, e1);
}
poInnerLR.setKeyType(plans.size() > 1 ? DataType.TUPLE :
exprPlans.get(0).getLeaves().get(0).getResultType());
poInnerLR.setResultType(DataType.TUPLE);
}
POMergeCogroup poCogrp = new POMergeCogroup(new OperatorKey(
scope, nodeGen.getNextNodeId(scope)),inpPOs,innerLRs,parallel);
return poCogrp;
}
private POPackage compileToLR_GR_PackTrio(String scope,List<LogicalOperator> inputs,
int parallel, String customPartitioner, String alias, boolean[] innerFlags, MultiMap<LogicalOperator,
LogicalPlan> innerPlans) throws VisitorException {
POGlobalRearrange poGlobal = new POGlobalRearrange(new OperatorKey(
scope, nodeGen.getNextNodeId(scope)), parallel);
poGlobal.setAlias(alias);
poGlobal.setCustomPartitioner(customPartitioner);
POPackage poPackage = new POPackage(new OperatorKey(scope, nodeGen
.getNextNodeId(scope)), parallel);
poPackage.setAlias(alias);
currentPlan.add(poGlobal);
currentPlan.add(poPackage);
try {
currentPlan.connect(poGlobal, poPackage);
} catch (PlanException e1) {
int errCode = 2015;
String msg = "Invalid physical operators in the physical plan" ;
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, e1);
}
int count = 0;
Byte type = null;
for (LogicalOperator op : inputs) {
Collection<LogicalPlan> plans = innerPlans.get(op);
POLocalRearrange physOp = new POLocalRearrange(new OperatorKey(
scope, nodeGen.getNextNodeId(scope)), parallel);
physOp.setAlias(alias);
List<PhysicalPlan> exprPlans = new ArrayList<PhysicalPlan>();
currentPlans.push(currentPlan);
for (LogicalPlan lp : plans) {
currentPlan = new PhysicalPlan();
PlanWalker<LogicalOperator, LogicalPlan> childWalker = mCurrentWalker
.spawnChildWalker(lp);
pushWalker(childWalker);
mCurrentWalker.walk(this);
exprPlans.add(currentPlan);
popWalker();
}
currentPlan = currentPlans.pop();
try {
physOp.setPlans(exprPlans);
} catch (PlanException pe) {
int errCode = 2071;
String msg = "Problem with setting up local rearrange's plans.";
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, pe);
}
try {
physOp.setIndex(count++);
} catch (ExecException e1) {
int errCode = 2058;
String msg = "Unable to set index on newly create POLocalRearrange.";
throw new VisitorException(msg, errCode, PigException.BUG, e1);
}
if (plans.size() > 1) {
type = DataType.TUPLE;
physOp.setKeyType(type);
} else {
type = exprPlans.get(0).getLeaves().get(0).getResultType();
physOp.setKeyType(type);
}
physOp.setResultType(DataType.TUPLE);
currentPlan.add(physOp);
try {
currentPlan.connect(logToPhyMap.get(op), physOp);
currentPlan.connect(physOp, poGlobal);
} catch (PlanException e) {
int errCode = 2015;
String msg = "Invalid physical operators in the physical plan" ;
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, e);
}
}
poPackage.setKeyType(type);
poPackage.setResultType(DataType.TUPLE);
poPackage.setNumInps(count);
poPackage.setInner(innerFlags);
return poPackage;
}
private void translateCollectedCogroup(LOCogroup cg) throws VisitorException {
String scope = cg.getOperatorKey().scope;
List<LogicalOperator> inputs = cg.getInputs();
// can have only one input
LogicalOperator op = inputs.get(0);
List<LogicalPlan> plans = (List<LogicalPlan>) cg.getGroupByPlans().get(op);
POCollectedGroup physOp = new POCollectedGroup(new OperatorKey(
scope, nodeGen.getNextNodeId(scope)));
physOp.setAlias(cg.getAlias());
List<PhysicalPlan> exprPlans = new ArrayList<PhysicalPlan>();
currentPlans.push(currentPlan);
for (LogicalPlan lp : plans) {
currentPlan = new PhysicalPlan();
PlanWalker<LogicalOperator, LogicalPlan> childWalker =
mCurrentWalker.spawnChildWalker(lp);
pushWalker(childWalker);
mCurrentWalker.walk(this);
exprPlans.add(currentPlan);
popWalker();
}
currentPlan = currentPlans.pop();
try {
physOp.setPlans(exprPlans);
} catch (PlanException pe) {
int errCode = 2071;
String msg = "Problem with setting up map group's plans.";
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, pe);
}
Byte type = null;
if (plans.size() > 1) {
type = DataType.TUPLE;
physOp.setKeyType(type);
} else {
type = exprPlans.get(0).getLeaves().get(0).getResultType();
physOp.setKeyType(type);
}
physOp.setResultType(DataType.TUPLE);
currentPlan.add(physOp);
try {
currentPlan.connect(logToPhyMap.get(op), physOp);
} catch (PlanException e) {
int errCode = 2015;
String msg = "Invalid physical operators in the physical plan" ;
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, e);
}
logToPhyMap.put(cg, physOp);
}
@Override
protected void visit(LOJoin loj) throws VisitorException {
String scope = loj.getOperatorKey().scope;
// List of join predicates
List<LogicalOperator> inputs = loj.getInputs();
// mapping of inner join physical plans corresponding to inner physical operators.
MultiMap<PhysicalOperator, PhysicalPlan> joinPlans = new LinkedMultiMap<PhysicalOperator, PhysicalPlan>();
// Outer list corresponds to join predicates. Inner list is inner physical plan of each predicate.
List<List<PhysicalPlan>> ppLists = new ArrayList<List<PhysicalPlan>>();
// List of physical operator corresponding to join predicates.
List<PhysicalOperator> inp = new ArrayList<PhysicalOperator>();
// Outer list corresponds to join predicates and inner list corresponds to type of keys for each predicate.
List<List<Byte>> keyTypes = new ArrayList<List<Byte>>();
boolean[] innerFlags = loj.getInnerFlags();
String alias = loj.getAlias();
int parallel = loj.getRequestedParallelism();
for (LogicalOperator op : inputs) {
PhysicalOperator physOp = logToPhyMap.get(op);
inp.add(physOp);
List<LogicalPlan> plans = (List<LogicalPlan>) loj.getJoinPlans().get(op);
List<PhysicalPlan> exprPlans = new ArrayList<PhysicalPlan>();
currentPlans.push(currentPlan);
for (LogicalPlan lp : plans) {
currentPlan = new PhysicalPlan();
PlanWalker<LogicalOperator, LogicalPlan> childWalker = mCurrentWalker.spawnChildWalker(lp);
pushWalker(childWalker);
mCurrentWalker.walk(this);
exprPlans.add(currentPlan);
popWalker();
}
currentPlan = currentPlans.pop();
ppLists.add(exprPlans);
joinPlans.put(physOp, exprPlans);
// Key could potentially be a tuple. So, we visit all exprPlans to get types of members of tuples.
List<Byte> tupleKeyMemberTypes = new ArrayList<Byte>();
for(PhysicalPlan exprPlan : exprPlans)
tupleKeyMemberTypes.add(exprPlan.getLeaves().get(0).getResultType());
keyTypes.add(tupleKeyMemberTypes);
}
if (loj.getJoinType() == LOJoin.JOINTYPE.SKEWED) {
POSkewedJoin skj;
try {
skj = new POSkewedJoin(new OperatorKey(scope,nodeGen.getNextNodeId(scope)),
parallel,inp, innerFlags);
skj.setAlias(alias);
skj.setJoinPlans(joinPlans);
}
catch (Exception e) {
int errCode = 2015;
String msg = "Skewed Join creation failed";
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, e);
}
skj.setResultType(DataType.TUPLE);
for (int i=0; i < inputs.size(); i++) {
LogicalOperator op = inputs.get(i);
if (!innerFlags[i]) {
try {
Schema s = op.getSchema();
// if the schema cannot be determined
if (s == null) {
throw new FrontendException();
}
skj.addSchema(s);
} catch (FrontendException e) {
int errCode = 2015;
String msg = "Couldn't set the schema for outer join" ;
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, e);
}
} else {
// This will never be retrieved. It just guarantees that the index will be valid when
// MRCompiler is trying to read the schema
skj.addSchema(null);
}
}
currentPlan.add(skj);
for (LogicalOperator op : inputs) {
try {
currentPlan.connect(logToPhyMap.get(op), skj);
} catch (PlanException e) {
int errCode = 2015;
String msg = "Invalid physical operators in the physical plan" ;
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, e);
}
}
logToPhyMap.put(loj, skj);
}
else if(loj.getJoinType() == LOJoin.JOINTYPE.REPLICATED) {
int fragment = 0;
POFRJoin pfrj;
try {
boolean isLeftOuter = false;
// We dont check for bounds issue as we assume that a join
// involves atleast two inputs
isLeftOuter = !innerFlags[1];
Tuple nullTuple = null;
if( isLeftOuter ) {
try {
// We know that in a Left outer join its only a two way
// join, so we assume index of 1 for the right input
Schema inputSchema = inputs.get(1).getSchema();
// We check if we have a schema before the join
if(inputSchema == null) {
int errCode = 1109;
String msg = "Input (" + inputs.get(1).getAlias() + ") " +
"on which outer join is desired should have a valid schema";
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.INPUT);
}
// Using the schema we decide the number of columns/fields
// in the nullTuple
nullTuple = TupleFactory.getInstance().newTuple(inputSchema.size());
for(int j = 0; j < inputSchema.size(); j++) {
nullTuple.set(j, null);
}
} catch( FrontendException e ) {
int errCode = 2104;
String msg = "Error while determining the schema of input";
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, e);
}
}
pfrj = new POFRJoin(new OperatorKey(scope,nodeGen.getNextNodeId(scope)),parallel,
inp, ppLists, keyTypes, null, fragment, isLeftOuter, nullTuple);
pfrj.setAlias(alias);
} catch (ExecException e1) {
int errCode = 2058;
String msg = "Unable to set index on newly create POLocalRearrange.";
throw new VisitorException(msg, errCode, PigException.BUG, e1);
}
pfrj.setResultType(DataType.TUPLE);
currentPlan.add(pfrj);
for (LogicalOperator op : inputs) {
try {
currentPlan.connect(logToPhyMap.get(op), pfrj);
} catch (PlanException e) {
int errCode = 2015;
String msg = "Invalid physical operators in the physical plan" ;
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, e);
}
}
logToPhyMap.put(loj, pfrj);
}
else if (loj.getJoinType() == LOJoin.JOINTYPE.MERGE && validateMapSideMerge(inputs,loj.getPlan())) {
PhysicalOperator smj;
boolean usePOMergeJoin = inputs.size() == 2 && innerFlags[0] && innerFlags[1] ;
if(usePOMergeJoin){
// inner join on two sorted inputs. We have less restrictive
// implementation here in a form of POMergeJoin which doesn't
// require loaders to implement collectable interface.
try {
smj = new POMergeJoin(new OperatorKey(scope,nodeGen.getNextNodeId(scope)),
parallel,inp,joinPlans,keyTypes);
}
catch (PlanException e) {
int errCode = 2042;
String msg = "Merge Join creation failed";
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, e);
}
logToPhyMap.put(loj, smj);
}
else{
// in all other cases we fall back to POMergeCogroup + Flattening FEs
smj = compileToMergeCogrp(scope, inputs, loj.getJoinPlans(), alias, parallel);
}
smj.setResultType(DataType.TUPLE);
currentPlan.add(smj);
smj.setAlias(alias);
for (LogicalOperator op : inputs) {
try {
currentPlan.connect(logToPhyMap.get(op), smj);
} catch (PlanException e) {
int errCode = 2015;
String msg = "Invalid physical operators in the physical plan" ;
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, e);
}
}
if(!usePOMergeJoin){
// Now create and configure foreach which will flatten the output
// of cogroup.
POForEach fe = compileFE4Flattening(innerFlags, scope, parallel, alias, inputs);
currentPlan.add(fe);
try {
currentPlan.connect(smj, fe);
} catch (PlanException e) {
throw new LogicalToPhysicalTranslatorException(e.getMessage(),e.getErrorCode(),e.getErrorSource(),e);
}
logToPhyMap.put(loj, fe);
}
return;
}
else if (loj.getJoinType() == LOJoin.JOINTYPE.HASH){
POPackage poPackage = compileToLR_GR_PackTrio(scope, inputs, parallel, loj.getCustomPartitioner(), alias, innerFlags, loj.getJoinPlans());
POForEach fe = compileFE4Flattening(innerFlags, scope, parallel, alias, inputs);
currentPlan.add(fe);
try {
currentPlan.connect(poPackage, fe);
} catch (PlanException e) {
throw new LogicalToPhysicalTranslatorException(e.getDetailedMessage(),
e.getErrorCode(),e.getErrorSource(),e);
}
logToPhyMap.put(loj, fe);
poPackage.setPackageType(PackageType.JOIN);
}
translateSoftLinks(loj);
}
private POForEach compileFE4Flattening(boolean[] innerFlags,String scope,
int parallel, String alias, List<LogicalOperator> inputs)
throws LogicalToPhysicalTranslatorException{
List<PhysicalPlan> fePlans = new ArrayList<PhysicalPlan>();
List<Boolean> flattenLst = new ArrayList<Boolean>();
POForEach fe;
try{
for(int i=0;i< inputs.size();i++){
PhysicalPlan fep1 = new PhysicalPlan();
POProject feproj1 = new POProject(new OperatorKey(scope, nodeGen.getNextNodeId(scope)),
parallel, i+1); //i+1 since the first column is the "group" field
feproj1.setAlias(alias);
feproj1.setResultType(DataType.BAG);
feproj1.setOverloaded(false);
fep1.add(feproj1);
fePlans.add(fep1);
// the parser would have marked the side
// where we need to keep empty bags on
// non matched as outer (innerFlags[i] would be
// false)
if(!(innerFlags[i])) {
LogicalOperator joinInput = inputs.get(i);
// for outer join add a bincond
// which will project nulls when bag is
// empty
updateWithEmptyBagCheck(fep1, joinInput);
}
flattenLst.add(true);
}
fe = new POForEach(new OperatorKey(scope, nodeGen.getNextNodeId(scope)),
parallel, fePlans, flattenLst );
fe.setAlias(alias);
}catch (PlanException e1) {
int errCode = 2015;
String msg = "Invalid physical operators in the physical plan" ;
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, e1);
}
return fe;
}
/**
* updates plan with check for empty bag and if bag is empty to flatten a bag
* with as many null's as dictated by the schema
* @param fePlan the plan to update
* @param joinInput the relation for which the corresponding bag is being checked
* @throws PlanException
* @throws LogicalToPhysicalTranslatorException
*/
public static void updateWithEmptyBagCheck(PhysicalPlan fePlan, LogicalOperator joinInput) throws PlanException, LogicalToPhysicalTranslatorException {
Schema inputSchema = null;
try {
inputSchema = joinInput.getSchema();
if(inputSchema == null) {
int errCode = 1109;
String msg = "Input (" + joinInput.getAlias() + ") " +
"on which outer join is desired should have a valid schema";
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.INPUT);
}
} catch (FrontendException e) {
int errCode = 2104;
String msg = "Error while determining the schema of input";
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, e);
}
CompilerUtils.addEmptyBagOuterJoin(fePlan, inputSchema);
}
private boolean validateMapSideMerge(List<LogicalOperator> preds,LogicalPlan lp)
throws LogicalToPhysicalTranslatorException{
int errCode = 1103;
String errMsg = "Merge join/Cogroup only supports Filter, Foreach, " +
"filter and Load as its predecessor. Found : ";
if(preds != null && !preds.isEmpty()){
for(LogicalOperator lo : preds){
if (!(lo instanceof LOFilter || lo instanceof LOForEach || lo instanceof LOLoad || lo instanceof LONative))
throw new LogicalToPhysicalTranslatorException(errMsg, errCode);
// All is good at this level. Visit predecessors now.
validateMapSideMerge(lp.getPredecessors(lo),lp);
}
}
// We visited everything and all is good.
return true;
}
@Override
protected void visit(LOFilter filter) throws VisitorException {
String scope = filter.getOperatorKey().scope;
POFilter poFilter = new POFilter(new OperatorKey(scope, nodeGen
.getNextNodeId(scope)), filter.getRequestedParallelism());
poFilter.setAlias(filter.getAlias());
poFilter.setResultType(filter.getType());
currentPlan.add(poFilter);
logToPhyMap.put(filter, poFilter);
currentPlans.push(currentPlan);
currentPlan = new PhysicalPlan();
PlanWalker<LogicalOperator, LogicalPlan> childWalker = mCurrentWalker
.spawnChildWalker(filter.getComparisonPlan());
pushWalker(childWalker);
mCurrentWalker.walk(this);
popWalker();
poFilter.setPlan(currentPlan);
currentPlan = currentPlans.pop();
List<LogicalOperator> op = filter.getPlan().getPredecessors(filter);
PhysicalOperator from;
if(op != null) {
from = logToPhyMap.get(op.get(0));
} else {
int errCode = 2051;
String msg = "Did not find a predecessor for Filter." ;
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG);
}
try {
currentPlan.connect(from, poFilter);
} catch (PlanException e) {
int errCode = 2015;
String msg = "Invalid physical operators in the physical plan" ;
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, e);
}
translateSoftLinks(filter);
}
@Override
protected void visit(LOStream stream) throws VisitorException {
String scope = stream.getOperatorKey().scope;
POStream poStream = new POStream(new OperatorKey(scope, nodeGen
.getNextNodeId(scope)), stream.getExecutableManager(),
stream.getStreamingCommand(), this.pc.getProperties());
poStream.setAlias(stream.getAlias());
currentPlan.add(poStream);
logToPhyMap.put(stream, poStream);
List<LogicalOperator> op = stream.getPlan().getPredecessors(stream);
PhysicalOperator from;
if(op != null) {
from = logToPhyMap.get(op.get(0));
} else {
int errCode = 2051;
String msg = "Did not find a predecessor for Stream." ;
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG);
}
try {
currentPlan.connect(from, poStream);
} catch (PlanException e) {
int errCode = 2015;
String msg = "Invalid physical operators in the physical plan" ;
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, e);
}
}
@SuppressWarnings("unchecked")
@Override
protected void visit(LOProject op) throws VisitorException {
String scope = op.getOperatorKey().scope;
POProject exprOp;
if(op.isSendEmptyBagOnEOP()) {
exprOp = new PORelationToExprProject(new OperatorKey(scope, nodeGen
.getNextNodeId(scope)), op.getRequestedParallelism());
} else {
exprOp = new POProject(new OperatorKey(scope, nodeGen
.getNextNodeId(scope)), op.getRequestedParallelism());
}
exprOp.setAlias(op.getAlias());
exprOp.setResultType(op.getType());
exprOp.setColumns((ArrayList)op.getProjection());
exprOp.setStar(op.isStar());
exprOp.setOverloaded(op.getOverloaded());
LogicalPlan lp = op.getPlan();
logToPhyMap.put(op, exprOp);
currentPlan.add(exprOp);
List<LogicalOperator> predecessors = lp.getPredecessors(op);
// Project might not have any predecessors
if (predecessors == null)
return;
for (LogicalOperator lo : predecessors) {
PhysicalOperator from = logToPhyMap.get(lo);
try {
currentPlan.connect(from, exprOp);
} catch (PlanException e) {
int errCode = 2015;
String msg = "Invalid physical operators in the physical plan" ;
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, e);
}
}
}
@Override
protected void visit(LOForEach g) throws VisitorException {
String scope = g.getOperatorKey().scope;
List<PhysicalPlan> innerPlans = new ArrayList<PhysicalPlan>();
List<LogicalPlan> plans = g.getForEachPlans();
currentPlans.push(currentPlan);
for (LogicalPlan plan : plans) {
currentPlan = new PhysicalPlan();
PlanWalker<LogicalOperator, LogicalPlan> childWalker = new DependencyOrderWalkerWOSeenChk<LogicalOperator, LogicalPlan>(
plan);
pushWalker(childWalker);
childWalker.walk(this);
innerPlans.add(currentPlan);
popWalker();
}
currentPlan = currentPlans.pop();
// PhysicalOperator poGen = new POGenerate(new OperatorKey("",
// r.nextLong()), inputs, toBeFlattened);
POForEach poFE = new POForEach(new OperatorKey(scope, nodeGen
.getNextNodeId(scope)), g.getRequestedParallelism(), innerPlans,
g.getFlatten());
poFE.setAlias(g.getAlias());
poFE.setResultType(g.getType());
logToPhyMap.put(g, poFE);
currentPlan.add(poFE);
// generate cannot have multiple inputs
List<LogicalOperator> op = g.getPlan().getPredecessors(g);
// generate may not have any predecessors
if (op == null)
return;
PhysicalOperator from = logToPhyMap.get(op.get(0));
try {
currentPlan.connect(from, poFE);
} catch (PlanException e) {
int errCode = 2015;
String msg = "Invalid physical operators in the physical plan" ;
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, e);
}
translateSoftLinks(g);
}
@Override
protected void visit(LOSort s) throws VisitorException {
String scope = s.getOperatorKey().scope;
List<LogicalPlan> logPlans = s.getSortColPlans();
List<PhysicalPlan> sortPlans = new ArrayList<PhysicalPlan>(logPlans.size());
// convert all the logical expression plans to physical expression plans
currentPlans.push(currentPlan);
for (LogicalPlan plan : logPlans) {
currentPlan = new PhysicalPlan();
PlanWalker<LogicalOperator, LogicalPlan> childWalker = mCurrentWalker
.spawnChildWalker(plan);
pushWalker(childWalker);
childWalker.walk(this);
sortPlans.add(currentPlan);
popWalker();
}
currentPlan = currentPlans.pop();
// get the physical operator for sort
POSort sort;
if (s.getUserFunc() == null) {
sort = new POSort(new OperatorKey(scope, nodeGen
.getNextNodeId(scope)), s.getRequestedParallelism(), null,
sortPlans, s.getAscendingCols(), null);
} else {
POUserComparisonFunc comparator = new POUserComparisonFunc(new OperatorKey(
scope, nodeGen.getNextNodeId(scope)), s
.getRequestedParallelism(), null, s.getUserFunc());
sort = new POSort(new OperatorKey(scope, nodeGen
.getNextNodeId(scope)), s.getRequestedParallelism(), null,
sortPlans, s.getAscendingCols(), comparator);
}
sort.setAlias(s.getAlias());
sort.setLimit(s.getLimit());
// sort.setRequestedParallelism(s.getType());
logToPhyMap.put(s, sort);
currentPlan.add(sort);
List<LogicalOperator> op = s.getPlan().getPredecessors(s);
PhysicalOperator from;
if(op != null) {
from = logToPhyMap.get(op.get(0));
} else {
int errCode = 2051;
String msg = "Did not find a predecessor for Sort." ;
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG);
}
try {
currentPlan.connect(from, sort);
} catch (PlanException e) {
int errCode = 2015;
String msg = "Invalid physical operators in the physical plan" ;
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, e);
}
sort.setResultType(s.getType());
try {
sort.setSortInfo(s.getSortInfo());
} catch (FrontendException e) {
throw new LogicalToPhysicalTranslatorException(e);
}
}
@Override
protected void visit(LODistinct op) throws VisitorException {
String scope = op.getOperatorKey().scope;
// This is simpler. No plans associated with this. Just create the
// physical operator,
// push it in the current plan and make the connections
PhysicalOperator physOp = new PODistinct(new OperatorKey(scope, nodeGen
.getNextNodeId(scope)), op.getRequestedParallelism());
physOp.setAlias(op.getAlias());
physOp.setResultType(op.getType());
logToPhyMap.put(op, physOp);
currentPlan.add(physOp);
// Distinct will only have a single input
List<LogicalOperator> inputs = op.getPlan().getPredecessors(op);
PhysicalOperator from;
if(inputs != null) {
from = logToPhyMap.get(inputs.get(0));
} else {
int errCode = 2051;
String msg = "Did not find a predecessor for Distinct." ;
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG);
}
try {
currentPlan.connect(from, physOp);
} catch (PlanException e) {
int errCode = 2015;
String msg = "Invalid physical operators in the physical plan" ;
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, e);
}
}
@Override
protected void visit(LOSplit split) throws VisitorException {
String scope = split.getOperatorKey().scope;
PhysicalOperator physOp = new POSplit(new OperatorKey(scope, nodeGen
.getNextNodeId(scope)), split.getRequestedParallelism());
physOp.setAlias(split.getAlias());
FileSpec splStrFile;
try {
splStrFile = new FileSpec(FileLocalizer.getTemporaryPath(pc).toString(),new FuncSpec(Utils.getTmpFileCompressorName(pc)));
} catch (IOException e1) {
byte errSrc = pc.getErrorSource();
int errCode = 0;
switch(errSrc) {
case PigException.BUG:
errCode = 2016;
break;
case PigException.REMOTE_ENVIRONMENT:
errCode = 6002;
break;
case PigException.USER_ENVIRONMENT:
errCode = 4003;
break;
}
String msg = "Unable to obtain a temporary path." ;
throw new LogicalToPhysicalTranslatorException(msg, errCode, errSrc, e1);
}
((POSplit)physOp).setSplitStore(splStrFile);
logToPhyMap.put(split, physOp);
currentPlan.add(physOp);
List<LogicalOperator> op = split.getPlan().getPredecessors(split);
PhysicalOperator from;
if(op != null) {
from = logToPhyMap.get(op.get(0));
} else {
int errCode = 2051;
String msg = "Did not find a predecessor for Split." ;
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG);
}
try {
currentPlan.connect(from, physOp);
} catch (PlanException e) {
int errCode = 2015;
String msg = "Invalid physical operators in the physical plan" ;
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, e);
}
}
@Override
protected void visit(LOSplitOutput split) throws VisitorException {
String scope = split.getOperatorKey().scope;
PhysicalOperator physOp = new POFilter(new OperatorKey(scope, nodeGen
.getNextNodeId(scope)), split.getRequestedParallelism());
physOp.setAlias(split.getAlias());
logToPhyMap.put(split, physOp);
currentPlan.add(physOp);
currentPlans.push(currentPlan);
currentPlan = new PhysicalPlan();
PlanWalker<LogicalOperator, LogicalPlan> childWalker = mCurrentWalker
.spawnChildWalker(split.getConditionPlan());
pushWalker(childWalker);
mCurrentWalker.walk(this);
popWalker();
((POFilter) physOp).setPlan(currentPlan);
currentPlan = currentPlans.pop();
currentPlan.add(physOp);
List<LogicalOperator> op = split.getPlan().getPredecessors(split);
PhysicalOperator from;
if(op != null) {
from = logToPhyMap.get(op.get(0));
} else {
int errCode = 2051;
String msg = "Did not find a predecessor for Split Output." ;
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG);
}
try {
currentPlan.connect(from, physOp);
} catch (PlanException e) {
int errCode = 2015;
String msg = "Invalid physical operators in the physical plan" ;
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, e);
}
translateSoftLinks(split);
}
@Override
protected void visit(LOUserFunc func) throws VisitorException {
String scope = func.getOperatorKey().scope;
Object f = PigContext.instantiateFuncFromSpec(func.getFuncSpec());
PhysicalOperator p;
if (f instanceof EvalFunc) {
p = new POUserFunc(new OperatorKey(scope, nodeGen
.getNextNodeId(scope)), func.getRequestedParallelism(),
null, func.getFuncSpec(), (EvalFunc) f);
} else {
p = new POUserComparisonFunc(new OperatorKey(scope, nodeGen
.getNextNodeId(scope)), func.getRequestedParallelism(),
null, func.getFuncSpec(), (ComparisonFunc) f);
}
p.setAlias(func.getAlias());
p.setResultType(func.getType());
currentPlan.add(p);
List<org.apache.pig.impl.logicalLayer.ExpressionOperator> fromList = func.getArguments();
if(fromList!=null){
for (LogicalOperator op : fromList) {
PhysicalOperator from = logToPhyMap.get(op);
try {
currentPlan.connect(from, p);
} catch (PlanException e) {
int errCode = 2015;
String msg = "Invalid physical operators in the physical plan" ;
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, e);
}
}
}
logToPhyMap.put(func, p);
// We need to track all the scalars
if (func.getImplicitReferencedOperator()!=null) {
PhysicalOperator referredOp = logToPhyMap.get(func.getImplicitReferencedOperator());
((POUserFunc)p).setReferencedOperator(referredOp);
}
}
@Override
protected void visit(LOLoad loLoad) throws VisitorException {
String scope = loLoad.getOperatorKey().scope;
POLoad load = new POLoad(new OperatorKey(scope, nodeGen
.getNextNodeId(scope)),loLoad.getLoadFunc());
load.setAlias(loLoad.getAlias());
load.setLFile(loLoad.getInputFile());
load.setPc(pc);
load.setResultType(loLoad.getType());
load.setSignature(loLoad.getAlias());
currentPlan.add(load);
logToPhyMap.put(loLoad, load);
// Load is typically a root operator, but in the multiquery
// case it might have a store as a predecessor.
List<LogicalOperator> op = loLoad.getPlan().getPredecessors(loLoad);
PhysicalOperator from;
if(op != null) {
from = logToPhyMap.get(op.get(0));
try {
currentPlan.connect(from, load);
} catch (PlanException e) {
int errCode = 2015;
String msg = "Invalid physical operators in the physical plan" ;
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, e);
}
}
}
@Override
protected void visit(LONative loNative) throws VisitorException {
String scope = loNative.getOperatorKey().scope;
PONative poNative = new PONative(new OperatorKey(scope, nodeGen
.getNextNodeId(scope)));
poNative.setAlias(loNative.getAlias());
poNative.setNativeMRjar(loNative.getNativeMRJar());
poNative.setParams(loNative.getParams());
poNative.setResultType(DataType.BAG);
logToPhyMap.put(loNative, poNative);
currentPlan.add(poNative);
List<LogicalOperator> op = loNative.getPlan().getPredecessors(loNative);
PhysicalOperator from;
if(op != null) {
from = logToPhyMap.get(op.get(0));
} else {
int errCode = 2051;
String msg = "Did not find a predecessor for Native." ;
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG);
}
try {
currentPlan.connect(from, poNative);
} catch (PlanException e) {
int errCode = 2015;
String msg = "Invalid physical operators in the physical plan" ;
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, e);
}
}
@Override
protected void visit(LOStore loStore) throws VisitorException {
String scope = loStore.getOperatorKey().scope;
POStore store = new POStore(new OperatorKey(scope, nodeGen
.getNextNodeId(scope)));
store.setAlias(loStore.getPlan().getPredecessors(loStore).get(0).getAlias());
store.setSFile(loStore.getOutputFile());
store.setInputSpec(loStore.getInputSpec());
store.setSignature(loStore.getSignature());
store.setSortInfo(loStore.getSortInfo());
store.setIsTmpStore(loStore.isTmpStore());
try {
// create a new schema for ourselves so that when
// we serialize we are not serializing objects that
// contain the schema - apparently Java tries to
// serialize the object containing the schema if
// we are trying to serialize the schema reference in
// the containing object. The schema here will be serialized
// in JobControlCompiler
store.setSchema(new Schema(loStore.getSchema()));
} catch (FrontendException e1) {
int errorCode = 1060;
String message = "Cannot resolve Store output schema";
throw new VisitorException(message, errorCode, PigException.BUG, e1);
}
currentPlan.add(store);
PhysicalOperator from = logToPhyMap.get(loStore.getPlan().getPredecessors(loStore).get(0));
try {
currentPlan.connect(from, store);
} catch (PlanException e) {
int errCode = 2015;
String msg = "Invalid physical operators in the physical plan" ;
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, e);
}
logToPhyMap.put(loStore, store);
}
@Override
protected void visit(LOConst op) throws VisitorException {
String scope = op.getOperatorKey().scope;
ConstantExpression ce = new ConstantExpression(new OperatorKey(scope,
nodeGen.getNextNodeId(scope)));
ce.setAlias(op.getAlias());
ce.setValue(op.getValue());
ce.setResultType(op.getType());
//this operator doesn't have any predecessors
currentPlan.add(ce);
logToPhyMap.put(op, ce);
}
@Override
public void visit(LOBinCond op) throws VisitorException {
String scope = op.getOperatorKey().scope;
ExpressionOperator physOp = new POBinCond(new OperatorKey(scope,
nodeGen.getNextNodeId(scope)), op.getRequestedParallelism());
physOp.setAlias(op.getAlias());
logToPhyMap.put(op, physOp);
POBinCond phy = (POBinCond) physOp;
ExpressionOperator cond = (ExpressionOperator)logToPhyMap.get(op.getCond());
phy.setCond(cond);
ExpressionOperator lhs = (ExpressionOperator)logToPhyMap.get(op.getLhsOp());
phy.setLhs(lhs);
ExpressionOperator rhs = (ExpressionOperator)logToPhyMap.get(op.getRhsOp());
phy.setRhs(rhs);
phy.setResultType(op.getType());
currentPlan.add(physOp);
List<LogicalOperator> ops = op.getPlan().getPredecessors(op);
for (LogicalOperator l : ops) {
ExpressionOperator from = (ExpressionOperator) logToPhyMap.get(l);
try {
currentPlan.connect(from, physOp);
} catch (PlanException e) {
int errCode = 2015;
String msg = "Invalid physical operators in the physical plan" ;
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, e);
}
}
}
@Override
protected void visit(LONegative op) throws VisitorException {
String scope = op.getOperatorKey().scope;
ExpressionOperator physOp = new PONegative(new OperatorKey(scope,
nodeGen.getNextNodeId(scope)), op.getRequestedParallelism(),
null);
physOp.setAlias(op.getAlias());
currentPlan.add(physOp);
logToPhyMap.put(op, physOp);
List<LogicalOperator> inputs = op.getPlan().getPredecessors(op);
ExpressionOperator from;
if(inputs != null) {
from = (ExpressionOperator)logToPhyMap.get(inputs.get(0));
} else {
int errCode = 2051;
String msg = "Did not find a predecessor for Negative." ;
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG);
}
((PONegative) physOp).setExpr(from);
((PONegative) physOp).setResultType(op.getType());
try {
currentPlan.connect(from, physOp);
} catch (PlanException e) {
int errCode = 2015;
String msg = "Invalid physical operators in the physical plan" ;
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, e);
}
}
@Override
protected void visit(LOIsNull op) throws VisitorException {
String scope = op.getOperatorKey().scope;
UnaryComparisonOperator physOp = new POIsNull(new OperatorKey(scope, nodeGen
.getNextNodeId(scope)), op.getRequestedParallelism(), null);
physOp.setAlias(op.getAlias());
List<LogicalOperator> inputs = op.getPlan().getPredecessors(op);
ExpressionOperator from;
if(inputs != null) {
from = (ExpressionOperator)logToPhyMap.get(inputs.get(0));
} else {
int errCode = 2051;
String msg = "Did not find a predecessor for Null." ;
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG);
}
physOp.setOperandType(op.getOperand().getType());
currentPlan.add(physOp);
logToPhyMap.put(op, physOp);
((POIsNull) physOp).setExpr(from);
try {
currentPlan.connect(from, physOp);
} catch (PlanException e) {
int errCode = 2015;
String msg = "Invalid physical operators in the physical plan" ;
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, e);
}
}
@Override
protected void visit(LOMapLookup op) throws VisitorException {
String scope = (op.getOperatorKey()).scope;
ExpressionOperator physOp = new POMapLookUp(new OperatorKey(scope,
nodeGen.getNextNodeId(scope)), op.getRequestedParallelism(), op
.getLookUpKey());
physOp.setResultType(op.getType());
physOp.setAlias(op.getAlias());
currentPlan.add(physOp);
logToPhyMap.put(op, physOp);
ExpressionOperator from = (ExpressionOperator) logToPhyMap.get(op
.getMap());
try {
currentPlan.connect(from, physOp);
} catch (PlanException e) {
int errCode = 2015;
String msg = "Invalid physical operators in the physical plan" ;
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, e);
}
}
@Override
protected void visit(LOCast op) throws VisitorException {
String scope = op.getOperatorKey().scope;
POCast physOp = new POCast(new OperatorKey(scope, nodeGen
.getNextNodeId(scope)), op.getRequestedParallelism());
physOp.setAlias(op.getAlias());
currentPlan.add(physOp);
logToPhyMap.put(op, physOp);
ExpressionOperator from = (ExpressionOperator) logToPhyMap.get(op
.getExpression());
physOp.setResultType(op.getType());
try {
if (op.getType()==DataType.BAG || op.getType()==DataType.TUPLE) {
physOp.setFieldSchema(new ResourceFieldSchema(op.getFieldSchema()));
}
} catch (FrontendException e) {
int errCode = 2216;
String msg = "Cannot get field schema for "+op;
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, e);
}
FuncSpec lfSpec = op.getLoadFuncSpec();
if(null != lfSpec) {
try {
physOp.setFuncSpec(lfSpec);
} catch (IOException e) {
int errCode = 1053;
String msg = "Cannot resolve load function to use for casting" +
" from " + DataType.findTypeName(op.getExpression().
getType()) + " to " + DataType.findTypeName(op.getType());
throw new LogicalToPhysicalTranslatorException(msg, errCode,
PigException.ERROR, e);
}
}
try {
currentPlan.connect(from, physOp);
} catch (PlanException e) {
int errCode = 2015;
String msg = "Invalid physical operators in the physical plan" ;
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, e);
}
}
@Override
protected void visit(LOLimit limit) throws VisitorException {
String scope = limit.getOperatorKey().scope;
POLimit poLimit = new POLimit(new OperatorKey(scope, nodeGen.getNextNodeId(scope)), limit.getRequestedParallelism());
poLimit.setResultType(limit.getType());
poLimit.setLimit(limit.getLimit());
poLimit.setAlias(limit.getAlias());
currentPlan.add(poLimit);
logToPhyMap.put(limit, poLimit);
List<LogicalOperator> op = limit.getPlan().getPredecessors(limit);
PhysicalOperator from;
if(op != null) {
from = logToPhyMap.get(op.get(0));
} else {
int errCode = 2051;
String msg = "Did not find a predecessor for Limit." ;
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG);
}
try {
currentPlan.connect(from, poLimit);
} catch (PlanException e) {
int errCode = 2015;
String msg = "Invalid physical operators in the physical plan" ;
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, e);
}
}
@Override
protected void visit(LOUnion op) throws VisitorException {
String scope = op.getOperatorKey().scope;
POUnion physOp = new POUnion(new OperatorKey(scope, nodeGen
.getNextNodeId(scope)), op.getRequestedParallelism());
physOp.setAlias(op.getAlias());
currentPlan.add(physOp);
physOp.setResultType(op.getType());
logToPhyMap.put(op, physOp);
List<LogicalOperator> ops = op.getInputs();
for (LogicalOperator l : ops) {
PhysicalOperator from = logToPhyMap.get(l);
try {
currentPlan.connect(from, physOp);
} catch (PlanException e) {
int errCode = 2015;
String msg = "Invalid physical operators in the physical plan" ;
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, e);
}
}
}
private void translateSoftLinks(LogicalOperator op) throws VisitorException {
List<LogicalOperator> preds = op.getPlan().getSoftLinkPredecessors(op);
if (preds == null)
return;
try {
for (LogicalOperator pred : preds) {
PhysicalOperator from = logToPhyMap.get(pred);
currentPlan.createSoftLink(from, logToPhyMap.get(op));
}
} catch (PlanException e) {
int errorCode = 2015;
String msg = "Cannot translate soft link";
throw new VisitorException(msg, errorCode, PigException.BUG, e);
}
}
}