/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pig.newplan.logical.relational;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Stack;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.pig.PigException;
import org.apache.pig.backend.hadoop.executionengine.physicalLayer.LogicalToPhysicalTranslatorException;
import org.apache.pig.backend.stratosphere.executionengine.pactLayer.PactOperator;
import org.apache.pig.backend.stratosphere.executionengine.pactLayer.plans.PactPlan;
import org.apache.pig.backend.stratosphere.executionengine.pactLayer.relationalOperators.SOCoGroup;
import org.apache.pig.backend.stratosphere.executionengine.pactLayer.relationalOperators.SOCross;
import org.apache.pig.backend.stratosphere.executionengine.pactLayer.relationalOperators.SOFilter;
import org.apache.pig.backend.stratosphere.executionengine.pactLayer.relationalOperators.SOLoad;
import org.apache.pig.backend.stratosphere.executionengine.pactLayer.relationalOperators.SOMatch;
import org.apache.pig.backend.stratosphere.executionengine.pactLayer.relationalOperators.SOReduce;
import org.apache.pig.backend.stratosphere.executionengine.pactLayer.relationalOperators.SOStore;
import org.apache.pig.data.DataType;
import org.apache.pig.impl.PigContext;
import org.apache.pig.impl.logicalLayer.FrontendException;
import org.apache.pig.impl.plan.NodeIdGenerator;
import org.apache.pig.impl.plan.OperatorKey;
import org.apache.pig.impl.plan.PlanException;
import org.apache.pig.impl.util.CompilerUtils;
import org.apache.pig.newplan.DependencyOrderWalker;
import org.apache.pig.newplan.Operator;
import org.apache.pig.newplan.OperatorPlan;
import org.apache.pig.newplan.PlanWalker;
import org.apache.pig.newplan.ReverseDependencyOrderWalkerWOSeenChk;
import org.apache.pig.newplan.logical.Util;
import org.apache.pig.newplan.logical.expression.ExpToPactTranslationVisitor;
import org.apache.pig.newplan.logical.expression.LogicalExpression;
import org.apache.pig.newplan.logical.expression.LogicalExpressionPlan;
import org.apache.pig.newplan.logical.expression.ProjectExpression;
import org.apache.pig.newplan.logical.relational.LOCogroup.GROUPTYPE;
public class LogToPactTranslationVisitor extends LogicalRelationalNodesVisitor {
public LogToPactTranslationVisitor(OperatorPlan plan) throws FrontendException {
super(plan, new DependencyOrderWalker(plan));
currentPlan = new PactPlan();
logToPhyMap = new HashMap<Operator, PactOperator>();
currentPlans = new Stack<PactPlan>();
}
protected final Log log = LogFactory.getLog(getClass());
protected Map<Operator, PactOperator> logToPhyMap;
protected Stack<PactPlan> currentPlans;
protected PactPlan currentPlan;
protected NodeIdGenerator nodeGen = NodeIdGenerator.getGenerator();
protected PigContext pc;
public void setPigContext(PigContext pc) {
this.pc = pc;
}
public Map<Operator, PactOperator> getLogToPhyMap() {
return logToPhyMap;
}
public PactPlan getPhysicalPlan() {
return currentPlan;
}
@Override
public void visit(LOLoad loLoad) throws FrontendException {
String scope = DEFAULT_SCOPE;
SOLoad load = new SOLoad(new OperatorKey(scope, nodeGen
.getNextNodeId(scope)), loLoad.getLoadFunc(), loLoad.getFileSpec().getFileName());
load.setAlias(loLoad.getAlias());
load.setLFile(loLoad.getFileSpec());
load.setPc(pc);
load.setResultType(DataType.BAG);
load.setSignature(loLoad.getSignature());
load.setLimit(loLoad.getLimit());
currentPlan.add(load);
logToPhyMap.put(loLoad, load);
// Load is typically a root operator, but in the multiquery
// case it might have a store as a predecessor.
List<Operator> op = loLoad.getPlan().getPredecessors(loLoad);
PactOperator from;
if(op != null) {
from = logToPhyMap.get(op.get(0));
try {
currentPlan.connect(from, load);
} catch (PlanException e) {
int errCode = 2015;
String msg = "Invalid pact operators in the pact plan" ;
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, e);
}
}
}
@Override
public void visit(LOFilter filter) throws FrontendException {
String scope = DEFAULT_SCOPE;
SOFilter soFilter = new SOFilter(new OperatorKey(scope, nodeGen
.getNextNodeId(scope)), filter.getRequestedParallelisam());
soFilter.setAlias(filter.getAlias());
soFilter.setResultType(DataType.BAG);
currentPlan.add(soFilter);
logToPhyMap.put(filter, soFilter);
currentPlans.push(currentPlan);
currentPlan = new PactPlan();
PlanWalker childWalker = new ReverseDependencyOrderWalkerWOSeenChk(filter.getFilterPlan());
pushWalker(childWalker);
currentWalker.walk(
new ExpToPactTranslationVisitor( currentWalker.getPlan(),
childWalker, filter, currentPlan, logToPhyMap ) );
popWalker();
soFilter.setPlan(currentPlan);
currentPlan = currentPlans.pop();
List<Operator> op = filter.getPlan().getPredecessors(filter);
PactOperator from;
if(op != null) {
from = logToPhyMap.get(op.get(0));
} else {
int errCode = 2051;
String msg = "Did not find a predecessor for Filter." ;
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG);
}
try {
currentPlan.connect(from, soFilter);
} catch (PlanException e) {
int errCode = 2015;
String msg = "Invalid physical operators in the physical plan" ;
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, e);
}
translateSoftLinks(filter);
}
@Override
public void visit(LOCross cross) throws FrontendException {
String scope = DEFAULT_SCOPE;
// List of cross predicates
List<Operator> inputs = cross.getPlan().getPredecessors(cross);
// currently support 2 inputs
if (inputs.size() != 2) {
throw new LogicalToPhysicalTranslatorException("CROSS currently supports 2 inputs only");
}
// nested cross not allowed for the moment
if (cross.isNested()) {
throw new LogicalToPhysicalTranslatorException("nested CROSS is not currently supported");
}
SOCross soCross = new SOCross(new OperatorKey(scope,nodeGen.getNextNodeId(scope)), cross.getRequestedParallelisam());
soCross.setAlias(soCross.getAlias());
soCross.setResultType(DataType.BAG);
currentPlan.add(soCross);
logToPhyMap.put(cross, soCross);
for (Operator op : cross.getPlan().getPredecessors(cross)) {
PactOperator from = logToPhyMap.get(op);
try {
currentPlan.connect(from, soCross);
} catch (PlanException e) {
int errCode = 2015;
String msg = "Invalid pact operators in the pact plan when trying to connect SOCross" ;
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, e);
}
}
translateSoftLinks(cross);
}
@Override
public void visit(LOForEach foreach) throws FrontendException {
}
/**
* This function takes in a List of LogicalExpressionPlan and converts them to
* a list of PactPlans (to be used in LOForEach translation)
*
* @param plans
* @return
* @throws FrontendException
*/
private List<PactPlan> translateExpressionPlans(LogicalRelationalOperator loj,
List<LogicalExpressionPlan> plans ) throws FrontendException {
List<PactPlan> exprPlans = new ArrayList<PactPlan>();
if( plans == null || plans.size() == 0 ) {
return exprPlans;
}
// Save the current plan onto stack
currentPlans.push(currentPlan);
for( LogicalExpressionPlan lp : plans ) {
currentPlan = new PactPlan();
// We spawn a new Dependency Walker and use it
// PlanWalker childWalker = currentWalker.spawnChildWalker(lp);
PlanWalker childWalker = new ReverseDependencyOrderWalkerWOSeenChk(lp);
// Save the old walker and use childWalker as current Walker
pushWalker(childWalker);
// We create a new ExpToPhyTranslationVisitor to walk the ExpressionPlan
currentWalker.walk(
new ExpToPactTranslationVisitor(
currentWalker.getPlan(),
childWalker, loj, currentPlan, logToPhyMap) );
exprPlans.add(currentPlan);
popWalker();
}
// Pop the current plan back out
currentPlan = currentPlans.pop();
return exprPlans;
}
@Override
public void visit(LOStore loStore) throws FrontendException {
String scope = DEFAULT_SCOPE;
SOStore store = new SOStore(new OperatorKey(scope, nodeGen.getNextNodeId(scope)));
store.setAlias(loStore.getAlias());
store.setSFile(loStore.getOutputSpec());
store.setInputSpec(loStore.getInputSpec());
store.setSignature(loStore.getSignature());
store.setSortInfo(loStore.getSortInfo());
store.setIsTmpStore(loStore.isTmpStore());
store.setSchema(Util.translateSchema( loStore.getSchema() ));
currentPlan.add(store);
List<Operator> op = loStore.getPlan().getPredecessors(loStore);
PactOperator from;
if(op != null) {
from = logToPhyMap.get(op.get(0));
}
else {
int errCode = 2051;
String msg = "Did not find a predecessor for Store." ;
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG);
}
try {
currentPlan.connect(from, store);
} catch (PlanException e) {
int errCode = 2015;
String msg = "Invalid physical operators in the physical plan" ;
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, e);
}
logToPhyMap.put(loStore, store);
}
@Override
public void visit(LOCogroup cg) throws FrontendException {
if (cg.getGroupType() != GROUPTYPE.REGULAR) {
throw new LogicalToPhysicalTranslatorException("Only REGULAR (Co)GROUP type is currently supported");
}
String scope = DEFAULT_SCOPE;
// if it's a GROUP translate into a Reduce
if (cg.getInputs((LogicalPlan)plan).size() == 1) {
SOReduce soReduce = new SOReduce(new OperatorKey(scope,nodeGen.getNextNodeId(scope)), cg.getRequestedParallelisam());
soReduce.setAlias(cg.getAlias());
soReduce.setResultType(DataType.BAG);
// for now, only allow Project expressions for GROUP and only grouping by a field
LogicalExpressionPlan lp = cg.getExpressionPlans().get(0).get(0);
if (lp.size() > 1) {
throw new LogicalToPhysicalTranslatorException("Grouping by expressions or tuples is currently not supported");
}
else {
LogicalExpression expr = (LogicalExpression) lp.getOperators().next();
if (!(expr instanceof ProjectExpression)) {
throw new LogicalToPhysicalTranslatorException("Only Project expressions are currently supported in GROUP");
}
else {
soReduce.setFirstKeyPosition(((ProjectExpression)(expr)).getColNum());
}
}
currentPlan.add(soReduce);
logToPhyMap.put(cg, soReduce);
List <Operator> op = cg.getPlan().getPredecessors(cg);
PactOperator from = logToPhyMap.get(op.get(0));
try {
currentPlan.connect(from, soReduce);
} catch (PlanException e) {
int errCode = 2015;
String msg = "Invalid pact operators in the pact plan when trying to connect SOReduce" ;
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, e);
}
}
// if it's a CoGROUP translate into a CoGroup
else if (cg.getInputs((LogicalPlan)plan).size() == 2) {
SOCoGroup soCoGroup = new SOCoGroup(new OperatorKey(scope,nodeGen.getNextNodeId(scope)), cg.getRequestedParallelisam());
soCoGroup.setAlias(cg.getAlias());
soCoGroup.setResultType(DataType.BAG);
currentPlan.add(soCoGroup);
logToPhyMap.put(cg, soCoGroup);
List<Operator> inputs = cg.getPlan().getPredecessors(cg);
for (Operator op : inputs) {
PactOperator from = logToPhyMap.get(op);
try {
currentPlan.connect(from, soCoGroup);
} catch (PlanException e) {
int errCode = 2015;
String msg = "Invalid pact operators in the pact plan when trying to connect SOcoGroup" ;
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, e);
}
}
}
// more than 2 inputs are not currently supported
else {
throw new LogicalToPhysicalTranslatorException("Grouping on more then 2 relations in not currently supported");
}
translateSoftLinks(cg);
}
@Override
public void visit(LOJoin loj) throws FrontendException {
String scope = DEFAULT_SCOPE;
// List of join predicates
List<Operator> inputs = loj.getPlan().getPredecessors(loj);
String alias = loj.getAlias();
// currently support 2 inputs
if (inputs.size() != 2) {
throw new LogicalToPhysicalTranslatorException("Joins currently support 2 inputs only");
}
int[] keyPositions = new int[inputs.size()];
//for each join predicate, find the pactOperators
for (int i=0;i<inputs.size();i++) {
List<LogicalExpressionPlan> plans = (List<LogicalExpressionPlan>)loj.getJoinPlan(i);
LogicalExpression lExpr = (LogicalExpression) plans.get(0).getOperators().next();
// retrieve key positions
keyPositions[i] = ((ProjectExpression)(lExpr)).getColNum();
}
//This is the ONLY case to cover
SOMatch matchOperator = new SOMatch(new OperatorKey(scope, nodeGen.getNextNodeId(scope)), loj.getRequestedParallelisam());
matchOperator.setAlias(alias);
matchOperator.setResultType(DataType.BAG);
matchOperator.setFirstKeyPosition(keyPositions[0]);
matchOperator.setSecondKeyPosition(keyPositions[1]);
currentPlan.add(matchOperator);
logToPhyMap.put(loj, matchOperator);
for (Operator op : inputs) {
PactOperator from = logToPhyMap.get(op);
try {
currentPlan.connect(from, matchOperator);
} catch (PlanException e) {
int errCode = 2015;
String msg = "Invalid pact operators in the pact plan when trying to connect SOMatch" ;
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, e);
}
}
translateSoftLinks(loj);
}
/**
* updates plan with check for empty bag and if bag is empty to flatten a bag
* with as many null's as dictated by the schema
* @param fePlan the plan to update
* @param joinInput the relation for which the corresponding bag is being checked
* @throws FrontendException
*/
public static void updateWithEmptyBagCheck(PactPlan fePlan, Operator joinInput) throws FrontendException {
LogicalSchema inputSchema = null;
try {
inputSchema = ((LogicalRelationalOperator) joinInput).getSchema();
if(inputSchema == null) {
int errCode = 1109;
String msg = "Input (" + ((LogicalRelationalOperator) joinInput).getAlias() + ") " +
"on which outer join is desired should have a valid schema";
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.INPUT);
}
} catch (FrontendException e) {
int errCode = 2104;
String msg = "Error while determining the schema of input";
throw new LogicalToPhysicalTranslatorException(msg, errCode, PigException.BUG, e);
}
CompilerUtils.addEmptyBagOuterJoin(fePlan, Util.translateSchema(inputSchema));
}
private void translateSoftLinks(Operator op) throws FrontendException {
List<Operator> preds = op.getPlan().getSoftLinkPredecessors(op);
if (preds == null)
return;
for (Operator pred : preds) {
PactOperator from = logToPhyMap.get(pred);
currentPlan.createSoftLink(from, logToPhyMap.get(op));
}
}
}