/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pig.impl.logicalLayer;
import java.util.List;
import java.util.Iterator;
import java.util.Set;
import java.util.Map;
import java.util.ArrayList;
import org.apache.pig.impl.plan.PlanVisitor;
import org.apache.pig.impl.plan.PlanWalker;
import org.apache.pig.impl.plan.DepthFirstWalker;
import org.apache.pig.impl.plan.DependencyOrderWalker;
import org.apache.pig.impl.plan.VisitorException;
import org.apache.pig.impl.plan.PlanException;
import org.apache.pig.impl.logicalLayer.parser.ParseException;
import org.apache.pig.impl.plan.OperatorKey;
import org.apache.pig.impl.util.MultiMap;
import org.apache.pig.impl.logicalLayer.schema.Schema;
import org.apache.pig.data.DataType;
/**
* A visitor to walk operators that contain a nested plan and translate project( * )
* operators to a list of projection operators, i.e.,
* project( * ) -> project(0), project(1), ... project(n-2), project(n-1)
*/
public class ProjectStarTranslator extends
LOVisitor {
public ProjectStarTranslator(LogicalPlan plan) {
super(plan, new DependencyOrderWalker<LogicalOperator, LogicalPlan>(plan));
}
/**
*
* @param cg
* the logical cogroup operator that has to be visited
* @throws VisitorException
*/
protected void visit(LOCogroup cg) throws VisitorException {
//get the attributes of cogroup that are modified during the trnalsation
MultiMap<LogicalOperator, LogicalPlan> mapGByPlans = cg.getGroupByPlans();
for(LogicalOperator op: cg.getInputs()) {
ArrayList<LogicalPlan> newGByPlans = new ArrayList<LogicalPlan>();
for(LogicalPlan lp: mapGByPlans.get(op)) {
if (checkPlanForProjectStar(lp)) {
ArrayList<LogicalPlan> translatedPlans = translateProjectStarInPlan(lp);
for(int j = 0; j < translatedPlans.size(); ++j) {
newGByPlans.add(translatedPlans.get(j));
}
} else {
newGByPlans.add(lp);
}
}
mapGByPlans.removeKey(op);
mapGByPlans.put(op, newGByPlans);
}
// check if after translation none of group by plans in a cogroup
// have a project(*) - if they still do it's because the input
// for the project(*) did not have a schema - in this case, we should
// error out since we could have different number/types of
// cogroup keys
if(cg.getInputs().size() > 1) { // only for cogroups
for(LogicalOperator op: cg.getInputs()) {
for(LogicalPlan lp: mapGByPlans.get(op)) {
if(checkPlanForProjectStar(lp)) {
// not following Error handling guidelines to give error code
// and error source since this will get swallowed by the parser
// which will just return a ParseException
throw new VisitorException("Cogroup/Group by * is only allowed if " +
"the input has a schema");
}
}
}
// check if after translation all group by plans have same arity
int arity = mapGByPlans.get(cg.getInputs().get(0)).size();
for(LogicalOperator op: cg.getInputs()) {
if(arity != mapGByPlans.get(op).size()) {
// not following Error handling guidelines to give error code
// and error source since this will get swallowed by the parser
// which will just return a ParseException
throw new VisitorException("The arity of cogroup/group by columns " +
"do not match");
}
}
}
}
/* (non-Javadoc)
* @see org.apache.pig.impl.logicalLayer.LOVisitor#visit(org.apache.pig.impl.logicalLayer.LOJoin)
*/
@Override
protected void visit(LOJoin join) throws VisitorException {
//get the attributes of LOJoin that are modified during the translation
MultiMap<LogicalOperator, LogicalPlan> joinColPlans = join.getJoinPlans();
for(LogicalOperator op: join.getInputs()) {
ArrayList<LogicalPlan> newPlansAfterTranslation = new ArrayList<LogicalPlan>();
for(LogicalPlan lp: joinColPlans.get(op)) {
if (checkPlanForProjectStar(lp)) {
ArrayList<LogicalPlan> translatedPlans = translateProjectStarInPlan(lp);
for(int j = 0; j < translatedPlans.size(); ++j) {
newPlansAfterTranslation.add(translatedPlans.get(j));
}
} else {
newPlansAfterTranslation.add(lp);
}
}
joinColPlans.removeKey(op);
joinColPlans.put(op, newPlansAfterTranslation);
}
}
/**
*
* @param forEach
* the logical foreach operator that has to be visited
* @throws VisitorException
*/
protected void visit(LOForEach forEach) throws VisitorException {
//get the attributes of foreach that are modified during the trnalsation
super.visit(forEach);
//List of inner plans
ArrayList<LogicalPlan> foreachPlans = forEach.getForEachPlans();
ArrayList<LogicalPlan> newForeachPlans = new ArrayList<LogicalPlan>();
//the flatten list
List<Boolean> flattenList = forEach.getFlatten();
ArrayList<Boolean> newFlattenList = new ArrayList<Boolean>();
//user specified schemas in the as clause
List<Schema> userDefinedSchemaList = forEach.getUserDefinedSchema();
ArrayList<Schema> newUserDefinedSchemaList = new ArrayList<Schema>();
for(int i = 0; i < foreachPlans.size(); ++i) {
LogicalPlan lp = foreachPlans.get(i);
if(checkPlanForProjectStar(lp)) {
ArrayList<LogicalPlan> translatedPlans = translateProjectStarInPlan(lp);
Schema s = userDefinedSchemaList.get(i);
for(int j = 0; j < translatedPlans.size(); ++j) {
LogicalPlan translatedPlan = translatedPlans.get(j);
newForeachPlans.add(translatedPlan);
newFlattenList.add(flattenList.get(i));
if(null != s) {
try {
if(j < s.size()) {
newUserDefinedSchemaList.add(new Schema(s.getField(j)));
} else {
newUserDefinedSchemaList.add(null);
}
} catch (FrontendException fee) {
throw new VisitorException(fee.getMessage(), fee);
}
} else {
newUserDefinedSchemaList.add(null);
}
}
} else {
newForeachPlans.add(lp);
newFlattenList.add(flattenList.get(i));
if(null != userDefinedSchemaList) {
newUserDefinedSchemaList.add(userDefinedSchemaList.get(i));
} else {
newUserDefinedSchemaList.add(null);
}
}
}
forEach.setForEachPlans(newForeachPlans);
forEach.setFlatten(newFlattenList);
forEach.setUserDefinedSchema(newUserDefinedSchemaList);
}
/**
*
* @param s
* the logical sort operator that has to be visited
* @throws VisitorException
*/
protected void visit(LOSort s) throws VisitorException {
//get the attributes of sort that are modified during the trnalsation
//List of inner plans
List<LogicalPlan> sortPlans = s.getSortColPlans();
ArrayList<LogicalPlan> newSortPlans = new ArrayList<LogicalPlan>();
//sort order
List<Boolean> sortOrder = s.getAscendingCols();
ArrayList<Boolean> newSortOrder = new ArrayList<Boolean>();
for(int i = 0; i < sortPlans.size(); ++i) {
LogicalPlan lp = sortPlans.get(i);
if(checkPlanForProjectStar(lp)) {
ArrayList<LogicalPlan> translatedPlans = translateProjectStarInPlan(lp);
for(int j = 0; j < translatedPlans.size(); ++j) {
newSortPlans.add(translatedPlans.get(j));
newSortOrder.add(sortOrder.get(i));
}
} else {
newSortPlans.add(lp);
newSortOrder.add(sortOrder.get(i));
}
}
s.setSortColPlans(newSortPlans);
s.setAscendingCols(newSortOrder);
}
private boolean checkPlanForProjectStar(LogicalPlan lp) {
List<LogicalOperator> leaves = lp.getLeaves();
for(LogicalOperator op: leaves) {
if(op instanceof LOProject) {
if(((LOProject) op).isStar() && ((LOProject)op).getType() != DataType.BAG) {
return true;
}
}
}
return false;
}
private LOProject getProjectStarFromPlan(LogicalPlan lp) {
List<LogicalOperator> leaves = lp.getLeaves();
for(LogicalOperator op: leaves) {
if(op instanceof LOProject) {
if(((LOProject) op).isStar()) {
return (LOProject)op;
}
}
}
return null;
}
private ArrayList<LogicalPlan> translateProjectStarInPlan(LogicalPlan lp) throws VisitorException {
//translate the project( * ) into a list of projections
LOProject projectStar = getProjectStarFromPlan(lp);
LogicalOperator projectInput = projectStar.getExpression();
ArrayList<LogicalPlan> translatedPlans = new ArrayList<LogicalPlan>();
Schema s = null;
try {
if(!(projectInput instanceof ExpressionOperator)) {
s = projectInput.getSchema();
} else {
Schema.FieldSchema fs = ((ExpressionOperator)projectInput).getFieldSchema();
if(null != fs) {
s = fs.schema;
}
}
} catch (FrontendException fee) {
throw new VisitorException(fee.getMessage(), fee);
}
if (null != s) {
for(int i = 0; i < s.size(); ++i) {
LogicalPlan replicatedPlan = replicatePlan(lp);
replaceProjectStar(replicatedPlan, projectStar, i);
translatedPlans.add(replicatedPlan);
}
} else {
translatedPlans.add(replicatePlan(lp));
}
return translatedPlans;
}
private LogicalPlan replicatePlan(LogicalPlan lp) throws VisitorException {
LogicalPlan replicatedPlan = new LogicalPlan();
for(LogicalOperator root: lp.getRoots()) {
replicatedPlan.add(root);
addSuccessors(lp, replicatedPlan, root);
}
return replicatedPlan;
}
private void addSuccessors(LogicalPlan lp, LogicalPlan replicatedPlan, LogicalOperator root) throws VisitorException {
List<LogicalOperator> successors = lp.getSuccessors(root);
if(null == successors) return;
for(LogicalOperator succ: successors) {
replicatedPlan.add(succ);
try {
replicatedPlan.connect(root, succ);
} catch (PlanException pe) {
throw new VisitorException(pe.getMessage(), pe);
}
addSuccessors(lp, replicatedPlan, succ);
}
}
private void replaceProjectStar(LogicalPlan lp, LOProject projectStar, int column) throws VisitorException {
String scope = projectStar.getOperatorKey().getScope();
LogicalOperator projectInput = projectStar.getExpression();
LogicalPlan projectPlan = projectStar.getPlan();
LOProject replacementProject = new LOProject(projectPlan, OperatorKey.genOpKey(scope), projectInput, column);
try {
lp.replace(projectStar, replacementProject);
} catch (PlanException pe) {
throw new VisitorException(pe.getMessage(), pe);
}
}
}