/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.pig.impl.logicalLayer.optimizer; import java.util.ArrayList; import java.util.List; import org.apache.pig.PigException; import org.apache.pig.data.DataType; import org.apache.pig.impl.logicalLayer.LOConst; import org.apache.pig.impl.logicalLayer.LOSplitOutput; import org.apache.pig.impl.logicalLayer.LogicalOperator; import org.apache.pig.impl.logicalLayer.LogicalPlan; import org.apache.pig.impl.logicalLayer.LOSplit; import org.apache.pig.impl.logicalLayer.LOStore; import org.apache.pig.impl.plan.DepthFirstWalker; import org.apache.pig.impl.plan.NodeIdGenerator; import org.apache.pig.impl.plan.OperatorKey; import org.apache.pig.impl.plan.optimizer.OptimizerException; public class ImplicitSplitInserter extends LogicalTransformer { public ImplicitSplitInserter(LogicalPlan plan) { super(plan); } @Override public boolean check(List<LogicalOperator> nodes) throws OptimizerException { // Look to see if this is a non-split node with two outputs. If so // it matches. if((nodes == null) || (nodes.size() <= 0)) { int errCode = 2052; String msg = "Internal error. Cannot retrieve operator from null or empty list."; throw new OptimizerException(msg, errCode, PigException.BUG); } try { LogicalOperator op = nodes.get(0); List<LogicalOperator> succs = mPlan.getSuccessors(op); if (succs == null || succs.size() < 2) return false; if (op instanceof LOSplit) return false; if (op instanceof LOStore) return false; return true; } catch (Exception e) { int errCode = 2048; String msg = "Error while performing checks to introduce split operators."; throw new OptimizerException(msg, errCode, PigException.BUG, e); } } @Override public void transform(List<LogicalOperator> nodes) throws OptimizerException { if((nodes == null) || (nodes.size() <= 0)) { int errCode = 2052; String msg = "Internal error. Cannot retrieve operator from null or empty list."; throw new OptimizerException(msg, errCode, PigException.BUG); } try { // Insert a split and its corresponding SplitOutput nodes into the plan // between node 0 and 1 / 2. String scope = nodes.get(0).getOperatorKey().scope; NodeIdGenerator idGen = NodeIdGenerator.getGenerator(); LOSplit splitOp = new LOSplit(mPlan, new OperatorKey(scope, idGen.getNextNodeId(scope)), new ArrayList<LogicalOperator>()); splitOp.setAlias(nodes.get(0).getAlias()); mPlan.add(splitOp); // Find all the successors and connect appropriately with split // and splitoutput operators. Keep our own copy // of the list, as we're changing the graph by doing these calls // and that will change the list of predecessors. List<LogicalOperator> succs = new ArrayList<LogicalOperator>(mPlan.getSuccessors(nodes.get(0))); int index = -1; // For two successors of nodes.get(0) here is a pictorial // representation of the change required: // BEFORE: // Succ1 Succ2 // \ / // nodes.get(0) // SHOULD BECOME: // AFTER: // Succ1 Succ2 // | | // SplitOutput SplitOutput // \ / // Split // | // nodes.get(0) // Here is how this will be accomplished. // First (the same) Split Operator will be "inserted between" nodes.get(0) // and all its successors. The "insertBetween" API is used which makes sure // the ordering of operators in the graph is preserved. So we get the following: // Succ1 Succ2 // | | // Split Split // \ / // nodes.get(0) // Then all but the first connection between nodes.get(0) and the Split // Operator are removed using "disconnect" - so we get the following: // Succ1 Succ2 // \ / // Split // | // nodes.get(0) // Now a new SplitOutputOperator is "inserted between" the Split operator // and the successors. So we get: // Succ1 Succ2 // | | // SplitOutput SplitOutput // \ / // Split // | // nodes.get(0) for (LogicalOperator succ : succs) { mPlan.insertBetween(nodes.get(0), splitOp, succ); } for(int i = 1; i < succs.size(); i++) { mPlan.disconnect(nodes.get(0), splitOp); } for (LogicalOperator succ : succs) { LogicalPlan condPlan = new LogicalPlan(); LOConst cnst = new LOConst(mPlan, new OperatorKey(scope, idGen.getNextNodeId(scope)), Boolean.valueOf(true)); cnst.setType(DataType.BOOLEAN); condPlan.add(cnst); LOSplitOutput splitOutput = new LOSplitOutput(mPlan, new OperatorKey(scope, idGen.getNextNodeId(scope)), ++index, condPlan); splitOp.addOutput(splitOutput); mPlan.add(splitOutput); mPlan.insertBetween(splitOp, splitOutput, succ); splitOutput.setAlias(splitOp.getAlias()); } } catch (Exception e) { int errCode = 2047; String msg = "Internal error. Unable to introduce split operators."; throw new OptimizerException(msg, errCode, PigException.BUG, e); } } }