/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /** * */ package org.apache.pig.impl.logicalLayer; import java.io.IOException; import java.util.ArrayList; import java.util.List; import org.apache.pig.impl.logicalLayer.schema.Schema; import org.apache.pig.impl.plan.OperatorKey; import org.apache.pig.impl.plan.ProjectionMap; import org.apache.pig.impl.plan.RequiredFields; import org.apache.pig.impl.plan.VisitorException; import org.apache.pig.impl.streaming.ExecutableManager; import org.apache.pig.impl.streaming.StreamingCommand; import org.apache.pig.impl.streaming.StreamingCommand.Handle; import org.apache.pig.impl.streaming.StreamingCommand.HandleSpec; import org.apache.pig.impl.util.MultiMap; import org.apache.pig.impl.util.Pair; /** * {@link LOStream} represents the specification of an external * command to be executed in a Pig Query. * * <code>LOStream</code> encapsulates all relevant details of the * command specified by the user either directly via the <code>STREAM</code> * operator or indirectly via a <code>DEFINE</code> operator. It includes * details such as input/output/error specifications and also files to be * shipped to the cluster and files to be cached. */ public class LOStream extends RelationalOperator { /** * */ private static final long serialVersionUID = 2L; // the StreamingCommand object for the // Stream Operator this operator represents private StreamingCommand command; transient private ExecutableManager executableManager; private boolean isParentSet = false; /** * Create a new <code>LOStream</code> with the given command. * * @param plan the logical plan this operator is a part of * @param k the operator key for this operator * @param input operator that is input to this command * @param exeManager ExecutableManager used by this streaming command. * @param cmd StreamingCommand for this stream to run. */ public LOStream(LogicalPlan plan, OperatorKey k, LogicalOperator input, ExecutableManager exeManager, StreamingCommand cmd) { super(plan, k); //this.input = input; this.command = cmd; this.executableManager = exeManager; } /** * Get the StreamingCommand object associated * with this operator * * @return the StreamingCommand object */ public StreamingCommand getStreamingCommand() { return command; } /* (non-Javadoc) * @see org.apache.pig.impl.logicalLayer.LogicalOperator#getSchema() */ @Override public Schema getSchema() throws FrontendException { if( mSchema == null ) return null; if( !isParentSet ) { setParent( mSchema ); isParentSet = true; } return mSchema; } /** * Set the optimized {@link HandleSpec} for the given {@link Handle} of the * <code>StreamSpec</code>. * * @param handle <code>Handle</code> to optimize * @param spec optimized specification for the handle */ public void setOptimizedSpec(Handle handle, String spec) { // The reason we need to clone and optimize the clone is the following: // consider a script like this: // define CMD1 `perl -ne 'print $_;print STDERR "stderr $_";'`; // define CMD2 `cat`; // A = load 'bla' split by 'file'; // B = stream A through CMD1; // C = stream B through CMD1; // D = stream C through CMD2; // store D into 'bla'; // In this case CMD1 is represented by a single StreamingCommand Object // which will be present as the "command" member in both the // LOStream operators corresponding to B and C. However we want to // optimize only B's input spec since it is immediately following a store // and is conducive to optimization. At this point we clone and make // sure only B's "command" gets optimized while C's "command" remains // untouched. StreamingCommand optimizedCommand = (StreamingCommand)command.clone(); if (handle == Handle.INPUT) { HandleSpec streamInputSpec = optimizedCommand.getInputSpec(); streamInputSpec.setSpec(spec); } else if (handle == Handle.OUTPUT) { HandleSpec streamOutputSpec = optimizedCommand.getOutputSpec(); streamOutputSpec.setSpec(spec); } command = optimizedCommand; } /* (non-Javadoc) * @see org.apache.pig.impl.logicalLayer.LogicalOperator#visit(org.apache.pig.impl.logicalLayer.LOVisitor) */ @Override public void visit(LOVisitor v) throws VisitorException { v.visit(this); } /* (non-Javadoc) * @see org.apache.pig.impl.plan.Operator#name() */ @Override public String name() { return getAliasString() + "Stream (" + command.toString() + ") " + mKey.scope + "-" + mKey.id; } /* (non-Javadoc) * @see org.apache.pig.impl.plan.Operator#supportsMultipleInputs() */ @Override public boolean supportsMultipleInputs() { return false; } /** * @return the ExecutableManager */ public ExecutableManager getExecutableManager() { return executableManager; } @Override public ProjectionMap getProjectionMap() { if(mIsProjectionMapComputed) return mProjectionMap; mIsProjectionMapComputed = true; Schema outputSchema; try { outputSchema = getSchema(); } catch (FrontendException fee) { mProjectionMap = null; return mProjectionMap; } if(outputSchema == null) { mProjectionMap = null; return mProjectionMap; } Schema inputSchema = null; List<LogicalOperator> predecessors = (ArrayList<LogicalOperator>)mPlan.getPredecessors(this); if(predecessors != null) { try { inputSchema = predecessors.get(0).getSchema(); } catch (FrontendException fee) { mProjectionMap = null; return mProjectionMap; } } else { mProjectionMap = null; return mProjectionMap; } List<Integer> addedFields = new ArrayList<Integer>(); List<Pair<Integer, Integer>> removedFields = new ArrayList<Pair<Integer, Integer>>(); for(int i = 0; i < outputSchema.size(); ++i) { //add all the elements of the output schema to the added fields addedFields.add(i); } if(inputSchema != null) { //add all the elements of the input schema to the removed fields for(int i = 0; i < inputSchema.size(); ++i) { removedFields.add(new Pair<Integer, Integer>(0, i)); } } mProjectionMap = new ProjectionMap(null, (removedFields.size() == 0? null: removedFields), addedFields); return mProjectionMap; } @Override public List<RequiredFields> getRequiredFields() { List<RequiredFields> requiredFields = new ArrayList<RequiredFields>(); requiredFields.add(new RequiredFields(true, false)); return requiredFields; } @Override public List<RequiredFields> getRelevantInputs(int output, int column) throws FrontendException { if (!mIsSchemaComputed) getSchema(); if (output!=0) return null; if (column<0) return null; // if we have schema information, check if output column is valid if (mSchema!=null) { if (column >= mSchema.size()) return null; } List<RequiredFields> result = new ArrayList<RequiredFields>(); result.add(new RequiredFields(true)); return result; } }