/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators; import java.util.ArrayList; import java.util.Iterator; import java.util.List; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.pig.PigException; import org.apache.pig.backend.executionengine.ExecException; import org.apache.pig.backend.hadoop.executionengine.physicalLayer.PhysicalOperator; import org.apache.pig.backend.hadoop.executionengine.physicalLayer.Result; import org.apache.pig.backend.hadoop.executionengine.physicalLayer.plans.PhyPlanVisitor; import org.apache.pig.data.Tuple; import org.apache.pig.impl.io.NullableTuple; import org.apache.pig.impl.io.PigNullableWritable; import org.apache.pig.impl.plan.OperatorKey; import org.apache.pig.impl.plan.VisitorException; /** * The package operator that packages the globally rearranged tuples * into output format as required by multi-query de-multiplexer. * <p> * This operator is used when merging multiple Map-Reduce splittees * into a Map-only splitter during multi-query optimization. * The package operators of the reduce plans of the splittees form an * indexed package list inside this operator. When this operator * receives an input, it extracts the index from the key and calls the * corresponding package to get the output data. * <p> * Due to the recursive nature of multi-query optimization, this operator * may be contained in another multi-query packager. * <p> * The successor of this operator must be a PODemux operator which * knows how to consume the output of this operator. */ public class POMultiQueryPackage extends POPackage { private static final long serialVersionUID = 1L; private static int idxPart = 0x7F; private final Log log = LogFactory.getLog(getClass()); private List<POPackage> packages = new ArrayList<POPackage>(); private PigNullableWritable myKey; private int baseIndex = 0; /** * Constructs an operator with the specified key. * * @param k the operator key */ public POMultiQueryPackage(OperatorKey k) { this(k, -1, null); } /** * Constructs an operator with the specified key * and degree of parallelism. * * @param k the operator key * @param rp the degree of parallelism requested */ public POMultiQueryPackage(OperatorKey k, int rp) { this(k, rp, null); } /** * Constructs an operator with the specified key and inputs. * * @param k the operator key * @param inp the inputs that this operator will read data from */ public POMultiQueryPackage(OperatorKey k, List<PhysicalOperator> inp) { this(k, -1, inp); } /** * Constructs an operator with the specified key, * degree of parallelism and inputs. * * @param k the operator key * @param rp the degree of parallelism requested * @param inp the inputs that this operator will read data from */ public POMultiQueryPackage(OperatorKey k, int rp, List<PhysicalOperator> inp) { super(k, rp, inp); } @Override public String name() { return "MultiQuery Package[" + baseIndex +"] - " + getOperatorKey().toString(); } @Override public boolean supportsMultipleInputs() { return false; } @Override public void visit(PhyPlanVisitor v) throws VisitorException { v.visitMultiQueryPackage(this); } @Override public boolean supportsMultipleOutputs() { return false; } @Override public void attachInput(PigNullableWritable k, Iterator<NullableTuple> inp) { tupIter = inp; myKey = k; } @Override public void detachInput() { tupIter = null; myKey = null; } /** * Appends the specified package object to the end of * the package list. * * @param pack package to be appended to the list */ public void addPackage(POPackage pack) { packages.add(pack); } /** * Returns the list of packages. * * @return the list of the packages */ public List<POPackage> getPackages() { return packages; } /** * Constructs the output tuple from the inputs. * <p> * The output is consumed by for the demultiplexer operator * (PODemux) in the format (key, {bag of tuples}) where key * is an indexed WritableComparable, not the wrapped value as a pig type. */ @Override public Result getNext(Tuple t) throws ExecException { int index = myKey.getIndex(); index &= idxPart; index -= baseIndex; if (index >= packages.size() || index < 0) { int errCode = 2140; String msg = "Invalid package index " + index + " should be in the range between 0 and " + packages.size(); throw new ExecException(msg, errCode, PigException.BUG); } POPackage pack = packages.get(index); pack.attachInput(myKey, tupIter); Result res = pack.getNext(t); Tuple tuple = (Tuple)res.result; // the key present in the first field // of the tuple above is the real key without // index information - this is because the // package above, extracts the real key out of // the PigNullableWritable key - we are going to // give this result tuple to a PODemux operator // which needs a PigNullableWritable key so // it can figure out the index - we already have // the PigNullableWritable key cachec in "myKey" // let's send this in the result tuple tuple.set(0, myKey); return res; } /** * Sets the base index of this operator * * @param baseIndex the base index of this operator */ public void setBaseIndex(int baseIndex) { this.baseIndex = baseIndex; } /** * Returns the base index of this operator * * @return the base index of this operator */ public int getBaseIndex() { return baseIndex; } }