/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.pig.backend.hadoop.executionengine.tez.plan.operator; import java.io.IOException; import java.util.ArrayList; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.WritableComparator; import org.apache.pig.backend.executionengine.ExecException; import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.JobControlCompiler; import org.apache.pig.backend.hadoop.executionengine.physicalLayer.POStatus; import org.apache.pig.backend.hadoop.executionengine.physicalLayer.Result; import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.AccumulativeTupleBuffer; import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.CombinerPackager; import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.LitePackager; import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POPackage; import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.Packager; import org.apache.pig.backend.hadoop.executionengine.tez.runtime.TezInput; import org.apache.pig.backend.hadoop.executionengine.util.AccumulatorOptimizerUtil; import org.apache.pig.data.AccumulativeBag; import org.apache.pig.data.DataBag; import org.apache.pig.data.InternalCachedBag; import org.apache.pig.data.ReadOnceBag; import org.apache.pig.data.Tuple; import org.apache.pig.impl.io.NullableTuple; import org.apache.pig.impl.io.PigNullableWritable; import org.apache.tez.runtime.api.LogicalInput; import org.apache.tez.runtime.library.api.KeyValuesReader; import org.apache.tez.runtime.library.common.ConfigUtils; public class POShuffleTezLoad extends POPackage implements TezInput { private static final long serialVersionUID = 1L; private static final Log LOG = LogFactory.getLog(POShuffleTezLoad.class); protected List<String> inputKeys = new ArrayList<String>(); private boolean isSkewedJoin = false; private transient List<LogicalInput> inputs; private transient List<KeyValuesReader> readers; private transient int numTezInputs; private transient boolean[] finished; private transient boolean[] readOnce; private transient WritableComparator comparator = null; private transient WritableComparator groupingComparator = null; private transient Configuration conf; private transient int accumulativeBatchSize; private transient boolean readOnceOneBag; public POShuffleTezLoad(POPackage pack) { super(pack); } @Override public String[] getTezInputs() { return inputKeys.toArray(new String[inputKeys.size()]); } @Override public void replaceInput(String oldInputKey, String newInputKey) { while (inputKeys.remove(oldInputKey)) { inputKeys.add(newInputKey); } } @Override public void addInputsToSkip(Set<String> inputsToSkip) { } @Override public void attachInputs(Map<String, LogicalInput> inputs, Configuration conf) throws ExecException { this.conf = conf; this.inputs = new ArrayList<LogicalInput>(); this.readers = new ArrayList<KeyValuesReader>(); this.comparator = (WritableComparator) ConfigUtils.getIntermediateInputKeyComparator(conf); this.groupingComparator = (WritableComparator) ConfigUtils.getInputKeySecondaryGroupingComparator(conf); this.accumulativeBatchSize = AccumulatorOptimizerUtil.getAccumulativeBatchSize(); try { for (String inputKey : inputKeys) { LogicalInput input = inputs.get(inputKey); // 1) Case of self join/cogroup/cross with Split - numTezInputs < numInputs/inputKeys // - Same TezInput will contain multiple indexes in case of join // 2) data unioned within Split - inputKeys > numInputs/numTezInputs // - Input key will be repeated, but index would be same within a TezInput if (!this.inputs.contains(input)) { this.inputs.add(input); KeyValuesReader reader = (KeyValuesReader)input.getReader(); this.readers.add(reader); LOG.info("Attached input from vertex " + inputKey + " : input=" + input + ", reader=" + reader); } } this.numInputs = this.pkgr.getKeyInfo().size(); this.numTezInputs = this.inputs.size(); readOnce = new boolean[numInputs]; for (int i = 0; i < numInputs; i++) { readOnce[i] = false; } finished = new boolean[numTezInputs]; for (int i = 0; i < numTezInputs; i++) { finished[i] = !readers.get(i).next(); } this.readOnceOneBag = (numInputs == 1) && (pkgr instanceof CombinerPackager || pkgr instanceof LitePackager || pkgr instanceof BloomPackager); if (readOnceOneBag) { readOnce[0] = true; } } catch (Exception e) { throw new ExecException(e); } } @Override public Result getNextTuple() throws ExecException { Result res = pkgr.getNext(); TezAccumulativeTupleBuffer buffer = null; if (isAccumulative()) { buffer = new TezAccumulativeTupleBuffer(accumulativeBatchSize); } while (res.returnStatus == POStatus.STATUS_EOP) { boolean hasData = false; Object cur = null; PigNullableWritable min = null; try { if (numTezInputs == 1) { if (!finished[0]) { hasData = true; cur = readers.get(0).getCurrentKey(); // Just move to the next key without comparison min = ((PigNullableWritable)cur).clone(); } } else { for (int i = 0; i < numTezInputs; i++) { if (!finished[i]) { hasData = true; cur = readers.get(i).getCurrentKey(); // TODO: PIG-4652 should compare key bytes instead // of deserialized objects when using BytesComparator // for faster comparison if (min == null || comparator.compare(min, cur) > 0) { //Not a deep clone. Writable is referenced. min = ((PigNullableWritable)cur).clone(); } } } } } catch (Exception e) { throw new ExecException(e); } if (!hasData) { // For certain operators (such as STREAM), we could still have some work // to do even after seeing the last input. These operators set a flag that // says all input has been sent and to run the pipeline one more time. if (Boolean.valueOf(conf.get(JobControlCompiler.END_OF_INP_IN_MAP, "false"))) { this.parentPlan.endOfAllInput = true; } return RESULT_EOP; } key = pkgr.getKey(min); keyWritable = min; try { DataBag[] bags = new DataBag[numInputs]; if (isAccumulative()) { buffer.setCurrentKey(min); for (int i = 0; i < numInputs; i++) { bags[i] = new AccumulativeBag(buffer, i); } } else { if (readOnceOneBag) { bags[0] = new TezReadOnceBag(pkgr, min); } else { for (int i = 0; i < numInputs; i++) { bags[i] = new InternalCachedBag(numInputs); } if (numTezInputs == 1) { do { Iterable<Object> vals = readers.get(0).getCurrentValues(); for (Object val : vals) { NullableTuple nTup = (NullableTuple) val; int index = nTup.getIndex(); Tuple tup = pkgr.getValueTuple(keyWritable, nTup, index); bags[index].add(tup); } finished[0] = !readers.get(0).next(); if (finished[0]) { break; } cur = readers.get(0).getCurrentKey(); } while (groupingComparator.compare(min, cur) == 0); // We need to loop in case of Grouping Comparators } else { for (int i = 0; i < numTezInputs; i++) { if (!finished[i]) { cur = readers.get(i).getCurrentKey(); // We need to loop in case of Grouping Comparators while (groupingComparator.compare(min, cur) == 0) { Iterable<Object> vals = readers.get(i).getCurrentValues(); for (Object val : vals) { NullableTuple nTup = (NullableTuple) val; int index = nTup.getIndex(); Tuple tup = pkgr.getValueTuple(keyWritable, nTup, index); bags[index].add(tup); } finished[i] = !readers.get(i).next(); if (finished[i]) { break; } cur = readers.get(i).getCurrentKey(); } } } } } } pkgr.attachInput(key, bags, readOnce); res = pkgr.getNext(); } catch (IOException e) { throw new ExecException(e); } } return res; } public void setInputKeys(List<String> inputKeys) { this.inputKeys = inputKeys; } public void addInputKey(String inputKey) { inputKeys.add(inputKey); } public void setSkewedJoins(boolean isSkewedJoin) { this.isSkewedJoin = isSkewedJoin; } public boolean isSkewedJoin() { return isSkewedJoin; } @Override public boolean supportsMultipleInputs() { return true; } private class TezAccumulativeTupleBuffer implements AccumulativeTupleBuffer { private int batchSize; private List<Tuple>[] bags; private PigNullableWritable min; private boolean clearedCurrent = true; @SuppressWarnings("unchecked") public TezAccumulativeTupleBuffer(int batchSize) { this.batchSize = batchSize; this.bags = new List[numInputs]; for (int i = 0; i < numInputs; i++) { this.bags[i] = new ArrayList<Tuple>(batchSize); } } public void setCurrentKey(PigNullableWritable curKey) { if (!clearedCurrent) { // If buffer.clear() is not called from POForEach ensure it is called here. clear(); } this.min = curKey; clearedCurrent = false; } @Override public boolean hasNextBatch() { Object cur = null; try { for (int i = 0; i < numTezInputs; i++) { if (!finished[i]) { cur = readers.get(i).getCurrentKey(); if (groupingComparator.compare(min, cur) == 0) { return true; } } } } catch (IOException e) { throw new RuntimeException( "Error while checking for next Accumulator batch", e); } return false; } @Override public void nextBatch() throws IOException { Object cur = null; for (int i = 0; i < bags.length; i++) { bags[i].clear(); } try { for (int i = 0; i < numTezInputs; i++) { if (!finished[i]) { cur = readers.get(i).getCurrentKey(); int batchCount = 0; while (groupingComparator.compare(min, cur) == 0) { Iterator<Object> iter = readers.get(i).getCurrentValues().iterator(); while (iter.hasNext() && batchCount < batchSize) { NullableTuple nTup = (NullableTuple) iter.next(); int index = nTup.getIndex(); bags[index].add(pkgr.getValueTuple(keyWritable, nTup, index)); batchCount++; } if (batchCount == batchSize) { if (!iter.hasNext()) { // Move to next key and update finished finished[i] = !readers.get(i).next(); } break; } finished[i] = !readers.get(i).next(); if (finished[i]) { break; } cur = readers.get(i).getCurrentKey(); } } } } catch (IOException e) { throw new RuntimeException( "Error while reading next Accumulator batch", e); } } @Override public void clear() { for (int i = 0; i < bags.length; i++) { bags[i].clear(); } // Skip through current keys and its values not processed because of // early termination of accumulator Object cur = null; try { for (int i = 0; i < numTezInputs; i++) { if (!finished[i]) { cur = readers.get(i).getCurrentKey(); while (groupingComparator.compare(min, cur) == 0) { finished[i] = !readers.get(i).next(); if (finished[i]) { break; } cur = readers.get(i).getCurrentKey(); } } } } catch (IOException e) { throw new RuntimeException( "Error while cleaning up for next Accumulator batch", e); } clearedCurrent = true; } @Override public Iterator<Tuple> getTuples(int index) { return bags[index].iterator(); } //TODO: illustratorMarkup } private class TezReadOnceBag extends ReadOnceBag { private static final long serialVersionUID = 1L; private Iterator<Object> iter; public TezReadOnceBag(Packager pkgr, PigNullableWritable currentKey) throws IOException { this.pkgr = pkgr; this.keyWritable = currentKey; this.iter = readers.get(0).getCurrentValues().iterator(); } @Override public Iterator<Tuple> iterator() { return new TezReadOnceBagIterator(); } private class TezReadOnceBagIterator implements Iterator<Tuple> { @Override public boolean hasNext() { if (iter.hasNext()) { return true; } else { try { finished[0] = !readers.get(0).next(); if (finished[0]) { return false; } // Currently combiner is not being applied when secondary key(grouping comparator) is used // But might change in future. So check if the next key is same and return its values Object cur = readers.get(0).getCurrentKey(); if (groupingComparator.compare(keyWritable, cur) == 0) { iter = readers.get(0).getCurrentValues().iterator(); // Key should at least have one value. But doing a check just for safety if (iter.hasNext()) { return true; } else { throw new RuntimeException("Unexpected. Key " + keyWritable + " does not have any values"); } } return false; } catch (IOException e) { throw new RuntimeException("ReadOnceBag failed to get value tuple : ", e); } } } @Override public Tuple next() { NullableTuple ntup = (NullableTuple) iter.next(); int index = ntup.getIndex(); Tuple ret = null; try { ret = pkgr.getValueTuple(keyWritable, ntup, index); } catch (ExecException e) { throw new RuntimeException("ReadOnceBag failed to get value tuple : ", e); } return ret; } @Override public void remove() { throw new RuntimeException("ReadOnceBag.iterator().remove() is not allowed"); } } } }