/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.pig.backend.hadoop.executionengine.tez.plan.operator; import java.io.IOException; import java.util.Map; import java.util.Set; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.pig.backend.executionengine.ExecException; import org.apache.pig.backend.hadoop.executionengine.physicalLayer.Result; import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POLocalRearrange; import org.apache.pig.backend.hadoop.executionengine.tez.runtime.TezInput; import org.apache.pig.backend.hadoop.executionengine.tez.runtime.TezOutput; import org.apache.pig.impl.io.NullablePartitionWritable; import org.apache.pig.impl.io.PigNullableWritable; import org.apache.pig.impl.plan.OperatorKey; import org.apache.tez.runtime.api.LogicalInput; import org.apache.tez.runtime.api.LogicalOutput; import org.apache.tez.runtime.api.Reader; import org.apache.tez.runtime.library.api.KeyValueReader; import org.apache.tez.runtime.library.api.KeyValueWriter; import org.apache.tez.runtime.library.api.KeyValuesReader; /** * POIdentityInOutTez is used to pass through tuples as is to next vertex from * previous vertex's POLocalRearrangeTez. For eg: In case of Order By, the * partition vertex which just applies the WeightedRangePartitioner on the * previous vertex data uses POIdentityInOutTez. */ @InterfaceAudience.Private public class POIdentityInOutTez extends POLocalRearrangeTez implements TezInput, TezOutput { private static final long serialVersionUID = 1L; private static final Log LOG = LogFactory.getLog(POIdentityInOutTez.class); private String inputKey; private transient KeyValueReader reader; private transient KeyValuesReader shuffleReader; private transient boolean shuffleInput; public POIdentityInOutTez(OperatorKey k, POLocalRearrange inputRearrange) { super(inputRearrange); this.mKey = k; } public void setInputKey(String inputKey) { this.inputKey = inputKey; } @Override public String[] getTezInputs() { return new String[] { inputKey }; } @Override public void replaceInput(String oldInputKey, String newInputKey) { if (oldInputKey.equals(inputKey)) { inputKey = newInputKey; } } @Override public void addInputsToSkip(Set<String> inputsToSkip) { } @Override public void attachInputs(Map<String, LogicalInput> inputs, Configuration conf) throws ExecException { LogicalInput input = inputs.get(inputKey); if (input == null) { throw new ExecException("Input from vertex " + inputKey + " is missing"); } try { Reader r = input.getReader(); if (r instanceof KeyValueReader) { reader = (KeyValueReader) r; } else { shuffleInput = true; shuffleReader = (KeyValuesReader) r; } LOG.info("Attached input from vertex " + inputKey + " : input=" + input + ", reader=" + r); } catch (Exception e) { throw new ExecException(e); } } @Override public void attachOutputs(Map<String, LogicalOutput> outputs, Configuration conf) throws ExecException { LogicalOutput output = outputs.get(outputKey); if (output == null) { throw new ExecException("Output to vertex " + outputKey + " is missing"); } try { writer = (KeyValueWriter) output.getWriter(); LOG.info("Attached output to vertex " + outputKey + " : output=" + output + ", writer=" + writer); } catch (Exception e) { throw new ExecException(e); } } @Override public Result getNextTuple() throws ExecException { try { if (shuffleInput) { while (shuffleReader.next()) { Object curKey = shuffleReader.getCurrentKey(); Iterable<Object> vals = shuffleReader.getCurrentValues(); if (isSkewedJoin) { NullablePartitionWritable wrappedKey = new NullablePartitionWritable( (PigNullableWritable) curKey); wrappedKey.setPartition(-1); curKey = wrappedKey; } for (Object val : vals) { writer.write(curKey, val); } } } else { while (reader.next()) { if (isSkewedJoin) { NullablePartitionWritable wrappedKey = new NullablePartitionWritable( (PigNullableWritable) reader.getCurrentKey()); // Skewed join wraps key with NullablePartitionWritable // The partitionIndex in NullablePartitionWritable is not serialized. // So setting it here instead of the previous vertex POLocalRearrangeTez. // Serializing it would add overhead for MR as well. wrappedKey.setPartition(-1); writer.write(wrappedKey, reader.getCurrentValue()); } else { writer.write(reader.getCurrentKey(), reader.getCurrentValue()); } } } return RESULT_EOP; } catch (IOException e) { throw new ExecException(e); } } @Override public String name() { return "POIdentityInOutTez - " + mKey.toString() + "\t<-\t " + inputKey + "\t->\t " + outputKey; } }