/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.pig.builtin; import org.apache.hadoop.conf.Configurable; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.Writable; import org.apache.hadoop.mapreduce.Partitioner; /** * This partitioner should be used with extreme caution and only in cases * where the order of output records is guaranteed to be same. If the order of * output records can vary on retries which is mostly the case, map reruns * due to shuffle fetch failures can lead to data being partitioned differently * and result in incorrect output due to loss or duplication of data. * Refer PIG-5041 for more details. * * This will be removed in the next release as it is risky to use in most cases. */ @Deprecated public class RoundRobinPartitioner extends Partitioner<Writable, Writable> implements Configurable { /** * Batch size for round robin partitioning. Batch size number of records * will be distributed to each partition in a round robin fashion. Default * value is 0 which distributes each record in a circular fashion. Higher * number for batch size can be used to increase probability of keeping * similar records in the same partition if output is already sorted and get * better compression. */ public static String PIG_ROUND_ROBIN_PARTITIONER_BATCH_SIZE = "pig.round.robin.partitioner.batch.size"; private int num = -1; private int batchSize = 0; private int currentBatchCount = 0; private Configuration conf; @Override public int getPartition(Writable key, Writable value, int numPartitions) { if (batchSize > 0) { if (currentBatchCount == 0) { num = ++num % numPartitions; } if (++currentBatchCount == batchSize) { currentBatchCount = 0; } } else { num = ++num % numPartitions; } return num; } @Override public void setConf(Configuration conf) { this.conf = conf; batchSize = conf.getInt(PIG_ROUND_ROBIN_PARTITIONER_BATCH_SIZE, 0); } @Override public Configuration getConf() { return conf; } }