package ml.shifu.shifu.core.shuffle; import ml.shifu.shifu.util.Constants; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Partitioner; import org.apache.hadoop.mapreduce.Reducer; import java.io.IOException; import java.util.Random; /** * Created by zhanhu on 12/31/16. */ public class DataShuffle { public static class ShuffleMapper extends Mapper<LongWritable, Text, IntWritable, Text> { private int shuffleSize; private Random rd; @Override public void setup(Context context) { this.shuffleSize = context.getConfiguration().getInt(Constants.SHIFU_NORM_SHUFFLE_SIZE, 100); this.rd = new Random(System.currentTimeMillis()); } @Override public void map(LongWritable key, Text line, Context context) throws IOException, InterruptedException { IntWritable shuffleIndex = new IntWritable(this.rd.nextInt(this.shuffleSize)); context.write(shuffleIndex, line); } } public static class ShuffleReducer extends Reducer<IntWritable, Text, NullWritable, Text> { @Override public void reduce(IntWritable key, Iterable<Text> iterable, Context context) throws IOException, InterruptedException { for ( Text record : iterable ) { context.write(NullWritable.get(), record); } } } public static class KvalPartitioner extends Partitioner<IntWritable, Text> { @Override public int getPartition(IntWritable key, Text text, int numReduceTasks) { return key.get() % numReduceTasks; } } }