package com.alexholmes.avro.sort.basic; import com.alexholmes.avro.Weather; import org.apache.avro.mapred.AvroKey; import org.apache.avro.mapred.AvroValue; import org.apache.avro.mapreduce.AvroJob; import org.apache.avro.mapreduce.AvroKeyInputFormat; import org.apache.avro.mapreduce.AvroKeyOutputFormat; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import java.io.IOException; public class AvroSortWithIgnores { private static class SortMapper extends Mapper<AvroKey<Weather>, NullWritable, AvroKey<Weather>, AvroValue<Weather>> { @Override protected void map(AvroKey<Weather> key, NullWritable value, Context context) throws IOException, InterruptedException { context.write(key, new AvroValue<Weather>(key.datum())); } } private static class SortReducer extends Reducer<AvroKey<Weather>, AvroValue<Weather>, AvroKey<Weather>, NullWritable> { @Override protected void reduce(AvroKey<Weather> key, Iterable<AvroValue<Weather>> values, Context context) throws IOException, InterruptedException { int counter = 1; for (AvroValue<Weather> Weather : values) { Weather.datum().setCounter(counter++); context.write(new AvroKey<Weather>(Weather.datum()), NullWritable.get()); } } } public boolean runMapReduce(final Job job, Path inputPath, Path outputPath) throws Exception { FileInputFormat.setInputPaths(job, inputPath); job.setInputFormatClass(AvroKeyInputFormat.class); AvroJob.setInputKeySchema(job, Weather.SCHEMA$); job.setMapperClass(SortMapper.class); AvroJob.setMapOutputKeySchema(job, Weather.SCHEMA$); AvroJob.setMapOutputValueSchema(job, Weather.SCHEMA$); job.setReducerClass(SortReducer.class); AvroJob.setOutputKeySchema(job, Weather.SCHEMA$); job.setOutputFormatClass(AvroKeyOutputFormat.class); FileOutputFormat.setOutputPath(job, outputPath); return job.waitForCompletion(true); } }