/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or * implied. See the License for the specific language governing * permissions and limitations under the License. */ package com.alexholmes.avro.sort.avrokey; import com.alexholmes.avro.WeatherNoIgnore; import org.apache.avro.mapred.AvroKey; import org.apache.avro.mapred.AvroValue; import org.apache.avro.mapreduce.AvroJob; import org.apache.avro.mapreduce.AvroKeyInputFormat; import org.apache.avro.mapreduce.AvroKeyOutputFormat; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import java.io.IOException; public class AvroSortCustom { private static class SortMapper extends Mapper<AvroKey<WeatherNoIgnore>, NullWritable, AvroKey<WeatherNoIgnore>, AvroValue<WeatherNoIgnore>> { @Override protected void map(AvroKey<WeatherNoIgnore> key, NullWritable value, Context context) throws IOException, InterruptedException { context.write(key, new AvroValue<WeatherNoIgnore>(key.datum())); } } private static class SortReducer extends Reducer<AvroKey<WeatherNoIgnore>, AvroValue<WeatherNoIgnore>, AvroKey<WeatherNoIgnore>, NullWritable> { @Override protected void reduce(AvroKey<WeatherNoIgnore> key, Iterable<AvroValue<WeatherNoIgnore>> ignore, Context context) throws IOException, InterruptedException { int i = 1; for (AvroValue<WeatherNoIgnore> WeatherNoIgnore : ignore) { WeatherNoIgnore.datum().setCounter(i++); context.write(new AvroKey<WeatherNoIgnore>(WeatherNoIgnore.datum()), NullWritable.get()); } } } public boolean runMapReduce(final Job job, Path inputPath, Path outputPath) throws Exception { FileInputFormat.setInputPaths(job, inputPath); job.setInputFormatClass(AvroKeyInputFormat.class); AvroJob.setInputKeySchema(job, WeatherNoIgnore.SCHEMA$); job.setMapperClass(SortMapper.class); AvroJob.setMapOutputKeySchema(job, WeatherNoIgnore.SCHEMA$); AvroJob.setMapOutputValueSchema(job, WeatherNoIgnore.SCHEMA$); job.setReducerClass(SortReducer.class); AvroJob.setOutputKeySchema(job, WeatherNoIgnore.SCHEMA$); job.setOutputFormatClass(AvroKeyOutputFormat.class); FileOutputFormat.setOutputPath(job, outputPath); AvroSort.builder() .setJob(job) .addPartitionField(WeatherNoIgnore.SCHEMA$, "station", true) .addSortField(WeatherNoIgnore.SCHEMA$, "station", true) .addSortField(WeatherNoIgnore.SCHEMA$, "time", true) .addSortField(WeatherNoIgnore.SCHEMA$, "temp", true) .addGroupField(WeatherNoIgnore.SCHEMA$, "station", true) .addGroupField(WeatherNoIgnore.SCHEMA$, "time", true) .configure(); return job.waitForCompletion(true); } }