/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied. See the License for the specific language governing
* permissions and limitations under the License.
*/
package com.alexholmes.avro.sort.writablekey;
import com.alexholmes.avro.Weather;
import org.apache.avro.mapred.AvroKey;
import org.apache.avro.mapred.AvroValue;
import org.apache.avro.mapreduce.AvroJob;
import org.apache.avro.mapreduce.AvroKeyInputFormat;
import org.apache.avro.mapreduce.AvroKeyOutputFormat;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Partitioner;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
public class AvroWritableKeySort {
private static class SortMapper
extends Mapper<AvroKey<Weather>, NullWritable, WeatherSubset, AvroValue<Weather>> {
@Override
protected void map(AvroKey<Weather> key, NullWritable value, Context context)
throws IOException, InterruptedException {
WeatherSubset subset = new WeatherSubset().setStation(key.datum().getStation().toString()).setTime(key.datum().getTime());
context.write(subset, new AvroValue<Weather>(key.datum()));
}
}
private static class SortReducer
extends Reducer<WeatherSubset, AvroValue<Weather>, AvroKey<Weather>, NullWritable> {
@Override
protected void reduce(WeatherSubset key, Iterable<AvroValue<Weather>> ignore, Context context)
throws IOException, InterruptedException {
int i = 1;
for (AvroValue<Weather> weather : ignore) {
weather.datum().setCounter(i++);
context.write(new AvroKey<Weather>(weather.datum()), NullWritable.get());
}
}
}
public static class WeatherSubsetSortComparator extends WritableComparator {
public WeatherSubsetSortComparator() {
super(WeatherSubset.class, true);
}
@Override
public int compare(WritableComparable w1, WritableComparable w2) {
WeatherSubset p1 = (WeatherSubset) w1;
WeatherSubset p2 = (WeatherSubset) w2;
int cmp = p1.getStation().compareTo(p2.getStation());
if (cmp != 0) {
return cmp;
}
return new Long(p1.getTime()).compareTo(p2.getTime());
}
}
public static class WeatherSubsetGroupingComparator extends WritableComparator {
public WeatherSubsetGroupingComparator() {
super(WeatherSubset.class, true);
}
@Override
public int compare(WritableComparable w1, WritableComparable w2) {
WeatherSubset p1 = (WeatherSubset) w1;
WeatherSubset p2 = (WeatherSubset) w2;
return p1.getStation().compareTo(p2.getStation());
}
}
public static class WeatherSubset implements WritableComparable<WeatherSubset> {
private String station;
private long time;
@Override
public void readFields(DataInput in) throws IOException {
this.station = in.readUTF();
this.time = in.readLong();
}
@Override
public void write(DataOutput out) throws IOException {
out.writeUTF(station);
out.writeLong(time);
}
@Override
public int compareTo(WeatherSubset other) {
int compare = this.station.compareTo(other.station);
if (compare != 0) {
return compare;
}
return new Long(time).compareTo(other.time);
}
public WeatherSubset setStation(String station) {
this.station = station;
return this;
}
public WeatherSubset setTime(long time) {
this.time = time;
return this;
}
public String getStation() {
return station;
}
public long getTime() {
return time;
}
}
public static class WeatherPartitioner extends
Partitioner<WeatherSubset, AvroValue<Weather>> {
@Override
public int getPartition(WeatherSubset key, AvroValue<Weather> value, int numPartitions) {
return Math.abs(key.getStation().hashCode() * 127) % numPartitions;
}
}
public boolean runMapReduce(final Job job, Path inputPath, Path outputPath) throws Exception {
FileInputFormat.setInputPaths(job, inputPath);
job.setInputFormatClass(AvroKeyInputFormat.class);
AvroJob.setInputKeySchema(job, Weather.SCHEMA$);
job.setMapperClass(SortMapper.class);
AvroJob.setMapOutputValueSchema(job, Weather.SCHEMA$);
job.setMapOutputKeyClass(WeatherSubset.class);
job.setReducerClass(SortReducer.class);
AvroJob.setOutputKeySchema(job, Weather.SCHEMA$);
job.setOutputFormatClass(AvroKeyOutputFormat.class);
FileOutputFormat.setOutputPath(job, outputPath);
job.setPartitionerClass(WeatherPartitioner.class);
job.setGroupingComparatorClass(WeatherSubsetGroupingComparator.class);
job.setSortComparatorClass(WeatherSubsetSortComparator.class);
return job.waitForCompletion(true);
}
}