package tap.sample;
import tap.CommandOptions;
import tap.Pipe;
import tap.Tap;
import tap.TapMapper;
import tap.TapReducer;
import tap.core.*;
//groupBy indicates the grouping for presentation to reducer. sortby indicates how records should be sorted, i.e.,
//which fields are part of the key. if groupBy is not set to anything then by default the grouping at the 'key' level.
//more logically we should require the groupBy be specified and change sort to subsort.
public class SortTestAvroInput {
public static void main(String[] args) throws Exception {
String a[] = { "-o", "/tmp/sorttest", "-i", "share/test_data.avro", "-f" };
CommandOptions o = new CommandOptions(a);
/* Set up a basic pipeline of map reduce */
Tap wordcount = new Tap(o).named("wordcount");
/* Parse options - just use the standard options - input and output location, time window, etc. */
if (o.input == null) {
System.err.println("Must specify input directory");
return;
}
if (o.output == null) {
System.err.println("Must specify output directory");
return;
}
wordcount.createPhase().reads(o.input).writes(o.output).map(Mapper.class).groupBy("group").sortBy("extra, subsort").reduce(Reducer.class);
wordcount.make();
}
public static class key {
public String group;
public String extra;
public String subsort;
}
public static class Mapper extends TapMapper<key,key> {
private key outrec = new key();
@Override
public void map(key in, Pipe<key> out) {
out.put(in);
}
}
public static class Reducer extends TapReducer<key,key> {
private key outrec = new key();
@Override
public void reduce(Pipe<key> in, Pipe<key> out) {
System.out.println("**************");
for (key rec : in) {
outrec.group = rec.group;
outrec.extra = rec.extra;
outrec.subsort = rec.subsort;
System.out.println("REDUCER::" + outrec.group + " " + outrec.extra + " " + outrec.subsort);
out.put(outrec);
}
}
}
}