package com.cloudera.sa.hcu.env2.arvo.io.examples; import java.io.IOException; import java.util.StringTokenizer; import org.apache.hadoop.fs.Path; import org.apache.hadoop.mapred.JobClient; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.FileInputFormat; import org.apache.hadoop.mapred.FileOutputFormat; import org.apache.hadoop.mapred.Reporter; import org.apache.avro.Schema; import org.apache.avro.mapred.AvroCollector; import org.apache.avro.mapred.AvroJob; import org.apache.avro.mapred.AvroMapper; import org.apache.avro.mapred.AvroMultipleOutputs; import org.apache.avro.mapred.AvroOutputFormat; import org.apache.avro.mapred.AvroReducer; import org.apache.avro.mapred.Pair; import org.apache.avro.util.Utf8; import org.apache.avro.mapred.AvroKey; public class AvroMapReduceExample { private static final String UTF8 = "UTF-8"; AvroKey a; public static class MapImpl extends AvroMapper<Utf8, Pair<Utf8, Long>> { private AvroMultipleOutputs amos; public void configure(JobConf Job) { this.amos = new AvroMultipleOutputs(Job); } @Override public void map(Utf8 text, AvroCollector<Pair<Utf8,Long>> collector, Reporter reporter) throws IOException { StringTokenizer tokens = new StringTokenizer(text.toString()); while (tokens.hasMoreTokens()) { String tok = tokens.nextToken(); collector.collect(new Pair<Utf8,Long>(new Utf8(tok),1L)); amos.getCollector("myavro2",reporter) .collect(new Pair<Utf8,Long>(new Utf8(tok),1L).toString()); } } public void close() throws IOException { amos.close(); } } public static class ReduceImpl extends AvroReducer<Utf8, Long, Pair<Utf8, Long> > { private AvroMultipleOutputs amos; public void configure(JobConf Job) { AvroKey a; amos=new AvroMultipleOutputs(Job); } @Override public void reduce(Utf8 word, Iterable<Long> counts, AvroCollector<Pair<Utf8,Long>> collector, Reporter reporter) throws IOException { long sum = 0; for (long count : counts) sum += count; Pair<Utf8,Long> outputvalue= new Pair<Utf8,Long>(word,sum); amos.getCollector("myavro",reporter).collect(outputvalue); amos.getCollector("myavro1",reporter).collect(outputvalue.toString()); collector.collect(new Pair<Utf8,Long>(word, sum)); } public void close() throws IOException { amos.close(); } } /* @Test public void runTestsInOrder() throws Exception { testJob(); testProjection(); testProjection1(); testJob_noreducer(); testProjection_noreducer(); } */ @SuppressWarnings("deprecation") public static void main(String[] args) throws Exception { JobConf job = new JobConf(); // private static final String UTF8 = "UTF-8"; Path outputPath = new Path(args[1]); outputPath.getFileSystem(job).delete(outputPath); job.setJobName("AvroMultipleOutputs"); AvroJob.setInputSchema(job, Schema.create(Schema.Type.STRING)); AvroJob.setOutputSchema(job, new Pair<Utf8,Long>(new Utf8(""), 0L).getSchema()); AvroJob.setMapperClass(job, MapImpl.class); AvroJob.setReducerClass(job, ReduceImpl.class); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, outputPath); FileOutputFormat.setCompressOutput(job, false); AvroMultipleOutputs.addNamedOutput(job,"myavro",AvroOutputFormat.class, new Pair<Utf8,Long>(new Utf8(""), 0L).getSchema()); AvroMultipleOutputs.addNamedOutput(job,"myavro1",AvroOutputFormat.class, Schema.create(Schema.Type.STRING)); AvroMultipleOutputs.addNamedOutput(job,"myavro2",AvroOutputFormat.class, Schema.create(Schema.Type.STRING)); JobClient.runJob(job); } }