package com.taobao.loganalyzer.common; import java.io.IOException; import java.util.Iterator; import java.util.List; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Writable; import org.apache.hadoop.io.WritableComparable; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.MapReduceBase; import org.apache.hadoop.mapred.Mapper; import org.apache.hadoop.mapred.OutputCollector; import org.apache.hadoop.mapred.RecordWriter; import org.apache.hadoop.mapred.Reducer; import org.apache.hadoop.mapred.Reporter; import org.apache.hadoop.mapred.TextOutputFormat; import org.apache.hadoop.mapred.lib.MultipleOutputFormat; import org.apache.hadoop.util.Progressable; public abstract class CommonPVProcessor extends AbstractProcessor { public abstract static class Map extends MapReduceBase implements Mapper<LongWritable, Text, Text, LongWritable> { private static final String SEPARATOR = "\u0001"; public String makeKey(List<String> tokens) { StringBuffer sb = new StringBuffer(); boolean isFirst = true; for (String token : tokens) { if (isFirst) isFirst = false; else sb.append(SEPARATOR); sb.append(token); } return sb.toString(); } public abstract void map(LongWritable key, Text value, OutputCollector<Text, LongWritable> output, Reporter reporter) throws IOException; } public static class Reduce extends MapReduceBase implements Reducer<Text, LongWritable, Text, Text> { LongWritable longVal = new LongWritable(); public void reduce(Text key, Iterator<LongWritable> values, OutputCollector<Text, Text> output, Reporter reporter) throws IOException { long sum = 0; while (values.hasNext()) { sum += values.next().get(); } longVal.set(sum); String newkey = key.toString()+"\u0001"+String.valueOf(sum); try { output.collect(new Text(newkey), null); }catch(Exception e) { return; } } } public static class Combiner extends MapReduceBase implements Reducer<Text, LongWritable, Text, LongWritable> { LongWritable longVal = new LongWritable(); public void reduce(Text key, Iterator<LongWritable> values, OutputCollector<Text, LongWritable> output, Reporter reporter) throws IOException { long sum = 0; while (values.hasNext()) { sum += values.next().get(); } longVal.set(sum); // String newkey = key.toString() + "\u0001" + String.valueOf(sum); output.collect(key, longVal); } } @SuppressWarnings("unchecked") public static class ReportOutFormat<K extends WritableComparable, V extends Writable> extends MultipleOutputFormat<K, V> { private TextOutputFormat<K, V> theTextOutputFormat = null; @Override protected RecordWriter<K, V> getBaseRecordWriter(FileSystem fs, JobConf job, String name, Progressable arg3) throws IOException { if (theTextOutputFormat == null) { theTextOutputFormat = new TextOutputFormat<K, V>(); } return theTextOutputFormat.getRecordWriter(fs, job, name, arg3); } @Override protected String generateFileNameForKeyValue(K key, V value, String name) { return name + "_" + key.toString().split("\u0001")[0]; } } public Class<Reduce> getReducer() { return Reduce.class; } public Class<Combiner> getCombiner() { return Combiner.class; } }