package hip.ch8;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.*;
import java.io.IOException;
import java.util.Iterator;
public class SkewLogsJob {
private static final Log log = LogFactory.getLog(SkewLogsJob.class);
public static class Map implements
Mapper<LongWritable, Text, LongWritable, Text> {
@Override
public void configure(JobConf job) {
}
@Override
public void map(LongWritable key, Text value,
OutputCollector<LongWritable, Text> output,
Reporter reporter) throws IOException {
String[] parts = value.toString().split("\\.");
Text outputValue = new Text(parts[0]);
output.collect(key, outputValue);
}
@Override
public void close() throws IOException {
}
}
public static class Reduce
implements Reducer<Text, Text, Text, Text> {
public static final String MAX_VALUES = "skew.maxvalues";
private int maxValueThreshold;
@Override
public void configure(JobConf job) {
maxValueThreshold = job.getInt(MAX_VALUES, 100);
}
@Override
public void reduce(Text key, Iterator<Text> values,
OutputCollector<Text,
Text> output,
Reporter reporter) throws IOException {
int i = 0;
while (values.hasNext()) {
values.next();
i++;
}
if (++i > maxValueThreshold) {
log.info("Received " + i + " values for key " + key);
}
}
@Override
public void close() throws IOException {
}
}
public static void main(String... args) throws Exception {
JobConf job = new JobConf();
job.setJarByClass(SkewLogsJob.class);
Path input = new Path(args[0]);
Path output = new Path(args[1]);
output.getFileSystem(job).delete(output, true);
job.setMapperClass(Map.class);
FileInputFormat.setInputPaths(job, input);
FileOutputFormat.setOutputPath(job, output);
job.setProfileEnabled(true);
job.setProfileParams(
"-agentlib:hprof=depth=8,cpu=samples,heap=sites,force=n," +
"thread=y,verbose=n,file=%s");
job.setProfileTaskRange(true, "0,1,5-10");
job.setProfileTaskRange(false, "");
JobClient.runJob(job);
System.out.println("Done");
Thread.sleep(20000);
}
}