package hip.ch8;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.*;
import java.io.IOException;
public class FilterProjectJob {
public static class Map implements
Mapper<LongWritable, Text, Text, Text> {
Text outputKey = new Text();
Text outputValue = new Text();
@Override
public void configure(JobConf job) {
}
@Override
public void map(LongWritable key, Text value,
OutputCollector<Text, Text> output,
Reporter reporter) throws IOException {
String v = value.toString();
if (!v.startsWith("10.")) {
String[] parts = StringUtils.split(v, ".", 3);
outputKey.set(parts[0]);
outputValue.set(parts[1]);
output.collect(outputKey, outputValue);
}
}
@Override
public void close() throws IOException {
}
}
public static void main(String... args) throws Exception {
JobConf job = new JobConf();
job.setJarByClass(FilterProjectJob.class);
Path input = new Path(args[0]);
Path output = new Path(args[1]);
output.getFileSystem(job).delete(output, true);
job.setMapperClass(Map.class);
job.setMapOutputKeyClass(Text.class);
FileInputFormat.setInputPaths(job, input);
FileOutputFormat.setOutputPath(job, output);
JobClient.runJob(job);
}
}