/**
* Copyright (c) 2009-2011 VMware, Inc. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.springsource.insight.plugin.hadoop;
import java.io.File;
import java.io.IOException;
import org.apache.commons.io.FileUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import com.springsource.insight.util.FileUtil;
public class WordCount extends Configured implements Tool {
public WordCount() {
super();
}
static public class WordCountMapper extends Mapper<LongWritable, Text, Text, LongWritable> {
final private static LongWritable ONE = new LongWritable(1);
private Text tokenValue = new Text();
@Override
protected void map(LongWritable offset, Text text, Context context) throws IOException, InterruptedException {
for (String token : text.toString().split("\\s+")) {
tokenValue.set(token);
context.write(tokenValue, ONE);
}
}
}
static public class WordCountReducer extends Reducer<Text, LongWritable, Text, LongWritable> {
private LongWritable total = new LongWritable();
@Override
protected void reduce(Text token, Iterable<LongWritable> counts, Context context)
throws IOException, InterruptedException {
long n = 0;
for (LongWritable count : counts)
n += count.get();
total.set(n);
context.write(token, total);
System.out.println(token + "," + total);
}
}
public int run(String[] args) throws Exception {
String INPUT = "src/test/resources";
String OUTPUT = "target/out";
Configuration conf = new Configuration();
File targetFolder = FileUtil.detectTargetFolder(getClass());
if (targetFolder == null) {
throw new IllegalStateException("Cannot detect target folder");
}
File tempFolder = new File(targetFolder, "temp");
conf.set("hadoop.tmp.dir", tempFolder.getAbsolutePath());
Job job = new Job(conf, "wordcount");
job.setJarByClass(WordCount.class);
job.setMapperClass(WordCountMapper.class);
job.setCombinerClass(WordCountReducer.class);
job.setReducerClass(WordCountReducer.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LongWritable.class);
FileUtils.deleteDirectory(new File(OUTPUT)); // delete old output data
FileInputFormat.addInputPath(job, new Path(INPUT));
FileOutputFormat.setOutputPath(job, new Path(OUTPUT));
return job.waitForCompletion(true) ? 0 : -1;
}
public static void main(String[] args) throws Exception {
System.exit(ToolRunner.run(new WordCount(), args));
}
}