/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.examples;
import java.io.IOException;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.mapred.*;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import java.util.Iterator;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.util.GenericOptionsParser;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class SumJob extends Configured implements Tool {
/** The path of the temporary directory for the job */
static final Path TMP_DIR = new Path(SumJob.class.getSimpleName() + "_TMP");
/**
* Mapper class for the SumJob.
*/
public static class MyMapper
extends MapReduceBase
implements Mapper<LongWritable, LongWritable, LongWritable, LongWritable> {
/**
* Map task for the job
*
* @param key the input key.
* @param value the input value.
* @param output collects mapped keys and values.
* @param reporter facility to report progress.
* @throws IOException
*/
public void map(LongWritable key, LongWritable value,
OutputCollector<LongWritable, LongWritable> output,
Reporter reporter) throws IOException {
output.collect(key, value);
}
}
/**
* Reducer class for SumJob
*/
public static class MyReducer
extends MapReduceBase
implements Reducer<LongWritable, LongWritable, LongWritable, LongWritable> {
private long sum = 0;
private JobConf conf;
/** Store job configuration. */
@Override
public void configure(JobConf job) {
conf = job;
}
/**
* Reduce task for the job. It sums up all the numbers in its partitions
* and writes out the result
*
* @param key the key.
* @param values the list of values to reduce.
* @param output to collect keys and combined values.
* @param reporter facility to report progress.
* @throws IOException
*/
public void reduce(LongWritable key, Iterator<LongWritable> values,
OutputCollector<LongWritable, LongWritable> output,
Reporter reporter) throws IOException {
while (values.hasNext()) {
long value = values.next().get();
this.sum = this.sum + value;
}
output.collect(key, new LongWritable(sum));
}
}
/**
* Runs the job to find the sum of integers between 1..N. Also checks if the
* sum is the same as (N*(N+1))/2
*
* @param args command specific arguments.
* @return A non-zero return value if the job fails
* @throws Exception
*/
public int run (String args[]) throws Exception {
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args).
getRemainingArgs();
if (otherArgs.length != 2) {
System.err.println("Usage: sum <numRecords> " +
"<numReduceTasks>");
ToolRunner.printGenericCommandUsage(System.err);
return -1;
}
int numRecords = Integer.parseInt(otherArgs[0]);
int numReduceTasks = Integer.parseInt(otherArgs[1]);
System.out.println("numRecords: " + numRecords +
" numReduceTasks: " + numReduceTasks);
JobConf jobConf = new JobConf(this.getConf(), SumJob.class);
final FileSystem fs = FileSystem.get(jobConf);
if (!fs.mkdirs(TMP_DIR)) {
throw new IOException("Cannot create input directory " + TMP_DIR);
}
final Path inFile = new Path(TMP_DIR, "inp");
// If the input directory already exists, the Job would assume that the
// input directory has the right input files, and would not create them
// again.
if (!fs.exists(inFile)) {
SequenceFile.Writer writer = SequenceFile.createWriter(fs, jobConf, inFile,
LongWritable.class, LongWritable.class);
for (int i = 1; i <= numRecords; i++) {
writer.append(
new LongWritable(i%numReduceTasks ),
new LongWritable(i));
}
writer.close();
}
final Path outDir = new Path(TMP_DIR, "out");
if (fs.exists(outDir)) {
throw new IOException("Tmp directory " + fs.makeQualified(outDir)
+ " already exists. Please remove it first.");
}
SequenceFileInputFormat.setInputPaths(jobConf, inFile);
FileOutputFormat.setOutputPath(jobConf, outDir);
jobConf.setJobName("sum");
jobConf.setInputFormat(SequenceFileInputFormat.class);
jobConf.setOutputFormat(SequenceFileOutputFormat.class);
jobConf.setMapperClass(MyMapper.class);
jobConf.setCombinerClass(MyReducer.class);
jobConf.setReducerClass(MyReducer.class);
jobConf.setOutputKeyClass(LongWritable.class);
jobConf.setOutputValueClass(LongWritable.class);
jobConf.setNumReduceTasks(numReduceTasks);
final long startTime = System.currentTimeMillis();
JobClient.runJob(jobConf);
final double duration = (System.currentTimeMillis() - startTime)/1000.0;
System.out.println("Job Finished in " + duration + " seconds");
// Read outputs
Long finalSum = new Long(0);
// Get the list of all the files which are in the output directory
FileStatus fileStatuses[] = fs.listStatus(outDir);
for (FileStatus fileStatus : fileStatuses) {
// If the file name starts with "part", read it.
if (fileStatus.getPath().getName().startsWith("part")) {
Path outFile = fileStatus.getPath();
LongWritable key = new LongWritable();
LongWritable sum = new LongWritable();
SequenceFile.Reader reader = new SequenceFile.Reader(fs, outFile,
jobConf);
try {
reader.next(key, sum);
finalSum += sum.get();
} finally {
reader.close();
}
}
}
System.err.println("Sum: " + finalSum);
Long expectedSum = new Long(((long)numRecords * (numRecords + 1)) / 2);
if (finalSum.equals(expectedSum)) {
System.err.println("Job Succeeded!");
return 0;
}
System.err.println("Job Failed! Expected Sum: " + expectedSum);
return 1;
}
public static void main(String[] args) throws Exception {
int res = ToolRunner.run(new SumJob(), args);
System.exit(res);
}
}