/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.mapred;
import java.io.File;
import java.io.FileWriter;
import java.io.Writer;
import java.io.BufferedWriter;
import java.io.IOException;
import junit.framework.TestCase;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
/**
* This is an wordcount application that tests the count of records
* got spilled to disk. It generates simple text input files. Then
* runs the wordcount map/reduce application on (1) 3 i/p files(with 3 maps
* and 1 reduce) and verifies the counters and (2) 4 i/p files(with 4 maps
* and 1 reduce) and verifies counters. Wordcount application reads the
* text input files, breaks each line into words and counts them. The output
* is a locally sorted list of words and the count of how often they occurred.
*
*/
public class TestSpilledRecordsCounter extends TestCase {
private void validateCounters(Counters counter, long spillRecCnt) {
// Check if the numer of Spilled Records is same as expected
assertEquals(counter.findCounter(Task.Counter.SPILLED_RECORDS).
getCounter(), spillRecCnt);
}
private void createWordsFile(File inpFile) throws Exception {
Writer out = new BufferedWriter(new FileWriter(inpFile));
try {
// 500*4 unique words --- repeated 5 times => 5*2K words
int REPLICAS=5, NUMLINES=500, NUMWORDSPERLINE=4;
for (int i = 0; i < REPLICAS; i++) {
for (int j = 1; j <= NUMLINES*NUMWORDSPERLINE; j+=NUMWORDSPERLINE) {
out.write("word" + j + " word" + (j+1) + " word" + (j+2) + " word" + (j+3) + '\n');
}
}
} finally {
out.close();
}
}
/**
* The main driver for word count map/reduce program.
* Invoke this method to submit the map/reduce job.
* @throws IOException When there is communication problems with the
* job tracker.
*/
public void testSpillCounter() throws Exception {
JobConf conf = new JobConf(TestSpilledRecordsCounter.class);
conf.setJobName("wordcountSpilledRecordsCounter");
// the keys are words (strings)
conf.setOutputKeyClass(Text.class);
// the values are counts (ints)
conf.setOutputValueClass(IntWritable.class);
conf.setMapperClass(WordCount.MapClass.class);
conf.setCombinerClass(WordCount.Reduce.class);
conf.setReducerClass(WordCount.Reduce.class);
conf.setNumMapTasks(3);
conf.setNumReduceTasks(1);
conf.setInt("io.sort.mb", 1);
conf.setInt("io.sort.factor", 2);
conf.set("io.sort.record.percent", "0.05");
conf.set("io.sort.spill.percent", "0.80");
String TEST_ROOT_DIR = new Path(System.getProperty("test.build.data",
File.separator + "tmp"))
.toString().replace(' ', '+');
conf.set("test.build.data", TEST_ROOT_DIR);
String IN_DIR = TEST_ROOT_DIR + File.separator +
"spilledRecords.countertest" + File.separator +
"genins" + File.separator;
String OUT_DIR = TEST_ROOT_DIR + File.separator +
"spilledRecords.countertest" + File.separator;
FileSystem fs = FileSystem.get(conf);
Path testdir = new Path(TEST_ROOT_DIR, "spilledRecords.countertest");
try {
if (fs.exists(testdir)) {
fs.delete(testdir, true);
}
if (!fs.mkdirs(testdir)) {
throw new IOException("Mkdirs failed to create " + testdir.toString());
}
Path wordsIns = new Path(testdir, "genins");
if (!fs.mkdirs(wordsIns)) {
throw new IOException("Mkdirs failed to create " + wordsIns.toString());
}
//create 3 input files each with 5*2k words
File inpFile = new File(IN_DIR + "input5_2k_1");
createWordsFile(inpFile);
inpFile = new File(IN_DIR + "input5_2k_2");
createWordsFile(inpFile);
inpFile = new File(IN_DIR + "input5_2k_3");
createWordsFile(inpFile);
FileInputFormat.setInputPaths(conf, IN_DIR);
Path outputPath1=new Path(OUT_DIR, "output5_2k_3");
FileOutputFormat.setOutputPath(conf, outputPath1);
RunningJob myJob = JobClient.runJob(conf);
Counters c1 = myJob.getCounters();
// 3maps & in each map, 4 first level spills --- So total 12.
// spilled records count:
// Each Map: 1st level:2k+2k+2k+2k=8k;2ndlevel=4k+4k=8k;
// 3rd level=2k(4k from 1st level & 4k from 2nd level & combineAndSpill)
// So total 8k+8k+2k=18k
// For 3 Maps, total = 3*18=54k
// Reduce: each of the 3 map o/p's(2k each) will be spilled in shuffleToDisk()
// So 3*2k=6k in 1st level; 2nd level:4k(2k+2k);
// 3rd level directly given to reduce(4k+2k --- combineAndSpill => 2k.
// So 0 records spilled to disk in 3rd level)
// So total of 6k+4k=10k
// Total job counter will be 54k+10k = 64k
validateCounters(c1, 64000);
//create 4th input file each with 5*2k words and test with 4 maps
inpFile = new File(IN_DIR + "input5_2k_4");
createWordsFile(inpFile);
conf.setNumMapTasks(4);
Path outputPath2=new Path(OUT_DIR, "output5_2k_4");
FileOutputFormat.setOutputPath(conf, outputPath2);
myJob = JobClient.runJob(conf);
c1 = myJob.getCounters();
// 4maps & in each map 4 first level spills --- So total 16.
// spilled records count:
// Each Map: 1st level:2k+2k+2k+2k=8k;2ndlevel=4k+4k=8k;
// 3rd level=2k(4k from 1st level & 4k from 2nd level & combineAndSpill)
// So total 8k+8k+2k=18k
// For 3 Maps, total = 4*18=72k
// Reduce: each of the 4 map o/p's(2k each) will be spilled in shuffleToDisk()
// So 4*2k=8k in 1st level; 2nd level:4k+4k=8k;
// 3rd level directly given to reduce(4k+4k --- combineAndSpill => 2k.
// So 0 records spilled to disk in 3rd level)
// So total of 8k+8k=16k
// Total job counter will be 72k+16k = 88k
validateCounters(c1, 88000);
} finally {
//clean up the input and output files
if (fs.exists(testdir)) {
fs.delete(testdir, true);
}
}
}
}