package mil.nga.giat.geowave.test.mapreduce;
import java.io.IOException;
import org.apache.accumulo.core.client.mapreduce.AccumuloFileOutputFormat;
import org.apache.accumulo.core.data.Key;
import org.apache.accumulo.core.data.Value;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.junit.AfterClass;
import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.Test;
import mil.nga.giat.geowave.examples.ingest.bulk.GeonamesDataFileInputFormat;
import mil.nga.giat.geowave.examples.ingest.bulk.SimpleFeatureToAccumuloKeyValueMapper;
public class BulkIngestInputGenerationIT
{
private static final Logger LOGGER = LoggerFactory.getLogger(BulkIngestInputGenerationIT.class);
private static final String TEST_DATA_LOCATION = "src/test/resources/mil/nga/giat/geowave/test/geonames/barbados";
private static final long NUM_GEONAMES_RECORDS = 834; // (see BB.txt)
private static final String OUTPUT_PATH = "target/tmp_bulkIngestTest";
private static long mapInputRecords;
private static long mapOutputRecords;
private static long startMillis;
@BeforeClass
public static void startTimer() {
startMillis = System.currentTimeMillis();
LOGGER.warn("-----------------------------------------");
LOGGER.warn("* *");
LOGGER.warn("* RUNNING BulkIngestInputGenerationIT *");
LOGGER.warn("* *");
LOGGER.warn("-----------------------------------------");
}
@AfterClass
public static void reportTest() {
LOGGER.warn("-----------------------------------------");
LOGGER.warn("* *");
LOGGER.warn("* FINISHED BulkIngestInputGenerationIT *");
LOGGER
.warn("* " + ((System.currentTimeMillis() - startMillis) / 1000)
+ "s elapsed. *");
LOGGER.warn("* *");
LOGGER.warn("-----------------------------------------");
}
@Test
public void testMapReduceJobSuccess()
throws Exception {
LOGGER.info("Running Bulk Ingest Input Generation MapReduce job...");
final int exitCode = ToolRunner.run(
new BulkIngestInputGenerationJobRunner(),
null);
LOGGER.info("Job completed with exit code: " + exitCode);
// verify exitCode = 0
Assert.assertEquals(
exitCode,
0);
verifyNumInputRecords();
verifyNumAccumuloKeyValuePairs();
verifyJobOutput();
}
private void verifyNumInputRecords() {
Assert.assertEquals(
mapInputRecords,
NUM_GEONAMES_RECORDS);
}
private void verifyNumAccumuloKeyValuePairs() {
Assert.assertEquals(
mapOutputRecords,
(NUM_GEONAMES_RECORDS));
}
private void verifyJobOutput()
throws IOException {
final String _SUCCESS = "_SUCCESS";
final String REDUCER_OUTPUT = "part-r-";
boolean wasSuccessful = false;
boolean reducerOutputExists = false;
final FileSystem fs = FileSystem.getLocal(new Configuration());
final RemoteIterator<LocatedFileStatus> iterator = fs.listFiles(
new Path(
OUTPUT_PATH),
false);
LocatedFileStatus fileStatus = null;
String fileName = null;
while (iterator.hasNext()) {
fileStatus = iterator.next();
fileName = fileStatus.getPath().getName();
if (fileName.contains(_SUCCESS)) {
wasSuccessful = true;
}
if (fileName.contains(REDUCER_OUTPUT)) {
reducerOutputExists = true;
}
}
// verify presence of _SUCCESS file
Assert.assertEquals(
wasSuccessful,
true);
// verify presence of Reducer output
Assert.assertEquals(
reducerOutputExists,
true);
}
private static class BulkIngestInputGenerationJobRunner extends
Configured implements
Tool
{
private static final String JOB_NAME = "BulkIngestInputGenerationITJob";
private static final String TASK_COUNTER_GROUP_NAME = "org.apache.hadoop.mapreduce.TaskCounter";
private static final String MAP_INPUT_RECORDS = "MAP_INPUT_RECORDS";
private static final String MAP_OUTPUT_RECORDS = "MAP_OUTPUT_RECORDS";
@Override
public int run(
final String[] args )
throws Exception {
final Configuration conf = getConf();
conf.set(
"fs.defaultFS",
"file:///");
final Job job = Job.getInstance(
conf,
JOB_NAME);
job.setJarByClass(getClass());
FileInputFormat.setInputPaths(
job,
new Path(
TEST_DATA_LOCATION));
FileOutputFormat.setOutputPath(
job,
cleanPathForReuse(
conf,
OUTPUT_PATH));
job.setMapperClass(SimpleFeatureToAccumuloKeyValueMapper.class);
job.setReducerClass(Reducer.class); // (Identity Reducer)
job.setInputFormatClass(GeonamesDataFileInputFormat.class);
job.setOutputFormatClass(AccumuloFileOutputFormat.class);
job.setMapOutputKeyClass(Key.class);
job.setMapOutputValueClass(Value.class);
job.setOutputKeyClass(Key.class);
job.setOutputValueClass(Value.class);
job.setNumReduceTasks(1);
job.setSpeculativeExecution(false);
final boolean result = job.waitForCompletion(true);
mapInputRecords = job.getCounters().findCounter(
TASK_COUNTER_GROUP_NAME,
MAP_INPUT_RECORDS).getValue();
mapOutputRecords = job.getCounters().findCounter(
TASK_COUNTER_GROUP_NAME,
MAP_OUTPUT_RECORDS).getValue();
return result ? 0 : 1;
}
private Path cleanPathForReuse(
final Configuration conf,
final String pathString )
throws IOException {
final FileSystem fs = FileSystem.get(conf);
final Path path = new Path(
pathString);
if (fs.exists(path)) {
LOGGER.info("Deleting '" + pathString + "' for reuse.");
fs.delete(
path,
true);
}
return path;
}
}
}