package mil.nga.giat.geowave.analytic.mapreduce.kde;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.OutputFormat;
import org.apache.hadoop.mapreduce.Partitioner;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.geotools.filter.text.ecql.ECQL;
import org.opengis.coverage.grid.GridCoverage;
import org.opengis.filter.Filter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.vividsolutions.jts.geom.Geometry;
import mil.nga.giat.geowave.adapter.raster.RasterUtils;
import mil.nga.giat.geowave.adapter.raster.operations.ResizeCommand;
import mil.nga.giat.geowave.adapter.vector.plugin.ExtractGeometryFilterVisitor;
import mil.nga.giat.geowave.analytic.mapreduce.operations.KdeCommand;
import mil.nga.giat.geowave.core.cli.operations.config.options.ConfigOptions;
import mil.nga.giat.geowave.core.cli.parser.CommandLineOperationParams;
import mil.nga.giat.geowave.core.cli.parser.ManualOperationParams;
import mil.nga.giat.geowave.core.cli.parser.OperationParser;
import mil.nga.giat.geowave.core.geotime.GeometryUtils;
import mil.nga.giat.geowave.core.geotime.ingest.SpatialDimensionalityTypeProvider.SpatialIndexBuilder;
import mil.nga.giat.geowave.core.geotime.store.query.SpatialQuery;
import mil.nga.giat.geowave.core.index.ByteArrayId;
import mil.nga.giat.geowave.core.store.IndexWriter;
import mil.nga.giat.geowave.core.store.StoreFactoryOptions;
import mil.nga.giat.geowave.core.store.adapter.AdapterStore;
import mil.nga.giat.geowave.core.store.adapter.WritableDataAdapter;
import mil.nga.giat.geowave.core.store.adapter.exceptions.MismatchedIndexToAdapterMapping;
import mil.nga.giat.geowave.core.store.config.ConfigUtils;
import mil.nga.giat.geowave.core.store.index.Index;
import mil.nga.giat.geowave.core.store.index.IndexStore;
import mil.nga.giat.geowave.core.store.index.PrimaryIndex;
import mil.nga.giat.geowave.core.store.operations.remote.ClearCommand;
import mil.nga.giat.geowave.core.store.operations.remote.options.DataStorePluginOptions;
import mil.nga.giat.geowave.core.store.query.QueryOptions;
import mil.nga.giat.geowave.mapreduce.GeoWaveConfiguratorBase;
import mil.nga.giat.geowave.mapreduce.input.GeoWaveInputFormat;
import mil.nga.giat.geowave.mapreduce.output.GeoWaveOutputFormat;
import mil.nga.giat.geowave.mapreduce.output.GeoWaveOutputKey;
public class KDEJobRunner extends
Configured implements
Tool
{
private static final Logger LOGGER = LoggerFactory.getLogger(KDEJobRunner.class);
public static final String GEOWAVE_CLASSPATH_JARS = "geowave.classpath.jars";
private static final String TMP_COVERAGE_SUFFIX = "_tMp_CoVeRaGe";
protected static int TILE_SIZE = 1;
public static final String MAX_LEVEL_KEY = "MAX_LEVEL";
public static final String MIN_LEVEL_KEY = "MIN_LEVEL";
public static final String COVERAGE_NAME_KEY = "COVERAGE_NAME";
protected KDECommandLineOptions kdeCommandLineOptions;
protected DataStorePluginOptions inputDataStoreOptions;
protected DataStorePluginOptions outputDataStoreOptions;
public KDEJobRunner(
final KDECommandLineOptions kdeCommandLineOptions,
final DataStorePluginOptions inputDataStoreOptions,
final DataStorePluginOptions outputDataStoreOptions ) {
this.kdeCommandLineOptions = kdeCommandLineOptions;
this.inputDataStoreOptions = inputDataStoreOptions;
this.outputDataStoreOptions = outputDataStoreOptions;
}
/**
* Main method to execute the MapReduce analytic.
*/
@SuppressWarnings("deprecation")
public int runJob()
throws Exception {
Configuration conf = super.getConf();
if (conf == null) {
conf = new Configuration();
setConf(conf);
}
DataStorePluginOptions rasterResizeOutputDataStoreOptions;
String kdeCoverageName;
// so we don't need a no data merge strategy, use 1 for the tile size of
// the KDE output and then run a resize operation
if ((kdeCommandLineOptions.getTileSize() > 1)) {
// this is the ending data store options after resize, the KDE will
// need to output to a temporary namespace, a resize operation
// will use the outputDataStoreOptions
rasterResizeOutputDataStoreOptions = outputDataStoreOptions;
// first clone the outputDataStoreOptions, then set it to a tmp
// namespace
final Map<String, String> configOptions = outputDataStoreOptions.getOptionsAsMap();
final StoreFactoryOptions options = ConfigUtils.populateOptionsFromList(
outputDataStoreOptions.getFactoryFamily().getDataStoreFactory().createOptionsInstance(),
configOptions);
options.setGeowaveNamespace(outputDataStoreOptions.getGeowaveNamespace() + "_tmp");
outputDataStoreOptions = new DataStorePluginOptions(
options);
kdeCoverageName = kdeCommandLineOptions.getCoverageName() + TMP_COVERAGE_SUFFIX;
}
else {
rasterResizeOutputDataStoreOptions = null;
kdeCoverageName = kdeCommandLineOptions.getCoverageName();
}
GeoWaveConfiguratorBase.setRemoteInvocationParams(
kdeCommandLineOptions.getHdfsHostPort(),
kdeCommandLineOptions.getJobTrackerOrResourceManHostPort(),
conf);
conf.setInt(
MAX_LEVEL_KEY,
kdeCommandLineOptions.getMaxLevel());
conf.setInt(
MIN_LEVEL_KEY,
kdeCommandLineOptions.getMinLevel());
conf.set(
COVERAGE_NAME_KEY,
kdeCoverageName);
if (kdeCommandLineOptions.getCqlFilter() != null) {
conf.set(
GaussianCellMapper.CQL_FILTER_KEY,
kdeCommandLineOptions.getCqlFilter());
}
preJob1Setup(conf);
final Job job = new Job(
conf);
job.setJarByClass(this.getClass());
addJobClasspathDependencies(
job,
conf);
job.setJobName(getJob1Name());
job.setMapperClass(getJob1Mapper());
job.setCombinerClass(CellSummationCombiner.class);
job.setReducerClass(getJob1Reducer());
job.setMapOutputKeyClass(LongWritable.class);
job.setMapOutputValueClass(DoubleWritable.class);
job.setOutputKeyClass(DoubleWritable.class);
job.setOutputValueClass(LongWritable.class);
job.setInputFormatClass(GeoWaveInputFormat.class);
job.setOutputFormatClass(SequenceFileOutputFormat.class);
job.setNumReduceTasks(8);
job.setSpeculativeExecution(false);
final AdapterStore adapterStore = inputDataStoreOptions.createAdapterStore();
final IndexStore indexStore = inputDataStoreOptions.createIndexStore();
final QueryOptions queryOptions = new QueryOptions(
adapterStore.getAdapter(new ByteArrayId(
kdeCommandLineOptions.getFeatureType())));
if (kdeCommandLineOptions.getIndexId() != null) {
final Index index = indexStore.getIndex(new ByteArrayId(
kdeCommandLineOptions.getIndexId()));
if ((index != null) && (index instanceof PrimaryIndex)) {
queryOptions.setIndex((PrimaryIndex) index);
}
}
GeoWaveInputFormat.setQueryOptions(
job.getConfiguration(),
queryOptions);
GeoWaveInputFormat.setMinimumSplitCount(
job.getConfiguration(),
kdeCommandLineOptions.getMinSplits());
GeoWaveInputFormat.setMaximumSplitCount(
job.getConfiguration(),
kdeCommandLineOptions.getMaxSplits());
GeoWaveInputFormat.setStoreOptions(
job.getConfiguration(),
inputDataStoreOptions);
if (kdeCommandLineOptions.getCqlFilter() != null) {
final Filter filter = ECQL.toFilter(kdeCommandLineOptions.getCqlFilter());
final Geometry bbox = (Geometry) filter.accept(
ExtractGeometryFilterVisitor.GEOMETRY_VISITOR,
null);
if ((bbox != null) && !bbox.equals(GeometryUtils.infinity())) {
GeoWaveInputFormat.setQuery(
job.getConfiguration(),
new SpatialQuery(
bbox));
}
}
FileSystem fs = null;
try {
fs = FileSystem.get(conf);
fs.delete(
new Path(
"/tmp/" + inputDataStoreOptions.getGeowaveNamespace() + "_stats_"
+ kdeCommandLineOptions.getMinLevel() + "_" + kdeCommandLineOptions.getMaxLevel()
+ "_" + kdeCommandLineOptions.getCoverageName()),
true);
FileOutputFormat.setOutputPath(
job,
new Path(
"/tmp/" + inputDataStoreOptions.getGeowaveNamespace() + "_stats_"
+ kdeCommandLineOptions.getMinLevel() + "_" + kdeCommandLineOptions.getMaxLevel()
+ "_" + kdeCommandLineOptions.getCoverageName() + "/basic"));
final boolean job1Success = job.waitForCompletion(true);
boolean job2Success = false;
boolean postJob2Success = false;
// Linear MapReduce job chaining
if (job1Success) {
setupEntriesPerLevel(
job,
conf);
// Stats Reducer Job configuration parameters
final Job statsReducer = new Job(
conf);
statsReducer.setJarByClass(this.getClass());
addJobClasspathDependencies(
statsReducer,
conf);
statsReducer.setJobName(getJob2Name());
statsReducer.setMapperClass(IdentityMapper.class);
statsReducer.setPartitionerClass(getJob2Partitioner());
statsReducer.setReducerClass(getJob2Reducer());
statsReducer
.setNumReduceTasks(getJob2NumReducers((kdeCommandLineOptions.getMaxLevel() - kdeCommandLineOptions
.getMinLevel()) + 1));
statsReducer.setMapOutputKeyClass(DoubleWritable.class);
statsReducer.setMapOutputValueClass(LongWritable.class);
statsReducer.setOutputKeyClass(getJob2OutputKeyClass());
statsReducer.setOutputValueClass(getJob2OutputValueClass());
statsReducer.setInputFormatClass(SequenceFileInputFormat.class);
statsReducer.setOutputFormatClass(getJob2OutputFormatClass());
FileInputFormat.setInputPaths(
statsReducer,
new Path(
"/tmp/" + inputDataStoreOptions.getGeowaveNamespace() + "_stats_"
+ kdeCommandLineOptions.getMinLevel() + "_"
+ kdeCommandLineOptions.getMaxLevel() + "_"
+ kdeCommandLineOptions.getCoverageName() + "/basic"));
setupJob2Output(
conf,
statsReducer,
outputDataStoreOptions.getGeowaveNamespace(),
kdeCoverageName);
job2Success = statsReducer.waitForCompletion(true);
if (job2Success) {
postJob2Success = postJob2Actions(
conf,
outputDataStoreOptions.getGeowaveNamespace(),
kdeCoverageName);
}
}
else {
job2Success = false;
}
if (rasterResizeOutputDataStoreOptions != null) {
// delegate to resize command to wrap it up with the correctly
// requested tile size
final ResizeCommand resizeCommand = new ResizeCommand();
// We're going to override these anyway.
resizeCommand.setParameters(
null,
null);
resizeCommand.setInputStoreOptions(outputDataStoreOptions);
resizeCommand.setOutputStoreOptions(rasterResizeOutputDataStoreOptions);
resizeCommand.getOptions().setInputCoverageName(
kdeCoverageName);
resizeCommand.getOptions().setMinSplits(
kdeCommandLineOptions.getMinSplits());
resizeCommand.getOptions().setMaxSplits(
kdeCommandLineOptions.getMaxSplits());
resizeCommand.getOptions().setHdfsHostPort(
kdeCommandLineOptions.getHdfsHostPort());
resizeCommand.getOptions().setJobTrackerOrResourceManHostPort(
kdeCommandLineOptions.getJobTrackerOrResourceManHostPort());
resizeCommand.getOptions().setOutputCoverageName(
kdeCommandLineOptions.getCoverageName());
resizeCommand.getOptions().setOutputTileSize(
kdeCommandLineOptions.getTileSize());
final int resizeStatus = ToolRunner.run(
resizeCommand.createRunner(new ManualOperationParams()),
new String[] {});
if (resizeStatus == 0) {
// delegate to clear command to clean up with tmp namespace
// after successful resize
final ClearCommand clearCommand = new ClearCommand();
clearCommand.setParameters(null);
clearCommand.setInputStoreOptions(outputDataStoreOptions);
clearCommand.execute(new ManualOperationParams());
}
else {
LOGGER.warn("Resize command error code '" + resizeStatus + "'. Retaining temporary namespace '"
+ outputDataStoreOptions.getGeowaveNamespace() + "' with tile size of 1.");
}
}
fs.delete(
new Path(
"/tmp/" + inputDataStoreOptions.getGeowaveNamespace() + "_stats_"
+ kdeCommandLineOptions.getMinLevel() + "_" + kdeCommandLineOptions.getMaxLevel()
+ "_" + kdeCommandLineOptions.getCoverageName()),
true);
return (job1Success && job2Success && postJob2Success) ? 0 : 1;
}
finally {
if (fs != null) {
fs.close();
}
}
}
protected void setupEntriesPerLevel(
final Job job1,
final Configuration conf )
throws IOException {
for (int l = kdeCommandLineOptions.getMinLevel(); l <= kdeCommandLineOptions.getMaxLevel(); l++) {
conf.setLong(
"Entries per level.level" + l,
job1.getCounters().getGroup(
"Entries per level").findCounter(
"level " + Long.valueOf(l)).getValue());
}
}
protected void preJob1Setup(
final Configuration conf ) {
}
protected boolean postJob2Actions(
final Configuration conf,
final String statsNamespace,
final String coverageName )
throws Exception {
return true;
}
protected Class<? extends OutputFormat<?, ?>> getJob2OutputFormatClass() {
return GeoWaveOutputFormat.class;
}
protected Class<?> getJob2OutputKeyClass() {
return GeoWaveOutputKey.class;
}
protected Class<?> getJob2OutputValueClass() {
return GridCoverage.class;
}
protected Class<? extends Reducer<?, ?, ?, ?>> getJob2Reducer() {
return AccumuloKDEReducer.class;
}
protected Class<? extends Partitioner<?, ?>> getJob2Partitioner() {
return DoubleLevelPartitioner.class;
}
protected int getJob2NumReducers(
final int numLevels ) {
return numLevels;
}
protected Class<? extends Mapper<?, ?, ?, ?>> getJob1Mapper() {
return GaussianCellMapper.class;
}
protected Class<? extends Reducer<?, ?, ?, ?>> getJob1Reducer() {
return CellSummationReducer.class;
}
protected String getJob2Name() {
return inputDataStoreOptions.getGeowaveNamespace() + "(" + kdeCommandLineOptions.getCoverageName() + ")"
+ " levels " + kdeCommandLineOptions.getMinLevel() + "-" + kdeCommandLineOptions.getMaxLevel()
+ " Ingest";
}
protected String getJob1Name() {
return inputDataStoreOptions.getGeowaveNamespace() + "(" + kdeCommandLineOptions.getCoverageName() + ")"
+ " levels " + kdeCommandLineOptions.getMinLevel() + "-" + kdeCommandLineOptions.getMaxLevel()
+ " Calculation";
}
protected void setupJob2Output(
final Configuration conf,
final Job statsReducer,
final String statsNamespace,
final String coverageName )
throws Exception {
final PrimaryIndex index = new SpatialIndexBuilder().createIndex();
final WritableDataAdapter<?> adapter = RasterUtils.createDataAdapterTypeDouble(
coverageName,
AccumuloKDEReducer.NUM_BANDS,
TILE_SIZE,
AccumuloKDEReducer.MINS_PER_BAND,
AccumuloKDEReducer.MAXES_PER_BAND,
AccumuloKDEReducer.NAME_PER_BAND,
null);
setup(
statsReducer,
statsNamespace,
adapter,
index);
}
protected void setup(
final Job job,
final String namespace,
final WritableDataAdapter<?> adapter,
final PrimaryIndex index )
throws IOException,
MismatchedIndexToAdapterMapping {
GeoWaveOutputFormat.setStoreOptions(
job.getConfiguration(),
outputDataStoreOptions);
GeoWaveOutputFormat.addDataAdapter(
job.getConfiguration(),
adapter);
GeoWaveOutputFormat.addIndex(
job.getConfiguration(),
index);
final IndexWriter writer = outputDataStoreOptions.createDataStore().createWriter(
adapter,
index);
writer.close();
}
public static void main(
final String[] args )
throws Exception {
final ConfigOptions opts = new ConfigOptions();
final OperationParser parser = new OperationParser();
parser.addAdditionalObject(opts);
final KdeCommand command = new KdeCommand();
final CommandLineOperationParams params = parser.parse(
command,
args);
opts.prepare(params);
final int res = ToolRunner.run(
new Configuration(),
command.createRunner(params),
args);
System.exit(res);
}
@Override
public int run(
final String[] args )
throws Exception {
return runJob();
}
protected void addJobClasspathDependencies(
final Job job,
final Configuration conf )
throws IOException,
URISyntaxException {
final String[] jars = conf.getTrimmedStrings(GEOWAVE_CLASSPATH_JARS);
if (jars != null) {
for (final String jarPath : jars) {
job.addArchiveToClassPath(new Path(
new URI(
jarPath)));
}
}
}
}