package mil.nga.giat.geowave.analytic.mapreduce.kde.compare; import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.DoubleWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; import org.apache.hadoop.util.ToolRunner; import org.opengis.feature.simple.SimpleFeature; import mil.nga.giat.geowave.adapter.raster.RasterUtils; import mil.nga.giat.geowave.analytic.mapreduce.kde.KDECommandLineOptions; import mil.nga.giat.geowave.analytic.mapreduce.kde.KDEJobRunner; import mil.nga.giat.geowave.analytic.mapreduce.operations.KdeCommand; import mil.nga.giat.geowave.core.cli.operations.config.options.ConfigOptions; import mil.nga.giat.geowave.core.cli.parser.CommandLineOperationParams; import mil.nga.giat.geowave.core.cli.parser.OperationParser; import mil.nga.giat.geowave.core.geotime.ingest.SpatialDimensionalityTypeProvider; import mil.nga.giat.geowave.core.store.operations.remote.options.DataStorePluginOptions; import mil.nga.giat.geowave.mapreduce.output.GeoWaveOutputFormat; import mil.nga.giat.geowave.mapreduce.output.GeoWaveOutputKey; public class ComparisonStatsJobRunner extends KDEJobRunner { private final String timeAttribute; public ComparisonStatsJobRunner( final ComparisonCommandLineOptions inputOptions, final KDECommandLineOptions kdeCommandLineOptions, final DataStorePluginOptions inputDataStoreOptions, final DataStorePluginOptions outputDataStoreOptions ) { super( kdeCommandLineOptions, inputDataStoreOptions, outputDataStoreOptions); timeAttribute = inputOptions.getTimeAttribute(); } public static void main( final String[] args ) throws Exception { final ConfigOptions opts = new ConfigOptions(); final ComparisonCommandLineOptions comparisonOptions = new ComparisonCommandLineOptions(); final OperationParser parser = new OperationParser(); parser.addAdditionalObject(opts); parser.addAdditionalObject(comparisonOptions); final KdeCommand kdeCommand = new KdeCommand(); final CommandLineOperationParams params = parser.parse( kdeCommand, args); // Load the params for config file. opts.prepare(params); // Don't care about output, but this will set the datastore options. kdeCommand.createRunner(params); final ComparisonStatsJobRunner runner = new ComparisonStatsJobRunner( comparisonOptions, kdeCommand.getKdeOptions(), kdeCommand.getInputStoreOptions(), kdeCommand.getOutputStoreOptions()); final int res = ToolRunner.run( new Configuration(), runner, args); System.exit(res); } @Override protected void preJob1Setup( final Configuration conf ) { super.preJob1Setup(conf); conf.set( ComparisonGaussianCellMapper.TIME_ATTRIBUTE_KEY, timeAttribute); } @Override protected boolean postJob2Actions( final Configuration conf, final String statsNamespace, final String coverageName ) throws Exception { try (final FileSystem fs = FileSystem.get(conf)) { fs.delete( new Path( "/tmp/" + inputDataStoreOptions.getGeowaveNamespace() + "_stats_" + kdeCommandLineOptions.getMinLevel() + "_" + kdeCommandLineOptions.getMaxLevel() + "_" + kdeCommandLineOptions.getCoverageName() + "/basic"), true); final Job combiner = new Job( conf); combiner.setJarByClass(this.getClass()); combiner.setJobName(inputDataStoreOptions.getGeowaveNamespace() + "(" + kdeCommandLineOptions.getCoverageName() + ")" + " levels " + kdeCommandLineOptions.getMinLevel() + "-" + kdeCommandLineOptions.getMaxLevel() + " combining seasons"); combiner.setMapperClass(ComparisonCombiningStatsMapper.class); combiner.setReducerClass(ComparisonCombiningStatsReducer.class); combiner.setMapOutputKeyClass(LongWritable.class); combiner.setMapOutputValueClass(DoubleWritable.class); combiner.setOutputKeyClass(ComparisonCellData.class); combiner.setOutputValueClass(LongWritable.class); combiner.setInputFormatClass(SequenceFileInputFormat.class); combiner.setOutputFormatClass(SequenceFileOutputFormat.class); FileOutputFormat.setOutputPath( combiner, new Path( "/tmp/" + inputDataStoreOptions.getGeowaveNamespace() + "_stats_" + kdeCommandLineOptions.getMinLevel() + "_" + kdeCommandLineOptions.getMaxLevel() + "_" + kdeCommandLineOptions.getCoverageName() + "/combined_pct")); FileInputFormat.setInputPaths( combiner, new Path( "/tmp/" + inputDataStoreOptions.getGeowaveNamespace() + "_stats_" + kdeCommandLineOptions.getMinLevel() + "_" + kdeCommandLineOptions.getMaxLevel() + "_" + kdeCommandLineOptions.getCoverageName() + "/percentiles")); if (combiner.waitForCompletion(true)) { fs.delete( new Path( "/tmp/" + inputDataStoreOptions.getGeowaveNamespace() + "_stats_" + kdeCommandLineOptions.getMinLevel() + "_" + kdeCommandLineOptions.getMaxLevel() + "_" + kdeCommandLineOptions.getCoverageName() + "/percentiles"), true); for (int l = kdeCommandLineOptions.getMinLevel(); l <= kdeCommandLineOptions.getMaxLevel(); l++) { conf.setLong( "Entries per level.level" + l, combiner.getCounters().getGroup( "Entries per level").findCounter( "level " + Long.valueOf(l)).getValue()); } // Stats Reducer Job configuration parameters final Job ingester = new Job( conf); ingester.setJarByClass(this.getClass()); ingester.setJobName(inputDataStoreOptions.getGeowaveNamespace() + "(" + kdeCommandLineOptions.getCoverageName() + ")" + " levels " + kdeCommandLineOptions.getMinLevel() + "-" + kdeCommandLineOptions + " Ingest"); ingester.setMapperClass(ComparisonIdentityMapper.class); ingester.setPartitionerClass(ComparisonCellLevelPartitioner.class); ingester.setReducerClass(ComparisonAccumuloStatsReducer.class); ingester .setNumReduceTasks((kdeCommandLineOptions.getMaxLevel() - kdeCommandLineOptions.getMinLevel()) + 1); ingester.setMapOutputKeyClass(ComparisonCellData.class); ingester.setMapOutputValueClass(LongWritable.class); ingester.setOutputKeyClass(GeoWaveOutputKey.class); ingester.setOutputValueClass(SimpleFeature.class); ingester.setInputFormatClass(SequenceFileInputFormat.class); ingester.setOutputFormatClass(GeoWaveOutputFormat.class); FileInputFormat.setInputPaths( ingester, new Path( "/tmp/" + inputDataStoreOptions.getGeowaveNamespace() + "_stats_" + kdeCommandLineOptions.getMinLevel() + "_" + kdeCommandLineOptions.getMaxLevel() + "_" + kdeCommandLineOptions.getCoverageName() + "/combined_pct")); GeoWaveOutputFormat.setStoreOptions( conf, outputDataStoreOptions); setup( ingester, statsNamespace, RasterUtils.createDataAdapterTypeDouble( coverageName, ComparisonAccumuloStatsReducer.NUM_BANDS, 1, ComparisonAccumuloStatsReducer.MINS_PER_BAND, ComparisonAccumuloStatsReducer.MAXES_PER_BAND, ComparisonAccumuloStatsReducer.NAME_PER_BAND, null), new SpatialDimensionalityTypeProvider().createPrimaryIndex()); return ingester.waitForCompletion(true); } return false; } } @Override protected Class getJob2OutputFormatClass() { return SequenceFileOutputFormat.class; } @Override protected Class getJob2OutputKeyClass() { return LongWritable.class; } @Override protected Class getJob2OutputValueClass() { return DoubleWritable.class; } @Override protected Class getJob2Reducer() { return ComparisonCellDataReducer.class; } @Override protected int getJob2NumReducers( final int numLevels ) { return super.getJob2NumReducers(numLevels) * 2; } @Override protected Class getJob1Mapper() { return ComparisonGaussianCellMapper.class; } @Override protected Class getJob1Reducer() { return ComparisonCellSummationReducer.class; } @Override protected Class getJob2Partitioner() { return ComparisonDoubleLevelPartitioner.class; } @Override protected String getJob2Name() { return inputDataStoreOptions.getGeowaveNamespace() + "(" + kdeCommandLineOptions.getCoverageName() + ")" + " levels " + kdeCommandLineOptions.getMinLevel() + "-" + kdeCommandLineOptions.getMaxLevel() + " Percentile Calculation by season"; } @Override protected String getJob1Name() { return super.getJob1Name() + " initial calculation by season"; } @Override protected void setupEntriesPerLevel( final Job job1, final Configuration conf ) throws IOException { for (int l = kdeCommandLineOptions.getMinLevel(); l <= kdeCommandLineOptions.getMaxLevel(); l++) { conf.setLong( "Entries per level (winter, " + l + ")", job1.getCounters().getGroup( "Entries per level (winter)").findCounter( "level " + Long.valueOf(l)).getValue()); conf.setLong( "Entries per level (summer, " + l + ")", job1.getCounters().getGroup( "Entries per level (summer)").findCounter( "level " + Long.valueOf(l)).getValue()); } } @Override protected void setupJob2Output( final Configuration conf, final Job statsReducer, final String statsNamespace, final String coverageName ) throws Exception { FileOutputFormat.setOutputPath( statsReducer, new Path( "/tmp/" + inputDataStoreOptions.getGeowaveNamespace() + "_stats_" + kdeCommandLineOptions.getMinLevel() + "_" + kdeCommandLineOptions.getMaxLevel() + "_" + kdeCommandLineOptions.getCoverageName() + "/percentiles")); } }