package mil.nga.giat.geowave.analytic.mapreduce.clustering.runner; import java.util.Arrays; import java.util.Collection; import mil.nga.giat.geowave.analytic.IndependentJobRunner; import mil.nga.giat.geowave.analytic.PropertyManagement; import mil.nga.giat.geowave.analytic.mapreduce.GeoWaveAnalyticJobRunner; import mil.nga.giat.geowave.analytic.mapreduce.GeoWaveInputFormatConfiguration; import mil.nga.giat.geowave.analytic.mapreduce.MapReduceJobRunner; import mil.nga.giat.geowave.analytic.mapreduce.SequenceFileOutputFormatConfiguration; import mil.nga.giat.geowave.analytic.mapreduce.clustering.InputToOutputKeyReducer; import mil.nga.giat.geowave.analytic.param.CentroidParameters; import mil.nga.giat.geowave.analytic.param.MapReduceParameters; import mil.nga.giat.geowave.analytic.param.OutputParameters; import mil.nga.giat.geowave.analytic.param.ParameterEnum; import mil.nga.giat.geowave.core.geotime.ingest.SpatialDimensionalityTypeProvider; import mil.nga.giat.geowave.mapreduce.input.GeoWaveInputKey; import mil.nga.giat.geowave.mapreduce.output.GeoWaveOutputKey; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.ObjectWritable; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; /** * * Run a map reduce job to extract a population of data from GeoWave (Accumulo), * remove duplicates, and output a SimpleFeature with the ID and the extracted * geometry from each of the GeoWave data item. * */ public class GeoWaveInputLoadJobRunner extends GeoWaveAnalyticJobRunner implements MapReduceJobRunner, IndependentJobRunner { public GeoWaveInputLoadJobRunner() { // defaults super.setInputFormatConfiguration(new GeoWaveInputFormatConfiguration()); super.setOutputFormatConfiguration(new SequenceFileOutputFormatConfiguration()); } @Override public void configure( final Job job ) throws Exception { job.setMapperClass(Mapper.class); job.setReducerClass(InputToOutputKeyReducer.class); job.setMapOutputKeyClass(GeoWaveInputKey.class); job.setMapOutputValueClass(ObjectWritable.class); job.setOutputKeyClass(GeoWaveOutputKey.class); job.setOutputValueClass(Object.class); job.setSpeculativeExecution(false); job.setJobName("GeoWave Input to Output"); job.setReduceSpeculativeExecution(false); } @Override public Class<?> getScope() { return InputToOutputKeyReducer.class; } @Override public int run( final Configuration config, final PropertyManagement runTimeProperties ) throws Exception { final String indexId = checkIndex( runTimeProperties, OutputParameters.Output.INDEX_ID, runTimeProperties.getPropertyAsString( CentroidParameters.Centroid.INDEX_ID, new SpatialDimensionalityTypeProvider().createPrimaryIndex().getId().getString())); OutputParameters.Output.INDEX_ID.getHelper().setValue( config, getScope(), indexId); addDataAdapter( config, getAdapter( runTimeProperties, OutputParameters.Output.DATA_TYPE_ID, OutputParameters.Output.DATA_NAMESPACE_URI)); runTimeProperties.setConfig( new ParameterEnum[] { OutputParameters.Output.DATA_TYPE_ID, OutputParameters.Output.DATA_NAMESPACE_URI, OutputParameters.Output.INDEX_ID }, config, getScope()); return super.run( config, runTimeProperties); } @Override public Collection<ParameterEnum<?>> getParameters() { final Collection<ParameterEnum<?>> params = super.getParameters(); params.addAll(Arrays.asList(new OutputParameters.Output[] { OutputParameters.Output.INDEX_ID, OutputParameters.Output.DATA_TYPE_ID, OutputParameters.Output.DATA_NAMESPACE_URI })); params.addAll(MapReduceParameters.getParameters()); return params; } @Override protected String getJobName() { return "Input Load"; } }