package mil.nga.giat.geowave.analytic.mapreduce.kmeans.runner; import mil.nga.giat.geowave.analytic.PropertyManagement; import mil.nga.giat.geowave.analytic.clustering.CentroidManagerGeoWave; import mil.nga.giat.geowave.analytic.distance.DistanceFn; import mil.nga.giat.geowave.analytic.mapreduce.MapReduceJobRunner; import mil.nga.giat.geowave.analytic.param.SampleParameters; import mil.nga.giat.geowave.analytic.sample.BahmanEtAlSampleProbabilityFn; import mil.nga.giat.geowave.analytic.sample.function.CentroidDistanceBasedSamplingRankFunction; import org.apache.hadoop.conf.Configuration; /** * Sample K points given a sample function. The sampled K points are are stored * as centroids within GeoWave. The sampling weight may be determined by the * relation of a point to a current set of centroids, thus a {@link DistanceFn} * instance is required. * */ public class RankSamplerJobRunner extends KSamplerJobRunner implements MapReduceJobRunner { public RankSamplerJobRunner() { setSamplingRankFunctionClass(CentroidDistanceBasedSamplingRankFunction.class); } @Override public int run( final Configuration config, final PropertyManagement runTimeProperties ) throws Exception { CentroidManagerGeoWave.setParameters( config, getScope(), runTimeProperties); runTimeProperties.storeIfEmpty( SampleParameters.Sample.PROBABILITY_FUNCTION, BahmanEtAlSampleProbabilityFn.class); CentroidDistanceBasedSamplingRankFunction.setParameters( config, getScope(), runTimeProperties); return super.run( config, runTimeProperties); } }