package mil.nga.giat.geowave.analytic.mapreduce.kmeans.runner;
import java.io.IOException;
import java.util.List;
import java.util.concurrent.atomic.AtomicInteger;
import mil.nga.giat.geowave.analytic.AnalyticItemWrapper;
import mil.nga.giat.geowave.analytic.PropertyManagement;
import mil.nga.giat.geowave.analytic.clustering.CentroidManager;
import mil.nga.giat.geowave.analytic.clustering.CentroidManagerGeoWave;
import mil.nga.giat.geowave.analytic.clustering.CentroidManager.CentroidProcessingFn;
import mil.nga.giat.geowave.analytic.mapreduce.MapReduceJobRunner;
import org.apache.hadoop.conf.Configuration;
/**
* Determine the number of iterations in the KMeans Parallel initialization
* step. Each iteration samples a set of K points from the full population. The
* number of iterations is log(psi) where psi is the initial cost of the system
* with a single centroid. Rounding is in effect. To obtain a reasonable sample,
* the minimum is 2.
*
* This class has been adapted to determine the maximum number of iterations
* required across multiple groups. Each group is its own set of clusters.
*
*/
public class IterationCountCalculateRunner<T> implements
MapReduceJobRunner
{
private int iterationsCount = 1;
public IterationCountCalculateRunner() {}
public int getIterationsCount() {
return iterationsCount;
}
public void setIterationsCount(
final int iterationsCount ) {
this.iterationsCount = iterationsCount;
}
@Override
public int run(
final Configuration config,
final PropertyManagement runTimeProperties )
throws Exception {
iterationsCount = this.getIterations(runTimeProperties);
return 0;
}
private int getIterations(
final PropertyManagement propertyManagement )
throws IOException {
final CentroidManager<T> centroidManager = new CentroidManagerGeoWave<T>(
propertyManagement);
final AtomicInteger resultHolder = new AtomicInteger(
0);
// Must iterate through the worst case.
centroidManager.processForAllGroups(new CentroidProcessingFn<T>() {
@Override
public int processGroup(
final String groupID,
final List<AnalyticItemWrapper<T>> centroids ) {
resultHolder.set(Math.max(
resultHolder.get(),
(centroids.size() > 0) ? (int) Math.round(Math.log(maxCost(centroids))) : 0));
return 0;
}
});
return Math.max(
iterationsCount,
resultHolder.get());
}
private double maxCost(
final List<AnalyticItemWrapper<T>> centroids ) {
double max = 0.0;
for (final AnalyticItemWrapper<T> centroid : centroids) {
max = Math.max(
max,
centroid.getCost());
}
return max;
}
}