package mil.nga.giat.geowave.analytic.clustering; import java.io.IOException; import java.util.Arrays; import java.util.Collection; import java.util.HashSet; import java.util.List; import java.util.Set; import mil.nga.giat.geowave.analytic.AnalyticItemWrapper; import mil.nga.giat.geowave.analytic.PropertyManagement; import mil.nga.giat.geowave.analytic.ScopedJobConfiguration; import mil.nga.giat.geowave.analytic.distance.DistanceFn; import mil.nga.giat.geowave.analytic.distance.FeatureCentroidDistanceFn; import mil.nga.giat.geowave.analytic.kmeans.AssociationNotification; import mil.nga.giat.geowave.analytic.kmeans.CentroidAssociationFn; import mil.nga.giat.geowave.analytic.param.CentroidParameters; import mil.nga.giat.geowave.analytic.param.CommonParameters; import mil.nga.giat.geowave.analytic.param.GlobalParameters; import mil.nga.giat.geowave.analytic.param.ParameterEnum; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.mapreduce.JobContext; import org.slf4j.Logger; /** * * A helper class that finds the closest centroid to a point at a specific zoom * level. * * If the starting level does match the specified level, then the centroid tree * is 'walked' down. Walking up to higher levels is not supported. * * Levels are number 1 to n where 1 is the top tier. The current tier being * computed may have a different batch ID (temporary) than all upper level * tiers. In this case, a parent batch id is provided to resolve groups for * those tiers. This approach is often used in speculative computation at each * tier. * * Parameters include: * * @formatter:off * * "NestedGroupCentroidAssignment.Global.ParentBatchId" -> Parent * Tier Batch IDs. If not present then assume value * NestedGroupCentroidAssignment.Global.BatchId * * "NestedGroupCentroidAssignment.Global.BatchId" -> batch id for * current tier. * * "NestedGroupCentroidAssignment.Global.ZoomLevel" -> current * tier (level) * * "NestedGroupCentroidAssignment.Common.DistanceFunctionClass" * -> distance function used for association of data points to * centroid. * * @see mil.nga.giat.geowave.analytic.clustering.CentroidManagerGeoWave * @formatter:on * * @param <T> */ public class NestedGroupCentroidAssignment<T> { private final CentroidAssociationFn<T> associationdFunction = new CentroidAssociationFn<T>(); private final CentroidManager<T> centroidManager; private final int endZoomLevel; private final String parentBatchID; public NestedGroupCentroidAssignment( final CentroidManager<T> centroidManager, final int endZoomLevel, final String parentBatchID, final DistanceFn<T> distanceFunction ) { super(); this.centroidManager = centroidManager; this.endZoomLevel = endZoomLevel; this.parentBatchID = parentBatchID; this.associationdFunction.setDistanceFunction(distanceFunction); } public NestedGroupCentroidAssignment( final JobContext context, final Class<?> scope, final Logger logger ) throws InstantiationException, IllegalAccessException, IOException { final ScopedJobConfiguration config = new ScopedJobConfiguration( context.getConfiguration(), scope, logger); endZoomLevel = config.getInt( CentroidParameters.Centroid.ZOOM_LEVEL, 1); parentBatchID = config.getString( GlobalParameters.Global.PARENT_BATCH_ID, config.getString( GlobalParameters.Global.BATCH_ID, null)); @SuppressWarnings("unchecked") final DistanceFn<T> distanceFunction = config.getInstance( CommonParameters.Common.DISTANCE_FUNCTION_CLASS, DistanceFn.class, FeatureCentroidDistanceFn.class); this.associationdFunction.setDistanceFunction(distanceFunction); centroidManager = new CentroidManagerGeoWave<T>( context, scope); } /** * Override zoomLevel from parameters * * @param config * @param runTimeProperties * @param zoomLevel */ public static void setZoomLevel( final Configuration config, final Class<?> scope, final int zoomLevel ) { CentroidParameters.Centroid.ZOOM_LEVEL.getHelper().setValue( config, scope, zoomLevel); } /** * Override zoomLevel from parameters * * @param config * @param runTimeProperties * @param zoomLevel */ public static void setParentBatchID( final Configuration config, final Class<?> scope, final String parentID ) { GlobalParameters.Global.PARENT_BATCH_ID.getHelper().setValue( config, scope, parentID); } public static Collection<ParameterEnum<?>> getParameters() { final Set<ParameterEnum<?>> params = new HashSet<ParameterEnum<?>>(); params.addAll(CentroidManagerGeoWave.getParameters()); params.addAll(Arrays.asList(new ParameterEnum<?>[] { CentroidParameters.Centroid.ZOOM_LEVEL, GlobalParameters.Global.PARENT_BATCH_ID, CommonParameters.Common.DISTANCE_FUNCTION_CLASS })); return params; } public List<AnalyticItemWrapper<T>> getCentroidsForGroup( final String groupID ) throws IOException { return centroidManager.getCentroidsForGroup(groupID); } /** * Get the associated group id from the current zoom level * */ public String getGroupForLevel( final AnalyticItemWrapper<T> item ) throws IOException { final GroupHolder group = new GroupHolder(); group.setGroupID(item.getGroupID()); int currentLevel = item.getZoomLevel(); while (endZoomLevel != currentLevel) { final List<AnalyticItemWrapper<T>> centroids = centroidManager.getCentroidsForGroup( parentBatchID, group.getGroupID()); if (centroids.size() == 0) { throw new IOException( "Cannot find group " + group.getGroupID()); } associationdFunction.compute( item, centroids, new AssociationNotification<T>() { @Override public void notify( final CentroidPairing<T> pairing ) { group.setGroupID(pairing.getCentroid().getID()); } }); currentLevel = centroids.get( 0).getZoomLevel() + 1; } return group.getGroupID(); } public double findCentroidForLevel( final AnalyticItemWrapper<T> item, final AssociationNotification<T> associationNotification ) throws IOException { final GroupHolder group = new GroupHolder(); group.setGroupID(item.getGroupID()); double currentDistance = Double.NaN; int currentLevel = item.getZoomLevel(); boolean atEndLevel = false; // force one time through while (!atEndLevel) { // save status as 'final' to use in the following closure. final boolean reachedEndLevel = currentLevel == endZoomLevel; atEndLevel = reachedEndLevel; // only use the parent batch ID for upper levels, otherwise use the // current batch ID. final List<AnalyticItemWrapper<T>> centroids = (currentLevel == endZoomLevel) ? centroidManager .getCentroidsForGroup(group.getGroupID()) : centroidManager.getCentroidsForGroup( parentBatchID, group.getGroupID()); if (centroids.size() == 0) { throw new IOException( "Cannot find group " + group.getGroupID()); } currentDistance = associationdFunction.compute( item, centroids, new AssociationNotification<T>() { @Override public void notify( final CentroidPairing<T> pairing ) { group.setGroupID(pairing.getCentroid().getID()); if (reachedEndLevel) { associationNotification.notify(pairing); } } }); // update for next loop currentLevel = centroids.get( 0).getZoomLevel() + 1; } return currentDistance; } public static void setParameters( final Configuration config, final Class<?> scope, final PropertyManagement runTimeProperties ) { CentroidManagerGeoWave.setParameters( config, scope, runTimeProperties); runTimeProperties.setConfig( new ParameterEnum[] { CommonParameters.Common.DISTANCE_FUNCTION_CLASS, CentroidParameters.Centroid.ZOOM_LEVEL, GlobalParameters.Global.BATCH_ID, GlobalParameters.Global.PARENT_BATCH_ID }, config, scope); } private class GroupHolder { private String groupID; public String getGroupID() { return groupID; } public void setGroupID( final String groupID ) { this.groupID = groupID; } } }