package mil.nga.giat.geowave.analytic.mapreduce.clustering; import java.io.IOException; import java.util.HashMap; import java.util.Map; import java.util.Map.Entry; import java.util.concurrent.atomic.AtomicInteger; import mil.nga.giat.geowave.analytic.AnalyticItemWrapperFactory; import mil.nga.giat.geowave.analytic.ScopedJobConfiguration; import mil.nga.giat.geowave.analytic.SimpleFeatureItemWrapperFactory; import mil.nga.giat.geowave.analytic.clustering.CentroidManagerGeoWave; import mil.nga.giat.geowave.analytic.clustering.CentroidPairing; import mil.nga.giat.geowave.analytic.clustering.NestedGroupCentroidAssignment; import mil.nga.giat.geowave.analytic.extract.CentroidExtractor; import mil.nga.giat.geowave.analytic.extract.SimpleFeatureCentroidExtractor; import mil.nga.giat.geowave.analytic.kmeans.AssociationNotification; import mil.nga.giat.geowave.analytic.mapreduce.GroupIDText; import mil.nga.giat.geowave.analytic.param.CentroidParameters; import mil.nga.giat.geowave.mapreduce.GeoWaveWritableInputMapper; import mil.nga.giat.geowave.mapreduce.input.GeoWaveInputKey; import org.apache.hadoop.io.ObjectWritable; import org.apache.hadoop.mapreduce.Mapper; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * Adjust input items so that so that the assigned centroid becomes the group * ID. If the item has an assigned group ID, the resulting item's group ID is * replaced in the output. * * From a multi-level clustering algorithm, an item has a different grouping in * each level. Items are clustered within their respective groups. * * @formatter:off * * Context configuration parameters include: * * "GroupAssignmentMapReduce.Common.DistanceFunctionClass" -> * Used to determine distance to centroid * * "GroupAssignmentMapReduce.Centroid.ExtractorClass" -> * {@link mil.nga.giat.geowave.analytic.extract.CentroidExtractor} * * "GroupAssignmentMapReduce.Centroid.WrapperFactoryClass" -> * {@link AnalyticItemWrapperFactory} to extract wrap spatial * objects with Centroid management functions * * "GroupAssignmentMapReduce.Centroid.ZoomLevel" -> The current * zoom level * * @see CentroidManagerGeoWave * @formatter:on * */ public class GroupAssignmentMapReduce { protected static final Logger LOGGER = LoggerFactory.getLogger(GroupAssignmentMapReduce.class); public static class GroupAssignmentMapper extends GeoWaveWritableInputMapper<GeoWaveInputKey, ObjectWritable> { private NestedGroupCentroidAssignment<Object> nestedGroupCentroidAssigner; protected GroupIDText outputKeyWritable = new GroupIDText(); protected ObjectWritable outputValWritable = new ObjectWritable(); protected CentroidExtractor<Object> centroidExtractor; protected AnalyticItemWrapperFactory<Object> itemWrapperFactory; private final Map<String, AtomicInteger> logCounts = new HashMap<String, AtomicInteger>(); @Override protected void mapNativeValue( final GeoWaveInputKey key, final Object value, final org.apache.hadoop.mapreduce.Mapper<GeoWaveInputKey, ObjectWritable, GeoWaveInputKey, ObjectWritable>.Context context ) throws IOException, InterruptedException { final AssociationNotification<Object> centroidAssociationFn = new AssociationNotification<Object>() { @Override public void notify( final CentroidPairing<Object> pairing ) { pairing.getPairedItem().setGroupID( pairing.getCentroid().getID()); pairing.getPairedItem().setZoomLevel( pairing.getCentroid().getZoomLevel() + 1); // just get the contents of the returned ObjectWritable to // avoid // having to assign outputValWritable rather than update its // contents. // the 'toWritabeValue' method is efficient, not creating an // extra instance of // ObjectWritable each time, so this is just a simple // exchange of a reference outputValWritable.set(toWritableValue( key, pairing.getPairedItem().getWrappedItem()).get()); AtomicInteger ii = logCounts.get(pairing.getCentroid().getID()); if (ii == null) { ii = new AtomicInteger( 0); logCounts.put( pairing.getCentroid().getID(), ii); } ii.incrementAndGet(); } }; nestedGroupCentroidAssigner.findCentroidForLevel( itemWrapperFactory.create(value), centroidAssociationFn); context.write( key, outputValWritable); } @Override protected void cleanup( final org.apache.hadoop.mapreduce.Mapper.Context context ) throws IOException, InterruptedException { for (final Entry<String, AtomicInteger> e : logCounts.entrySet()) { GroupAssignmentMapReduce.LOGGER.info(e.getKey() + " = " + e.getValue()); } super.cleanup(context); } @Override protected void setup( final Mapper<GeoWaveInputKey, ObjectWritable, GeoWaveInputKey, ObjectWritable>.Context context ) throws IOException, InterruptedException { super.setup(context); final ScopedJobConfiguration config = new ScopedJobConfiguration( context.getConfiguration(), GroupAssignmentMapReduce.class, GroupAssignmentMapReduce.LOGGER); try { nestedGroupCentroidAssigner = new NestedGroupCentroidAssignment<Object>( context, GroupAssignmentMapReduce.class, GroupAssignmentMapReduce.LOGGER); } catch (final Exception e1) { throw new IOException( e1); } try { centroidExtractor = config.getInstance( CentroidParameters.Centroid.EXTRACTOR_CLASS, CentroidExtractor.class, SimpleFeatureCentroidExtractor.class); } catch (final Exception e1) { throw new IOException( e1); } try { itemWrapperFactory = config.getInstance( CentroidParameters.Centroid.WRAPPER_FACTORY_CLASS, AnalyticItemWrapperFactory.class, SimpleFeatureItemWrapperFactory.class); itemWrapperFactory.initialize( context, GroupAssignmentMapReduce.class, GroupAssignmentMapReduce.LOGGER); } catch (final Exception e1) { throw new IOException( e1); } } } }