package mil.nga.giat.geowave.analytic.mapreduce.kmeans; import java.io.IOException; import java.util.ArrayList; import java.util.List; import mil.nga.giat.geowave.analytic.AnalyticItemWrapper; import mil.nga.giat.geowave.analytic.AnalyticItemWrapperFactory; import mil.nga.giat.geowave.analytic.ScopedJobConfiguration; import mil.nga.giat.geowave.analytic.SimpleFeatureItemWrapperFactory; import mil.nga.giat.geowave.analytic.clustering.CentroidManager; import mil.nga.giat.geowave.analytic.clustering.CentroidManagerGeoWave; import mil.nga.giat.geowave.analytic.clustering.CentroidPairing; import mil.nga.giat.geowave.analytic.clustering.NestedGroupCentroidAssignment; import mil.nga.giat.geowave.analytic.clustering.exception.MatchingCentroidNotFoundException; import mil.nga.giat.geowave.analytic.kmeans.AssociationNotification; import mil.nga.giat.geowave.analytic.mapreduce.CountofDoubleWritable; import mil.nga.giat.geowave.analytic.mapreduce.GroupIDText; import mil.nga.giat.geowave.analytic.param.CentroidParameters; import mil.nga.giat.geowave.core.index.ByteArrayId; import mil.nga.giat.geowave.mapreduce.GeoWaveWritableInputMapper; import mil.nga.giat.geowave.mapreduce.input.GeoWaveInputKey; import mil.nga.giat.geowave.mapreduce.output.GeoWaveOutputKey; import org.apache.hadoop.io.ObjectWritable; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * Update the SINGLE cost of the clustering as a measure of distance from all * points to their closest center. * * As an FYI: During the clustering algorithm, the cost should be monotonic * decreasing. * * @formatter:off * * Context configuration parameters include: * * "UpdateCentroidCostMapReduce.Common.DistanceFunctionClass" -> * Used to determine distance to centroid * * "UpdateCentroidCostMapReduce.Centroid.WrapperFactoryClass" -> * {@link AnalyticItemWrapperFactory} to extract wrap spatial * objects with Centroid management functions * * @see CentroidManagerGeoWave * * @formatter:on * */ public class UpdateCentroidCostMapReduce { protected static final Logger LOGGER = LoggerFactory.getLogger(UpdateCentroidCostMapReduce.class); public static class UpdateCentroidCostMap extends GeoWaveWritableInputMapper<GroupIDText, CountofDoubleWritable> { private NestedGroupCentroidAssignment<Object> nestedGroupCentroidAssigner; private final CountofDoubleWritable dw = new CountofDoubleWritable(); protected final GroupIDText outputWritable = new GroupIDText(); protected AnalyticItemWrapperFactory<Object> itemWrapperFactory; private final AssociationNotification<Object> centroidAssociationFn = new AssociationNotification<Object>() { @Override public void notify( final CentroidPairing<Object> pairing ) { outputWritable.set( pairing.getCentroid().getGroupID(), pairing.getCentroid().getID()); } }; @Override protected void mapNativeValue( final GeoWaveInputKey key, final Object value, final Mapper<GeoWaveInputKey, ObjectWritable, GroupIDText, CountofDoubleWritable>.Context context ) throws IOException, InterruptedException { final AnalyticItemWrapper<Object> wrappedItem = itemWrapperFactory.create(value); dw.set( nestedGroupCentroidAssigner.findCentroidForLevel( wrappedItem, centroidAssociationFn), 1.0); context.write( outputWritable, dw); } @Override protected void setup( final Mapper<GeoWaveInputKey, ObjectWritable, GroupIDText, CountofDoubleWritable>.Context context ) throws IOException, InterruptedException { super.setup(context); final ScopedJobConfiguration config = new ScopedJobConfiguration( context.getConfiguration(), UpdateCentroidCostMapReduce.class, UpdateCentroidCostMapReduce.LOGGER); try { nestedGroupCentroidAssigner = new NestedGroupCentroidAssignment<Object>( context, UpdateCentroidCostMapReduce.class, UpdateCentroidCostMapReduce.LOGGER); } catch (final Exception e1) { throw new IOException( e1); } try { itemWrapperFactory = config.getInstance( CentroidParameters.Centroid.WRAPPER_FACTORY_CLASS, AnalyticItemWrapperFactory.class, SimpleFeatureItemWrapperFactory.class); itemWrapperFactory.initialize( context, UpdateCentroidCostMapReduce.class, UpdateCentroidCostMapReduce.LOGGER); } catch (final Exception e1) { throw new IOException( e1); } } } public static class UpdateCentroidCostCombiner extends Reducer<GroupIDText, CountofDoubleWritable, GroupIDText, CountofDoubleWritable> { final CountofDoubleWritable outputValue = new CountofDoubleWritable(); @Override public void reduce( final GroupIDText key, final Iterable<CountofDoubleWritable> values, final Reducer<GroupIDText, CountofDoubleWritable, GroupIDText, CountofDoubleWritable>.Context context ) throws IOException, InterruptedException { double expectation = 0; double ptCount = 0; for (final CountofDoubleWritable value : values) { expectation += value.getValue(); ptCount += value.getCount(); } outputValue.set( expectation, ptCount); context.write( key, outputValue); } } public static class UpdateCentroidCostReducer extends Reducer<GroupIDText, CountofDoubleWritable, GeoWaveOutputKey, Object> { private CentroidManager<Object> centroidManager; private List<ByteArrayId> indexIds; @Override protected void reduce( final GroupIDText key, final Iterable<CountofDoubleWritable> values, final Reducer<GroupIDText, CountofDoubleWritable, GeoWaveOutputKey, Object>.Context context ) throws IOException, InterruptedException { final String id = key.getID(); final String groupID = key.getGroupID(); double sum = 0.0; double count = 0; for (final CountofDoubleWritable next : values) { sum += next.getValue(); count += next.getCount(); } AnalyticItemWrapper<Object> centroid; try { centroid = getFeatureForCentroid( id, groupID); } catch (final MatchingCentroidNotFoundException e) { LOGGER.error( "Unable to get centroid " + id + " for group " + groupID, e); return; } centroid.setCost(sum); centroid.resetAssociatonCount(); centroid.incrementAssociationCount((long) count); UpdateCentroidCostMapReduce.LOGGER.info("Update centroid " + centroid.toString()); context.write( new GeoWaveOutputKey( centroidManager.getDataTypeId(), indexIds), centroid.getWrappedItem()); } private AnalyticItemWrapper<Object> getFeatureForCentroid( final String id, final String groupID ) throws IOException, MatchingCentroidNotFoundException { return centroidManager.getCentroidById( id, groupID); } @Override protected void setup( final Reducer<GroupIDText, CountofDoubleWritable, GeoWaveOutputKey, Object>.Context context ) throws IOException, InterruptedException { super.setup(context); try { centroidManager = new CentroidManagerGeoWave<Object>( context, UpdateCentroidCostMapReduce.class, UpdateCentroidCostMapReduce.LOGGER); indexIds = new ArrayList<ByteArrayId>(); indexIds.add(centroidManager.getIndexId()); } catch (final Exception e) { UpdateCentroidCostMapReduce.LOGGER.warn( "Unable to initialize centroid manager", e); throw new IOException( "Unable to initialize centroid manager"); } } } }