package mil.nga.giat.geowave.analytic.mapreduce.clustering;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.Map.Entry;
import java.util.concurrent.atomic.AtomicInteger;
import mil.nga.giat.geowave.analytic.AnalyticItemWrapperFactory;
import mil.nga.giat.geowave.analytic.ScopedJobConfiguration;
import mil.nga.giat.geowave.analytic.SimpleFeatureItemWrapperFactory;
import mil.nga.giat.geowave.analytic.clustering.CentroidManagerGeoWave;
import mil.nga.giat.geowave.analytic.clustering.CentroidPairing;
import mil.nga.giat.geowave.analytic.clustering.NestedGroupCentroidAssignment;
import mil.nga.giat.geowave.analytic.extract.CentroidExtractor;
import mil.nga.giat.geowave.analytic.extract.SimpleFeatureCentroidExtractor;
import mil.nga.giat.geowave.analytic.kmeans.AssociationNotification;
import mil.nga.giat.geowave.analytic.mapreduce.GroupIDText;
import mil.nga.giat.geowave.analytic.param.CentroidParameters;
import mil.nga.giat.geowave.mapreduce.GeoWaveWritableInputMapper;
import mil.nga.giat.geowave.mapreduce.input.GeoWaveInputKey;
import org.apache.hadoop.io.ObjectWritable;
import org.apache.hadoop.mapreduce.Mapper;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Adjust input items so that so that the assigned centroid becomes the group
* ID. If the item has an assigned group ID, the resulting item's group ID is
* replaced in the output.
*
* From a multi-level clustering algorithm, an item has a different grouping in
* each level. Items are clustered within their respective groups.
*
* @formatter:off
*
* Context configuration parameters include:
*
* "GroupAssignmentMapReduce.Common.DistanceFunctionClass" ->
* Used to determine distance to centroid
*
* "GroupAssignmentMapReduce.Centroid.ExtractorClass" ->
* {@link mil.nga.giat.geowave.analytic.extract.CentroidExtractor}
*
* "GroupAssignmentMapReduce.Centroid.WrapperFactoryClass" ->
* {@link AnalyticItemWrapperFactory} to extract wrap spatial
* objects with Centroid management functions
*
* "GroupAssignmentMapReduce.Centroid.ZoomLevel" -> The current
* zoom level
*
* @see CentroidManagerGeoWave
* @formatter:on
*
*/
public class GroupAssignmentMapReduce
{
protected static final Logger LOGGER = LoggerFactory.getLogger(GroupAssignmentMapReduce.class);
public static class GroupAssignmentMapper extends
GeoWaveWritableInputMapper<GeoWaveInputKey, ObjectWritable>
{
private NestedGroupCentroidAssignment<Object> nestedGroupCentroidAssigner;
protected GroupIDText outputKeyWritable = new GroupIDText();
protected ObjectWritable outputValWritable = new ObjectWritable();
protected CentroidExtractor<Object> centroidExtractor;
protected AnalyticItemWrapperFactory<Object> itemWrapperFactory;
private final Map<String, AtomicInteger> logCounts = new HashMap<String, AtomicInteger>();
@Override
protected void mapNativeValue(
final GeoWaveInputKey key,
final Object value,
final org.apache.hadoop.mapreduce.Mapper<GeoWaveInputKey, ObjectWritable, GeoWaveInputKey, ObjectWritable>.Context context )
throws IOException,
InterruptedException {
final AssociationNotification<Object> centroidAssociationFn = new AssociationNotification<Object>() {
@Override
public void notify(
final CentroidPairing<Object> pairing ) {
pairing.getPairedItem().setGroupID(
pairing.getCentroid().getID());
pairing.getPairedItem().setZoomLevel(
pairing.getCentroid().getZoomLevel() + 1);
// just get the contents of the returned ObjectWritable to
// avoid
// having to assign outputValWritable rather than update its
// contents.
// the 'toWritabeValue' method is efficient, not creating an
// extra instance of
// ObjectWritable each time, so this is just a simple
// exchange of a reference
outputValWritable.set(toWritableValue(
key,
pairing.getPairedItem().getWrappedItem()).get());
AtomicInteger ii = logCounts.get(pairing.getCentroid().getID());
if (ii == null) {
ii = new AtomicInteger(
0);
logCounts.put(
pairing.getCentroid().getID(),
ii);
}
ii.incrementAndGet();
}
};
nestedGroupCentroidAssigner.findCentroidForLevel(
itemWrapperFactory.create(value),
centroidAssociationFn);
context.write(
key,
outputValWritable);
}
@Override
protected void cleanup(
final org.apache.hadoop.mapreduce.Mapper.Context context )
throws IOException,
InterruptedException {
for (final Entry<String, AtomicInteger> e : logCounts.entrySet()) {
GroupAssignmentMapReduce.LOGGER.info(e.getKey() + " = " + e.getValue());
}
super.cleanup(context);
}
@Override
protected void setup(
final Mapper<GeoWaveInputKey, ObjectWritable, GeoWaveInputKey, ObjectWritable>.Context context )
throws IOException,
InterruptedException {
super.setup(context);
final ScopedJobConfiguration config = new ScopedJobConfiguration(
context.getConfiguration(),
GroupAssignmentMapReduce.class,
GroupAssignmentMapReduce.LOGGER);
try {
nestedGroupCentroidAssigner = new NestedGroupCentroidAssignment<Object>(
context,
GroupAssignmentMapReduce.class,
GroupAssignmentMapReduce.LOGGER);
}
catch (final Exception e1) {
throw new IOException(
e1);
}
try {
centroidExtractor = config.getInstance(
CentroidParameters.Centroid.EXTRACTOR_CLASS,
CentroidExtractor.class,
SimpleFeatureCentroidExtractor.class);
}
catch (final Exception e1) {
throw new IOException(
e1);
}
try {
itemWrapperFactory = config.getInstance(
CentroidParameters.Centroid.WRAPPER_FACTORY_CLASS,
AnalyticItemWrapperFactory.class,
SimpleFeatureItemWrapperFactory.class);
itemWrapperFactory.initialize(
context,
GroupAssignmentMapReduce.class,
GroupAssignmentMapReduce.LOGGER);
}
catch (final Exception e1) {
throw new IOException(
e1);
}
}
}
}