package mil.nga.giat.geowave.analytic.mapreduce.kmeans;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import mil.nga.giat.geowave.analytic.AnalyticItemWrapper;
import mil.nga.giat.geowave.analytic.AnalyticItemWrapperFactory;
import mil.nga.giat.geowave.analytic.GeoObjectDimensionValues;
import mil.nga.giat.geowave.analytic.ScopedJobConfiguration;
import mil.nga.giat.geowave.analytic.SimpleFeatureItemWrapperFactory;
import mil.nga.giat.geowave.analytic.clustering.CentroidManager;
import mil.nga.giat.geowave.analytic.clustering.CentroidManagerGeoWave;
import mil.nga.giat.geowave.analytic.clustering.CentroidPairing;
import mil.nga.giat.geowave.analytic.clustering.NestedGroupCentroidAssignment;
import mil.nga.giat.geowave.analytic.clustering.exception.MatchingCentroidNotFoundException;
import mil.nga.giat.geowave.analytic.extract.CentroidExtractor;
import mil.nga.giat.geowave.analytic.extract.SimpleFeatureCentroidExtractor;
import mil.nga.giat.geowave.analytic.kmeans.AssociationNotification;
import mil.nga.giat.geowave.analytic.mapreduce.GroupIDText;
import mil.nga.giat.geowave.analytic.param.CentroidParameters;
import mil.nga.giat.geowave.core.index.ByteArrayId;
import mil.nga.giat.geowave.mapreduce.GeoWaveWritableInputMapper;
import mil.nga.giat.geowave.mapreduce.input.GeoWaveInputKey;
import mil.nga.giat.geowave.mapreduce.output.GeoWaveOutputKey;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.ObjectWritable;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.vividsolutions.jts.geom.Coordinate;
import com.vividsolutions.jts.geom.Point;
/**
* K-Means mapper and reducer. Mapper determines the closest centroid for an
* item in the item's assigned group. A group contains one or more centroids.
* The dimensions for the item are sent to the reducer along with the closest
* centroid ID.
* <p/>
* Reducer Outputs a new copy of a centroid with the geometry and other
* dimensions updated towards their respective mean for the assigned items.
* <p/>
* Properties:
*
* @formatter:off "KMeansMapReduce.Common.DistanceFunctionClass" - Used to
* determine distance to centroid
* <p/>
* "KMeansMapReduce.Centroid.ExtractorClass" - Used to extract a
* centroid point from an item geometry
* <p/>
* "KMeansMapReduce.Centroid.WrapperFactoryClass" -
* {@link AnalyticItemWrapperFactory} to extract wrap spatial
* objects with Centroid management function
* <p/>
* "KMeansMapReduce.Centroid.ZoomLevel" -> The current zoom level
* @See CentroidManagerGeoWave
* @formatter:on
*/
public class KMeansMapReduce
{
protected static final Logger LOGGER = LoggerFactory.getLogger(KMeansMapReduce.class);
public static class KMeansMapper extends
GeoWaveWritableInputMapper<GroupIDText, BytesWritable>
{
private NestedGroupCentroidAssignment<Object> nestedGroupCentroidAssigner;
private final GroupIDText outputKeyWritable = new GroupIDText();
private final BytesWritable outputValWritable = new BytesWritable();
private final GeoObjectDimensionValues association = new GeoObjectDimensionValues();
protected CentroidExtractor<Object> centroidExtractor;
protected AnalyticItemWrapperFactory<Object> itemWrapperFactory;
AssociationNotification<Object> centroidAssociationFn = new AssociationNotification<Object>() {
@Override
public void notify(
final CentroidPairing<Object> pairing ) {
outputKeyWritable.set(
pairing.getCentroid().getGroupID(),
pairing.getCentroid().getID());
final double extra[] = pairing.getPairedItem().getDimensionValues();
final Point p = centroidExtractor.getCentroid(pairing.getPairedItem().getWrappedItem());
association.set(
p.getCoordinate().x,
p.getCoordinate().y,
p.getCoordinate().z,
extra,
pairing.getDistance());
}
};
@Override
protected void mapNativeValue(
final GeoWaveInputKey key,
final Object value,
final org.apache.hadoop.mapreduce.Mapper<GeoWaveInputKey, ObjectWritable, GroupIDText, BytesWritable>.Context context )
throws IOException,
InterruptedException {
final AnalyticItemWrapper<Object> item = itemWrapperFactory.create(value);
nestedGroupCentroidAssigner.findCentroidForLevel(
item,
centroidAssociationFn);
final byte[] outData = association.toBinary();
outputValWritable.set(
outData,
0,
outData.length);
context.write(
outputKeyWritable,
outputValWritable);
}
@Override
protected void setup(
final Mapper<GeoWaveInputKey, ObjectWritable, GroupIDText, BytesWritable>.Context context )
throws IOException,
InterruptedException {
super.setup(context);
final ScopedJobConfiguration config = new ScopedJobConfiguration(
context.getConfiguration(),
KMeansMapReduce.class,
KMeansMapReduce.LOGGER);
try {
nestedGroupCentroidAssigner = new NestedGroupCentroidAssignment<Object>(
context,
KMeansMapReduce.class,
KMeansMapReduce.LOGGER);
}
catch (final Exception e1) {
throw new IOException(
e1);
}
try {
centroidExtractor = config.getInstance(
CentroidParameters.Centroid.EXTRACTOR_CLASS,
CentroidExtractor.class,
SimpleFeatureCentroidExtractor.class);
}
catch (final Exception e1) {
throw new IOException(
e1);
}
try {
itemWrapperFactory = config.getInstance(
CentroidParameters.Centroid.WRAPPER_FACTORY_CLASS,
AnalyticItemWrapperFactory.class,
SimpleFeatureItemWrapperFactory.class);
itemWrapperFactory.initialize(
context,
KMeansMapReduce.class,
KMeansMapReduce.LOGGER);
}
catch (final Exception e1) {
throw new IOException(
e1);
}
}
}
/**
* Optimization
*/
public static class KMeansCombiner extends
Reducer<GroupIDText, BytesWritable, GroupIDText, BytesWritable>
{
private final GeoObjectDimensionValues geoObject = new GeoObjectDimensionValues();
private final BytesWritable outputValWritable = new BytesWritable();
@Override
public void reduce(
final GroupIDText key,
final Iterable<BytesWritable> values,
final Reducer<GroupIDText, BytesWritable, GroupIDText, BytesWritable>.Context context )
throws IOException,
InterruptedException {
final GeoObjectDimensionValues totals = new GeoObjectDimensionValues();
for (final BytesWritable value : values) {
geoObject.fromBinary(value.getBytes());
totals.add(geoObject);
}
final byte[] outData = totals.toBinary();
outputValWritable.set(
outData,
0,
outData.length);
context.write(
key,
outputValWritable);
}
}
public static class KMeansReduce extends
Reducer<GroupIDText, BytesWritable, GeoWaveOutputKey, Object>
{
protected CentroidManager<Object> centroidManager;
private final GeoObjectDimensionValues geoObject = new GeoObjectDimensionValues();
private List<ByteArrayId> indexIds;
@Override
public void reduce(
final GroupIDText key,
final Iterable<BytesWritable> values,
final Reducer<GroupIDText, BytesWritable, GeoWaveOutputKey, Object>.Context context )
throws IOException,
InterruptedException {
final String centroidID = key.getID();
final String groupID = key.getGroupID();
final GeoObjectDimensionValues totals = new GeoObjectDimensionValues();
for (final BytesWritable value : values) {
geoObject.fromBinary(value.getBytes());
totals.add(geoObject);
}
AnalyticItemWrapper<Object> centroid;
try {
centroid = getFeatureForCentroid(
centroidID,
groupID);
}
catch (final MatchingCentroidNotFoundException e) {
LOGGER.error(
"Unable to get centroid " + centroidID + " for group " + groupID,
e);
return;
}
// do not update the cost, because this cost is associated with the
// centroid PRIOR to this update.
// centroid.setCost(totals.distance);
centroid.resetAssociatonCount();
centroid.incrementAssociationCount(totals.getCount());
final double ptCount = totals.getCount();
// mean
totals.x = totals.x / ptCount;
totals.y = totals.y / ptCount;
totals.z = totals.z / ptCount;
final int s = centroid.getExtraDimensions().length;
for (int i = 0; i < s; i++) {
totals.values[i] = totals.values[i] / ptCount;
}
if (KMeansMapReduce.LOGGER.isTraceEnabled()) {
KMeansMapReduce.LOGGER.trace(groupID + " contains " + centroidID);
}
final AnalyticItemWrapper<Object> nextCentroid = centroidManager.createNextCentroid(
centroid.getWrappedItem(),
groupID,
new Coordinate(
totals.x,
totals.y,
totals.z),
centroid.getExtraDimensions(),
totals.values);
// new center
context.write(
new GeoWaveOutputKey(
centroidManager.getDataTypeId(),
indexIds),
nextCentroid.getWrappedItem());
}
private AnalyticItemWrapper<Object> getFeatureForCentroid(
final String id,
final String groupID )
throws IOException,
MatchingCentroidNotFoundException {
return centroidManager.getCentroidById(
id,
groupID);
}
@Override
protected void setup(
final Reducer<GroupIDText, BytesWritable, GeoWaveOutputKey, Object>.Context context )
throws IOException,
InterruptedException {
super.setup(context);
try {
centroidManager = new CentroidManagerGeoWave<Object>(
context,
KMeansMapReduce.class,
KMeansMapReduce.LOGGER);
indexIds = new ArrayList<ByteArrayId>();
indexIds.add(centroidManager.getIndexId());
}
catch (final Exception e) {
throw new IOException(
e);
}
}
}
}