package mil.nga.giat.geowave.analytic.mapreduce.clustering;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.UUID;
import mil.nga.giat.geowave.adapter.vector.FeatureDataAdapter;
import mil.nga.giat.geowave.analytic.AnalyticFeature;
import mil.nga.giat.geowave.analytic.AnalyticItemWrapper;
import mil.nga.giat.geowave.analytic.AnalyticItemWrapperFactory;
import mil.nga.giat.geowave.analytic.Projection;
import mil.nga.giat.geowave.analytic.ScopedJobConfiguration;
import mil.nga.giat.geowave.analytic.SimpleFeatureItemWrapperFactory;
import mil.nga.giat.geowave.analytic.SimpleFeatureProjection;
import mil.nga.giat.geowave.analytic.clustering.CentroidManager;
import mil.nga.giat.geowave.analytic.clustering.CentroidManagerGeoWave;
import mil.nga.giat.geowave.analytic.clustering.ClusteringUtils;
import mil.nga.giat.geowave.analytic.clustering.NestedGroupCentroidAssignment;
import mil.nga.giat.geowave.analytic.param.HullParameters;
import mil.nga.giat.geowave.core.geotime.ingest.SpatialDimensionalityTypeProvider;
import mil.nga.giat.geowave.core.index.ByteArrayId;
import mil.nga.giat.geowave.core.index.StringUtils;
import mil.nga.giat.geowave.mapreduce.GeoWaveWritableInputMapper;
import mil.nga.giat.geowave.mapreduce.GeoWaveWritableInputReducer;
import mil.nga.giat.geowave.mapreduce.input.GeoWaveInputKey;
import mil.nga.giat.geowave.mapreduce.output.GeoWaveOutputKey;
import org.apache.hadoop.io.ObjectWritable;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.geotools.feature.type.BasicFeatureTypes;
import org.opengis.feature.simple.SimpleFeature;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.vividsolutions.jts.algorithm.ConvexHull;
import com.vividsolutions.jts.geom.Coordinate;
import com.vividsolutions.jts.geom.Geometry;
import com.vividsolutions.jts.geom.GeometryFactory;
/**
* Compute the convex hull over all points associated with each centroid. Each
* hull is sent to output as a simple features.
*
* Properties:
*
* @formatter:off
*
* "ConvexHullMapReduce.Hull.DataTypeId" - Id of the data type to
* store the the polygons as simple features - defaults to
* "convex_hull"
*
* "ConvexHullMapReduce.Hull.ProjectionClass" - instance of
* {@link mil.nga.giat.geowave.analytic.Projection}
*
* "ConvexHullMapReduce.Hull.IndexId" - The Index ID used for
* output simple features.
*
* "ConvexHullMapReduce.Hull.WrapperFactoryClass" ->
* {@link AnalyticItemWrapperFactory} to group and level
* associated with each entry
*
* @see mil.nga.giat.geowave.analytic.clustering.NestedGroupCentroidAssignment
*
* @formatter:on
*/
public class ConvexHullMapReduce
{
protected static final Logger LOGGER = LoggerFactory.getLogger(ConvexHullMapReduce.class);
public static class ConvexHullMap<T> extends
GeoWaveWritableInputMapper<GeoWaveInputKey, ObjectWritable>
{
protected GeoWaveInputKey outputKey = new GeoWaveInputKey();
private ObjectWritable currentValue;
private AnalyticItemWrapperFactory<T> itemWrapperFactory;
private NestedGroupCentroidAssignment<T> nestedGroupCentroidAssigner;
// Override parent since there is not need to decode the value.
@Override
protected void mapWritableValue(
final GeoWaveInputKey key,
final ObjectWritable value,
final Mapper<GeoWaveInputKey, ObjectWritable, GeoWaveInputKey, ObjectWritable>.Context context )
throws IOException,
InterruptedException {
// cached for efficiency since the output is the input object
// the de-serialized input object is only used for sampling.
// For simplicity, allow the de-serialization to occur in all cases,
// even though some sampling
// functions do not inspect the input object.
currentValue = value;
super.mapWritableValue(
key,
value,
context);
}
@Override
protected void mapNativeValue(
final GeoWaveInputKey key,
final Object value,
final org.apache.hadoop.mapreduce.Mapper<GeoWaveInputKey, ObjectWritable, GeoWaveInputKey, ObjectWritable>.Context context )
throws IOException,
InterruptedException {
@SuppressWarnings("unchecked")
final AnalyticItemWrapper<T> wrapper = itemWrapperFactory.create((T) value);
outputKey.setAdapterId(key.getAdapterId());
outputKey.setDataId(new ByteArrayId(
StringUtils.stringToBinary(nestedGroupCentroidAssigner.getGroupForLevel(wrapper))));
outputKey.setInsertionId(key.getInsertionId());
context.write(
outputKey,
currentValue);
}
@SuppressWarnings("unchecked")
@Override
protected void setup(
final Mapper<GeoWaveInputKey, ObjectWritable, GeoWaveInputKey, ObjectWritable>.Context context )
throws IOException,
InterruptedException {
super.setup(context);
final ScopedJobConfiguration config = new ScopedJobConfiguration(
context.getConfiguration(),
ConvexHullMapReduce.class,
ConvexHullMapReduce.LOGGER);
try {
itemWrapperFactory = config.getInstance(
HullParameters.Hull.WRAPPER_FACTORY_CLASS,
AnalyticItemWrapperFactory.class,
SimpleFeatureItemWrapperFactory.class);
itemWrapperFactory.initialize(
context,
ConvexHullMapReduce.class,
ConvexHullMapReduce.LOGGER);
}
catch (final Exception e1) {
throw new IOException(
e1);
}
try {
nestedGroupCentroidAssigner = new NestedGroupCentroidAssignment<T>(
context,
ConvexHullMapReduce.class,
ConvexHullMapReduce.LOGGER);
}
catch (final Exception e1) {
throw new IOException(
e1);
}
}
}
public static class ConvexHullReducer<T> extends
GeoWaveWritableInputReducer<GeoWaveOutputKey, SimpleFeature>
{
private CentroidManager<T> centroidManager;
private List<ByteArrayId> indexIds;
private FeatureDataAdapter outputAdapter;
private Projection<T> projectionFunction;
/*
* Logic inspired by SpatialHadoop convexHullStream method
*/
// absolute point cloud limit
private final int pointCloudThreshold = 50000000;
private final List<Coordinate> batchCoords = new ArrayList<Coordinate>(
10000);
@Override
protected void reduceNativeValues(
final GeoWaveInputKey key,
final Iterable<Object> values,
final Reducer<GeoWaveInputKey, ObjectWritable, GeoWaveOutputKey, SimpleFeature>.Context context )
throws IOException,
InterruptedException {
// limit on new points per convex hull run (batch)
int batchThreshold = 10000;
batchCoords.clear();
Geometry currentHull = null;
final String groupID = StringUtils.stringFromBinary(key.getDataId().getBytes());
final AnalyticItemWrapper<T> centroid = centroidManager.getCentroid(groupID);
for (final Object value : values) {
currentHull = null;
@SuppressWarnings("unchecked")
final Geometry geo = projectionFunction.getProjection((T) value);
final Coordinate[] coords = geo.getCoordinates();
if ((coords.length + batchCoords.size()) > pointCloudThreshold) {
break;
}
for (final Coordinate coordinate : coords) {
batchCoords.add(coordinate);
}
if (coords.length > batchThreshold) {
batchThreshold = coords.length;
}
if (batchCoords.size() > batchThreshold) {
currentHull = compress(
key,
batchCoords);
}
}
currentHull = (currentHull == null) ? compress(
key,
batchCoords) : currentHull;
if (ConvexHullMapReduce.LOGGER.isTraceEnabled()) {
ConvexHullMapReduce.LOGGER.trace(centroid.getGroupID() + " contains " + groupID);
}
final SimpleFeature newPolygonFeature = AnalyticFeature.createGeometryFeature(
outputAdapter.getFeatureType(),
centroid.getBatchID(),
UUID.randomUUID().toString(),
centroid.getName(),
centroid.getGroupID(),
centroid.getCost(),
currentHull,
new String[0],
new double[0],
centroid.getZoomLevel(),
centroid.getIterationID(),
centroid.getAssociationCount());
// new center
context.write(
new GeoWaveOutputKey(
outputAdapter.getAdapterId(),
indexIds),
newPolygonFeature);
}
private static <T> Geometry compress(
final GeoWaveInputKey key,
final List<Coordinate> batchCoords ) {
final Coordinate[] actualCoords = batchCoords.toArray(new Coordinate[batchCoords.size()]);
// generate convex hull for current batch of points
final ConvexHull convexHull = new ConvexHull(
actualCoords,
new GeometryFactory());
final Geometry hullGeometry = convexHull.getConvexHull();
final Coordinate[] hullCoords = hullGeometry.getCoordinates();
batchCoords.clear();
for (final Coordinate hullCoord : hullCoords) {
batchCoords.add(hullCoord);
}
return hullGeometry;
}
@SuppressWarnings("unchecked")
@Override
protected void setup(
final Reducer<GeoWaveInputKey, ObjectWritable, GeoWaveOutputKey, SimpleFeature>.Context context )
throws IOException,
InterruptedException {
final ScopedJobConfiguration config = new ScopedJobConfiguration(
context.getConfiguration(),
ConvexHullMapReduce.class,
ConvexHullMapReduce.LOGGER);
super.setup(context);
try {
centroidManager = new CentroidManagerGeoWave<T>(
context,
ConvexHullMapReduce.class,
ConvexHullMapReduce.LOGGER);
}
catch (final Exception e) {
ConvexHullMapReduce.LOGGER.warn(
"Unable to initialize centroid manager",
e);
throw new IOException(
"Unable to initialize centroid manager");
}
try {
projectionFunction = config.getInstance(
HullParameters.Hull.PROJECTION_CLASS,
Projection.class,
SimpleFeatureProjection.class);
projectionFunction.initialize(
context,
ConvexHullMapReduce.class);
}
catch (final Exception e1) {
throw new IOException(
e1);
}
final String polygonDataTypeId = config.getString(
HullParameters.Hull.DATA_TYPE_ID,
"convex_hull");
outputAdapter = AnalyticFeature.createGeometryFeatureAdapter(
polygonDataTypeId,
new String[0],
config.getString(
HullParameters.Hull.DATA_NAMESPACE_URI,
BasicFeatureTypes.DEFAULT_NAMESPACE),
ClusteringUtils.CLUSTERING_CRS);
indexIds = new ArrayList<ByteArrayId>();
indexIds.add(new ByteArrayId(
StringUtils.stringToBinary(config.getString(
HullParameters.Hull.INDEX_ID,
new SpatialDimensionalityTypeProvider.SpatialIndexBuilder()
.createIndex()
.getId()
.getString()))));
}
}
}