package mil.nga.giat.geowave.analytic.mapreduce.dbscan;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Random;
import mil.nga.giat.geowave.adapter.vector.FeatureDataAdapter;
import mil.nga.giat.geowave.adapter.vector.FeatureWritable;
import mil.nga.giat.geowave.analytic.AdapterWithObjectWritable;
import mil.nga.giat.geowave.analytic.AnalyticFeature;
import mil.nga.giat.geowave.analytic.Projection;
import mil.nga.giat.geowave.analytic.SimpleFeatureProjection;
import mil.nga.giat.geowave.analytic.clustering.ClusteringUtils;
import mil.nga.giat.geowave.analytic.mapreduce.kmeans.SimpleFeatureImplSerialization;
import mil.nga.giat.geowave.analytic.mapreduce.nn.NNMapReduce;
import mil.nga.giat.geowave.analytic.mapreduce.nn.NNMapReduce.PartitionDataWritable;
import mil.nga.giat.geowave.analytic.param.ClusteringParameters;
import mil.nga.giat.geowave.analytic.param.HullParameters;
import mil.nga.giat.geowave.analytic.param.PartitionParameters;
import mil.nga.giat.geowave.analytic.partitioner.OrthodromicDistancePartitioner;
import mil.nga.giat.geowave.analytic.partitioner.Partitioner.PartitionData;
import mil.nga.giat.geowave.core.index.ByteArrayId;
import mil.nga.giat.geowave.mapreduce.GeoWaveConfiguratorBase;
import mil.nga.giat.geowave.mapreduce.JobContextAdapterStore;
import mil.nga.giat.geowave.mapreduce.input.GeoWaveInputKey;
import org.apache.hadoop.io.ObjectWritable;
import org.apache.hadoop.mrunit.mapreduce.MapDriver;
import org.apache.hadoop.mrunit.mapreduce.ReduceDriver;
import org.apache.hadoop.mrunit.types.Pair;
import org.geotools.feature.type.BasicFeatureTypes;
import org.junit.Before;
import org.junit.Test;
import org.opengis.feature.simple.SimpleFeature;
import org.opengis.feature.simple.SimpleFeatureType;
import com.vividsolutions.jts.geom.Coordinate;
import com.vividsolutions.jts.geom.GeometryFactory;
import com.vividsolutions.jts.geom.PrecisionModel;
public class DBScanMapReduceTest
{
MapDriver<GeoWaveInputKey, Object, PartitionDataWritable, AdapterWithObjectWritable> mapDriver;
ReduceDriver<PartitionDataWritable, AdapterWithObjectWritable, GeoWaveInputKey, ObjectWritable> reduceDriver;
SimpleFeatureType ftype;
final GeometryFactory factory = new GeometryFactory(
new PrecisionModel(
0.000001),
4326);
final NNMapReduce.NNMapper<ClusterItem> nnMapper = new NNMapReduce.NNMapper<ClusterItem>();
final NNMapReduce.NNReducer<ClusterItem, GeoWaveInputKey, ObjectWritable, Map<ByteArrayId, Cluster>> nnReducer = new DBScanMapReduce.DBScanMapHullReducer();
@Before
public void setUp()
throws IOException {
mapDriver = MapDriver.newMapDriver(nnMapper);
reduceDriver = ReduceDriver.newReduceDriver(nnReducer);
mapDriver.getConfiguration().set(
GeoWaveConfiguratorBase.enumToConfKey(
NNMapReduce.class,
PartitionParameters.Partition.DISTANCE_THRESHOLDS),
"10,10");
reduceDriver.getConfiguration().setDouble(
GeoWaveConfiguratorBase.enumToConfKey(
NNMapReduce.class,
PartitionParameters.Partition.MAX_DISTANCE),
10);
ftype = AnalyticFeature.createGeometryFeatureAdapter(
"centroid",
new String[] {
"extra1"
},
BasicFeatureTypes.DEFAULT_NAMESPACE,
ClusteringUtils.CLUSTERING_CRS).getFeatureType();
reduceDriver.getConfiguration().setClass(
GeoWaveConfiguratorBase.enumToConfKey(
DBScanMapReduce.class,
HullParameters.Hull.PROJECTION_CLASS),
SimpleFeatureProjection.class,
Projection.class);
JobContextAdapterStore.addDataAdapter(
mapDriver.getConfiguration(),
new FeatureDataAdapter(
ftype));
JobContextAdapterStore.addDataAdapter(
reduceDriver.getConfiguration(),
new FeatureDataAdapter(
ftype));
serializations();
}
private SimpleFeature createTestFeature(
final String name,
final Coordinate coord ) {
return AnalyticFeature.createGeometryFeature(
ftype,
"b1",
name,
name,
"NA",
20.30203,
factory.createPoint(coord),
new String[] {
"extra1"
},
new double[] {
0.022
},
1,
1,
0);
}
private void serializations() {
final String[] strings = reduceDriver.getConfiguration().getStrings(
"io.serializations");
final String[] newStrings = new String[strings.length + 1];
System.arraycopy(
strings,
0,
newStrings,
0,
strings.length);
newStrings[newStrings.length - 1] = SimpleFeatureImplSerialization.class.getName();
reduceDriver.getConfiguration().setStrings(
"io.serializations",
newStrings);
mapDriver.getConfiguration().setStrings(
"io.serializations",
newStrings);
}
@Test
public void testReducer()
throws IOException {
final ByteArrayId adapterId = new ByteArrayId(
ftype.getTypeName());
final SimpleFeature feature1 = createTestFeature(
"f1",
new Coordinate(
30.0,
30.00000001));
final SimpleFeature feature2 = createTestFeature(
"f2",
new Coordinate(
50.001,
50.001));
final SimpleFeature feature3 = createTestFeature(
"f3",
new Coordinate(
30.00000001,
30.00000001));
final SimpleFeature feature4 = createTestFeature(
"f4",
new Coordinate(
50.0011,
50.00105));
final SimpleFeature feature5 = createTestFeature(
"f5",
new Coordinate(
50.00112,
50.00111));
final SimpleFeature feature6 = createTestFeature(
"f6",
new Coordinate(
30.00000001,
30.00000002));
final SimpleFeature feature7 = createTestFeature(
"f7",
new Coordinate(
50.00113,
50.00114));
final SimpleFeature feature8 = createTestFeature(
"f8",
new Coordinate(
40.00000001,
40.000000002));
mapDriver.addInput(
new GeoWaveInputKey(
adapterId,
new ByteArrayId(
feature1.getID())),
feature1);
mapDriver.addInput(
new GeoWaveInputKey(
adapterId,
new ByteArrayId(
feature2.getID())),
feature2);
mapDriver.addInput(
new GeoWaveInputKey(
adapterId,
new ByteArrayId(
feature3.getID())),
feature3);
mapDriver.addInput(
new GeoWaveInputKey(
adapterId,
new ByteArrayId(
feature4.getID())),
feature4);
mapDriver.addInput(
new GeoWaveInputKey(
adapterId,
new ByteArrayId(
feature5.getID())),
feature5);
mapDriver.addInput(
new GeoWaveInputKey(
adapterId,
new ByteArrayId(
feature6.getID())),
feature6);
mapDriver.addInput(
new GeoWaveInputKey(
adapterId,
new ByteArrayId(
feature7.getID())),
feature7);
mapDriver.addInput(
new GeoWaveInputKey(
adapterId,
new ByteArrayId(
feature8.getID())),
feature8);
final List<Pair<PartitionDataWritable, AdapterWithObjectWritable>> mapperResults = mapDriver.run();
assertNotNull(getPartitionDataFor(
mapperResults,
feature1.getID(),
true));
assertNotNull(getPartitionDataFor(
mapperResults,
feature2.getID(),
true));
assertNotNull(getPartitionDataFor(
mapperResults,
feature2.getID(),
true));
assertNotNull(getPartitionDataFor(
mapperResults,
feature3.getID(),
true));
assertEquals(
getPartitionDataFor(
mapperResults,
feature1.getID(),
true).getId(),
getPartitionDataFor(
mapperResults,
feature3.getID(),
true).getId());
assertEquals(
getPartitionDataFor(
mapperResults,
feature6.getID(),
true).getId(),
getPartitionDataFor(
mapperResults,
feature3.getID(),
true).getId());
assertEquals(
getPartitionDataFor(
mapperResults,
feature5.getID(),
true).getId(),
getPartitionDataFor(
mapperResults,
feature7.getID(),
true).getId());
assertEquals(
getPartitionDataFor(
mapperResults,
feature5.getID(),
true).getId(),
getPartitionDataFor(
mapperResults,
feature4.getID(),
true).getId());
final List<Pair<PartitionDataWritable, List<AdapterWithObjectWritable>>> partitions = getReducerDataFromMapperInput(mapperResults);
reduceDriver.addAll(partitions);
reduceDriver.getConfiguration().setInt(
GeoWaveConfiguratorBase.enumToConfKey(
NNMapReduce.class,
ClusteringParameters.Clustering.MINIMUM_SIZE),
2);
final List<Pair<GeoWaveInputKey, ObjectWritable>> reduceResults = reduceDriver.run();
assertEquals(
2,
reduceResults.size());
/*
* assertEquals( feature3.getID(), find( reduceResults,
* feature1.getID()).toString());
*
* assertEquals( feature1.getID(), find( reduceResults,
* feature3.getID()).toString());
*
* assertEquals( feature4.getID(), find( reduceResults,
* feature2.getID()).toString());
*
* assertEquals( feature2.getID(), find( reduceResults,
* feature4.getID()).toString());
*/
}
private List<Pair<PartitionDataWritable, List<AdapterWithObjectWritable>>> getReducerDataFromMapperInput(
final List<Pair<PartitionDataWritable, AdapterWithObjectWritable>> mapperResults ) {
final List<Pair<PartitionDataWritable, List<AdapterWithObjectWritable>>> reducerInputSet = new ArrayList<Pair<PartitionDataWritable, List<AdapterWithObjectWritable>>>();
for (final Pair<PartitionDataWritable, AdapterWithObjectWritable> pair : mapperResults) {
getListFor(
pair.getFirst(),
reducerInputSet).add(
pair.getSecond());
}
return reducerInputSet;
}
private List<AdapterWithObjectWritable> getListFor(
final PartitionDataWritable pd,
final List<Pair<PartitionDataWritable, List<AdapterWithObjectWritable>>> reducerInputSet ) {
for (final Pair<PartitionDataWritable, List<AdapterWithObjectWritable>> pair : reducerInputSet) {
if (pair.getFirst().compareTo(
pd) == 0) {
return pair.getSecond();
}
}
final List<AdapterWithObjectWritable> newPairList = new ArrayList<AdapterWithObjectWritable>();
reducerInputSet.add(new Pair(
pd,
newPairList));
return newPairList;
}
private PartitionData getPartitionDataFor(
final List<Pair<PartitionDataWritable, AdapterWithObjectWritable>> mapperResults,
final String id,
final boolean primary ) {
for (final Pair<PartitionDataWritable, AdapterWithObjectWritable> pair : mapperResults) {
if (((FeatureWritable) pair.getSecond().getObjectWritable().get()).getFeature().getID().equals(
id) && (pair.getFirst().getPartitionData().isPrimary() == primary)) {
return pair.getFirst().getPartitionData();
}
}
return null;
}
private double round(
final double value ) {
return (double) Math.round(value * 1000000) / 1000000;
}
@Test
public void test8With4()
throws IOException {
final ByteArrayId adapterId = new ByteArrayId(
ftype.getTypeName());
final Random r = new Random(
3434);
for (int i = 0; i < 8; i++) {
final SimpleFeature feature = createTestFeature(
"f" + i,
new Coordinate(
round(30.0 + (r.nextGaussian() * 0.00001)),
round(30.0 + (r.nextGaussian() * 0.00001))));
mapDriver.addInput(
new GeoWaveInputKey(
adapterId,
new ByteArrayId(
feature.getID())),
feature);
}
final List<Pair<PartitionDataWritable, AdapterWithObjectWritable>> mapperResults = mapDriver.run();
final List<Pair<PartitionDataWritable, List<AdapterWithObjectWritable>>> partitions = getReducerDataFromMapperInput(mapperResults);
reduceDriver.addAll(partitions);
reduceDriver.getConfiguration().setInt(
GeoWaveConfiguratorBase.enumToConfKey(
NNMapReduce.class,
ClusteringParameters.Clustering.MINIMUM_SIZE),
4);
final List<Pair<GeoWaveInputKey, ObjectWritable>> reduceResults = reduceDriver.run();
assertEquals(
1,
reduceResults.size());
}
@Test
public void testScale()
throws IOException {
final ByteArrayId adapterId = new ByteArrayId(
ftype.getTypeName());
final Random r = new Random(
3434);
for (int i = 0; i < 10000; i++) {
final SimpleFeature feature = createTestFeature(
"f" + i,
new Coordinate(
round(30.0 + (r.nextGaussian() * 0.0001)),
round(30.0 + (r.nextGaussian() * 0.0001))));
mapDriver.addInput(
new GeoWaveInputKey(
adapterId,
new ByteArrayId(
feature.getID())),
feature);
}
final List<Pair<PartitionDataWritable, AdapterWithObjectWritable>> mapperResults = mapDriver.run();
final List<Pair<PartitionDataWritable, List<AdapterWithObjectWritable>>> partitions = getReducerDataFromMapperInput(mapperResults);
reduceDriver.addAll(partitions);
reduceDriver.getConfiguration().setInt(
GeoWaveConfiguratorBase.enumToConfKey(
NNMapReduce.class,
ClusteringParameters.Clustering.MINIMUM_SIZE),
10);
final List<Pair<GeoWaveInputKey, ObjectWritable>> reduceResults = reduceDriver.run();
assertTrue(reduceResults.size() > 0);
}
}