package mil.nga.giat.geowave.analytic.mapreduce.kmeans; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import org.apache.hadoop.io.ObjectWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mrunit.mapreduce.MapDriver; import org.apache.hadoop.mrunit.mapreduce.ReduceDriver; import org.apache.hadoop.mrunit.types.Pair; import org.junit.Before; import org.junit.Test; import org.opengis.feature.simple.SimpleFeature; import org.opengis.feature.simple.SimpleFeatureType; import com.vividsolutions.jts.geom.Coordinate; import com.vividsolutions.jts.geom.GeometryFactory; import mil.nga.giat.geowave.adapter.vector.FeatureDataAdapter; import mil.nga.giat.geowave.adapter.vector.FeatureWritable; import mil.nga.giat.geowave.analytic.AnalyticFeature; import mil.nga.giat.geowave.analytic.PropertyManagement; import mil.nga.giat.geowave.analytic.SimpleFeatureItemWrapperFactory; import mil.nga.giat.geowave.analytic.clustering.CentroidManagerGeoWave; import mil.nga.giat.geowave.analytic.clustering.ClusteringUtils; import mil.nga.giat.geowave.analytic.clustering.DistortionGroupManagement.DistortionEntry; import mil.nga.giat.geowave.analytic.clustering.NestedGroupCentroidAssignment; import mil.nga.giat.geowave.analytic.distance.DistanceFn; import mil.nga.giat.geowave.analytic.distance.FeatureCentroidDistanceFn; import mil.nga.giat.geowave.analytic.extract.SimpleFeatureCentroidExtractor; import mil.nga.giat.geowave.analytic.mapreduce.CountofDoubleWritable; import mil.nga.giat.geowave.analytic.param.CentroidParameters; import mil.nga.giat.geowave.analytic.param.CommonParameters; import mil.nga.giat.geowave.analytic.param.GlobalParameters; import mil.nga.giat.geowave.analytic.param.StoreParameters.StoreParam; import mil.nga.giat.geowave.analytic.store.PersistableStore; import mil.nga.giat.geowave.core.geotime.ingest.SpatialDimensionalityTypeProvider; import mil.nga.giat.geowave.core.index.ByteArrayId; import mil.nga.giat.geowave.core.store.DataStore; import mil.nga.giat.geowave.core.store.GeoWaveStoreFinder; import mil.nga.giat.geowave.core.store.IndexWriter; import mil.nga.giat.geowave.core.store.index.PrimaryIndex; import mil.nga.giat.geowave.core.store.memory.MemoryRequiredOptions; import mil.nga.giat.geowave.core.store.memory.MemoryStoreFactoryFamily; import mil.nga.giat.geowave.core.store.operations.remote.options.DataStorePluginOptions; import mil.nga.giat.geowave.mapreduce.GeoWaveConfiguratorBase; import mil.nga.giat.geowave.mapreduce.JobContextAdapterStore; import mil.nga.giat.geowave.mapreduce.input.GeoWaveInputKey; import mil.nga.giat.geowave.mapreduce.output.GeoWaveOutputKey; public class KMeansDistortionMapReduceTest { private static final String TEST_NAMESPACE = "test"; MapDriver<GeoWaveInputKey, ObjectWritable, Text, CountofDoubleWritable> mapDriver; ReduceDriver<Text, CountofDoubleWritable, GeoWaveOutputKey, DistortionEntry> reduceDriver; final String batchId = "b1"; final SimpleFeatureType ftype = AnalyticFeature.createGeometryFeatureAdapter( "centroid", new String[] { "extra1" }, "http://geowave.test.net", ClusteringUtils.CLUSTERING_CRS).getFeatureType(); final FeatureDataAdapter testObjectAdapter = new FeatureDataAdapter( ftype); private static final List<Object> capturedObjects = new ArrayList<Object>(); final PrimaryIndex index = new SpatialDimensionalityTypeProvider().createPrimaryIndex(); final GeometryFactory factory = new GeometryFactory(); final String grp1 = "g1"; @Before public void setUp() throws IOException { final KMeansDistortionMapReduce.KMeansDistortionMapper mapper = new KMeansDistortionMapReduce.KMeansDistortionMapper(); final KMeansDistortionMapReduce.KMeansDistortionReduce reducer = new KMeansDistortionMapReduce.KMeansDistortionReduce(); mapDriver = MapDriver.newMapDriver(mapper); reduceDriver = ReduceDriver.newReduceDriver(reducer); mapDriver.getConfiguration().setClass( GeoWaveConfiguratorBase.enumToConfKey( KMeansDistortionMapReduce.class, CommonParameters.Common.DISTANCE_FUNCTION_CLASS), FeatureCentroidDistanceFn.class, DistanceFn.class); JobContextAdapterStore.addDataAdapter( mapDriver.getConfiguration(), testObjectAdapter); JobContextAdapterStore.addDataAdapter( reduceDriver.getConfiguration(), testObjectAdapter); final PropertyManagement propManagement = new PropertyManagement(); propManagement.store( CentroidParameters.Centroid.INDEX_ID, new SpatialDimensionalityTypeProvider().createPrimaryIndex().getId().getString()); propManagement.store( CentroidParameters.Centroid.DATA_TYPE_ID, ftype.getTypeName()); propManagement.store( CentroidParameters.Centroid.DATA_NAMESPACE_URI, ftype.getName().getNamespaceURI()); propManagement.store( GlobalParameters.Global.BATCH_ID, batchId); propManagement.store( CentroidParameters.Centroid.EXTRACTOR_CLASS, SimpleFeatureCentroidExtractor.class); propManagement.store( CentroidParameters.Centroid.WRAPPER_FACTORY_CLASS, SimpleFeatureItemWrapperFactory.class); DataStorePluginOptions pluginOptions = new DataStorePluginOptions(); GeoWaveStoreFinder.getRegisteredStoreFactoryFamilies().put( "memory", new MemoryStoreFactoryFamily()); pluginOptions.selectPlugin("memory"); MemoryRequiredOptions opts = (MemoryRequiredOptions) pluginOptions.getFactoryOptions(); opts.setGeowaveNamespace(TEST_NAMESPACE); PersistableStore store = new PersistableStore( pluginOptions); propManagement.store( StoreParam.INPUT_STORE, store); NestedGroupCentroidAssignment.setParameters( mapDriver.getConfiguration(), KMeansDistortionMapReduce.class, propManagement); serializations(); capturedObjects.clear(); final SimpleFeature feature = AnalyticFeature.createGeometryFeature( ftype, batchId, "123", "fred", grp1, 20.30203, factory.createPoint(new Coordinate( 02.33, 0.23)), new String[] { "extra1" }, new double[] { 0.022 }, 1, 1, 0); propManagement.store( CentroidParameters.Centroid.ZOOM_LEVEL, 1); ingest( pluginOptions.createDataStore(), testObjectAdapter, index, feature); CentroidManagerGeoWave.setParameters( reduceDriver.getConfiguration(), KMeansDistortionMapReduce.class, propManagement); } private void ingest( final DataStore dataStore, final FeatureDataAdapter adapter, final PrimaryIndex index, final SimpleFeature feature ) throws IOException { try (IndexWriter writer = dataStore.createWriter( adapter, index)) { writer.write(feature); writer.close(); } } private void serializations() { final String[] strings = reduceDriver.getConfiguration().getStrings( "io.serializations"); final String[] newStrings = new String[strings.length + 1]; System.arraycopy( strings, 0, newStrings, 0, strings.length); newStrings[newStrings.length - 1] = SimpleFeatureImplSerialization.class.getName(); reduceDriver.getConfiguration().setStrings( "io.serializations", newStrings); mapDriver.getConfiguration().setStrings( "io.serializations", newStrings); } @Test public void testMapper() throws IOException { final GeoWaveInputKey inputKey = new GeoWaveInputKey(); inputKey.setInsertionId(null); inputKey.setAdapterId(testObjectAdapter.getAdapterId()); inputKey.setDataId(new ByteArrayId( "abc".getBytes())); final ObjectWritable ow = new ObjectWritable(); ow.set(new FeatureWritable( ftype, AnalyticFeature.createGeometryFeature( ftype, batchId, "123", "fred", grp1, 20.30203, factory.createPoint(new Coordinate( 02.33, 0.23)), new String[] { "extra1" }, new double[] { 0.022 }, 1, 1, 0))); mapDriver.withInput( inputKey, ow); final List<Pair<Text, CountofDoubleWritable>> results = mapDriver.run(); // output key has the dataID adjusted to contain the rank assertEquals( results.get( 0).getFirst().toString(), grp1); // output value is the same as input value assertEquals( results.get( 0).getSecond().getValue(), 0.0, 0.0001); } @Test public void testReducer() throws IOException { reduceDriver.addInput( new Text( "g1"), Arrays.asList( new CountofDoubleWritable( 0.34, 1), new CountofDoubleWritable( 0.75, 1))); reduceDriver.addInput( new Text( "g2"), Arrays.asList( new CountofDoubleWritable( 0.34, 1), new CountofDoubleWritable( 0.25, 1))); final List<Pair<GeoWaveOutputKey, DistortionEntry>> results = reduceDriver.run(); assertEquals( 1, results.size()); assertTrue(results.get( 0).getSecond().getGroupId().equals( "g1")); assertTrue(results.get( 0).getSecond().getClusterCount().equals( 1)); // TODO: floating point error? assertTrue(results.get( 0).getSecond().getDistortionValue().equals( 3.6697247706422016)); } }