package mil.nga.giat.geowave.analytic.mapreduce.kmeans; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import java.io.IOException; import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import org.apache.hadoop.io.ObjectWritable; import org.apache.hadoop.mapreduce.JobContext; import org.apache.hadoop.mrunit.mapreduce.MapDriver; import org.apache.hadoop.mrunit.mapreduce.ReduceDriver; import org.apache.hadoop.mrunit.types.Pair; import org.junit.Before; import org.junit.Test; import org.slf4j.Logger; import com.vividsolutions.jts.geom.Coordinate; import mil.nga.giat.geowave.analytic.AnalyticFeature; import mil.nga.giat.geowave.analytic.AnalyticItemWrapperFactory; import mil.nga.giat.geowave.analytic.PropertyManagement; import mil.nga.giat.geowave.analytic.clustering.CentroidManagerGeoWave; import mil.nga.giat.geowave.analytic.clustering.ClusteringUtils; import mil.nga.giat.geowave.analytic.extract.CentroidExtractor; import mil.nga.giat.geowave.analytic.param.CentroidParameters; import mil.nga.giat.geowave.analytic.param.GlobalParameters; import mil.nga.giat.geowave.analytic.param.SampleParameters; import mil.nga.giat.geowave.analytic.param.StoreParameters.StoreParam; import mil.nga.giat.geowave.analytic.sample.function.SamplingRankFunction; import mil.nga.giat.geowave.analytic.store.PersistableStore; import mil.nga.giat.geowave.core.geotime.ingest.SpatialDimensionalityTypeProvider; import mil.nga.giat.geowave.core.index.ByteArrayId; import mil.nga.giat.geowave.core.store.GeoWaveStoreFinder; import mil.nga.giat.geowave.core.store.adapter.DataAdapter; import mil.nga.giat.geowave.core.store.memory.MemoryRequiredOptions; import mil.nga.giat.geowave.core.store.memory.MemoryStoreFactoryFamily; import mil.nga.giat.geowave.core.store.operations.remote.options.DataStorePluginOptions; import mil.nga.giat.geowave.mapreduce.GeoWaveConfiguratorBase; import mil.nga.giat.geowave.mapreduce.JobContextAdapterStore; import mil.nga.giat.geowave.mapreduce.input.GeoWaveInputKey; import mil.nga.giat.geowave.mapreduce.output.GeoWaveOutputKey; public class KSamplerMapReduceTest { private static final String TEST_NAMESPACE = "test"; MapDriver<GeoWaveInputKey, ObjectWritable, GeoWaveInputKey, ObjectWritable> mapDriver; ReduceDriver<GeoWaveInputKey, ObjectWritable, GeoWaveOutputKey, TestObject> reduceDriver; final TestObjectDataAdapter testObjectAapter = new TestObjectDataAdapter(); private static final List<Object> capturedObjects = new ArrayList<Object>(); public KSamplerMapReduceTest() {} public static class TestSamplingMidRankFunction implements SamplingRankFunction { @Override public double rank( final int sampleSize, final Object value ) { capturedObjects.add(value); return 0.5; } @Override public void initialize( final JobContext context, final Class scope, final Logger logger ) throws IOException {} } public static class TestSamplingNoRankFunction implements SamplingRankFunction { @Override public void initialize( final JobContext context, final Class scope, final Logger logger ) throws IOException {} @Override public double rank( final int sampleSize, final Object value ) { capturedObjects.add(value); return 0.0; } } @Before public void setUp() throws IOException { final KSamplerMapReduce.SampleMap<TestObject> mapper = new KSamplerMapReduce.SampleMap<TestObject>(); final KSamplerMapReduce.SampleReducer<TestObject> reducer = new KSamplerMapReduce.SampleReducer<TestObject>(); mapDriver = MapDriver.newMapDriver(mapper); reduceDriver = ReduceDriver.newReduceDriver(reducer); final DataAdapter adapter = AnalyticFeature.createGeometryFeatureAdapter( "altoids", new String[] {}, "http://geowave.test.net", ClusteringUtils.CLUSTERING_CRS); final PropertyManagement propManagement = new PropertyManagement(); DataStorePluginOptions pluginOptions = new DataStorePluginOptions(); GeoWaveStoreFinder.getRegisteredStoreFactoryFamilies().put( "memory", new MemoryStoreFactoryFamily()); pluginOptions.selectPlugin("memory"); MemoryRequiredOptions opts = (MemoryRequiredOptions) pluginOptions.getFactoryOptions(); opts.setGeowaveNamespace(TEST_NAMESPACE); PersistableStore store = new PersistableStore( pluginOptions); propManagement.store( StoreParam.INPUT_STORE, store); propManagement.store( CentroidParameters.Centroid.INDEX_ID, new SpatialDimensionalityTypeProvider().createPrimaryIndex().getId().getString()); propManagement.store( CentroidParameters.Centroid.DATA_TYPE_ID, "altoids"); propManagement.store( CentroidParameters.Centroid.DATA_NAMESPACE_URI, "http://geowave.test.net"); propManagement.store( GlobalParameters.Global.BATCH_ID, "b1"); propManagement.store( CentroidParameters.Centroid.EXTRACTOR_CLASS, TestObjectExtractor.class); propManagement.store( CentroidParameters.Centroid.WRAPPER_FACTORY_CLASS, TestObjectItemWrapperFactory.class); CentroidManagerGeoWave.setParameters( reduceDriver.getConfiguration(), KSamplerMapReduce.class, propManagement); CentroidManagerGeoWave.setParameters( mapDriver.getConfiguration(), KSamplerMapReduce.class, propManagement); mapDriver.getConfiguration().setClass( GeoWaveConfiguratorBase.enumToConfKey( KSamplerMapReduce.class, SampleParameters.Sample.SAMPLE_RANK_FUNCTION), TestSamplingMidRankFunction.class, SamplingRankFunction.class); JobContextAdapterStore.addDataAdapter( mapDriver.getConfiguration(), testObjectAapter); mapDriver.getConfiguration().setInt( GeoWaveConfiguratorBase.enumToConfKey( KSamplerMapReduce.class, SampleParameters.Sample.SAMPLE_SIZE), 2); reduceDriver.getConfiguration().setInt( GeoWaveConfiguratorBase.enumToConfKey( KSamplerMapReduce.class, SampleParameters.Sample.SAMPLE_SIZE), 2); JobContextAdapterStore.addDataAdapter( reduceDriver.getConfiguration(), adapter); JobContextAdapterStore.addDataAdapter( reduceDriver.getConfiguration(), testObjectAapter); reduceDriver.getConfiguration().set( GeoWaveConfiguratorBase.enumToConfKey( KSamplerMapReduce.class, SampleParameters.Sample.DATA_TYPE_ID), "altoids"); reduceDriver.getConfiguration().setClass( GeoWaveConfiguratorBase.enumToConfKey( KSamplerMapReduce.class, CentroidParameters.Centroid.EXTRACTOR_CLASS), TestObjectExtractor.class, CentroidExtractor.class); mapDriver.getConfiguration().setClass( GeoWaveConfiguratorBase.enumToConfKey( KSamplerMapReduce.class, CentroidParameters.Centroid.WRAPPER_FACTORY_CLASS), TestObjectItemWrapperFactory.class, AnalyticItemWrapperFactory.class); reduceDriver.getConfiguration().setClass( GeoWaveConfiguratorBase.enumToConfKey( KSamplerMapReduce.class, CentroidParameters.Centroid.WRAPPER_FACTORY_CLASS), TestObjectItemWrapperFactory.class, AnalyticItemWrapperFactory.class); serializations(); } private void serializations() { final String[] strings = reduceDriver.getConfiguration().getStrings( "io.serializations"); final String[] newStrings = new String[strings.length + 2]; System.arraycopy( strings, 0, newStrings, 0, strings.length); newStrings[newStrings.length - 1] = SimpleFeatureImplSerialization.class.getName(); newStrings[newStrings.length - 2] = TestObjectSerialization.class.getName(); reduceDriver.getConfiguration().setStrings( "io.serializations", newStrings); } @Test public void testMapperWithMidRankedKey() throws IOException { capturedObjects.clear(); mapDriver.getConfiguration().setClass( GeoWaveConfiguratorBase.enumToConfKey( KSamplerMapReduce.class, SampleParameters.Sample.SAMPLE_RANK_FUNCTION), TestSamplingMidRankFunction.class, SamplingRankFunction.class); final GeoWaveInputKey inputKey = new GeoWaveInputKey(); inputKey.setAdapterId(testObjectAapter.getAdapterId()); inputKey.setDataId(new ByteArrayId( "abc".getBytes())); final ObjectWritable ow = new ObjectWritable(); ow.set(new TestObjectWritable( new TestObject( new Coordinate( 25.4, 25.6), "abc"))); final GeoWaveInputKey outputKey = new GeoWaveInputKey(); outputKey.setAdapterId(testObjectAapter.getAdapterId()); final ByteBuffer keyBuf = ByteBuffer.allocate(64); keyBuf.putDouble(0.5); keyBuf.putInt(1); keyBuf.put("1".getBytes()); keyBuf.putInt(3); keyBuf.put(inputKey.getDataId().getBytes()); outputKey.setDataId(new ByteArrayId( keyBuf.array())); mapDriver.withInput( inputKey, ow); final List<Pair<GeoWaveInputKey, ObjectWritable>> results = mapDriver.run(); // output key has the dataID adjusted to contain the rank assertEquals( results.get( 0).getFirst(), outputKey); // output value is the same as input value assertEquals( results.get( 0).getSecond().get(), ow.get()); // results from sample rank function to make sure it was provided the // correct object assertEquals( 1, capturedObjects.size()); assertEquals( "abc", ((TestObject) capturedObjects.get(0)).id); } @Test public void testMapperWithZeroRank() throws IOException { capturedObjects.clear(); mapDriver.getConfiguration().setClass( GeoWaveConfiguratorBase.enumToConfKey( KSamplerMapReduce.class, SampleParameters.Sample.SAMPLE_RANK_FUNCTION), TestSamplingNoRankFunction.class, SamplingRankFunction.class); final GeoWaveInputKey inputKey = new GeoWaveInputKey(); inputKey.setAdapterId(testObjectAapter.getAdapterId()); inputKey.setDataId(new ByteArrayId( "abc".getBytes())); final ObjectWritable ow = new ObjectWritable(); ow.set(new TestObjectWritable( new TestObject( new Coordinate( 25.4, 25.6), "abc"))); final GeoWaveInputKey outputKey = new GeoWaveInputKey(); outputKey.setAdapterId(testObjectAapter.getAdapterId()); final ByteBuffer keyBuf = ByteBuffer.allocate(64); keyBuf.putDouble(0.0); keyBuf.putInt(3); keyBuf.put(inputKey.getDataId().getBytes()); outputKey.setDataId(new ByteArrayId( keyBuf.array())); mapDriver.withInput( inputKey, ow); final List<Pair<GeoWaveInputKey, ObjectWritable>> results = mapDriver.run(); assertEquals( 0, results.size()); // results from sample rank function to make sure it was provided the // correct object assertEquals( 1, capturedObjects.size()); assertEquals( "abc", ((TestObject) capturedObjects.get(0)).id); } @Test public void testReducer() throws IOException { final ObjectWritable ow1 = new ObjectWritable(); ow1.set(new TestObjectWritable( new TestObject( new Coordinate( 25.4, 25.6), "abc"))); final ObjectWritable ow2 = new ObjectWritable(); ow2.set(new TestObjectWritable( new TestObject( new Coordinate( 25.4, 25.6), "def"))); final ObjectWritable ow3 = new ObjectWritable(); ow3.set(new TestObjectWritable( new TestObject( new Coordinate( 25.4, 25.6), "ghi"))); final GeoWaveInputKey inputKey1 = new GeoWaveInputKey(); inputKey1.setAdapterId(testObjectAapter.getAdapterId()); ByteBuffer keyBuf = ByteBuffer.allocate(64); keyBuf.putDouble(0.5); keyBuf.putInt(3); keyBuf.put("111".getBytes()); inputKey1.setDataId(new ByteArrayId( keyBuf.array())); keyBuf = ByteBuffer.allocate(64); final GeoWaveInputKey inputKey2 = new GeoWaveInputKey(); inputKey2.setAdapterId(testObjectAapter.getAdapterId()); keyBuf.putDouble(0.6); keyBuf.putInt(3); keyBuf.put("111".getBytes()); inputKey2.setDataId(new ByteArrayId( keyBuf.array())); keyBuf = ByteBuffer.allocate(64); final GeoWaveInputKey inputKey3 = new GeoWaveInputKey(); inputKey3.setAdapterId(testObjectAapter.getAdapterId()); keyBuf.putDouble(0.7); keyBuf.putInt(3); keyBuf.put("111".getBytes()); inputKey3.setDataId(new ByteArrayId( keyBuf.array())); reduceDriver.addInput( inputKey1, Arrays.asList(ow1)); reduceDriver.addInput( inputKey2, Arrays.asList(ow2)); reduceDriver.addInput( inputKey3, Arrays.asList(ow3)); final List<Pair<GeoWaveOutputKey, TestObject>> results = reduceDriver.run(); assertEquals( 2, results.size()); assertTrue(Arrays.equals( results.get( 0).getFirst().getAdapterId().getBytes(), "altoids".getBytes())); assertTrue(Arrays.equals( results.get( 1).getFirst().getAdapterId().getBytes(), "altoids".getBytes())); assertEquals( "abc", results.get( 0).getSecond().getName()); assertEquals( "def", results.get( 1).getSecond().getName()); } }