package mil.nga.giat.geowave.test.mapreduce;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.accumulo.core.client.AccumuloException;
import org.apache.accumulo.core.client.AccumuloSecurityException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.geotools.feature.simple.SimpleFeatureBuilder;
import org.geotools.feature.simple.SimpleFeatureTypeBuilder;
import org.geotools.referencing.crs.DefaultGeographicCRS;
import org.junit.AfterClass;
import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.opengis.feature.simple.SimpleFeature;
import com.vividsolutions.jts.geom.Geometry;
import mil.nga.giat.geowave.analytic.AnalyticItemWrapper;
import mil.nga.giat.geowave.analytic.GeometryDataSetGenerator;
import mil.nga.giat.geowave.analytic.PropertyManagement;
import mil.nga.giat.geowave.analytic.SimpleFeatureItemWrapperFactory;
import mil.nga.giat.geowave.analytic.clustering.CentroidManager;
import mil.nga.giat.geowave.analytic.clustering.CentroidManagerGeoWave;
import mil.nga.giat.geowave.analytic.distance.FeatureCentroidDistanceFn;
import mil.nga.giat.geowave.analytic.mapreduce.clustering.runner.MultiLevelJumpKMeansClusteringJobRunner;
import mil.nga.giat.geowave.analytic.mapreduce.clustering.runner.MultiLevelKMeansClusteringJobRunner;
import mil.nga.giat.geowave.analytic.param.ClusteringParameters;
import mil.nga.giat.geowave.analytic.param.ExtractParameters;
import mil.nga.giat.geowave.analytic.param.GlobalParameters;
import mil.nga.giat.geowave.analytic.param.JumpParameters;
import mil.nga.giat.geowave.analytic.param.MapReduceParameters;
import mil.nga.giat.geowave.analytic.param.ParameterEnum;
import mil.nga.giat.geowave.analytic.param.SampleParameters;
import mil.nga.giat.geowave.analytic.param.StoreParameters.StoreParam;
import mil.nga.giat.geowave.analytic.store.PersistableStore;
import mil.nga.giat.geowave.core.geotime.store.query.SpatialQuery;
import mil.nga.giat.geowave.core.index.sfc.data.NumericRange;
import mil.nga.giat.geowave.core.store.DataStore;
import mil.nga.giat.geowave.core.store.adapter.AdapterStore;
import mil.nga.giat.geowave.core.store.index.IndexStore;
import mil.nga.giat.geowave.core.store.operations.remote.options.DataStorePluginOptions;
import mil.nga.giat.geowave.core.store.query.DistributableQuery;
import mil.nga.giat.geowave.test.GeoWaveITRunner;
import mil.nga.giat.geowave.test.TestUtils;
import mil.nga.giat.geowave.test.annotation.Environments;
import mil.nga.giat.geowave.test.annotation.Environments.Environment;
import mil.nga.giat.geowave.test.annotation.GeoWaveTestStore;
import mil.nga.giat.geowave.test.annotation.GeoWaveTestStore.GeoWaveStoreType;
@RunWith(GeoWaveITRunner.class)
@Environments({
Environment.MAP_REDUCE
})
public class GeoWaveKMeansIT
{
@GeoWaveTestStore({
GeoWaveStoreType.ACCUMULO,
GeoWaveStoreType.BIGTABLE,
GeoWaveStoreType.HBASE
})
protected DataStorePluginOptions dataStorePluginOptions;
private final static Logger LOGGER = LoggerFactory.getLogger(GeoWaveKMeansIT.class);
private static long startMillis;
@BeforeClass
public static void startTimer() {
startMillis = System.currentTimeMillis();
LOGGER.warn("-----------------------------------------");
LOGGER.warn("* *");
LOGGER.warn("* RUNNING GeoWaveKMeansIT *");
LOGGER.warn("* *");
LOGGER.warn("-----------------------------------------");
}
@AfterClass
public static void reportTest() {
LOGGER.warn("-----------------------------------------");
LOGGER.warn("* *");
LOGGER.warn("* FINISHED GeoWaveKMeansIT *");
LOGGER
.warn("* " + ((System.currentTimeMillis() - startMillis) / 1000)
+ "s elapsed. *");
LOGGER.warn("* *");
LOGGER.warn("-----------------------------------------");
}
private SimpleFeatureBuilder getBuilder() {
final SimpleFeatureTypeBuilder typeBuilder = new SimpleFeatureTypeBuilder();
typeBuilder.setName("test");
typeBuilder.setCRS(DefaultGeographicCRS.WGS84); // <- Coordinate
// reference
// add attributes in order
typeBuilder.add(
"geom",
Geometry.class);
typeBuilder.add(
"name",
String.class);
typeBuilder.add(
"count",
Long.class);
// build the type
return new SimpleFeatureBuilder(
typeBuilder.buildFeatureType());
}
final GeometryDataSetGenerator dataGenerator = new GeometryDataSetGenerator(
new FeatureCentroidDistanceFn(),
getBuilder());
private void testIngest(
final DataStore dataStore )
throws IOException {
dataGenerator.writeToGeoWave(
dataStore,
dataGenerator.generatePointSet(
0.15,
0.2,
3,
800,
new double[] {
-100,
-45
},
new double[] {
-90,
-35
}));
dataGenerator.writeToGeoWave(
dataStore,
dataGenerator.generatePointSet(
0.15,
0.2,
6,
600,
new double[] {
0,
0
},
new double[] {
10,
10
}));
dataGenerator.writeToGeoWave(
dataStore,
dataGenerator.generatePointSet(
0.15,
0.2,
4,
900,
new double[] {
65,
35
},
new double[] {
75,
45
}));
}
@Test
public void testIngestAndQueryGeneralGpx()
throws Exception {
TestUtils.deleteAll(dataStorePluginOptions);
testIngest(dataStorePluginOptions.createDataStore());
runKPlusPlus(new SpatialQuery(
dataGenerator.getBoundingRegion()));
}
private void runKPlusPlus(
final DistributableQuery query )
throws Exception {
final MultiLevelKMeansClusteringJobRunner jobRunner = new MultiLevelKMeansClusteringJobRunner();
final int res = jobRunner.run(
MapReduceTestUtils.getConfiguration(),
new PropertyManagement(
new ParameterEnum[] {
ExtractParameters.Extract.QUERY,
ExtractParameters.Extract.MIN_INPUT_SPLIT,
ExtractParameters.Extract.MAX_INPUT_SPLIT,
ClusteringParameters.Clustering.ZOOM_LEVELS,
ClusteringParameters.Clustering.MAX_ITERATIONS,
ClusteringParameters.Clustering.RETAIN_GROUP_ASSIGNMENTS,
ExtractParameters.Extract.OUTPUT_DATA_TYPE_ID,
StoreParam.INPUT_STORE,
GlobalParameters.Global.BATCH_ID,
MapReduceParameters.MRConfig.HDFS_BASE_DIR,
SampleParameters.Sample.MAX_SAMPLE_SIZE,
SampleParameters.Sample.MIN_SAMPLE_SIZE
},
new Object[] {
query,
MapReduceTestUtils.MIN_INPUT_SPLITS,
MapReduceTestUtils.MAX_INPUT_SPLITS,
2,
2,
false,
"centroid",
new PersistableStore(
dataStorePluginOptions),
"bx1",
TestUtils.TEMP_DIR + File.separator + MapReduceTestEnvironment.HDFS_BASE_DIRECTORY + "/t1",
3,
2
}));
Assert.assertEquals(
0,
res);
final DataStore dataStore = dataStorePluginOptions.createDataStore();
final IndexStore indexStore = dataStorePluginOptions.createIndexStore();
final AdapterStore adapterStore = dataStorePluginOptions.createAdapterStore();
final int resultCounLevel1 = countResults(
dataStore,
indexStore,
adapterStore,
"bx1",
1, // level
1);
final int resultCounLevel2 = countResults(
dataStore,
indexStore,
adapterStore,
"bx1",
2, // level
resultCounLevel1);
Assert.assertTrue(resultCounLevel2 >= 2);
// for travis-ci to run, we want to limit the memory consumption
System.gc();
}
private void runKJumpPlusPlus(
final DistributableQuery query )
throws Exception {
final MultiLevelJumpKMeansClusteringJobRunner jobRunner2 = new MultiLevelJumpKMeansClusteringJobRunner();
final int res2 = jobRunner2.run(
MapReduceTestUtils.getConfiguration(),
new PropertyManagement(
new ParameterEnum[] {
ExtractParameters.Extract.QUERY,
ExtractParameters.Extract.MIN_INPUT_SPLIT,
ExtractParameters.Extract.MAX_INPUT_SPLIT,
ClusteringParameters.Clustering.ZOOM_LEVELS,
ExtractParameters.Extract.OUTPUT_DATA_TYPE_ID,
StoreParam.INPUT_STORE,
GlobalParameters.Global.BATCH_ID,
MapReduceParameters.MRConfig.HDFS_BASE_DIR,
JumpParameters.Jump.RANGE_OF_CENTROIDS,
JumpParameters.Jump.KPLUSPLUS_MIN,
ClusteringParameters.Clustering.MAX_ITERATIONS
},
new Object[] {
query,
MapReduceTestUtils.MIN_INPUT_SPLITS,
MapReduceTestUtils.MAX_INPUT_SPLITS,
2,
"centroid",
new PersistableStore(
dataStorePluginOptions),
"bx2",
TestUtils.TEMP_DIR + File.separator + MapReduceTestEnvironment.HDFS_BASE_DIRECTORY + "/t2",
new NumericRange(
4,
7),
5,
2
}));
Assert.assertEquals(
0,
res2);
final DataStore dataStore = dataStorePluginOptions.createDataStore();
final IndexStore indexStore = dataStorePluginOptions.createIndexStore();
final AdapterStore adapterStore = dataStorePluginOptions.createAdapterStore();
final int jumpRresultCounLevel1 = countResults(
dataStore,
indexStore,
adapterStore,
"bx2",
1,
1);
final int jumpRresultCounLevel2 = countResults(
dataStore,
indexStore,
adapterStore,
"bx2",
2,
jumpRresultCounLevel1);
Assert.assertTrue(jumpRresultCounLevel1 >= 2);
Assert.assertTrue(jumpRresultCounLevel2 >= 2);
// for travis-ci to run, we want to limit the memory consumption
System.gc();
}
private int countResults(
final DataStore dataStore,
final IndexStore indexStore,
final AdapterStore adapterStore,
final String batchID,
final int level,
final int expectedParentCount )
throws AccumuloException,
AccumuloSecurityException,
IOException {
final CentroidManager<SimpleFeature> centroidManager = new CentroidManagerGeoWave<SimpleFeature>(
dataStore,
indexStore,
adapterStore,
new SimpleFeatureItemWrapperFactory(),
"centroid",
TestUtils.DEFAULT_SPATIAL_INDEX.getId().getString(),
batchID,
level);
final CentroidManager<SimpleFeature> hullManager = new CentroidManagerGeoWave<SimpleFeature>(
dataStore,
indexStore,
adapterStore,
new SimpleFeatureItemWrapperFactory(),
"convex_hull",
TestUtils.DEFAULT_SPATIAL_INDEX.getId().getString(),
batchID,
level);
int childCount = 0;
int parentCount = 0;
for (final String grp : centroidManager.getAllCentroidGroups()) {
final List<AnalyticItemWrapper<SimpleFeature>> centroids = centroidManager.getCentroidsForGroup(grp);
final List<AnalyticItemWrapper<SimpleFeature>> hulls = hullManager.getCentroidsForGroup(grp);
for (final AnalyticItemWrapper<SimpleFeature> centroid : centroids) {
if (centroid.getAssociationCount() == 0) {
continue;
}
Assert.assertTrue(centroid.getGeometry() != null);
Assert.assertTrue(centroid.getBatchID() != null);
boolean found = false;
final List<SimpleFeature> features = new ArrayList<SimpleFeature>();
for (final AnalyticItemWrapper<SimpleFeature> hull : hulls) {
found |= (hull.getName().equals(centroid.getName()));
Assert.assertTrue(hull.getGeometry() != null);
Assert.assertTrue(hull.getBatchID() != null);
features.add(hull.getWrappedItem());
}
System.out.println(features);
Assert.assertTrue(
grp,
found);
childCount++;
}
parentCount++;
}
Assert.assertEquals(
batchID,
expectedParentCount,
parentCount);
return childCount;
}
}