package mil.nga.giat.geowave.analytic.clustering;
import java.io.IOException;
import java.io.Serializable;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Calendar;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.commons.collections.map.LRUMap;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.JobContext;
import org.geotools.data.DefaultTransaction;
import org.geotools.data.FeatureWriter;
import org.geotools.data.Transaction;
import org.geotools.data.shapefile.ShapefileDataStore;
import org.geotools.data.shapefile.ShapefileDataStoreFactory;
import org.geotools.feature.simple.SimpleFeatureBuilder;
import org.geotools.feature.simple.SimpleFeatureTypeBuilder;
import org.geotools.feature.type.BasicFeatureTypes;
import org.geotools.filter.FilterFactoryImpl;
import org.opengis.feature.simple.SimpleFeature;
import org.opengis.feature.simple.SimpleFeatureType;
import org.opengis.feature.type.AttributeDescriptor;
import org.opengis.feature.type.GeometryType;
import org.opengis.filter.Filter;
import org.opengis.filter.expression.Expression;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.vividsolutions.jts.geom.Coordinate;
import com.vividsolutions.jts.geom.Geometry;
import com.vividsolutions.jts.geom.Point;
import mil.nga.giat.geowave.adapter.vector.FeatureDataAdapter;
import mil.nga.giat.geowave.adapter.vector.GeotoolsFeatureDataAdapter;
import mil.nga.giat.geowave.adapter.vector.query.cql.CQLQuery;
import mil.nga.giat.geowave.analytic.AnalyticFeature;
import mil.nga.giat.geowave.analytic.AnalyticFeature.ClusterFeatureAttribute;
import mil.nga.giat.geowave.analytic.AnalyticItemWrapper;
import mil.nga.giat.geowave.analytic.AnalyticItemWrapperFactory;
import mil.nga.giat.geowave.analytic.PropertyManagement;
import mil.nga.giat.geowave.analytic.ScopedJobConfiguration;
import mil.nga.giat.geowave.analytic.clustering.exception.MatchingCentroidNotFoundException;
import mil.nga.giat.geowave.analytic.param.CentroidParameters;
import mil.nga.giat.geowave.analytic.param.GlobalParameters;
import mil.nga.giat.geowave.analytic.param.ParameterEnum;
import mil.nga.giat.geowave.analytic.param.StoreParameters;
import mil.nga.giat.geowave.analytic.store.PersistableStore;
import mil.nga.giat.geowave.core.geotime.ingest.SpatialDimensionalityTypeProvider;
import mil.nga.giat.geowave.core.index.ByteArrayId;
import mil.nga.giat.geowave.core.index.StringUtils;
import mil.nga.giat.geowave.core.store.CloseableIterator;
import mil.nga.giat.geowave.core.store.DataStore;
import mil.nga.giat.geowave.core.store.IndexWriter;
import mil.nga.giat.geowave.core.store.adapter.AdapterStore;
import mil.nga.giat.geowave.core.store.adapter.DataAdapter;
import mil.nga.giat.geowave.core.store.index.IndexStore;
import mil.nga.giat.geowave.core.store.index.PrimaryIndex;
import mil.nga.giat.geowave.core.store.query.DataIdQuery;
import mil.nga.giat.geowave.core.store.query.QueryOptions;
import mil.nga.giat.geowave.mapreduce.GeoWaveConfiguratorBase;
/**
*
* Manages the population of centroids by group id and batch id.
*
* Properties:
*
* @formatter:off
*
* "CentroidManagerGeoWave.Centroid.WrapperFactoryClass" -
* {@link AnalyticItemWrapperFactory} to extract wrap spatial
* objects with Centroid management function
*
* "CentroidManagerGeoWave.Centroid.DataTypeId" -> The data type
* ID of the centroid simple feature
*
* "CentroidManagerGeoWave.Centroid.IndexId" -> The GeoWave index
* ID of the centroid simple feature
*
* "CentroidManagerGeoWave.Global.BatchId" -> Batch ID for
* updates
*
* "CentroidManagerGeoWave.Global.Zookeeper" -> Zookeeper URL
*
* "CentroidManagerGeoWave.Global.AccumuloInstance" -> Accumulo
* Instance Name
*
* "CentroidManagerGeoWave.Global.AccumuloUser" -> Accumulo User
* name
*
* "CentroidManagerGeoWave.Global.AccumuloPassword" -> Accumulo
* Password
*
* "CentroidManagerGeoWave.Global.AccumuloNamespace" -> Accumulo
* Table Namespace
*
* "CentroidManagerGeoWave.Common.AccumuloConnectFactory" ->
* {@link BasicAccumuloOperationsFactory}
*
* @formatter:on
*
* @param <T>
* The item type used to represent a centroid.
*/
public class CentroidManagerGeoWave<T> implements
CentroidManager<T>
{
final static Logger LOGGER = LoggerFactory.getLogger(CentroidManagerGeoWave.class);
private static final ParameterEnum<?>[] MY_PARAMS = new ParameterEnum[] {
StoreParameters.StoreParam.INPUT_STORE,
GlobalParameters.Global.BATCH_ID,
CentroidParameters.Centroid.DATA_TYPE_ID,
CentroidParameters.Centroid.DATA_NAMESPACE_URI,
CentroidParameters.Centroid.INDEX_ID,
CentroidParameters.Centroid.WRAPPER_FACTORY_CLASS,
CentroidParameters.Centroid.ZOOM_LEVEL
};
private String centroidDataTypeId;
private String batchId;
private int level = 0;
private AnalyticItemWrapperFactory<T> centroidFactory;
@SuppressWarnings("rawtypes")
private GeotoolsFeatureDataAdapter adapter;
private DataStore dataStore;
private IndexStore indexStore;
private AdapterStore adapterStore;
private PrimaryIndex index;
public CentroidManagerGeoWave(
final DataStore dataStore,
final IndexStore indexStore,
final AdapterStore adapterStore,
final AnalyticItemWrapperFactory<T> centroidFactory,
final String centroidDataTypeId,
final String indexId,
final String batchId,
final int level ) {
this.centroidFactory = centroidFactory;
this.centroidDataTypeId = centroidDataTypeId;
this.level = level;
this.batchId = batchId;
this.dataStore = dataStore;
this.indexStore = indexStore;
index = (PrimaryIndex) indexStore.getIndex(new ByteArrayId(
StringUtils.stringToBinary(indexId)));
this.adapterStore = adapterStore;
adapter = (GeotoolsFeatureDataAdapter) adapterStore.getAdapter(new ByteArrayId(
StringUtils.stringToBinary(centroidDataTypeId)));
}
@SuppressWarnings("unchecked")
public CentroidManagerGeoWave(
final PropertyManagement properties )
throws IOException {
final Class<?> scope = CentroidManagerGeoWave.class;
final Configuration configuration = new Configuration();
properties.setJobConfiguration(
configuration,
scope);
init(
Job.getInstance(configuration),
scope,
LOGGER);
}
public CentroidManagerGeoWave(
final JobContext context,
final Class<?> scope )
throws IOException {
this(
context,
scope,
LOGGER);
}
public CentroidManagerGeoWave(
final JobContext context,
final Class<?> scope,
final Logger logger )
throws IOException {
init(
context,
scope,
logger);
}
private void init(
final JobContext context,
final Class<?> scope,
final Logger logger )
throws IOException {
final ScopedJobConfiguration scopedJob = new ScopedJobConfiguration(
context.getConfiguration(),
scope,
logger);
try {
centroidFactory = (AnalyticItemWrapperFactory<T>) CentroidParameters.Centroid.WRAPPER_FACTORY_CLASS
.getHelper()
.getValue(
context,
scope,
CentroidItemWrapperFactory.class);
centroidFactory.initialize(
context,
scope,
logger);
}
catch (final Exception e1) {
LOGGER.error("Cannot instantiate " + GeoWaveConfiguratorBase.enumToConfKey(
this.getClass(),
CentroidParameters.Centroid.WRAPPER_FACTORY_CLASS));
throw new IOException(
e1);
}
this.level = scopedJob.getInt(
CentroidParameters.Centroid.ZOOM_LEVEL,
1);
centroidDataTypeId = scopedJob.getString(
CentroidParameters.Centroid.DATA_TYPE_ID,
"centroid");
batchId = scopedJob.getString(
GlobalParameters.Global.BATCH_ID,
Long.toString(Calendar.getInstance().getTime().getTime()));
final String indexId = scopedJob.getString(
CentroidParameters.Centroid.INDEX_ID,
new SpatialDimensionalityTypeProvider().createPrimaryIndex().getId().getString());
PersistableStore store = (PersistableStore) StoreParameters.StoreParam.INPUT_STORE.getHelper().getValue(
context,
scope,
null);
dataStore = store.getDataStoreOptions().createDataStore();
indexStore = store.getDataStoreOptions().createIndexStore();
index = (PrimaryIndex) indexStore.getIndex(new ByteArrayId(
StringUtils.stringToBinary(indexId)));
adapterStore = store.getDataStoreOptions().createAdapterStore();
adapter = (GeotoolsFeatureDataAdapter) adapterStore.getAdapter(new ByteArrayId(
StringUtils.stringToBinary(centroidDataTypeId)));
}
/**
* Creates a new centroid based on the old centroid with new coordinates and
* dimension values
*
* @param feature
* @param coordinate
* @param extraNames
* @param extraValues
* @return
*/
@Override
public AnalyticItemWrapper<T> createNextCentroid(
final T feature,
final String groupID,
final Coordinate coordinate,
final String[] extraNames,
final double[] extraValues ) {
return centroidFactory.createNextItem(
feature,
groupID,
coordinate,
extraNames,
extraValues);
}
private final int capacity = 100;
private final LRUMap groupToCentroid = new LRUMap(
capacity);
@Override
public void clear() {
groupToCentroid.clear();
}
@Override
public void delete(
final String[] dataIds )
throws IOException {
final ByteArrayId adapterId = new ByteArrayId(
StringUtils.stringToBinary(centroidDataTypeId));
for (final String dataId : dataIds) {
if (dataId != null) {
dataStore.delete(
new QueryOptions(
adapterId,
index.getId()),
new DataIdQuery(
adapterId,
new ByteArrayId(
StringUtils.stringToBinary(dataId))));
}
}
}
@Override
public List<String> getAllCentroidGroups()
throws IOException {
final List<String> groups = new ArrayList<String>();
final CloseableIterator<T> it = getRawCentroids(
this.batchId,
null);
while (it.hasNext()) {
final AnalyticItemWrapper<T> item = centroidFactory.create(it.next());
final String groupID = item.getGroupID();
int pos = groups.indexOf(groupID);
if (pos < 0) {
pos = groups.size();
groups.add(groupID);
}
// cache the first set
if (pos < capacity) {
getCentroidsForGroup(groupID);
}
}
it.close();
return groups;
}
@Override
public List<AnalyticItemWrapper<T>> getCentroidsForGroup(
final String groupID )
throws IOException {
return getCentroidsForGroup(
this.batchId,
groupID);
}
@Override
public List<AnalyticItemWrapper<T>> getCentroidsForGroup(
final String batchID,
final String groupID )
throws IOException {
final String lookupGroup = (groupID == null) ? "##" : groupID;
final Pair<String, String> gid = Pair.of(
batchID,
lookupGroup);
@SuppressWarnings("unchecked")
List<AnalyticItemWrapper<T>> centroids = (List<AnalyticItemWrapper<T>>) groupToCentroid.get(gid);
if (centroids == null) {
centroids = groupID == null ? loadCentroids(
batchID,
null) : loadCentroids(
batchID,
groupID);
groupToCentroid.put(
gid,
centroids);
}
return centroids;
}
@Override
public AnalyticItemWrapper<T> getCentroidById(
final String id,
final String groupID )
throws IOException,
MatchingCentroidNotFoundException {
for (final AnalyticItemWrapper<T> centroid : this.getCentroidsForGroup(groupID)) {
if (centroid.getID().equals(
id)) {
return centroid;
}
}
throw new MatchingCentroidNotFoundException(
id);
}
private List<AnalyticItemWrapper<T>> loadCentroids(
final String batchID,
final String groupID )
throws IOException {
final List<AnalyticItemWrapper<T>> centroids = new ArrayList<AnalyticItemWrapper<T>>();
try {
CloseableIterator<T> it = null;
try {
it = this.getRawCentroids(
batchID,
groupID);
while (it.hasNext()) {
centroids.add(centroidFactory.create(it.next()));
}
return centroids;
}
finally {
if (it != null) {
try {
it.close();
}
catch (final IOException e) {
LOGGER.warn(
"Unable to close the iterator",
e);
}
}
}
}
catch (final IOException e) {
LOGGER.error("Cannot load centroids");
throw new IOException(
e);
}
}
@Override
@SuppressWarnings("unchecked")
public AnalyticItemWrapper<T> getCentroid(
final String dataId ) {
final ByteArrayId adapterId = new ByteArrayId(
StringUtils.stringToBinary(centroidDataTypeId));
try (CloseableIterator<T> it = dataStore.query(
new QueryOptions(
adapterId,
index.getId()),
new DataIdQuery(
adapterId,
new ByteArrayId(
StringUtils.stringToBinary(dataId))))) {
if (it.hasNext()) {
return centroidFactory.create(it.next());
}
}
catch (final IOException e) {
LOGGER.error(
"Failed to fined centroid " + dataId.toString(),
e);
}
return null;
}
@SuppressWarnings("unchecked")
protected CloseableIterator<T> getRawCentroids(
final String batchId,
final String groupID )
throws IOException {
final FilterFactoryImpl factory = new FilterFactoryImpl();
final Expression expB1 = factory.property(ClusterFeatureAttribute.BATCH_ID.attrName());
final Expression expB2 = factory.literal(batchId);
final Filter batchIdFilter = factory.equal(
expB1,
expB2,
false);
Filter finalFilter = batchIdFilter;
if (groupID != null) {
final Expression exp1 = factory.property(ClusterFeatureAttribute.GROUP_ID.attrName());
final Expression exp2 = factory.literal(groupID);
// ignore levels for group IDS
finalFilter = factory.and(
factory.equal(
exp1,
exp2,
false),
batchIdFilter);
}
else if (level > 0) {
final Expression exp1 = factory.property(ClusterFeatureAttribute.ZOOM_LEVEL.attrName());
final Expression exp2 = factory.literal(level);
finalFilter = factory.and(
factory.equal(
exp1,
exp2,
false),
batchIdFilter);
}
return (CloseableIterator<T>) dataStore.query(
new QueryOptions(
adapter,
index),
new CQLQuery(
null,
finalFilter,
adapter));
}
@SuppressWarnings("unchecked")
public void transferBatch(
final String fromBatchId,
final String groupID )
throws IOException {
final CloseableIterator<T> it = getRawCentroids(
fromBatchId,
groupID);
int count = 0;
try (final IndexWriter indexWriter = dataStore.createWriter(
adapter,
index)) {
while (it.hasNext()) {
final AnalyticItemWrapper<T> item = centroidFactory.create(it.next());
item.setBatchID(this.batchId);
count++;
indexWriter.write(item.getWrappedItem());
}
it.close();
// indexWriter.close();
}
LOGGER.info("Transfer " + count + " centroids");
}
@Override
public int processForAllGroups(
final CentroidProcessingFn<T> fn )
throws IOException {
List<String> centroidGroups;
try {
centroidGroups = getAllCentroidGroups();
}
catch (final IOException e) {
throw new IOException(
e);
}
int status = 0;
for (final String groupID : centroidGroups) {
status = fn.processGroup(
groupID,
getCentroidsForGroup(groupID));
if (status != 0) {
break;
}
}
return status;
}
public static Collection<ParameterEnum<?>> getParameters() {
return Arrays.asList(MY_PARAMS);
}
public static void setParameters(
final Configuration config,
final Class<?> scope,
final PropertyManagement runTimeProperties ) {
runTimeProperties.setConfig(
MY_PARAMS,
config,
scope);
}
@Override
public ByteArrayId getDataTypeId() {
return new ByteArrayId(
StringUtils.stringToBinary(centroidDataTypeId));
}
@Override
public ByteArrayId getIndexId() {
return index.getId();
}
public String getBatchId() {
return this.batchId;
}
private DataAdapter<?> getAdapter() {
return adapterStore.getAdapter(getDataTypeId());
}
private ToSimpleFeatureConverter<T> getFeatureConverter(
final List<AnalyticItemWrapper<T>> items,
final Class<? extends Geometry> shapeClass ) {
final DataAdapter<?> adapter = getAdapter();
return (adapter instanceof FeatureDataAdapter) ? new SimpleFeatureConverter(
(FeatureDataAdapter) adapter,
shapeClass) : new NonSimpleFeatureConverter(
items.isEmpty() ? new String[0] : items.get(
0).getExtraDimensions(),
shapeClass);
}
private interface ToSimpleFeatureConverter<T>
{
SimpleFeatureType getFeatureType();
SimpleFeature toSimpleFeature(
AnalyticItemWrapper<T> item );
}
private static FeatureDataAdapter createFeatureAdapter(
final SimpleFeatureType featureType,
final Class<? extends Geometry> shapeClass ) {
try {
final SimpleFeatureTypeBuilder builder = new SimpleFeatureTypeBuilder();
builder.setName(featureType.getName().getLocalPart());
builder.setNamespaceURI(featureType.getName().getNamespaceURI());
builder.setCRS(featureType.getCoordinateReferenceSystem());
for (final AttributeDescriptor attr : featureType.getAttributeDescriptors()) {
if (attr.getType() instanceof GeometryType) {
builder.add(
attr.getLocalName(),
shapeClass);
}
else {
builder.add(
attr.getLocalName(),
attr.getType().getBinding());
}
}
return new FeatureDataAdapter(
builder.buildFeatureType());
}
catch (final Exception e) {
LOGGER.warn(
"Schema Creation Error. Hint: Check the SRID.",
e);
}
return null;
}
private static Geometry convert(
final Geometry value,
final Class<? extends Geometry> shapeClass ) {
if (shapeClass.isInstance(value)) {
return value;
}
if (shapeClass.isAssignableFrom(Point.class)) {
return value.getCentroid();
}
final Geometry hull = value.convexHull();
if (shapeClass.isInstance(hull)) {
return hull;
}
return null;
}
private class SimpleFeatureConverter implements
ToSimpleFeatureConverter<T>
{
final FeatureDataAdapter adapter;
final Object[] defaults;
final Class<? extends Geometry> shapeClass;
public SimpleFeatureConverter(
final FeatureDataAdapter adapter,
final Class<? extends Geometry> shapeClass ) {
this.adapter = createFeatureAdapter(
adapter.getFeatureType(),
shapeClass);
int p = 0;
this.shapeClass = shapeClass;
final List<AttributeDescriptor> descriptors = adapter.getFeatureType().getAttributeDescriptors();
defaults = new Object[descriptors.size()];
for (final AttributeDescriptor descriptor : descriptors) {
defaults[p++] = descriptor.getDefaultValue();
}
}
@Override
public SimpleFeatureType getFeatureType() {
return adapter.getFeatureType();
}
@Override
public SimpleFeature toSimpleFeature(
final AnalyticItemWrapper<T> item ) {
final SimpleFeature newFeature = SimpleFeatureBuilder.build(
adapter.getFeatureType(),
defaults,
item.getID());
int i = 0;
for (final Object value : ((SimpleFeature) item.getWrappedItem()).getAttributes()) {
if (value instanceof Geometry) {
final Geometry newValue = convert(
(Geometry) value,
shapeClass);
if (newValue == null) {
return null;
}
newFeature.setAttribute(
i++,
newValue);
}
else {
newFeature.setAttribute(
i++,
value);
}
}
return newFeature;
}
}
private class NonSimpleFeatureConverter implements
ToSimpleFeatureConverter<T>
{
final SimpleFeatureType featureType;
final Object[] defaults;
final Class<? extends Geometry> shapeClass;
public NonSimpleFeatureConverter(
final String[] extraDimensionNames,
final Class<? extends Geometry> shapeClass ) {
featureType = AnalyticFeature.createFeatureAdapter(
centroidDataTypeId,
extraDimensionNames,
BasicFeatureTypes.DEFAULT_NAMESPACE,
ClusteringUtils.CLUSTERING_CRS,
ClusterFeatureAttribute.values(),
shapeClass).getFeatureType();
this.shapeClass = shapeClass;
final List<AttributeDescriptor> descriptors = featureType.getAttributeDescriptors();
defaults = new Object[descriptors.size()];
int p = 0;
for (final AttributeDescriptor descriptor : descriptors) {
defaults[p++] = descriptor.getDefaultValue();
}
}
@Override
public SimpleFeatureType getFeatureType() {
return featureType;
}
@Override
public SimpleFeature toSimpleFeature(
final AnalyticItemWrapper<T> item ) {
final Geometry value = item.getGeometry();
final Geometry newValue = convert(
value,
shapeClass);
if (newValue == null) {
return null;
}
return AnalyticFeature.createGeometryFeature(
featureType,
item.getBatchID(),
item.getID(),
item.getName(),
item.getGroupID(),
item.getCost(),
newValue,
item.getExtraDimensions(),
item.getDimensionValues(),
item.getZoomLevel(),
item.getIterationID(),
item.getAssociationCount());
}
}
public void toShapeFile(
final String parentDir,
final Class<? extends Geometry> shapeClass )
throws IOException {
// File shp = new File(parentDir + "/" + this.batchId + ".shp");
// File shx = new File(parentDir + "/" + this.batchId + ".shx");
final ShapefileDataStoreFactory dataStoreFactory = new ShapefileDataStoreFactory();
final Map<String, Serializable> params = new HashMap<String, Serializable>();
try {
params.put(
"url",
new URL(
"file://" + parentDir + "/" + this.batchId + ".shp"));
}
catch (final MalformedURLException e) {
LOGGER.error(
"Error creating URL",
e);
}
params.put(
"create spatial index",
Boolean.TRUE);
final List<AnalyticItemWrapper<T>> centroids = loadCentroids(
batchId,
null);
final ToSimpleFeatureConverter<T> converter = getFeatureConverter(
centroids,
shapeClass);
final ShapefileDataStore newDataStore = (ShapefileDataStore) dataStoreFactory.createNewDataStore(params);
newDataStore.createSchema(converter.getFeatureType());
final Transaction transaction = new DefaultTransaction(
"create");
final String typeName = newDataStore.getTypeNames()[0];
try (final FeatureWriter<SimpleFeatureType, SimpleFeature> writer = newDataStore.getFeatureWriterAppend(
typeName,
transaction)) {
for (final AnalyticItemWrapper<T> item : centroids) {
final SimpleFeature copy = writer.next();
final SimpleFeature newFeature = converter.toSimpleFeature(item);
for (final AttributeDescriptor attrD : newFeature.getFeatureType().getAttributeDescriptors()) {
// the null case should only happen for geometry
if (copy.getFeatureType().getDescriptor(
attrD.getName()) != null) {
copy.setAttribute(
attrD.getName(),
newFeature.getAttribute(attrD.getName()));
}
}
// shape files force geometry name to be 'the_geom'. So isolate
// this change
copy.setDefaultGeometry(newFeature.getDefaultGeometry());
writer.write();
}
}
catch (final IOException e) {
LOGGER.warn(
"Problem with the FeatureWritter",
e);
transaction.rollback();
}
finally {
transaction.commit();
transaction.close();
}
}
}