package mil.nga.giat.geowave.analytic.mapreduce.dbscan;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import mil.nga.giat.geowave.analytic.GeometryHullTool;
import mil.nga.giat.geowave.analytic.nn.DistanceProfile;
import mil.nga.giat.geowave.core.index.ByteArrayId;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.vividsolutions.jts.geom.Geometry;
import com.vividsolutions.jts.geom.Point;
import com.vividsolutions.jts.geom.TopologyException;
/**
*
* Represents a cluster. Maintains links to other clusters through shared
* components Maintains counts contributed by components of this cluster.
* Supports merging with other clusters, incrementing the count by only those
* components different from the other cluster.
*
* Intended to run in a single thread. Not Thread Safe.
*
*/
public abstract class DBScanClusterList implements
Cluster
{
protected static final Logger LOGGER = LoggerFactory.getLogger(DBScanClusterList.class);
// internal state
protected Geometry clusterGeo = null;
protected int itemCount = 1;
private Set<ByteArrayId> linkedClusters = null;
private List<ByteArrayId> ids = null;
private ByteArrayId id;
// global configuration...to save memory...passing this stuff around.
private static GeometryHullTool connectGeometryTool = new GeometryHullTool();
private static int mergeSize = 0;
// global state
// ID to cluster.
protected final Map<ByteArrayId, Cluster> index;
public static GeometryHullTool getHullTool() {
return connectGeometryTool;
}
public static void setMergeSize(
int size ) {
mergeSize = size;
}
public DBScanClusterList(
final Geometry clusterGeo,
final int itemCount,
final ByteArrayId centerId,
final Map<ByteArrayId, Cluster> index ) {
super();
this.clusterGeo = clusterGeo;
this.itemCount = itemCount;
this.index = index;
id = centerId;
}
protected abstract long addAndFetchCount(
final ByteArrayId newId,
final ClusterItem newInstance,
final DistanceProfile<?> distanceProfile );
@Override
public final boolean add(
final DistanceProfile<?> distanceProfile,
final ByteArrayId newId,
final ClusterItem newInstance ) {
LOGGER.trace(
"link {} to {}",
newId,
id);
if (!getLinkedClusters(
true).add(
newId)) return false;
Cluster cluster = index.get(newId);
if (cluster == this) return false;
incrementItemCount(addAndFetchCount(
newId,
newInstance,
distanceProfile));
return true;
}
protected List<ByteArrayId> getIds(
boolean allowUpdates ) {
if (ids == null || ids == Collections.<ByteArrayId> emptyList())
ids = allowUpdates ? new ArrayList<ByteArrayId>(
4) : Collections.<ByteArrayId> emptyList();
return ids;
}
protected Set<ByteArrayId> getLinkedClusters(
boolean allowUpdates ) {
if (linkedClusters == null || linkedClusters == Collections.<ByteArrayId> emptySet())
linkedClusters = allowUpdates ? new HashSet<ByteArrayId>() : Collections.<ByteArrayId> emptySet();
return linkedClusters;
}
protected void incrementItemCount(
long amount ) {
int c = itemCount;
itemCount += amount;
assert (c <= itemCount);
}
/**
* Clear the contents. Invoked when the contents of a cluster are merged
* with another cluster. This method is supportive for GC, not serving any
* algorithm logic.
*/
@Override
public void clear() {
linkedClusters = null;
clusterGeo = null;
}
@Override
public void invalidate() {
for (ByteArrayId linkedId : getLinkedClusters(true)) {
Cluster linkedCluster = index.get(linkedId);
if (linkedCluster != null && linkedCluster != this && linkedCluster instanceof DBScanClusterList) {
((DBScanClusterList) linkedCluster).getLinkedClusters(
false).remove(
id);
}
}
LOGGER.trace("Invalidate " + id);
index.remove(id);
linkedClusters = null;
clusterGeo = null;
itemCount = -1;
}
@Override
public InferType infer(
final ByteArrayId id,
final ClusterItem value ) {
final Cluster cluster = index.get(id);
if (cluster == this || getLinkedClusters(
false).contains(
id)) return InferType.SKIP;
return InferType.NONE;
}
@Override
public Iterator<Entry<ByteArrayId, ClusterItem>> iterator() {
return Collections.<Entry<ByteArrayId, ClusterItem>> emptyList().iterator();
}
@Override
public int currentLinkSetSize() {
return getLinkedClusters(
false).size();
}
public void finish() {
mergeLinks(true);
}
@Override
public int hashCode() {
final int prime = 31;
int result = 1;
result = (prime * result) + ((id == null) ? 0 : id.hashCode());
return result;
}
@Override
public boolean equals(
final Object obj ) {
if (this == obj) {
return true;
}
if (obj == null) {
return false;
}
if (getClass() != obj.getClass()) {
return false;
}
final DBScanClusterList other = (DBScanClusterList) obj;
if (id == null) {
if (other.id != null) {
return false;
}
}
else if (!id.equals(other.id)) {
return false;
}
return true;
}
@Override
public int size() {
return (int) (itemCount);
}
@Override
public boolean isEmpty() {
return size() <= 0;
}
@Override
public Geometry getGeometry() {
return compress();
}
@Override
public abstract boolean isCompressed();
@Override
public void merge(
final Cluster cluster ) {
boolean removedLinked = getLinkedClusters(
true).remove(
cluster.getId());
if (LOGGER.isTraceEnabled()) {
LOGGER.trace(
"Merging {} into {}",
cluster.getId(),
this.id);
}
if (cluster != this) {
getIds(
true).add(
cluster.getId());
index.put(
cluster.getId(),
this);
if (cluster instanceof DBScanClusterList) {
for (ByteArrayId id : ((DBScanClusterList) cluster).getIds(false)) {
index.put(
id,
this);
this.ids.add(id);
}
getLinkedClusters(
true).addAll(
((DBScanClusterList) cluster).getLinkedClusters(false));
}
if (isCompressed() && ((DBScanClusterList) cluster).isCompressed()) {
incrementItemCount((long) (interpolateFactor(((DBScanClusterList) cluster).clusterGeo) * ((DBScanClusterList) cluster).itemCount));
}
else if (!removedLinked) {
incrementItemCount(1);
}
}
}
protected double interpolateFactor(
final Geometry areaBeingMerged ) {
try {
if (clusterGeo == null) return 1.0;
Geometry intersection = areaBeingMerged.intersection(clusterGeo);
double geo2Area = areaBeingMerged.getArea();
if (intersection != null) {
if (intersection instanceof Point && areaBeingMerged instanceof Point)
return 0.0;
else if (intersection.isEmpty())
return 1.0;
else if (geo2Area > 0)
return 1.0 - (intersection.getArea() / geo2Area);
else
return 0.0;
}
return 1.0;
}
catch (final Exception ex) {
LOGGER.warn(
"Cannot calculate difference of geometries to interpolate size ",
ex);
}
return 0.0;
}
@Override
public ByteArrayId getId() {
return id;
}
protected abstract Geometry compress();
@Override
public Set<ByteArrayId> getLinkedClusters() {
return getLinkedClusters(false);
}
protected void union(
Geometry otherGeo ) {
if (otherGeo == null) return;
try {
if (clusterGeo == null) {
clusterGeo = otherGeo;
}
else if (clusterGeo instanceof Point) {
clusterGeo = connectGeometryTool.connect(
otherGeo,
clusterGeo);
}
else {
clusterGeo = connectGeometryTool.connect(
clusterGeo,
otherGeo);
}
}
catch (TopologyException ex) {
LOGGER.error(
"Union failed due to non-simple geometries",
ex);
clusterGeo = connectGeometryTool.createHullFromGeometry(
clusterGeo,
Arrays.asList(otherGeo.getCoordinates()),
false);
}
}
protected void mergeLinks(
final boolean deleteNonLinks ) {
if (getLinkedClusters(
false).size() == 0) return;
final Set<Cluster> readyClusters = new HashSet<Cluster>();
readyClusters.add(this);
buildClusterLists(
readyClusters,
this,
deleteNonLinks);
readyClusters.remove(this);
final Iterator<Cluster> finishedIt = readyClusters.iterator();
Cluster top = this;
while (finishedIt.hasNext()) {
top.merge(finishedIt.next());
}
}
private void buildClusterLists(
final Set<Cluster> readyClusters,
final DBScanClusterList cluster,
final boolean deleteNonLinks ) {
for (final ByteArrayId linkedClusterId : cluster.getLinkedClusters()) {
final Cluster linkedCluster = index.get(linkedClusterId);
if (readyClusters.add(linkedCluster) && linkedCluster.size() >= mergeSize) {
buildClusterLists(
readyClusters,
(DBScanClusterList) linkedCluster,
false);
}
}
}
@Override
public String toString() {
return "DBScanClusterList [clusterGeo=" + (clusterGeo == null ? "null" : clusterGeo.toString()) + ", id=" + id
+ "]";
}
}