package mil.nga.giat.geowave.analytic.nn;
import java.io.IOException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import mil.nga.giat.geowave.analytic.nn.NeighborList.InferType;
import mil.nga.giat.geowave.analytic.partitioner.Partitioner;
import mil.nga.giat.geowave.analytic.partitioner.Partitioner.PartitionData;
import mil.nga.giat.geowave.analytic.partitioner.Partitioner.PartitionDataCallback;
import mil.nga.giat.geowave.core.index.ByteArrayId;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
*
* This class is designed to support secondary partitioning.
*
* (1) Partition added data using a partitioner.
*
* (2) Process data, perform the O(N^2) (e.g. ~ n^2/2) comparisons within those
* partitions.
*
* Custom plug-ins include (1) A factory for the neighbor list to track those
* pairings of data whose distance feel under the provided minimum. (2) A
* complete notification callback callback for each primary data.
*
* The loop algorithms is For each primary compare to all remaining primary and
* all secondary data items
*
* A powerful performance enhancing tool is the inference mechanism associated
* with the neighborhood lists. A list can have intelligence to decide that a
* particular neighbor can be inferred and, therefore, can be removed from the
* set of primaries to be inspected. This has no effect on secondaries.
*
* The processor can be called multiple times, as the 'process' algorithm does
* not alter its internal state. The notification callback can be used to alter
* the internal state (e.g. calling 'add' or 'remove' methods). Caution should
* used to alter internal state within the neighbor list.
*
*
*
* @param <PARTITION_VALUE>
* @param <STORE_VALUE>
*
* @See Partitioner
* @See Partitioner.PartitionData
*/
public class NNProcessor<PARTITION_VALUE, STORE_VALUE>
{
protected static final Logger LOGGER = LoggerFactory.getLogger(NNProcessor.class);
final Map<PartitionData, PartitionData> uniqueSetOfPartitions = new HashMap<PartitionData, PartitionData>();
final Map<PartitionData, Set<ByteArrayId>> partitionsToIds = new HashMap<PartitionData, Set<ByteArrayId>>();
final Map<ByteArrayId, Set<PartitionData>> idsToPartition = new HashMap<ByteArrayId, Set<PartitionData>>();
final Map<ByteArrayId, STORE_VALUE> primaries = new HashMap<ByteArrayId, STORE_VALUE>();
final Map<ByteArrayId, STORE_VALUE> others = new HashMap<ByteArrayId, STORE_VALUE>();
protected final Partitioner<Object> partitioner;
protected final TypeConverter<STORE_VALUE> typeConverter;
protected final DistanceProfileGenerateFn<?, STORE_VALUE> distanceProfileFn;
protected final double maxDistance;
protected final PartitionData parentPartition;
private int upperBoundPerPartition = DEFAULT_UPPER_BOUND_PARTIION_SIZE;
public static final int DEFAULT_UPPER_BOUND_PARTIION_SIZE = 75000;
/**
* Run State
*/
protected ByteArrayId startingPoint;
protected NeighborIndex<STORE_VALUE> index;
public NNProcessor(
Partitioner<Object> partitioner,
TypeConverter<STORE_VALUE> typeConverter,
DistanceProfileGenerateFn<?, STORE_VALUE> distanceProfileFn,
double maxDistance,
PartitionData parentPartition ) {
super();
this.partitioner = partitioner;
this.typeConverter = typeConverter;
this.distanceProfileFn = distanceProfileFn;
this.maxDistance = maxDistance;
this.parentPartition = parentPartition;
}
private PartitionData add(
final PartitionData pd,
final ByteArrayId itemId ) {
PartitionData singleton = uniqueSetOfPartitions.get(pd);
if (singleton == null) {
uniqueSetOfPartitions.put(
pd,
pd);
singleton = pd;
}
Set<ByteArrayId> idsSet = partitionsToIds.get(singleton);
if (idsSet == null) {
idsSet = new HashSet<ByteArrayId>();
partitionsToIds.put(
singleton,
idsSet);
}
if (idsSet.size() > upperBoundPerPartition) {
return null;
}
if (idsSet.size() == upperBoundPerPartition) {
LOGGER.warn("At upper bound on partition. Increase the bounds or condense the data.");
}
idsSet.add(itemId);
Set<PartitionData> partitionSet = idsToPartition.get(itemId);
if (partitionSet == null) {
partitionSet = new HashSet<PartitionData>();
idsToPartition.put(
itemId,
partitionSet);
}
partitionSet.add(singleton);
return singleton;
}
public void remove(
final ByteArrayId id ) {
final Set<PartitionData> partitionSet = idsToPartition.remove(id);
if (partitionSet != null) {
for (PartitionData pd : partitionSet) {
final Set<ByteArrayId> idSet = partitionsToIds.get(pd);
if (idSet != null) idSet.remove(id);
}
}
primaries.remove(id);
others.remove(id);
if (index != null) {
index.empty(id);
}
}
public void add(
final ByteArrayId id,
final boolean isPrimary,
final PARTITION_VALUE partitionValue )
throws IOException {
final STORE_VALUE storeValue = this.typeConverter.convert(
id,
partitionValue);
try {
partitioner.partition(
partitionValue,
new PartitionDataCallback() {
@Override
public void partitionWith(
final PartitionData partitionData )
throws Exception {
PartitionData singleton = add(
partitionData,
id);
if (singleton != null) {
singleton.setPrimary(partitionData.isPrimary() || singleton.isPrimary());
if (isPrimary)
primaries.put(
id,
storeValue);
else
others.put(
id,
storeValue);
}
}
});
}
catch (Exception e) {
throw new IOException(
e);
}
if (isPrimary) {
if (startingPoint == null) startingPoint = id;
}
}
public interface CompleteNotifier<STORE_VALUE>
{
public void complete(
ByteArrayId id,
STORE_VALUE value,
NeighborList<STORE_VALUE> list )
throws IOException,
InterruptedException;
}
public int size() {
return primaries.size() + others.size();
}
/**
*
* @param size
* the minimum size of a partition to be processed
* @return true if all partitions are emptt
*/
public boolean trimSmallPartitions(
int size ) {
Iterator<Map.Entry<PartitionData, Set<ByteArrayId>>> it = partitionsToIds.entrySet().iterator();
while (it.hasNext()) {
final Map.Entry<PartitionData, Set<ByteArrayId>> entry = it.next();
if (entry.getValue().size() < size) {
for (ByteArrayId id : entry.getValue()) {
final Set<PartitionData> partitionsForId = idsToPartition.get(id);
partitionsForId.remove(entry.getKey());
if (partitionsForId.isEmpty()) {
this.primaries.remove(id);
this.others.remove(id);
}
}
it.remove();
}
}
return partitionsToIds.isEmpty();
}
public void process(
NeighborListFactory<STORE_VALUE> listFactory,
final CompleteNotifier<STORE_VALUE> notification )
throws IOException,
InterruptedException {
LOGGER.info("Processing " + parentPartition.toString() + " with primary = " + primaries.size()
+ " and other = " + others.size());
LOGGER.info("Processing " + parentPartition.toString() + " with sub-partitions = "
+ uniqueSetOfPartitions.size());
index = new NeighborIndex<STORE_VALUE>(
listFactory);
double farthestDistance = 0;
ByteArrayId farthestNeighbor = null;
ByteArrayId nextStart = startingPoint;
final Set<ByteArrayId> inspectionSet = new HashSet<ByteArrayId>();
inspectionSet.addAll(primaries.keySet());
if (inspectionSet.size() > 0 && nextStart == null) {
nextStart = inspectionSet.iterator().next();
}
while (nextStart != null) {
inspectionSet.remove(nextStart);
farthestDistance = 0;
final Set<PartitionData> partition = idsToPartition.get(nextStart);
final STORE_VALUE primary = primaries.get(nextStart);
final ByteArrayId primaryId = nextStart;
nextStart = null;
farthestNeighbor = null;
if (LOGGER.isTraceEnabled()) LOGGER.trace("processing " + primaryId);
if (primary == null) {
if (inspectionSet.size() > 0) {
nextStart = inspectionSet.iterator().next();
}
continue;
}
final NeighborList<STORE_VALUE> primaryList = index.init(
primaryId,
primary);
for (PartitionData pd : partition) {
for (ByteArrayId neighborId : partitionsToIds.get(pd)) {
if (neighborId.equals(primaryId)) continue;
boolean isAPrimary = true;
STORE_VALUE neighbor = primaries.get(neighborId);
if (neighbor == null) {
neighbor = others.get(neighborId);
isAPrimary = false;
}
else // prior processed primary
if (!inspectionSet.contains(neighborId)) continue;
if (neighbor == null) continue;
final InferType inferResult = primaryList.infer(
neighborId,
neighbor);
if (inferResult == InferType.NONE) {
final DistanceProfile<?> distanceProfile = distanceProfileFn.computeProfile(
primary,
neighbor);
final double distance = distanceProfile.getDistance();
if (distance <= maxDistance) {
index.add(
distanceProfile,
primaryId,
primary,
neighborId,
neighbor,
isAPrimary);
if (LOGGER.isTraceEnabled()) LOGGER.trace("Neighbor " + neighborId);
}
if (distance > farthestDistance && inspectionSet.contains(neighborId)) {
farthestDistance = distance;
farthestNeighbor = neighborId;
}
}
else if (inferResult == InferType.REMOVE) {
inspectionSet.remove(neighborId);
}
}
}
notification.complete(
primaryId,
primary,
primaryList);
index.empty(primaryId);
if (farthestNeighbor == null && inspectionSet.size() > 0) {
nextStart = inspectionSet.iterator().next();
}
else {
nextStart = farthestNeighbor;
}
}
}
public int getUpperBoundPerPartition() {
return upperBoundPerPartition;
}
public void setUpperBoundPerPartition(
int upperBoundPerPartition ) {
this.upperBoundPerPartition = upperBoundPerPartition;
}
}