package arida.ufc.br.moap.optics;
import java.util.ArrayList;
import java.util.List;
import java.util.PriorityQueue;
import arida.ufc.br.moap.clustering.api.IClusteringAlgorithm;
import arida.ufc.br.moap.core.imp.Parameters;
import arida.ufc.br.moap.core.imp.Reporter;
import arida.ufc.br.moap.core.spi.IDataModel;
import arida.ufc.br.moap.datamodelapi.imp.ListModelImpl;
/**
* @author igobrilhante
*
* @param <T> is the object type to be clustered
*
* Optics is a density-based algorithm similar to DBSCAN. <p>It is based on
* ordering points to identify cluster structures.</p> <p>This implementation is
* based on the paper Ankerst, M., Breunig, M., & Kriegel, H. (1999). OPTICS:
* ordering points to identify the clustering structure. ACM SIGMOD Record.
* Retrieved from <a
* href='http://dl.acm.org/citation.cfm?id=304187'>http://dl.acm.org/citation.cfm?id=304187</a>
*/
public class Optics<T>
extends IClusteringAlgorithm<T> {
public static int UNDEFINED = Integer.MAX_VALUE;
public static int NOISE_ID = -1;
private List<OpticsObject<T>> setOfObjects;
// Parameters
private float epsilon;
private int minPts;
private Parameters params;
private PriorityQueue<OpticsObject<T>> orderSeed;
// Ordered List
private List<OpticsObject<T>> ClusterOrdered;
// Result
private List<OpticsCluster<T>> clusters;
/**
* @param dataset
* @param epsilon
* @param minPts
* @param distanceFunction
*/
public Optics() {
// Parameters for the algorithm
this.params = new Parameters();
params.addClass("minPts", Integer.class);
params.addClass("eps", Float.class);
report = new Reporter(this.getClass());
}
/**
* Name of the algorithm
* @return
*/
@Override
public String getName() {
// TODO Auto-generated method stub
return "Optics Algorithm";
}
/**
*
* @param object
*/
private void expandClusterOrder(OpticsObject<T> object) {
// logger.info("Expand Cluster Order");
// Object is set processed
object.setProcessed(true);
// Update its reachability-distance
// Compute its core-distance
setCoreDistance(object);
// Add the object to the result
ClusterOrdered.add(object);
// if its core-distance is not UNDEFINED
if (object.getCoreDistance() != UNDEFINED) {
// Update object
update(object);
// While the orderSeed is not empty
while (!orderSeed.isEmpty()) {
OpticsObject<T> current = orderSeed.poll();
current.setProcessed(true);
setCoreDistance(current);
ClusterOrdered.add(current);
if (current.getCoreDistance() != UNDEFINED) {
update(current);
}
}
}
}
/**
* The priority queue is updated with the epslon-neighborhood of p and q, respectively
* @param object
*/
private void update(OpticsObject<T> object) {
double coredist = object.getCoreDistance();
// Loop over the neighborhood
for (OpticsObject<T> p : object.getNeighbors()) {
if (!p.isProcessed()) {
double dist = distanceFunction.evaluate(object.getObject(), p.getObject());
double newReachDist = Math.max(coredist,dist);
// p is not in orderSeed
if (p.getReachabilityDistance() == UNDEFINED) {
p.setReachabilityDistance(newReachDist);
orderSeed.add(p);
}
// p is already in orderSeed
else {
if (newReachDist < p.getReachabilityDistance()) {
// Update it in the queue by assigning a new reachability distance
orderSeed.remove(p);
p.setReachabilityDistance(newReachDist);
orderSeed.add(p);
}
}
}
}
}
/**
* Set the clusters for each object
* Extract DBSCAN Clustering <p> It extracts the clusters based on the
* Optics execution that creates Ordered Clusters</p>
*/
private void extractDBSCANClustering() {
report.setReport("Extract DBScan Clustering");
// cluster noise id
int noise = NOISE_ID;
// initial cluster id
int clusterId = -1;
OpticsCluster<T> noiseCluster = new OpticsCluster<T>(noise);
OpticsCluster<T> cluster = null;
int size = ClusterOrdered.size();
/*
* Loop over the points
*/
for (int i = 0; i < size; i++) {
OpticsObject<T> object = ClusterOrdered.get(i);
// System.out.println(object.getObject()+" "+object.getReachabilityDistance());
if (object.getReachabilityDistance() > this.epsilon) {
if (object.getCoreDistance() <= epsilon) {
// Insert cluster to result clusters
// if (cluster != null) {
// this.clusters.add(cluster);
// }
// New Optics Cluster
// System.out.println("New optics cluster");
clusterId = clusterId + 1;
object.setClusterID(clusterId);
cluster = new OpticsCluster<T>(clusterId);
cluster.getObjects().add(object.getObject());
this.clusters.add(cluster);
} else {
// Noise
// Get the Noise Cluster and insert a new object
// System.out.println("Noise");
object.setClusterID(noise);
noiseCluster.getObjects().add(object.getObject());
}
} else {
// Get a Optics Cluster with clusterId and insert a new object
// System.out.println("Add to cluster");
object.setClusterID(clusterId);
cluster.getObjects().add(object.getObject());
}
}
this.clusters.add(noiseCluster); // Insert noise to result clusters
}
// Set neighbors of an object given a epsilon
/**
* @param object
*/
private void setNeighbors(OpticsObject<T> object) {
List<OpticsObject<T>> neighbors = new ArrayList<OpticsObject<T>>();
for (OpticsObject<T> p : setOfObjects) {
if (!p.equals(object)) {
if (distanceFunction.evaluate((T) p.getObject(), (T) object.getObject()) <= this.epsilon) {
neighbors.add(p);
}
}
}
object.setNeighbors(neighbors);
}
/**
* Set core-distance of an object. This is obtained as follows.
*
* core-distance(p) = UNDEFINED, if |N(p,eps)| < MinPts
* core-distance(p) = distance to the MinPts-th point, otherwise
*
* @param object
*/
private void setCoreDistance(OpticsObject<T> object) {
// If its neighbor size is smaller than minPts: |N(p,eps)| < MinPts
if (object.getNeighbors().size() < this.minPts) {
object.setCoreDistance(UNDEFINED);
} // Otherwise, its core-distance is minPtsDistance
else {
// distance to the MinPts-th point
double d = minPtsDistance(object);
object.setCoreDistance(d);
}
}
/**
* Help to find core-distance by getting the smallest distance between an object and its neighbors
* @param object
* @return
*/
private double minPtsDistance(OpticsObject<T> object) {
double minDistance = Double.MAX_VALUE;
for (OpticsObject<T> q : object.getNeighbors()) {
double distance = distanceFunction.evaluate(q.getObject(), object.getObject());
if (distance < minDistance) {
minDistance = distance;
}
}
return minDistance;
}
/**
* @return
*/
public List<OpticsObject<T>> getClusterOrdered() {
return ClusterOrdered;
}
/**
*
* @param data
* @param parameters
* @return
*/
@Override
public ListModelImpl<OpticsCluster<T>> execute(IDataModel<T> data, Parameters parameters) {
/**
* Setting parameters
*/
setParameters(parameters);
Optics.UNDEFINED = (int)this.epsilon+5;
this.setOfObjects = new ArrayList<OpticsObject<T>>();
this.orderSeed = new PriorityQueue<OpticsObject<T>>();
/**
* Creating OpticsObjects
*/
for (T t : data.getInstances()) {
OpticsObject<T> o = new OpticsObject<T>(t);
this.setOfObjects.add(o);
}
/**
* Set neighbors of each point p
*/
for (OpticsObject<T> p : setOfObjects) {
setNeighbors(p);
}
/**
* Execution
*/
report.setReport("Optics Execution");
int size = setOfObjects.size();
this.clusters = new ArrayList<OpticsCluster<T>>();
this.ClusterOrdered = new ArrayList<OpticsObject<T>>();
int i = 0;
while (i < size) {
OpticsObject<T> object = setOfObjects.get(i);
// if the object is not processed
if (!object.isProcessed()) {
expandClusterOrder(object);
}
i++;
}
/**
* Extract the cluster
*/
extractDBSCANClustering();
report.setReport("Optics Execution End");
/**
* Setting the result
*/
ListModelImpl<OpticsCluster<T>> res = new ListModelImpl<OpticsCluster<T>>(this.clusters);
this.result = res;
return res;
}
private void setParameters(Parameters parameters){
report.setReport("Setting parameters");
if(this.params.validate(parameters)){
this.epsilon = (Float)parameters.getParamValue("eps");
this.minPts = (Integer)parameters.getParamValue("minPts");
report.setReport("Parameters "+parameters);
report.setReport("Distance function: "+this.distanceFunction.getName());
}
}
}