package it.unisa.sesa.repominer.dbscan;
import java.util.ArrayList;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Set;
/**
* DBSCAN (density-based spatial clustering of applications with noise)
* algorithm
* <p>
* The DBSCAN algorithm forms clusters based on the idea of density
* connectivity, i.e. a point p is density connected to another point q, if
* there exists a chain of points p<sub>i</sub>, with i=1...n and
* p<sub>1</sub>=p and p<sub>n</sub>=q, such that each pair p<sub>i+1</sub> is
* directly density-reachable from p<sub>i</sub>. A point q is directly
* density-reachable from point p if it is in the ε-neighborhood or this
* point.
* <p>
* Any point that is not density-reachable from a formed cluster is treated as
* noise, and will thus not be present in the results.
* <p>
* The algorithm requires two parameters:
* <ul>
* <li>eps: the distance that defines the
* <li>epsilon-neighborhood of a point minPoints: the minimum number of
* density-connected points required to form a cluster
* </ul>
*
* @author giograno
*
*/
public class DBSCAN {
/* Maximum radius of the neighborhood to be considered */
private double eps;
/* Minimum number of points needed for a cluster */
private int minPoints;
/**
* Creates a new instance of a DBSCAN
*
* @param eps
* maximum radius of the neighborhood to be considered
* @param minPoints
* minimum number of points needed for a cluster
*/
public DBSCAN(double eps, int minPoints) {
this.eps = eps;
this.minPoints = minPoints;
}
/**
* Return the eps value (maximum radius of the neighborhood to be
* considered)
*
* @return maximum radius of the neighborhood
*/
public double getEps() {
return eps;
}
/**
* Returns the minimum number of points needed for a cluster
*
* @return minimum number of points needed for a cluster
*/
public int getMinPoints() {
return minPoints;
}
/**
* Perform DBSCAN cluster analysis
*
* @param pPoints
* the points to cluster
* @return the list of Cluster
*/
public List<Cluster> cluster(List<ChangePoint> pPoints) {
List<Cluster> clusters = new ArrayList<>();
for (ChangePoint point : pPoints) {
if (point.isNotVisited()) {
// a point from here should be not visited
List<ChangePoint> neighbors = getNeighbors(point, pPoints);
if (neighbors.size() < minPoints) {
point.setNoise();
} else {
Cluster cluster = new Cluster(null);
clusters.add(this.expandCluster(cluster, point, neighbors,
pPoints));
}
}
}
return clusters;
}
/**
* Expands the cluster to include density-reachable items
*
* @param pCluster
* Cluster to expand
* @param pPoint
* point to add to cluster
* @param neighbors
* list of neighbors
* @param points
* the data set
* @return the expanded cluster
*/
private Cluster expandCluster(Cluster pCluster, ChangePoint pPoint,
List<ChangePoint> neighbors, List<ChangePoint> points) {
pCluster.addPoint(pPoint);
pPoint.setAlreadyInACluster();
for (ChangePoint neighborsPoint : neighbors) {
if (neighborsPoint.isNotVisited()) {
List<ChangePoint> currentNeighbors = getNeighbors(
neighborsPoint, points);
if (currentNeighbors.size() >= this.minPoints) {
neighbors = this.merge(neighbors, currentNeighbors);
}
}
if (!neighborsPoint.isAlreadyInACluster()) {
neighborsPoint.setAlreadyInACluster();
pCluster.addPoint(neighborsPoint);
}
}
return pCluster;
}
/**
* Returns a list of density-reachable neighbors of a point
*
* @param pPoint
* point to look for
* @param points
* point possible neighbors
* @return the List of neighbors
*/
private List<ChangePoint> getNeighbors(ChangePoint pPoint,
List<ChangePoint> points) {
List<ChangePoint> neighbors = new ArrayList<>();
for (ChangePoint changePoint : points) {
if (changePoint.distanceFrom(pPoint) <= this.eps) {
neighbors.add(changePoint);
}
}
return neighbors;
}
/**
* Merge two list together with no duplicates
*
* @param pNeighbors1
* first list
* @param pNeighbors2
* second list
* @return a merged list
*/
private List<ChangePoint> merge(List<ChangePoint> pNeighbors1,
List<ChangePoint> pNeighbors2) {
Set<ChangePoint> resultList = new LinkedHashSet<>();
resultList.addAll(pNeighbors1);
resultList.addAll(pNeighbors2);
return new ArrayList<>(resultList);
}
}