package tr.gov.ulakbim.jDenetX.clusterers.denstream;
import tr.gov.ulakbim.jDenetX.cluster.Cluster;
import tr.gov.ulakbim.jDenetX.cluster.Clustering;
import tr.gov.ulakbim.jDenetX.clusterers.AbstractClusterer;
import tr.gov.ulakbim.jDenetX.core.Measurement;
import tr.gov.ulakbim.jDenetX.options.FloatOption;
import tr.gov.ulakbim.jDenetX.options.IntOption;
import weka.core.DenseInstance;
import weka.core.Instance;
import java.util.ArrayList;
/**
* @author jansen
*/
public class DenStream extends AbstractClusterer {
public FloatOption epsilonOption = new FloatOption("epsilon", 'e',
"Defines the epsilon neighbourhood", 0.01, 0, 1);
public IntOption minPointsOption = new IntOption("minPoints", 'p',
"Minimal number of points cluster has to contain.", 10);
public FloatOption lambdaOption = new FloatOption("lambda", 'l',
"", 0.006, 0, 1);
public FloatOption betaOption = new FloatOption("beta", 'b',
"", 0.001, 0, 1);
public FloatOption muOption = new FloatOption("mu", 'm',
"", 1, 0, Double.MAX_VALUE);
public IntOption initPointsOption = new IntOption("initPoints", 'i',
"Number of points to use for initialization.", 1000);
double lambda;
double epsilon;
int minPoints;
double mu;
double beta;
Clustering p_micro_cluster;
Clustering o_micro_cluster;
ArrayList<DenPoint> initBuffer;
boolean initialized;
private long timestamp = 0;
Timestamp currentTimestamp;
long tp;
private class DenPoint extends DenseInstance {
protected boolean covered;
public DenPoint(Instance nextInstance, Long timestamp) {
super(nextInstance);
this.setDataset(nextInstance.dataset());
}
}
@Override
public void resetLearningImpl() {
//init DenStream
currentTimestamp = new Timestamp();
lambda = lambdaOption.getValue();
// lambda = (Math.log(1.0/0.01)/Math.log(2)/initPointsOption.getValue());
// System.out.println(lambda);
epsilon = epsilonOption.getValue();
minPoints = minPointsOption.getValue();
mu = muOption.getValue();
beta = betaOption.getValue();
initialized = false;
p_micro_cluster = new Clustering();
o_micro_cluster = new Clustering();
initBuffer = new ArrayList<DenPoint>();
tp = Math.round(1 / lambda * Math.log((beta * mu) / (beta * mu - 1))) + 1;
}
public void initialDBScan() {
for (int p = 0; p < initBuffer.size(); p++) {
DenPoint point = initBuffer.get(p);
if (!point.covered) {
point.covered = true;
ArrayList<Integer> neighbourhood = getNeighbourhoodIDs(point, initBuffer, epsilon);
if (neighbourhood.size() > minPoints) {
MicroCluster mc = new MicroCluster(point, point.numAttributes(), timestamp, lambda, currentTimestamp);
expandCluster(mc, initBuffer, neighbourhood);
p_micro_cluster.add(mc);
} else {
point.covered = false;
}
}
}
}
@Override
public void trainOnInstanceImpl(Instance inst) {
timestamp++;
currentTimestamp.setTimestamp(timestamp);
DenPoint point = new DenPoint(inst, timestamp);
//////////////////
//Initialization//
//////////////////
if (!initialized) {
initBuffer.add(point);
if (initBuffer.size() >= initPointsOption.getValue()) {
initialDBScan();
initialized = true;
}
} else {
//////////////
//Merging(p)//
//////////////
boolean merged = false;
if (p_micro_cluster.getClustering().size() != 0) {
MicroCluster x = nearestCluster(point, p_micro_cluster);
MicroCluster xCopy = x.copy();
xCopy.insert(point, timestamp);
if (xCopy.getRadius(timestamp) <= epsilon) {
x.insert(point, timestamp);
merged = true;
}
}
if (!merged && (o_micro_cluster.getClustering().size() != 0)) {
MicroCluster x = nearestCluster(point, o_micro_cluster);
MicroCluster xCopy = x.copy();
xCopy.insert(point, timestamp);
if (xCopy.getRadius(timestamp) <= epsilon) {
x.insert(point, timestamp);
merged = true;
if (x.getWeight() > beta * mu) {
o_micro_cluster.getClustering().remove(x);
p_micro_cluster.getClustering().add(x);
}
}
}
if (!merged) {
o_micro_cluster.getClustering().add(new MicroCluster(point.toDoubleArray(), point.toDoubleArray().length, timestamp, lambda, currentTimestamp));
}
////////////////////////////
//Periodic cluster removal//
////////////////////////////
if (timestamp % tp == 0) {
ArrayList<MicroCluster> removalList = new ArrayList<MicroCluster>();
for (Cluster c : p_micro_cluster.getClustering()) {
if (((MicroCluster) c).getWeight() < beta * mu) {
removalList.add((MicroCluster) c);
}
}
for (Cluster c : removalList) {
p_micro_cluster.getClustering().remove(c);
}
for (Cluster c : o_micro_cluster.getClustering()) {
long t0 = ((MicroCluster) c).getCreationTime();
double xsi1 = Math.pow(2, (-lambda * (timestamp - t0 + tp))) - 1;
double xsi2 = Math.pow(2, -lambda * tp) - 1;
double xsi = xsi1 / xsi2;
if (((MicroCluster) c).getWeight() < xsi) {
removalList.add((MicroCluster) c);
}
}
for (Cluster c : removalList) {
o_micro_cluster.getClustering().remove(c);
}
}
}
}
private void expandCluster(MicroCluster mc, ArrayList<DenPoint> points, ArrayList<Integer> neighbourhood) {
for (int p : neighbourhood) {
DenPoint npoint = points.get(p);
if (!npoint.covered) {
npoint.covered = true;
mc.insert(npoint, timestamp);
ArrayList<Integer> neighbourhood2 = getNeighbourhoodIDs(npoint, initBuffer, epsilon);
if (neighbourhood.size() > minPoints) {
expandCluster(mc, points, neighbourhood2);
}
}
}
}
private ArrayList<Integer> getNeighbourhoodIDs(DenPoint point, ArrayList<DenPoint> points, double eps) {
ArrayList<Integer> neighbourIDs = new ArrayList<Integer>();
for (int p = 0; p < points.size(); p++) {
DenPoint npoint = points.get(p);
if (!npoint.covered) {
double dist = distance(point.toDoubleArray(), points.get(p).toDoubleArray());
if (dist < eps) {
neighbourIDs.add(p);
}
}
}
return neighbourIDs;
}
private MicroCluster nearestCluster(DenPoint p, Clustering cl) {
MicroCluster min = null;
double minDist = 0;
for (int c = 0; c < cl.size(); c++) {
MicroCluster x = (MicroCluster) cl.get(c);
if (min == null) {
min = x;
}
double dist = distance(p.toDoubleArray(), x.getCenter());
dist -= x.getRadius(timestamp);
if (dist < minDist) {
minDist = dist;
min = x;
}
}
return min;
}
private double distance(double[] pointA, double[] pointB) {
double distance = 0.0;
for (int i = 0; i < pointA.length; i++) {
double d = pointA[i] - pointB[i];
distance += d * d;
}
return Math.sqrt(distance);
}
public Clustering getClusteringResult() {
return null;
}
@Override
public boolean implementsMicroClusterer() {
return true;
}
@Override
public Clustering getMicroClusteringResult() {
return p_micro_cluster;
}
@Override
protected Measurement[] getModelMeasurementsImpl() {
throw new UnsupportedOperationException("Not supported yet.");
}
@Override
public void getModelDescription(StringBuilder out, int indent) {
}
public boolean isRandomizable() {
return true;
}
public double[] getVotesForInstance(Instance inst) {
return null;
}
}