package tr.gov.ulakbim.jDenetX.clusterers.clustream; import tr.gov.ulakbim.jDenetX.cluster.CFCluster; import weka.core.DenseInstance; import weka.core.Instance; import java.util.ArrayList; import java.util.Random; /** * */ public class ClustreamKernel extends CFCluster { private final static double EPSILON = 0.00005; public static final double MIN_VARIANCE = 1e-50; protected double LST; protected double SST; public ClustreamKernel(Instance instance, int dimensions, long timestamp) { super(instance, dimensions); this.LST = timestamp; this.SST = timestamp * timestamp; } public ClustreamKernel(ClustreamKernel cluster) { super(cluster); this.LST = cluster.LST; this.SST = cluster.SST; } public void insert(Instance instance, long timestamp) { N++; LST += timestamp; SST += timestamp * timestamp; for (int i = 0; i < instance.numValues(); i++) { LS[i] += instance.value(i); SS[i] += instance.value(i) * instance.value(i); } } public void add(ClustreamKernel other) { assert (other.LS.length == this.LS.length); this.N += other.N; this.LST += other.LST; this.SST += other.SST; for (int i = 0; i < LS.length; i++) { this.LS[i] += other.LS[i]; this.SS[i] += other.SS[i]; } } public double getRelevanceStamp() { if (N < 2 * Clustream.m) return getMuTime(); return getMuTime() + getSigmaTime() * getQuantile(((double) Clustream.m) / (2 * N)); } private double getMuTime() { return LST / N; } private double getSigmaTime() { return Math.sqrt(SST / N - (LST / N) * (LST / N)); } private double getQuantile(double z) { assert (z >= 0 && z <= 1); return Math.sqrt(2) * inverseError(2 * z - 1); } @Override public double getRadius() { //trivial cluster if (N == 1) return 0; return getDeviation() * 1.6; } private double getDeviation() { double[] variance = getVarianceVector(); double sumOfDeviation = 0.0; for (int i = 0; i < variance.length; i++) { double d = Math.sqrt(variance[i]); sumOfDeviation += d; } return sumOfDeviation / variance.length; } /** * @return this kernels' center */ @Override public double[] getCenter() { assert (!this.isEmpty()); double res[] = new double[this.LS.length]; for (int i = 0; i < res.length; i++) { res[i] = this.LS[i] / N; } return res; } /** * See interface <code>Cluster</code> * * @param point * @return */ @Override public double getInclusionProbability(Instance instance) { //trivial cluster if (N == 1) { double distance = 0.0; for (int i = 0; i < LS.length; i++) { double d = LS[i] - instance.value(i); distance += d * d; } distance = Math.sqrt(distance); if (distance < EPSILON) return 1.0; return 0.0; } else { double dist = calcNormalizedDistance(instance.toDoubleArray()); if (dist <= getRadius()) { return 1; } else { return 0; } // double res = AuxiliaryFunctions.distanceProbabilty(dist, LS.length); // return res; } } private double[] getVarianceVector() { double[] res = new double[this.LS.length]; for (int i = 0; i < this.LS.length; i++) { double ls = this.LS[i]; double ss = this.SS[i]; double lsDivN = ls / this.getWeight(); double lsDivNSquared = lsDivN * lsDivN; double ssDivN = ss / this.getWeight(); res[i] = ssDivN - lsDivNSquared; // Due to numerical errors, small negative values can occur. // We correct this by settings them to almost zero. if (res[i] <= 0.0) { if (res[i] > -EPSILON) { res[i] = MIN_VARIANCE; } } else { } } return res; } /** * Check if this cluster is empty or not. * * @return <code>true</code> if the cluster has no data points, * <code>false</code> otherwise. */ public boolean isEmpty() { return this.N == 0; } /** * Calculate the normalized euclidean distance (Mahalanobis distance for * distribution w/o covariances) to a point. * * @param other The point to which the distance is calculated. * @return The normalized distance to the cluster center. * <p/> * TODO: check whether WEIGHTING is correctly applied to variances */ //??????? private double calcNormalizedDistance(double[] point) { double[] variance = getVarianceVector(); double[] center = getCenter(); double res = 0.0; for (int i = 0; i < center.length; i++) { double diff = center[i] - point[i]; res += (diff * diff);// variance[i]; } return Math.sqrt(res); } /** * Approximates the inverse error function. Clustream needs this. * * @param z */ public static double inverseError(double x) { double z = Math.sqrt(Math.PI) * x; double res = (z) / 2; double z2 = z * z; double zProd = z * z2; // z^3 res += (1.0 / 24) * zProd; zProd *= z2; // z^5 res += (7.0 / 960) * zProd; zProd *= z2; // z^7 res += (127 * zProd) / 80640; zProd *= z2; // z^9 res += (4369 * zProd) / 11612160; zProd *= z2; // z^11 res += (34807 * zProd) / 364953600; zProd *= z2; // z^13 res += (20036983 * zProd) / 797058662400d; /* zProd *= z2; // z^15 res += (2280356863 * zProd)/334764638208000; */ // +(49020204823 pi^(17/2) x^17)/26015994740736000+(65967241200001 pi^(19/2) x^19)/124564582818643968000+(15773461423793767 pi^(21/2) x^21)/104634249567660933120000+O(x^22) return res; } @Override public Instance sample(Random random) { double[] res = new double[LS.length]; double[] variance = getVarianceVector(); double[] center = getCenter(); for (int i = 0; i < res.length; i++) { double radius = Math.sqrt(variance[i]); res[i] = center[i] + random.nextGaussian() * radius; } return new DenseInstance(1.0, res); } @Override protected void getClusterSpecificInfo(ArrayList<String> infoTitle, ArrayList<String> infoValue) { super.getClusterSpecificInfo(infoTitle, infoValue); infoTitle.add("Deviation"); double[] variance = getVarianceVector(); double sumOfDeviation = 0.0; for (int i = 0; i < variance.length; i++) { double d = Math.sqrt(variance[i]); sumOfDeviation += d; } sumOfDeviation /= variance.length; infoValue.add(Double.toString(sumOfDeviation)); } }