package tr.gov.ulakbim.jDenetX.clusterers.clustree; import tr.gov.ulakbim.jDenetX.cluster.CFCluster; import tr.gov.ulakbim.jDenetX.cluster.Cluster; import tr.gov.ulakbim.jDenetX.clusterers.clustree.util.AuxiliaryFunctions; import weka.core.Instance; import java.util.Arrays; /** * Representation of an Entry in the tree * * @author Fernando Sanchez Villaamil */ public class ClusKernel extends CFCluster { /** * Numeric epsilon. */ public static final double EPSILON = 0.00000001; public static final double MIN_VARIANCE = 1e-50; // 1e-100; // 0.0000001; /** * Counting of the number of N weighted by how much time passes between * updates. If this weighted N is under a threshhold, we may consider * the cluster irrelevant and we can delete it. */ private double weightedN; /** * A constructor that makes a Kernel which just represents the given point. * * @param point The point to be converted into a corresponding Kernel. * @param numberClasses The number of classes possible for points in this * experiment(<code>Tree</code>). */ public ClusKernel(double[] point, int dim) { super(point, dim); this.weightedN = 1; } /** * Constructor of the Cluster. * * @param numberDimensions Dimensionality of the points added that can be * added to this cluster * @param numberClasses The number of classes possible for points in this * experiment(<code>Tree</code>). */ protected ClusKernel(int numberDimensions) { super(numberDimensions); this.weightedN = 0; } /** * Instantiates a copy of the given cluster. * * @param other The <code>Cluster</code> of which we make a copy. */ protected ClusKernel(ClusKernel other) { super(other); this.weightedN = other.getWeightedN(); } /** * Adds the given cluster to this cluster, without making this cluster * older. * * @param other */ public void add(ClusKernel other) { super.add(other); this.weightedN += other.weightedN; } /** * Make this cluster older bei weighting it and add to this cluster the * given cluster. If we want to add somethin to the cluster, but don't * want to weight it, we should use the function <code>add(Cluster)</code>. * * @param other The other cluster to be added to this one. * @param timeDifference The time elapsed between the last update of the * <code>Entry</code> to which this cluster belongs and the update that * caused the call to this function. * @param negLambda A parameter needed to weight the cluster. * @see #add(tree.Kernel) */ protected void aggregate(ClusKernel other, long timeDifference, double negLambda) { makeOlder(timeDifference, negLambda); add(other); } /** * Make this cluster older. This means multiplying weighted N, LS and SS * with a weight factor given by the time difference and the parameter * negLambda. * * @param timeDifference The time elapsed between this current update and * the last one. * @param negLambda */ protected void makeOlder(long timeDifference, double negLambda) { if (timeDifference == 0) { return; } double weightFactor = AuxiliaryFunctions.weight(negLambda, timeDifference); this.weightedN *= weightFactor; for (int i = 0; i < LS.length; i++) { LS[i] *= weightFactor; SS[i] *= weightFactor; } } /** * Calculate the distance to this other cluster. The other cluster is * normaly just a single data point(i.e. N = 1). * * @param other The other cluster to which the distance is calculated. * @return The distance between this cluster and the other. */ protected double calcDistance(ClusKernel other) { // TODO: (Fernando, Felix) Adapt the distance function to the new algorithmn. double N1 = this.getWeightedN(); double N2 = other.getWeightedN(); double[] thisLS = this.LS; double[] otherLS = other.LS; double res = 0.0; for (int i = 0; i < thisLS.length; i++) { double substracted = (thisLS[i] / N1) - (otherLS[i] / N2); res += substracted * substracted; } return res; } /** * Returns the weighted number of points in the cluster. * * @return The weighted number of points in the cluster. */ protected double getWeightedN() { return weightedN; } /** * Check if this cluster is empty or not. * * @return <code>true</code> if the cluster has no data points, * <code>false</code> otherwise. */ protected boolean isEmpty() { return this.N == 0; } /** * Remove all points from this cluster. */ protected void clear() { this.N = 0; this.weightedN = 0.0; Arrays.fill(this.LS, 0.0); Arrays.fill(this.SS, 0.0); } /** * Overwrites the LS, SS and weightedN in this cluster to the values of the * given cluster but adds N and classCount of the given cluster to this one. * This function is useful when the weight of an entry becomes to small, and * we want to forget the information of the old points. * * @param other The cluster that should overwrite the information. */ protected void overwriteOldCluster(ClusKernel other) { this.N = other.N; this.weightedN = other.weightedN; AuxiliaryFunctions.overwriteDoubleArray(this.LS, other.LS); AuxiliaryFunctions.overwriteDoubleArray(this.SS, other.SS); } @Override public double getWeight() { return this.weightedN; } /** * @return this kernels' center */ public double[] getCenter() { assert (!this.isEmpty()); double res[] = new double[this.LS.length]; double weightedSize = this.getWeightedN(); for (int i = 0; i < res.length; i++) { res[i] = this.LS[i] / weightedSize; } return res; } @Override public double getInclusionProbability(Instance instance) { double dist = calcNormalizedDistance(instance.toDoubleArray()); double res = AuxiliaryFunctions.distanceProbabilty(dist, LS.length); assert (res >= 0.0 && res <= 1.0) : "Bad confidence " + res + " for" + " distance " + dist; return res; } /** * See interface <code>Cluster</code> * * @return The radius of the cluster. * @see Cluster#getRadius() */ public double getRadius() { double[] squaredVarianceVector = this.getSquaredVarianceVector(); // The value with which every component of the squared root of every // variance vector component is multiplied. // TODO: weight MUST depend on #dimensions! (follow cumulative gamma function!) // SEE: http://en.wikipedia.org/wiki/Incomplete_gamma_function // SEE: http://ieeexplore.ieee.org/iel5/8819/27916/01246282.pdf // Numerical calculation: http://algolist.manual.ru/maths/count_fast/gamma_function.php final double componentWeight = 1; // Use standart deviation to calculate average radius double sumOfVariances = 0.0; for (int i = 0; i < squaredVarianceVector.length; i++) { double d = squaredVarianceVector[i]; sumOfVariances += componentWeight * Math.sqrt(d); } return 1.6 * (sumOfVariances / squaredVarianceVector.length); } public double[] getSquaredVarianceVector() { double[] res = new double[this.LS.length]; for (int i = 0; i < this.LS.length; i++) { double ls = this.LS[i]; double ss = this.SS[i]; double lsDivN = ls / this.weightedN; double lsDivNSquared = lsDivN * lsDivN; double ssDivN = ss / this.weightedN; res[i] = ssDivN - lsDivNSquared; // Due to numerical errors, small negative values can occur. // We correct this by settings them to almost zero. if (res[i] <= 0.0) { if (res[i] > -EPSILON) { res[i] = MIN_VARIANCE * MIN_VARIANCE; } else { assert (false) : "Bad variance " + res[i] + ", weighted N is " + getWeightedN(); } } } return res; } /** * Calculate the normalized euclidean distance (Mahalanobis distance for * distribution w/o covariances) to a point. * * @param other The point to which the distance is calculated. * @return The normalized distance to the cluster center. * <p/> * TODO: check whether WEIGHTING is correctly applied to variances */ private double calcNormalizedDistance(double[] point) { assert (this.LS.length == point.length); double N1 = this.getWeightedN(); double[] thisLS = this.LS; double[] squaredVariances = this.getSquaredVarianceVector(); double res = 0.0; for (int i = 0; i < thisLS.length; i++) { double substracted = (thisLS[i] / N1) - (point[i]); res += (substracted * substracted) / squaredVariances[i]; } return Math.sqrt(res); } }