ClusKernel.java example

Explorer
jDenetX-master
- src
package tr.gov.ulakbim.jDenetX.clusterers.clustree;

import tr.gov.ulakbim.jDenetX.cluster.CFCluster;
import tr.gov.ulakbim.jDenetX.cluster.Cluster;
import tr.gov.ulakbim.jDenetX.clusterers.clustree.util.AuxiliaryFunctions;
import weka.core.Instance;

import java.util.Arrays;

/**
 * Representation of an Entry in the tree
 *
 * @author Fernando Sanchez Villaamil
 */
public class ClusKernel extends CFCluster {
    /**
     * Numeric epsilon.
     */
    public static final double EPSILON = 0.00000001;
    public static final double MIN_VARIANCE = 1e-50; // 1e-100; // 0.0000001;

    /**
     * Counting of the number of N weighted by how much time passes between
     * updates. If this weighted N is under a threshhold, we may consider
     * the cluster irrelevant and we can delete it.
     */
    private double weightedN;

    /**
     * A constructor that makes a Kernel which just represents the given point.
     *
     * @param point         The point to be converted into a corresponding Kernel.
     * @param numberClasses The number of classes possible for points in this
     *                      experiment(<code>Tree</code>).
     */
    public ClusKernel(double[] point, int dim) {
        super(point, dim);
        this.weightedN = 1;
    }

    /**
     * Constructor of the Cluster.
     *
     * @param numberDimensions Dimensionality of the points added that can be
     *                         added to this cluster
     * @param numberClasses    The number of classes possible for points in this
     *                         experiment(<code>Tree</code>).
     */
    protected ClusKernel(int numberDimensions) {
        super(numberDimensions);
        this.weightedN = 0;
    }

    /**
     * Instantiates a copy of the given cluster.
     *
     * @param other The <code>Cluster</code> of which we make a copy.
     */
    protected ClusKernel(ClusKernel other) {
        super(other);
        this.weightedN = other.getWeightedN();
    }

    /**
     * Adds the given cluster to this cluster, without making this cluster
     * older.
     *
     * @param other
     */
    public void add(ClusKernel other) {
        super.add(other);
        this.weightedN += other.weightedN;
    }

    /**
     * Make this cluster older bei weighting it and add to this cluster the
     * given cluster. If we want to add somethin to the cluster, but don't
     * want to weight it, we should use the function <code>add(Cluster)</code>.
     *
     * @param other          The other cluster to be added to this one.
     * @param timeDifference The time elapsed between the last update of the
     *                       <code>Entry</code> to which this cluster belongs and the update that
     *                       caused the call to this function.
     * @param negLambda      A parameter needed to weight the cluster.
     * @see #add(tree.Kernel)
     */
    protected void aggregate(ClusKernel other, long timeDifference, double negLambda) {
        makeOlder(timeDifference, negLambda);
        add(other);
    }

    /**
     * Make this cluster older. This means multiplying weighted N, LS and SS
     * with a weight factor given by the time difference and the parameter
     * negLambda.
     *
     * @param timeDifference The time elapsed between this current update and
     *                       the last one.
     * @param negLambda
     */
    protected void makeOlder(long timeDifference, double negLambda) {
        if (timeDifference == 0) {
            return;
        }

        double weightFactor = AuxiliaryFunctions.weight(negLambda, timeDifference);
        this.weightedN *= weightFactor;
        for (int i = 0; i < LS.length; i++) {
            LS[i] *= weightFactor;
            SS[i] *= weightFactor;
        }
    }

    /**
     * Calculate the distance to this other cluster. The other cluster is
     * normaly just a single data point(i.e. N = 1).
     *
     * @param other The other cluster to which the distance is calculated.
     * @return The distance between this cluster and the other.
     */
    protected double calcDistance(ClusKernel other) {
        // TODO: (Fernando, Felix) Adapt the distance function to the new algorithmn.

        double N1 = this.getWeightedN();
        double N2 = other.getWeightedN();

        double[] thisLS = this.LS;
        double[] otherLS = other.LS;

        double res = 0.0;
        for (int i = 0; i < thisLS.length; i++) {
            double substracted = (thisLS[i] / N1) - (otherLS[i] / N2);
            res += substracted * substracted;
        }

        return res;
    }

    /**
     * Returns the weighted number of points in the cluster.
     *
     * @return The weighted number of points in the cluster.
     */
    protected double getWeightedN() {
        return weightedN;
    }

    /**
     * Check if this cluster is empty or not.
     *
     * @return <code>true</code> if the cluster has no data points,
     *         <code>false</code> otherwise.
     */
    protected boolean isEmpty() {
        return this.N == 0;
    }

    /**
     * Remove all points from this cluster.
     */
    protected void clear() {
        this.N = 0;
        this.weightedN = 0.0;
        Arrays.fill(this.LS, 0.0);
        Arrays.fill(this.SS, 0.0);
    }

    /**
     * Overwrites the LS, SS and weightedN in this cluster to the values of the
     * given cluster but adds N and classCount of the given cluster to this one.
     * This function is useful when the weight of an entry becomes to small, and
     * we want to forget the information of the old points.
     *
     * @param other The cluster that should overwrite the information.
     */
    protected void overwriteOldCluster(ClusKernel other) {
        this.N = other.N;
        this.weightedN = other.weightedN;
        AuxiliaryFunctions.overwriteDoubleArray(this.LS, other.LS);
        AuxiliaryFunctions.overwriteDoubleArray(this.SS, other.SS);

    }

    @Override
    public double getWeight() {
        return this.weightedN;
    }


    /**
     * @return this kernels' center
     */
    public double[] getCenter() {
        assert (!this.isEmpty());
        double res[] = new double[this.LS.length];
        double weightedSize = this.getWeightedN();
        for (int i = 0; i < res.length; i++) {
            res[i] = this.LS[i] / weightedSize;
        }
        return res;
    }

    @Override
    public double getInclusionProbability(Instance instance) {

        double dist = calcNormalizedDistance(instance.toDoubleArray());
        double res = AuxiliaryFunctions.distanceProbabilty(dist, LS.length);
        assert (res >= 0.0 && res <= 1.0) : "Bad confidence " + res + " for"
                + " distance " + dist;

        return res;
    }

    /**
     * See interface <code>Cluster</code>
     *
     * @return The radius of the cluster.
     * @see Cluster#getRadius()
     */
    public double getRadius() {
        double[] squaredVarianceVector = this.getSquaredVarianceVector();

        // The value with which every component of the squared root of every
        // variance vector component is multiplied.
        // TODO: weight MUST depend on #dimensions! (follow cumulative gamma function!)
        // SEE: http://en.wikipedia.org/wiki/Incomplete_gamma_function
        // SEE: http://ieeexplore.ieee.org/iel5/8819/27916/01246282.pdf
        // Numerical calculation: http://algolist.manual.ru/maths/count_fast/gamma_function.php
        final double componentWeight = 1;

        // Use standart deviation to calculate average radius
        double sumOfVariances = 0.0;
        for (int i = 0; i < squaredVarianceVector.length; i++) {
            double d = squaredVarianceVector[i];
            sumOfVariances += componentWeight * Math.sqrt(d);
        }

        return 1.6 * (sumOfVariances / squaredVarianceVector.length);
    }


    public double[] getSquaredVarianceVector() {
        double[] res = new double[this.LS.length];
        for (int i = 0; i < this.LS.length; i++) {
            double ls = this.LS[i];
            double ss = this.SS[i];

            double lsDivN = ls / this.weightedN;
            double lsDivNSquared = lsDivN * lsDivN;
            double ssDivN = ss / this.weightedN;
            res[i] = ssDivN - lsDivNSquared;

            // Due to numerical errors, small negative values can occur.
            // We correct this by settings them to almost zero.
            if (res[i] <= 0.0) {
                if (res[i] > -EPSILON) {
                    res[i] = MIN_VARIANCE * MIN_VARIANCE;
                } else {
                    assert (false) : "Bad variance " + res[i]
                            + ", weighted N is " + getWeightedN();
                }
            }
        }
        return res;
    }

    /**
     * Calculate the normalized euclidean distance (Mahalanobis distance for
     * distribution w/o covariances) to a point.
     *
     * @param other The point to which the distance is calculated.
     * @return The normalized distance to the cluster center.
     *         <p/>
     *         TODO: check whether WEIGHTING is correctly applied to variances
     */
    private double calcNormalizedDistance(double[] point) {
        assert (this.LS.length == point.length);
        double N1 = this.getWeightedN();
        double[] thisLS = this.LS;
        double[] squaredVariances = this.getSquaredVarianceVector();
        double res = 0.0;

        for (int i = 0; i < thisLS.length; i++) {
            double substracted = (thisLS[i] / N1) - (point[i]);
            res += (substracted * substracted) / squaredVariances[i];
        }
        return Math.sqrt(res);
    }


}