NeuralGas.java example

Explorer
smile-master
/*******************************************************************************
 * Copyright (c) 2010 Haifeng Li
 *   
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *  
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *******************************************************************************/
package smile.vq;

import java.util.Arrays;

import smile.clustering.ClusteringDistance;
import smile.clustering.PartitionClustering;
import smile.math.Math;

/**
 * Neural Gas soft competitive learning algorithm. The Neural Gas is inspired
 * by the Self-Organizing Map for finding optimal data representations based on
 * feature vectors. The algorithm was coined "Neural Gas" because of the
 * dynamics of the feature vectors during the adaptation process, which
 * distribute themselves like a gas within the data space. Although it is mainly
 * applied where data compression or vector quantization is an issue,
 * it is also used for cluster analysis as a robustly converging alternative to
 * the k-means clustering. A prominent extension is the Growing Neural Gas.
 * <p>
 * Compared to SOM, neural gas has no topology of a fixed dimensionality
 * (in fact, no topology at all). For each input signal during learning, the
 * neural gas algorithm sorts the neurons of the network according to the
 * distance of their reference vectors to the input signal. Based on this
 * "rank order", neurons are adapted based on the adaptation strength that are
 * decreased according to a fixed schedule.
 * <p>
 * The adaptation step of the Neural Gas can be interpreted as gradient descent
 * on a cost function. By adapting not only the closest feature vector but all
 * of them with a step size decreasing with increasing distance order,
 * compared to k-means clustering, a much more robust convergence of the
 * algorithm can be achieved.
 *
 * <h2>References</h2>
 * <ol>
 * <li> Thomas Martinetz and Klaus Schulten. A "neural gas" network learns topologies. Artificial Neural Networks, 397-402, 1991.</li>
 * <li> T. Martinetz, S. Berkovich, and K. Schulten. "Neural-gas" Network for Vector Quantization and its Application to Time-Series Prediction. IEEE Trans. on Neural Networks, 4(4):558-569, 1993. </li>
 * <li> T. Martinetz and K. Schulten. Topology representing networks. Neural Networks, 7(3):507-522, 1994. </li>
 * </ol>
 * 
 * @see smile.clustering.KMeans
 * @see GrowingNeuralGas
 * @see NeuralMap
 * 
 * @author Haifeng Li
 */
public class NeuralGas extends PartitionClustering<double[]> {
    /**
     * The total distortion.
     */
    double distortion;
    /**
     * The centroids of each cluster.
     */
    double[][] centroids;

    /**
     * A class representing a node for all neural gas algorithms.
     */
    class Neuron implements Comparable<Neuron> {
        /**
         * Reference vector.
         */
        double[] w;
        /**
         * The distance between the node and an input signal.
         */
        double dist = Double.MAX_VALUE;

        /**
         * Constructor.
         */
        Neuron(double[] w) {
            this.w = w;
        }

        @Override
        public int compareTo(Neuron o) {
            return (int) Math.signum(dist - o.dist);
        }
    }

    /**
     * Constructor. Learn the Neural Gas with k neurons.
     * @param k the number of units in the neural gas. It is also the number
     * of clusters.
     */
    public NeuralGas(double[][] data, int k) {
        this(data, k, Math.min(10, Math.max(1, k/2)), 0.01, 0.5, 0.005, 25);
    }

    /**
     * Constructor. Learn the Neural Gas with k neurons.
     * @param k the number of units in the neural gas.
     * @param lambda_i the initial value of lambda. lambda_i and lambda_f are
     * used to set the soft learning radius/rate, i.e. determining the number
     * of neural units significantly changing their synaptic weights with
     * each adaptation step.
     * @param lambda_f The final value of lambda.
     * @param eps_i the initial value of epsilon. epsilon_i and epsilon_f
     * are the initial and final learning rate respectively.
     * @param eps_f the final value of epsilon.
     * @param steps the number of iterations. Note that for one iteration, we
     * mean that the learning process goes through the whole dataset.
     */
    public NeuralGas(double[][] data, int k, double lambda_i, double lambda_f, double eps_i, double eps_f, int steps) {
        if (k < 2) {
            throw new IllegalArgumentException("Invalid number of clusters: " + k);
        }

        if (lambda_i <= 0.0) {
            throw new IllegalArgumentException("Invalid initial value of lambda: " + lambda_i);            
        }
        
        if (lambda_f <= 0.0) {
            throw new IllegalArgumentException("Invalid final value of lambda: " + lambda_i);            
        }
        
        if (lambda_f >= lambda_i) {
            throw new IllegalArgumentException("lambda_f is NOT less than lambda_i.");                        
        }
        
        if (eps_i <= 0.0 || eps_i > 1.0) {
            throw new IllegalArgumentException("Invalid initial value of epsilon: " + eps_i);            
        }
        
        if (eps_f <= 0.0 || eps_f > 1.0) {
            throw new IllegalArgumentException("Invalid final value of epsilon: " + eps_i);            
        }
        
        if (eps_f >= eps_i) {
            throw new IllegalArgumentException("eps_f is NOT less than eps_i.");                        
        }
        
        int n = data.length;
        int d = data[0].length;
        this.k = k;

        // We use k-means++ seeding method to initialize neurons.
        y = seed(data, k, ClusteringDistance.EUCLIDEAN);

        size = new int[k];
        for (int i = 0; i < n; i++) {
            size[y[i]]++;
        }

        centroids = new double[k][d];
        for (int i = 0; i < n; i++) {
            for (int j = 0; j < d; j++) {
                centroids[y[i]][j] += data[i][j];
            }
        }

        for (int i = 0; i < k; i++) {
            for (int j = 0; j < d; j++) {
                centroids[i][j] /= size[i];
            }
        }

        Neuron[] nodes = new Neuron[k];
        for (int i = 0; i < k; i++) {
            nodes[i] = new Neuron(centroids[i]);
        }

        for (int t = 0; t < steps; t++) {
            double tf = (double) t / steps;
            double lambda = lambda_i * Math.pow(lambda_f / lambda_i, tf);
            double eps = eps_i * Math.pow(eps_f / eps_i, tf);

            for (double[] signal : data) {
                for (Neuron node : nodes) {
                    node.dist = Math.squaredDistance(node.w, signal);
                }

                Arrays.sort(nodes);

                for (int i = 0; i < k; i++) {
                    double delta = eps * Math.exp(-i / lambda);
                    if (delta > 0) {
                        for (int j = 0; j < d; j++) {
                            nodes[i].w[j] += delta * (signal[j] - nodes[i].w[j]);
                        }
                    }
                }
            }
        }

        distortion = 0.0;
        for (int i = 0; i < n; i++) {
            double nearest = Double.MAX_VALUE;
            for (int j = 0; j < k; j++) {
                double dist = Math.squaredDistance(data[i], centroids[j]);
                if (nearest > dist) {
                    y[i] = j;
                    nearest = dist;
                }
            }
            distortion += nearest;
        }

        Arrays.fill(size, 0);

        for (int i = 0; i < data.length; i++) {
            size[y[i]]++;
        }
    }

    /**
     * Returns the distortion.
     */
    public double distortion() {
        return distortion;
    }

    /**
     * Returns the centroids/neurons.
     */
    public double[][] centroids() {
        return centroids;
    }

    /**
     * Returns the centroids/neurons.
     */
    public double[][] neurons() {
        return centroids;
    }

    /**
     * Cluster a new instance.
     * @param x a new instance.
     * @return the cluster label, which is the index of nearest centroid.
     */
    @Override
    public int predict(double[] x) {
        double minDist = Double.MAX_VALUE;
        int bestCluster = 0;

        for (int i = 0; i < k; i++) {
            double dist = Math.squaredDistance(x, centroids[i]);
            if (dist < minDist) {
                minDist = dist;
                bestCluster = i;
            }
        }

        return bestCluster;
    }

    @Override
    public String toString() {
        StringBuilder sb = new StringBuilder();
        
        sb.append(String.format("Neural Gas distortion: %.5f%n", distortion));
        sb.append(String.format("Clusters of %d data points of dimension %d:%n", y.length, centroids[0].length));
        for (int i = 0; i < k; i++) {
            int r = (int) Math.round(1000.0 * size[i] / y.length);
            sb.append(String.format("%3d\t%5d (%2d.%1d%%)%n", i, size[i], r / 10, r % 10));
        }
        
        return sb.toString();
    }
}