/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.commons.math3.ml.neuralnet; import java.util.HashMap; import java.util.Collection; import org.apache.commons.math3.ml.distance.DistanceMeasure; import org.apache.commons.math3.ml.neuralnet.twod.NeuronSquareMesh2D; import org.apache.commons.math3.exception.NoDataException; import org.apache.commons.math3.util.Pair; /** * Utilities for network maps. * * @since 3.3 */ public class MapUtils { /** * Class contains only static methods. */ private MapUtils() {} /** * Finds the neuron that best matches the given features. * * @param features Data. * @param neurons List of neurons to scan. If the list is empty * {@code null} will be returned. * @param distance Distance function. The neuron's features are * passed as the first argument to {@link DistanceMeasure#compute(double[],double[])}. * @return the neuron whose features are closest to the given data. * @throws org.apache.commons.math3.exception.DimensionMismatchException * if the size of the input is not compatible with the neurons features * size. */ public static Neuron findBest(double[] features, Iterable<Neuron> neurons, DistanceMeasure distance) { Neuron best = null; double min = Double.POSITIVE_INFINITY; for (final Neuron n : neurons) { final double d = distance.compute(n.getFeatures(), features); if (d < min) { min = d; best = n; } } return best; } /** * Finds the two neurons that best match the given features. * * @param features Data. * @param neurons List of neurons to scan. If the list is empty * {@code null} will be returned. * @param distance Distance function. The neuron's features are * passed as the first argument to {@link DistanceMeasure#compute(double[],double[])}. * @return the two neurons whose features are closest to the given data. * @throws org.apache.commons.math3.exception.DimensionMismatchException * if the size of the input is not compatible with the neurons features * size. */ public static Pair<Neuron, Neuron> findBestAndSecondBest(double[] features, Iterable<Neuron> neurons, DistanceMeasure distance) { Neuron[] best = { null, null }; double[] min = { Double.POSITIVE_INFINITY, Double.POSITIVE_INFINITY }; for (final Neuron n : neurons) { final double d = distance.compute(n.getFeatures(), features); if (d < min[0]) { // Replace second best with old best. min[1] = min[0]; best[1] = best[0]; // Store current as new best. min[0] = d; best[0] = n; } else if (d < min[1]) { // Replace old second best with current. min[1] = d; best[1] = n; } } return new Pair<Neuron, Neuron>(best[0], best[1]); } /** * Computes the <a href="http://en.wikipedia.org/wiki/U-Matrix"> * U-matrix</a> of a two-dimensional map. * * @param map Network. * @param distance Function to use for computing the average * distance from a neuron to its neighbours. * @return the matrix of average distances. */ public static double[][] computeU(NeuronSquareMesh2D map, DistanceMeasure distance) { final int numRows = map.getNumberOfRows(); final int numCols = map.getNumberOfColumns(); final double[][] uMatrix = new double[numRows][numCols]; final Network net = map.getNetwork(); for (int i = 0; i < numRows; i++) { for (int j = 0; j < numCols; j++) { final Neuron neuron = map.getNeuron(i, j); final Collection<Neuron> neighbours = net.getNeighbours(neuron); final double[] features = neuron.getFeatures(); double d = 0; int count = 0; for (Neuron n : neighbours) { ++count; d += distance.compute(features, n.getFeatures()); } uMatrix[i][j] = d / count; } } return uMatrix; } /** * Computes the "hit" histogram of a two-dimensional map. * * @param data Feature vectors. * @param map Network. * @param distance Function to use for determining the best matching unit. * @return the number of hits for each neuron in the map. */ public static int[][] computeHitHistogram(Iterable<double[]> data, NeuronSquareMesh2D map, DistanceMeasure distance) { final HashMap<Neuron, Integer> hit = new HashMap<Neuron, Integer>(); final Network net = map.getNetwork(); for (double[] f : data) { final Neuron best = findBest(f, net, distance); final Integer count = hit.get(best); if (count == null) { hit.put(best, 1); } else { hit.put(best, count + 1); } } // Copy the histogram data into a 2D map. final int numRows = map.getNumberOfRows(); final int numCols = map.getNumberOfColumns(); final int[][] histo = new int[numRows][numCols]; for (int i = 0; i < numRows; i++) { for (int j = 0; j < numCols; j++) { final Neuron neuron = map.getNeuron(i, j); final Integer count = hit.get(neuron); if (count == null) { histo[i][j] = 0; } else { histo[i][j] = count; } } } return histo; } /** * Computes the quantization error. * The quantization error is the average distance between a feature vector * and its "best matching unit" (closest neuron). * * @param data Feature vectors. * @param neurons List of neurons to scan. * @param distance Distance function. * @return the error. * @throws NoDataException if {@code data} is empty. */ public static double computeQuantizationError(Iterable<double[]> data, Iterable<Neuron> neurons, DistanceMeasure distance) { double d = 0; int count = 0; for (double[] f : data) { ++count; d += distance.compute(f, findBest(f, neurons, distance).getFeatures()); } if (count == 0) { throw new NoDataException(); } return d / count; } /** * Computes the topographic error. * The topographic error is the proportion of data for which first and * second best matching units are not adjacent in the map. * * @param data Feature vectors. * @param net Network. * @param distance Distance function. * @return the error. * @throws NoDataException if {@code data} is empty. */ public static double computeTopographicError(Iterable<double[]> data, Network net, DistanceMeasure distance) { int notAdjacentCount = 0; int count = 0; for (double[] f : data) { ++count; final Pair<Neuron, Neuron> p = findBestAndSecondBest(f, net, distance); if (!net.getNeighbours(p.getFirst()).contains(p.getSecond())) { // Increment count if first and second best matching units // are not neighbours. ++notAdjacentCount; } } if (count == 0) { throw new NoDataException(); } return ((double) notAdjacentCount) / count; } }