/*
* Copyright 2004-2010 Information & Software Engineering Group (188/1)
* Institute of Software Technology and Interactive Systems
* Vienna University of Technology, Austria
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.ifs.tuwien.ac.at/dm/somtoolbox/license.html
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package at.tuwien.ifs.somtoolbox.visualization;
import java.awt.GraphicsEnvironment;
import java.awt.GridBagConstraints;
import java.awt.GridBagLayout;
import java.awt.Insets;
import java.awt.event.ActionEvent;
import java.awt.event.ActionListener;
import java.awt.image.BufferedImage;
import java.util.HashMap;
import java.util.logging.Logger;
import javax.swing.JButton;
import javax.swing.JLabel;
import javax.swing.JPanel;
import javax.swing.JSpinner;
import javax.swing.JTextField;
import javax.swing.SpinnerNumberModel;
import javax.swing.event.ChangeEvent;
import javax.swing.event.ChangeListener;
import org.apache.commons.collections.keyvalue.MultiKey;
import cern.colt.list.DoubleArrayList;
import cern.colt.matrix.DoubleFactory1D;
import cern.colt.matrix.DoubleFactory2D;
import cern.colt.matrix.DoubleMatrix1D;
import cern.colt.matrix.DoubleMatrix2D;
import cern.colt.matrix.impl.DenseDoubleMatrix1D;
import cern.colt.matrix.impl.DenseDoubleMatrix2D;
import cern.jet.math.Functions;
import cern.jet.stat.quantile.DoubleQuantileFinder;
import cern.jet.stat.quantile.QuantileFinderFactory;
import at.tuwien.ifs.somtoolbox.SOMToolboxException;
import at.tuwien.ifs.somtoolbox.data.InputData;
import at.tuwien.ifs.somtoolbox.data.SOMVisualisationData;
import at.tuwien.ifs.somtoolbox.data.distance.InputVectorDistanceMatrix;
import at.tuwien.ifs.somtoolbox.data.distance.LeightWeightMemoryInputVectorDistanceMatrix;
import at.tuwien.ifs.somtoolbox.layers.GrowingLayer;
import at.tuwien.ifs.somtoolbox.layers.LayerAccessException;
import at.tuwien.ifs.somtoolbox.layers.metrics.DistanceMetric;
import at.tuwien.ifs.somtoolbox.layers.metrics.MetricException;
import at.tuwien.ifs.somtoolbox.models.GrowingSOM;
import at.tuwien.ifs.somtoolbox.util.ArrayUtils;
import at.tuwien.ifs.somtoolbox.util.GridBagConstraintsIFS;
import at.tuwien.ifs.somtoolbox.util.StringUtils;
import at.tuwien.ifs.somtoolbox.util.VectorTools;
import at.tuwien.ifs.somtoolbox.util.inputVerifier.DoubleNumberInputVerifier;
/**
* This visualizer implements:
* <ul>
* <li>Ultsch, A. Maps for the Visualization of high-dimensional Data Spaces. In Proceedings Workshop on Self-Organizing
* Maps (WSOM 2003), Kyushu, Japan</li>
* <li>Ultsch, A. U*-Matrix: a Tool to visualize Clusters in high dimensional Data. Technical Report No. 36, Dept. of
* Mathematics and Computer Science, University of Marburg, Germany, 2003</li>
* </ul>
*
* @author Rudolf Mayer
* @version $Id: PMatrix.java 3874 2010-11-02 14:14:38Z mayer $
*/
public class PMatrix extends UMatrix {
// a cache for the computed p-matrix, which is expensive, and used also for the U*-Matrix
private static final HashMap<MultiKey, DoubleMatrix2D> pMatrixCache = new HashMap<MultiKey, DoubleMatrix2D>();
/** pareto-optimal size, as dervied in the paper "Maps for the Visualization of high-dimensional Data Space" */
public static final double PARETO_SIZE = 0.2013;
private InputVectorDistanceMatrix distanceMatrix;
/** The radius for the density calculation */
private double radius = Double.NaN;
DoubleMatrix1D percentiles = null;
public PMatrix() {
VISUALIZATION_NAMES = new String[] { "P-Matrix", "U*-Matrix" };
VISUALIZATION_SHORT_NAMES = new String[] { "PMatrix", "UStarMatrix" };
NUM_VISUALIZATIONS = VISUALIZATION_NAMES.length;
VISUALIZATION_DESCRIPTIONS = new String[] {
"Implementation of the P-Matrix, as described in Ultsch, A.\n"
+ "Maps for the Visualization of high-dimensional Data Spaces.\n"
+ "In Proceedings Workshop on Self-Organizing Maps (WSOM 2003), Kyushu, Japan",
"Implementation of the U*-Matrix (U- and P-Matrix combined), as described in Ultsch. A.\n"
+ "U*-Matrix: a Tool to visualize Clusters in high dimensional Data\n"
+ "Technical Report No. 36, Dept. of Mathematics and Computer Science, University of Marburg, Germany, 2003" };
neededInputObjects = new String[] { SOMVisualisationData.INPUT_VECTOR_DISTANCE_MATRIX,
SOMVisualisationData.INPUT_VECTOR };
// don't initialise the control panel if we have no graphics environment (e.g. in server applications)
if (!GraphicsEnvironment.isHeadless()) {
controlPanel = new PMatrixControlPanel();
}
}
@Override
public String[] needsAdditionalFiles() {
String[] neededDataFiles = super.needsAdditionalFiles();
// we only need the input vector file, the distance matrix is a bonus
if (org.apache.commons.lang.ArrayUtils.contains(neededDataFiles, SOMVisualisationData.INPUT_VECTOR)) {
return neededDataFiles;
} else {
return null;
}
}
@Override
protected String getCacheKey(GrowingSOM gsom, int index, int width, int height) {
return super.getCacheKey(gsom, index, width, height) + CACHE_KEY_SECTION_SEPARATOR + "radius:" + radius;
}
public class PMatrixControlPanel extends VisualizationControlPanel {
private static final long serialVersionUID = 1L;
/** The {@link JSpinner} to set the radius via the percentile */
private JSpinner spinnerPercentile = new JSpinner(new SpinnerNumberModel(20, 1, 50, 1));
/** The {@link JTextField} to directly set a radius */
private JTextField textFieldRadius = new JTextField();
private PMatrixControlPanel() {
super("P/U*Matrix Control");
spinnerPercentile.addChangeListener(new ChangeListener() {
@Override
public void stateChanged(ChangeEvent e) {
radius = percentiles.getQuick((Integer) spinnerPercentile.getValue());
updateRadiusTextField();
if (visualizationUpdateListener != null) {
visualizationUpdateListener.updateVisualization();
}
}
});
textFieldRadius.addActionListener(new ActionListener() {
@Override
public void actionPerformed(ActionEvent e) {
radius = Double.parseDouble(textFieldRadius.getText());
if (visualizationUpdateListener != null) {
visualizationUpdateListener.updateVisualization();
}
}
});
textFieldRadius.setInputVerifier(new DoubleNumberInputVerifier());
JButton buttonRecalc = new JButton("Compute optimal radius");
buttonRecalc.addActionListener(new ActionListener() {
@Override
public void actionPerformed(ActionEvent e) {
setOptimalRadius();
if (visualizationUpdateListener != null) {
visualizationUpdateListener.updateVisualization();
}
}
});
JPanel pmatrixPanel = new JPanel(new GridBagLayout());
GridBagConstraintsIFS gc = new GridBagConstraintsIFS(GridBagConstraints.NORTHWEST,
GridBagConstraints.HORIZONTAL);
gc.insets = new Insets(0, 2, 2, 1);
pmatrixPanel.add(new JLabel("Density percentile"), gc);
pmatrixPanel.add(spinnerPercentile, gc.nextCol().setWeightX(1.0));
pmatrixPanel.add(new JLabel("P-Radius"), gc.nextRow().setWeightX(0));
pmatrixPanel.add(textFieldRadius, gc.nextCol().setWeightX(1.0));
pmatrixPanel.add(buttonRecalc, gc.nextRow().setGridWidth(2).setAnchor(GridBagConstraints.CENTER));
add(pmatrixPanel, c);
}
}
private void setOptimalRadius() {
int percentile = calculateParetoRadiusPercentile(distanceMatrix, percentiles);
radius = percentiles.get(percentile);
((PMatrixControlPanel) controlPanel).spinnerPercentile.setValue(percentile);
updateRadiusTextField();
}
private void updateRadiusTextField() {
((PMatrixControlPanel) controlPanel).textFieldRadius.setText(StringUtils.format(radius, 5, true));
((PMatrixControlPanel) controlPanel).textFieldRadius.setToolTipText("Exact radius: " + radius);
}
@Override
public BufferedImage createVisualization(int index, GrowingSOM gsom, int width, int height)
throws SOMToolboxException {
checkVariantIndex(index, getClass());
if (distanceMatrix == null) {
if (gsom.getSharedInputObjects().getInputVectorDistanceMatrix() != null) {
distanceMatrix = gsom.getSharedInputObjects().getInputVectorDistanceMatrix();
} else {
distanceMatrix = gsom.getSharedInputObjects().getInputVectorDistanceMatrix();
distanceMatrix = new LeightWeightMemoryInputVectorDistanceMatrix(
gsom.getSharedInputObjects().getInputData(), gsom.getLayer().getMetric());
}
// calculate the percentiles in the input data
percentiles = createPercentiles(distanceMatrix);
// guess the paretoRadius from the percentiles and the input data
setOptimalRadius();
}
DoubleMatrix2D matrix;
if (index == 0) {
matrix = createPMatrix(gsom);
} else {
matrix = createUStarMatrix(gsom);
}
VectorTools.normalise(matrix);
return createImage(gsom, matrix, width, height, interpolate);
}
public DoubleMatrix2D createPMatrix(GrowingSOM gsom) throws MetricException, LayerAccessException {
// we store a cache of the PMatrix, as it is computationally expensive, and is be used for both P-Matrix and
// U*-Matrix
if (!pMatrixCache.containsKey(new MultiKey(gsom, radius))) {
GrowingLayer layer = gsom.getLayer();
InputData data = gsom.getSharedInputObjects().getInputData();
DistanceMetric metric = layer.getMetric();
int pmatW = layer.getXSize();
int pmatH = layer.getYSize();
DoubleMatrix2D pmatrix = new DenseDoubleMatrix2D(pmatH, pmatW);
// now check // for each unit in the SOM how many of the input data items lie within the Pareto radius
// around the unit's weight vector
int index = 0;
for (int x = 0; x < layer.getXSize(); x++) {
for (int y = 0; y < layer.getYSize(); y++) {
int withinRadius = 0;
for (int i = 0; i < data.numVectors(); i++) { // check whether the distance to each input is within
// the pareto radius
if (metric.distance(layer.getUnit(x, y).getWeightVector(), data.getInputVector(i)) < radius) {
withinRadius++;
}
pmatrix.set(y, x, withinRadius);
}
index++;
}
}
pMatrixCache.put(new MultiKey(gsom, radius), pmatrix);
}
return pMatrixCache.get(new MultiKey(gsom, radius));
}
public static int coordinates2index(int row, int col, int columns) {
return row * columns + col;
}
private DoubleMatrix1D createPercentiles(InputVectorDistanceMatrix distances) {
// FIXME: document this
final double[] distancesFlat = distances.getDistancesFlat();
DoubleMatrix1D reducedValues = new DenseDoubleMatrix1D(distancesFlat);
// calculate distance percentiles
DoubleQuantileFinder finder = QuantileFinderFactory.newDoubleQuantileFinder(true, reducedValues.size(), 0.0,
0.0, 100, null);
finder.addAllOf(new DoubleArrayList(distancesFlat));
double[] p = ArrayUtils.getLinearPercentageArray();
DoubleArrayList percentages = new DoubleArrayList(p);
return new DenseDoubleMatrix1D(finder.quantileElements(percentages).elements());
}
private DoubleMatrix1D getAllDensities(InputVectorDistanceMatrix distances, double radius) {
// FIXME: use DoubleMatrix1D right away, w/o having to go via a list
DoubleArrayList list = new DoubleArrayList();
for (int row = 0; row < distances.rows(); row++) {
int counter = 0;
// get only the right part of the matrix (its symmetric)
for (int col = row; col < distances.columns(); col++) {
double distance = distances.getDistance(row, col);
if (distance < radius) {
counter++;
}
}
list.add(counter);
}
return DoubleFactory1D.dense.make(list);
}
private int calculateParetoRadiusPercentile(InputVectorDistanceMatrix distances, DoubleMatrix1D percentiles) {
// the paper describes the 18th percentile as a good start value for gaussian distributions
int percentile = 18;
double radius;
// variables needed for the search
int last_percentile = percentile;
double diff = 0.0;
double last_diff = 1.0;
double median_size;
boolean stop = false;
double upper_size = 1.0;
double lower_size = 0.0;
// upper and lower search boundaries for the percentiles
double upper_percentile = 50;
double lower_percentile = 2;
Logger log = Logger.getLogger("at.tuwien.ifs.somtoolbox");
while (!stop) {
// get current radius from the percentile
radius = percentiles.getQuick(percentile);
// compute densities with this radius
DoubleMatrix1D densities = getAllDensities(distances, radius);
// median percentage of points in spheres
if (densities.size() != 0) {
double median = VectorTools.median(densities.toArray());
double mean = densities.zSum() / densities.size();
log.info("Mean: " + mean + " median: " + median);
median_size = Math.max(median, mean) / distances.columns();
} else {
median_size = 0;
}
log.fine("spheres for " + percentile + "%-tile contain on average " + Math.round(median_size * 100)
+ "% of the data");
// compute difference of median size to the defined optimum
diff = median_size - PARETO_SIZE;
// stop if last step was 1, or the defined upper/lower stopping criterion is reached
stop = Math.abs(percentile - last_percentile) == 1 || percentile == upper_percentile
|| percentile == lower_percentile;
if (!stop) { // iterate
last_percentile = percentile;
last_diff = diff;
// adjust percentile towards optimum with linear interpolation
if (diff > 0) {
upper_percentile = percentile;
upper_size = median_size;
} else {
lower_percentile = percentile;
lower_size = median_size;
}
// compute the estimated position of pareto size in the current search interval
double pest = (PARETO_SIZE - lower_size) / (upper_size - lower_size)
* (upper_percentile - lower_percentile) + lower_percentile;
// step towards the estimated position
double step = pest - percentile;
// always go at least 1 resp. -1
if (step > 0) {
step = Math.max(step, 1);
} else {
step = Math.min(step, -1);
}
percentile = percentile + (int) Math.round(step);
} else {
// if it is better, revert to the last percentile before we stopped
if (Math.abs(diff) > Math.abs(last_diff)) {
percentile = last_percentile;
}
}
}
log.info("P-Matrix: " + percentile + "%tile chosen.");
return percentile;
}
public DoubleMatrix2D createUStarMatrix(GrowingSOM gsom) throws MetricException, LayerAccessException {
DoubleMatrix2D umatrix = createUMatrix(gsom);
DoubleMatrix2D pmatrix = createPMatrix(gsom);
double meanP = pmatrix.zSum() / pmatrix.size();
double maxP = pmatrix.aggregate(Functions.max, Functions.identity);
double diff = meanP - maxP;
DoubleMatrix2D ustarmatrix = DoubleFactory2D.dense.make(gsom.getLayer().getYSize(), gsom.getLayer().getXSize(),
-1);
for (int x = 0; x < ustarmatrix.rows(); x++) {
for (int y = 0; y < ustarmatrix.columns(); y++) {
double uheight = umatrix.getQuick(x * 2, y * 2);
double pheight = pmatrix.getQuick(x, y);
double scaleFactor = (pheight - meanP) / diff + 1;
ustarmatrix.setQuick(x, y, uheight * scaleFactor);
}
}
return ustarmatrix;
}
}