/* * Copyright 2004-2010 Information & Software Engineering Group (188/1) * Institute of Software Technology and Interactive Systems * Vienna University of Technology, Austria * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.ifs.tuwien.ac.at/dm/somtoolbox/license.html * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package at.tuwien.ifs.somtoolbox.data.distance; import java.io.IOException; import java.util.ArrayList; import java.util.logging.Logger; import cern.colt.matrix.impl.DenseDoubleMatrix1D; import at.tuwien.ifs.somtoolbox.SOMToolboxException; import at.tuwien.ifs.somtoolbox.layers.metrics.DistanceMetric; import at.tuwien.ifs.somtoolbox.util.FileUtils; import at.tuwien.ifs.somtoolbox.util.StringUtils; /** * Base for classes providing a distance matrix of the input vectors, with generic methods and fields.<br/> * A distance matrix is of size <code>n*n</code>, where <code>n</code> is the number of input vectors. The matrix is * symmetric, i.e. the upper-right and lower-left halves contain the same values. The diagonal contains the distances of * one element to itself, and is thus always 0. * * @author Rudolf Mayer * @version $Id: InputVectorDistanceMatrix.java 3711 2010-07-23 09:37:24Z mayer $ */ public abstract class InputVectorDistanceMatrix { protected int numVectors; protected DistanceMetric metric; protected ArrayList<String> inputLabels; public DistanceMetric getMetric() { return metric; } public ArrayList<String> getInputLabels() { return inputLabels; } public InputVectorDistanceMatrix() { super(); } /** Return the distance between input vectors x and y. */ public abstract double getDistance(int x, int y); /** * Return the n nearest vectors of input x. Basic implementation of the method, sub-classes might provide an * optimised implementation. */ public int[] getNNearest(int x, int num) { double[] distancesToInput = getDistances(x); int[] indices = new int[num]; for (int i = 0; i < indices.length; i++) { indices[i] = -1; } double[] distances = new double[num]; for (int i = 0; i < numVectors; i++) { if (x == i) { // skip similarity to the vector itself.. continue; } int element = 0; double distance = distancesToInput[i]; boolean inserted = false; while (inserted == false && element < num) { if (indices[element] == -1 || distance < distances[element]) { // found place to insert unit for (int m = num - 2; m >= element; m--) { // move units with greater distance to right indices[m + 1] = indices[m]; distances[m + 1] = distances[m]; } indices[element] = i; distances[element] = distance; inserted = true; } element++; } } return indices; } /** * Return the distances to all vectors from input x. This is a basic using {@link #getDistance(int, int)}, * sub-classes might provide an optimised implementation. */ public double[] getDistances(int x) { double[] d = new double[numVectors]; for (int y = 0; y < d.length; y++) { d[y] = getDistance(x, y); } return d; } public int numVectors() { return numVectors; } /** * Gets all the distances in a single flat array avoiding duplicates from the pairwise distances, thus of the size * of <code>numVectors * (numVectors - * 1) / 2</code>.<br/> * This is a default implementation always constructing the array on the fly using the * {@link #getDistance(int, int)} method. Specific subclasses might provide better performing implementations, as * e.g. {@link LeightWeightMemoryInputVectorDistanceMatrix}. */ public double[] getDistancesFlat() { double[] distances = new double[flatArraySize()]; int index = 0; for (int x = 1; x < distances.length; x++) { for (int y = x + 1; y < distances.length; y++) { distances[index] = getDistance(x, y); } } return distances; } public DenseDoubleMatrix1D getDistancesFlatAsMatrix() { return new DenseDoubleMatrix1D(getDistancesFlat()); } @Override public String toString() { StringBuffer sb = new StringBuffer(); sb.append("Distance matrix ").append(numVectors()).append("x").append(numVectors()).append("\n"); if (numVectors() < 20) { for (int i = 0; i < numVectors(); i++) { sb.append(StringUtils.toStringWithPrecision(getDistances(i), 3)).append("\n"); } } return sb.toString(); } @Override public boolean equals(Object obj) { if (!(obj instanceof InputVectorDistanceMatrix)) { return false; } else { final InputVectorDistanceMatrix other = (InputVectorDistanceMatrix) obj; for (int i = 0; i < numVectors(); i++) { for (int j = 0; j < numVectors(); j++) { if (getDistance(i, j) != other.getDistance(i, j)) { System.out.println("not equal in " + i + "," + j + ": " + getDistance(i, j) + " <> " + other.getDistance(i, j)); return false; } } } return true; } } /** * Factory method that reads and creates an {@link InputVectorDistanceMatrix} from the given file. Depending on the * filename, returns either a {@link RandomAccessFileInputVectorDistanceMatrix} (if the filename ends with '.bin') * or a {@link LeightWeightMemoryInputVectorDistanceMatrix} (all other cases).<br> * TODO: maybe more intelligent checking for file type, possibly trying to read it as binary, and checking the first * bytes for a file type or so. */ public static InputVectorDistanceMatrix initFromFile(String fileName) throws IOException, SOMToolboxException { if (fileName.endsWith(".bin") || !FileUtils.fileStartsWith(fileName, "$")) { Logger.getLogger("at.tuwien.ifs.somtoolbox").info("Opening binary random access distance matrix file"); return new RandomAccessFileInputVectorDistanceMatrix(fileName); } else { Logger.getLogger("at.tuwien.ifs.somtoolbox").info("Reading ASCII distance matrix into memory."); return new LeightWeightMemoryInputVectorDistanceMatrix(fileName); } } protected int flatArraySize() { return numVectors * (numVectors - 1) / 2; } public int rows() { return numVectors; } public int columns() { return numVectors; } }