/*
* Copyright 2004-2010 Information & Software Engineering Group (188/1)
* Institute of Software Technology and Interactive Systems
* Vienna University of Technology, Austria
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.ifs.tuwien.ac.at/dm/somtoolbox/license.html
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package at.tuwien.ifs.somtoolbox.data.distance;
import java.io.IOException;
import java.io.RandomAccessFile;
import at.tuwien.ifs.somtoolbox.SOMToolboxException;
import at.tuwien.ifs.somtoolbox.layers.metrics.AbstractMetric;
import at.tuwien.ifs.somtoolbox.layers.metrics.DistanceMetric;
/**
* A distance matrix based on a binary {@link RandomAccessFile}. This implementation does not read the matrix into the
* memory, and is thus suited especially for big datasets.
* <p>
* The file is built as follows:
* <ul>
* <li>One integer value, giving the number of vectors</li>
* <li>A series of double values representing the upper-right half of the symmetric distance matrix, not containing the
* values in the diagonal itself (as they are all 0).<br>
* Thus, there are (n-1)! double values, and the matrix file contains the following (x, y) tuples:
*
* <pre>
* [(2,1) (3,1) (4,1) (5,1) (6,1)]
* [ (3,2) (4,2) (5,1) (6,2)]
* [ (4,3) (5,3) (6,3)]
* [ (5,4) (6,4)]
* [ (6,5)]
* [ ]
* </pre>
*
* </li>
* <li>The name of the metric used, as String (until the end of the file).</li>
* </ul>
* </p>
*
* @author Rudolf Mayer
* @version $Id: RandomAccessFileInputVectorDistanceMatrix.java 3706 2010-07-20 11:07:54Z mayer $
*/
public class RandomAccessFileInputVectorDistanceMatrix extends InputVectorDistanceMatrix {
public static final int BYTES_HEADER = Integer.SIZE / 8;
private static final int BYTES_CHAR = Character.SIZE / 8;
private static final int BYTES_DOUBLE = Double.SIZE / 8;
private RandomAccessFile file;
public RandomAccessFileInputVectorDistanceMatrix(String fileName) throws IOException, SOMToolboxException {
file = new RandomAccessFile(fileName, "rw");
numVectors = file.readInt();
}
@Override
public DistanceMetric getMetric() {
if (metric == null) {
try {
long offset = (getOffset(numVectors - 1, numVectors - 1, numVectors) + 1) * Double.SIZE / 8
+ BYTES_HEADER;
file.seek(offset);
String metricName = "";
for (long i = offset; i < file.length(); i += BYTES_CHAR) {
final char readChar = file.readChar();
metricName += readChar;
}
metricName = metricName.trim();
metric = AbstractMetric.instantiateNice(metricName);
} catch (IOException e) {
e.printStackTrace();
} catch (SOMToolboxException e) {
e.printStackTrace();
}
}
return metric;
}
@Override
public double getDistance(int x, int y) {
if (x == y) {
return 0;
} else {
try {
file.seek(getOffset(x, y, numVectors) * BYTES_DOUBLE + BYTES_HEADER);
return file.readDouble();
} catch (IOException e) {
e.printStackTrace();
return -1;
}
}
}
/** Find the offset of a specific value in the linear order */
protected static long getOffset(long x, long y, long numVectors) {
if (y > x) {
return getOffset(y, x, numVectors);
}
// we need to use long, cause otherwise for larger files, we get an overflow and thus negative numbers!
long factor = (long) ((y + 1) / 2d * y);
long pos = y * numVectors - factor - 1 + x - y;
return pos;
}
}