/* * Copyright 2004-2010 Information & Software Engineering Group (188/1) * Institute of Software Technology and Interactive Systems * Vienna University of Technology, Austria * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.ifs.tuwien.ac.at/dm/somtoolbox/license.html * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package at.tuwien.ifs.somtoolbox.data; import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; import java.io.RandomAccessFile; import java.util.Random; import java.util.logging.Logger; import cern.colt.matrix.impl.DenseDoubleMatrix1D; import cern.colt.matrix.impl.SparseDoubleMatrix1D; import at.tuwien.ifs.somtoolbox.layers.metrics.DistanceMetric; import at.tuwien.ifs.somtoolbox.layers.metrics.MetricException; import at.tuwien.ifs.somtoolbox.util.StdErrProgressWriter; /** * Reads SOMLib input from a random access file. * * @see RandomAccessFile * @author Rudolf Mayer * @version $Id: RandomAccessFileSOMLibInputData.java 3883 2010-11-02 17:13:23Z frank $ */ public class RandomAccessFileSOMLibInputData extends AbstractSOMLibSparseInputData { private static final int BYTES_CHAR = Character.SIZE / 8; private static final int BYTES_INT = Integer.SIZE / 8; private static final int BYTES_DOUBLE = Double.SIZE / 8; private RandomAccessFile inputFile; int headerOffset = 2 * BYTES_INT; // values for num vectors & dim public RandomAccessFileSOMLibInputData(String fileName) throws IOException { this(SOMLibSparseInputData.DEFAULT_NORMALISED, new Random(SOMLibSparseInputData.DEFAULT_RANDOM_SEED), null, null, fileName); } public RandomAccessFileSOMLibInputData(boolean norm, Random rand, TemplateVector tv, SOMLibClassInformation clsInfo, String fileName) throws IOException { super(norm, rand); meanVector = new DenseDoubleMatrix1D(dim); this.templateVector = tv; this.classInfo = clsInfo; inputFile = new RandomAccessFile(fileName, "r"); numVectors = inputFile.readInt(); dim = inputFile.readInt(); System.out.println("num vecs: " + numVectors); System.out.println("dim: " + dim); double[] mean = new double[dim]; for (int i = 0; i < numVectors(); i++) { for (int j = 0; j < dim(); j++) { mean[j] += inputFile.readDouble(); } } StdErrProgressWriter progress = new StdErrProgressWriter(mean.length, "Calculating mean vector ", 10); for (int i = 0; i < mean.length; i++) { mean[i] = mean[i] / numVectors(); progress.progress(); } this.meanVector = new DenseDoubleMatrix1D(mean); dataNames = new String[numVectors]; final int labelNameOffset = getOffset(numVectors) + BYTES_CHAR; inputFile.seek(labelNameOffset); String labelName = ""; int pos = 0; for (int i = labelNameOffset; i < inputFile.length(); i += BYTES_CHAR) { final char readChar = inputFile.readChar(); if (readChar != '\n') { labelName += readChar; } else { dataNames[pos] = labelName; labelName = ""; pos++; } } inputFile.seek(labelNameOffset); } @Override public InputDatum getInputDatum(int d) { try { return new InputDatum(dataNames[d], readVectorFromFile(d)); } catch (IOException e) { e.printStackTrace(); return null; } } private SparseDoubleMatrix1D readVectorFromFile(int d) throws IOException { int offset = getOffset(d); inputFile.seek(offset); SparseDoubleMatrix1D vec = new SparseDoubleMatrix1D(dim); for (int i = 0; i < dim; i++) { double readDouble = inputFile.readDouble(); vec.setQuick(i, readDouble); } return vec; } @Override public double[] getInputVector(int d) { try { return readVectorFromFile(d).toArray(); } catch (IOException e) { e.printStackTrace(); return null; } } private int getOffset(int i) { return headerOffset + i * dim * BYTES_DOUBLE; } private int getOffset(int i, int j) { return headerOffset + i * dim * BYTES_DOUBLE + j * BYTES_DOUBLE; } @Override public double mqe0(DistanceMetric metric) { if (mqe0 == -1) { // mqe0 for data was not yet calculated mqe0 = 0; try { for (int i = 0; i < numVectors; i++) { mqe0 += metric.distance(meanVector, getInputDatum(i)); } } catch (MetricException e) { Logger.getLogger("at.tuwien.ifs.somtoolbox").severe(e.getMessage()); throw new IllegalArgumentException(e.getMessage()); } } return mqe0; } @Override public InputData subset(String[] names) { // TODO Auto-generated method stub return null; } @Override public double getValue(int x, int y) { try { inputFile.seek(getOffset(x, y)); return inputFile.readDouble(); } catch (IOException e) { Logger.getLogger("at.tuwien.ifs.somtoolbox").severe(""); System.exit(-1); return -1; } } public static void write(InputData data, String outputFile) throws IOException { RandomAccessFile randomAccessFile = new RandomAccessFile(outputFile, "rw"); writeHeader(randomAccessFile, data.numVectors(), data.dim()); writeContent(randomAccessFile, data.getData()); writeVectorLabels(randomAccessFile, data.getLabels()); randomAccessFile.close(); } public static boolean writeHeader(RandomAccessFile randomAccessFile, int numVectors, int dim) throws IOException { randomAccessFile.writeInt(numVectors); randomAccessFile.writeInt(dim); return true; } private static void writeContent(RandomAccessFile randomAccessFile, double[][] data) throws IOException { for (double[] element : data) { for (double element2 : element) { randomAccessFile.writeDouble(element2); } } } public static void writeVectorLabels(RandomAccessFile randomAccessFile, String[] labels) throws IOException { for (String label : labels) { randomAccessFile.writeChars("\n" + label); } randomAccessFile.writeChars("\n"); System.out.println("wrote file, length: " + randomAccessFile.length()); } /** * Main method for testing purposes, either writes & reads a random access file, or compares a random access file * with an ascii version of the same input data. */ public static void main(String[] args) throws IOException { if (args.length > 1) { compare(args[0], args[1]); } else { test("test.bin"); } } /** Compare the ascii & binary version of some SOMLib input data. */ private static void compare(String ascii, String binary) throws IOException { SOMLibSparseInputData memoryData = new SOMLibSparseInputData(ascii); RandomAccessFileSOMLibInputData fileData = new RandomAccessFileSOMLibInputData(true, new Random(), null, null, binary); System.out.println(fileData.equals(memoryData)); } /** Create & read a random access file SOMLib input data. */ private static void test(String fileName) throws FileNotFoundException, IOException { double[][] bytes = { { 1, 2.4, 5.2 }, { 1.5, 8.2, 9.0 } }; File file = new File(fileName); if (file.exists()) { file.delete(); } RandomAccessFile f = new RandomAccessFile(fileName, "rw"); f.writeInt(bytes.length); System.out.println("wrote " + bytes.length); f.writeInt(bytes[0].length); System.out.println("wrote " + bytes[0].length); String[] names = new String[bytes.length]; for (int i = 0; i < names.length; i++) { names[i] = "label" + i; } for (double[] element : bytes) { System.out.print("wrote: "); for (double element2 : element) { f.writeDouble(element2); System.out.print(element2 + ", "); } System.out.println(); } for (String element : names) { f.writeChars("\n" + element); } f.writeChars("\n"); System.out.println("wrote file, length: " + f.length()); System.out.println(); f.close(); RandomAccessFileSOMLibInputData data = new RandomAccessFileSOMLibInputData(true, new Random(), null, null, fileName); System.out.println("num vec: " + data.numVectors()); System.out.println("dim: " + data.dim()); for (int i = 0; i < data.numVectors(); i++) { final InputDatum inputDatum = data.getInputDatum(i); System.out.println(inputDatum + " -> " + inputDatum.equals(new InputDatum("label" + i, new DenseDoubleMatrix1D(bytes[i])))); } } public static String getFormatName() { return "randomAccess"; } public static String getFileNameSuffix() { return ".bin"; } }