/** * GeDBIT.type.DoubleVectorTable 2006.07.24 * * Change Log: * 2006.07.24: Added, by Willard */ package GeDBIT.type; import java.io.BufferedReader; import java.io.FileReader; import java.io.IOException; import java.util.ArrayList; import java.util.logging.Logger; import GeDBIT.dist.LMetric; import GeDBIT.dist.Metric; import GeDBIT.util.Debug; /** * @author willard */ public class DoubleVectorTable extends Table { /** * */ private static final long serialVersionUID = 7630078213101669086L; /** * */ private static final Metric DEFAULT_METRIC = LMetric.EuclideanDistanceMetric; /** * @param fileName * the filename of the source file * @param size * number of data points to read * @param dimNum * number of dimensions to read from data. * @throws IOException */ public DoubleVectorTable(String fileName, String indexPrefix, int size, int dimNum) throws IOException { this(fileName, indexPrefix, size, dimNum, DEFAULT_METRIC); } /** * The first line of the file should have two integers, separated by white * space. The first is the dimension of the DoubleVector, the second is the * total number of data points. Each following line is a DoubleVector, with * each dimension separated by white space. * * @param fileName * the filename of the source file * @param size * number of data points to read * @param dimNum * number of dimensions to read from data. * @throws IOException */ public DoubleVectorTable(String fileName, String indexPrefix, int size, int dimNum, Metric metric) throws IOException { super(fileName, indexPrefix, size, metric); BufferedReader reader = new BufferedReader(new FileReader(fileName)); loadData(reader, size, dimNum); } /** * @param reader * @param maxSize * @param dimNum * @throws IOException */ void loadData(BufferedReader reader, int maxSize, int dimNum) throws IOException { String line; ArrayList<DoubleVector> doubleVectors = new ArrayList<DoubleVector>(); ArrayList<Integer> originalRowIDsArrayList = new ArrayList<Integer>(); // read vector values from file line = reader.readLine(); // read the first line if (line != null) line = line.trim(); String[] metaData = line.split("[ \t]+"); if (metaData.length != 2) { System.out.println("Error: Cannot parse the data file."); System.exit(-1); } final int dim = java.lang.Integer.parseInt(metaData[0]); // dimension if (dim < dimNum) dimNum = dim; // size = java.lang.Integer.parseInt(metaData[1]); // total number of // data if (Debug.debug) { Logger.getLogger("GeDBIT.index").info("dim: " + dim); Logger.getLogger("GeDBIT.index").info("size: " + maxSize); } int numData = 0; double[] data = new double[dimNum]; line = reader.readLine(); if (line != null) line = line.trim(); while (line != null && numData < maxSize) { String[] row = line.split("[ \t]+"); if (Debug.debug) { for (int i = 0; i < row.length; i++) Logger.getLogger("GeDBIT.index").finer( "row[" + i + "]: " + row[i]); } for (int i = 0; i < dimNum; i++) data[i] = java.lang.Double.parseDouble(row[i]); // System.out.println(new DoubleVector(new Integer(numData), data)); originalRowIDsArrayList.add(numData, numData); doubleVectors.add(new DoubleVector(this, numData, data)); line = reader.readLine(); if (line != null) line = line.trim(); numData++; } doubleVectors.trimToSize(); this.data = doubleVectors; originalRowIDs = new int[originalRowIDsArrayList.size()]; for (int i = 0, e = originalRowIDsArrayList.size(); i < e; i++) { originalRowIDs[i] = originalRowIDsArrayList.get(i); } } }