/** * GeDBIT.app.QueryEvaluator 2006.06.27 * * Copyright Information: * * Change Log: * 2006.06.27: Copied from jdb 1.0, by Rui Mao */ package GeDBIT.mapreduce.app; import java.io.BufferedWriter; import java.io.FileOutputStream; import java.io.FileWriter; import java.io.IOException; import java.io.PrintWriter; import java.text.DecimalFormat; import java.util.ArrayList; import java.util.Iterator; import java.util.List; import java.util.logging.Level; import GeDBIT.dist.Metric; import GeDBIT.index.Index; import GeDBIT.index.RangeQuery; import GeDBIT.index.VPRangeCursor; import GeDBIT.mapreduce.app.IndexMapReduce.radiusData; import GeDBIT.type.DoubleIndexObjectPair; import GeDBIT.type.DoubleVector; import GeDBIT.type.IndexObject; import GeDBIT.type.Sequence; import GeDBIT.type.Table; import GeDBIT.type.TableMR; /** * This is a utility class to query a VPIndex. It, taking command line * parameters, runs a set of query on the given index and compute the average * performance. * * main function to evaluate the query performance of an {@link Index} The * eveluation is done by run a set of query on the Index, and compute the * average performance statistics, such as number of distance calculation, * number of index node visited (#I/O), and search time. The command line * interface to bulkload one {@link Index}, or a series of {@link Index}es for * scalability study. * * -d [name of index, should be a prefix of the actual file name containing serialized database] * -q [query file name] * -f [offset of first query to be used in the query file, start from 0, inclusive, default 0] * -l [offset of * last query to be used in the query file, exclusive, default 1] * -i [minimum search radius, default 0] * -a [maximum search radius, default 10] * -s [step size for search radii, default 1] * -t [data type, "vector", "protein", "dna", "image", "mass"] * -p [length of the path distance list] * -v [1 if search results are to be verified against a linear scan and 0 otherwise, default 0] * -g [debug level, default 0] * -frag [fragment length, only meaningful for {@link Sequence}s] * -dim [dimension of vector data to load, only meaningful for {@link DoubleVector}s] * -res [output results to the given filename] * * The {@link Metric} is hardcoded for each give data type. * * @author Rui Mao, Willard * @version 2006.06.27 */ public class QueryVPIndexMR { String forPrint; // data fields for arguments final Metric metric; final Index index; final int indexSize; // number of data objects in the // index TableMR query; int firstQuery; int lastQuery; int querySize; // number of queries final double minRadius; final double maxRadius; final int pathLength; final double step; final int numRun; // number of different radii to run final boolean verify; final List<IndexObject> linearIndex; final Level debug; final int frag; boolean outputToFile; // indicates whether to output // statistics to a file or // to the terminal String outputFile; // output filename // data fields to store statistics, each array's first dimension is the // dimension for different // search radius, start from the min one. // global statistics double[] distNum; // number of distance calculation, // calculations with // centers, int[] minDistNum, minDistNumID, maxDistNum, maxDistNumID; double[] centerDistNum; // with // bottom // data // objects. int[] minCenterDistNum, minCenterDistNumID, maxCenterDistNum, maxCenterDistNumID; double[] dataDistNum; int[] minDataDistNum, minDataDistNumID, maxDataDistNum, maxDataDistNumID; double[] nodeVisited; // number // of // index // nodes // visited int[] minNodeVisited, minNodeVisitedID, maxNodeVisited, maxNodeVisitedID; double[] internalVisited; double[] resultInternalNode; // number of internal nodes containing at least // one query result; double[] resultLeafNode; // number of leaf nodes containing at least one // query result; double[] resultNode; // number of nodes containing at least one query // result; // double[] pivotDistNum; double[] pivotAsResult; double[] resultWithoutDistance; double[] internalPruned; double[] leafPruned; double[] internalWithoutDist; double[] leafWithoutDist; int[] minInternalVisited, minInternalVisitedID, maxInternalVisited, maxInternalVisitedID; double[] leafVisited; int[] minLeafVisited, minLeafVisitedID, maxLeafVisited, maxLeafVisitedID; double[] time, minTime, maxTime; // search // time int[] minTimeID, maxTimeID; double[] result; // number // of // search // result int[] minResult, minResultID, maxResult, maxResultID; // layer statistics, computed in temp variables, and finally added to the // lists when values are // fixed. // distance between the query and the centers(center of VP) of each level, // -1 means no such value double[][][] queryCenterDistance; // each element is a 1-d array, is the average value of one layer, for each // center/vp (column) // computed during processing the layer, and value added to this list at the // end of the layer // from the perspective of the last layer, these are the total children node // number int[][] layerNode; // number of childre visited, and the ratio. thus, these values are computed int[][] layerNodeVisited; // during the processing of last layer, and are added to the list at the // begin // of the current // layer. // number of data objects of leaf nodes of each level, int[][] layerData; // number of data object directly compute distance on, ratio int[][] layerDataVisited; // computed during processing the layer, and value added to this list at the // end of the layer // name of file to save results to. private String resultsFileName; /** * @param args */ // Honglong Xu /* * public static void main(String[] args) { // arguments and default values * String indexName = ""; String queryFileName = ""; String forPrint = ""; * * int firstQuery = 0; int lastQuery = 1; * * double minRadius = 0.0; double maxRadius = 10.0; double step = 1.0; * * boolean verify = false; Level debug = Level.OFF; * * int frag = 6; int dim = 2; * * int pathLength = 0; * * String dataType = "sequence"; * * String resultsFileName = null; * * // parse arguments, and set values for (int i = 0; i < args.length; i = i * + 2) { if (args[i].equalsIgnoreCase("-d")) indexName = args[i + 1]; * * else if (args[i].equalsIgnoreCase("-q")) queryFileName = args[i + 1]; * * else if (args[i].equalsIgnoreCase("-t")) dataType = args[i + 1]; * * else if (args[i].equalsIgnoreCase("-forprint")) forPrint += args[i + 1] + * ", "; * * else if (args[i].equalsIgnoreCase("-f")) firstQuery = * Integer.parseInt(args[i + 1]); * * else if (args[i].equalsIgnoreCase("-l")) lastQuery = * Integer.parseInt(args[i + 1]); * * else if (args[i].equalsIgnoreCase("-p")) pathLength = * Integer.parseInt(args[i + 1]); * * else if (args[i].equalsIgnoreCase("-i")) minRadius = * Double.parseDouble(args[i + 1]); * * else if (args[i].equalsIgnoreCase("-a")) maxRadius = * Double.parseDouble(args[i + 1]); * * else if (args[i].equalsIgnoreCase("-s")) step = Double.parseDouble(args[i * + 1]); * * else if (args[i].equalsIgnoreCase("-g")) debug = Level.parse(args[i + * 1]); * * else if (args[i].equalsIgnoreCase("-frag")) frag = * Integer.parseInt(args[i + 1]); * * else if (args[i].equalsIgnoreCase("-dim")) dim = Integer.parseInt(args[i * + 1]); * * else if (args[i].equalsIgnoreCase("-v")) verify = * (Integer.parseInt(args[i + 1]) == 1) ? true : false; else if * (args[i].equalsIgnoreCase("-res")) resultsFileName = args[i + 1]; else * throw new IllegalArgumentException("Invalid option " + args[i]); } * * // check arguments if (indexName == "") throw new * IllegalArgumentException("Invalid Index file name!"); * * if (queryFileName == "") throw new * IllegalArgumentException("Invalid Query file name!"); * * if ((firstQuery < 0) || (lastQuery < 0) || (lastQuery < firstQuery)) * throw new * IllegalArgumentException("Invalid first query index or last query index!" * ); * * if ((minRadius < 0) || (maxRadius < 0) || (maxRadius < minRadius) || * (step <= 0)) throw new IllegalArgumentException( * "Invalid min radius, max radius, or radius increasement unit!"); * * // load index from file TableMR dataTable = * TableManager.getTableManager(indexName).getTable(indexName); Index index; * if (dataTable != null) index = dataTable.getIndex(); else throw new * Error("index: " + indexName + " does not exist"); * * // load queryData from file TableMR queryTable = null; double[][] data = * null; try { if (dataType.equalsIgnoreCase("protein")) queryTable = new * PeptideTable(queryFileName, "", lastQuery, frag); else if * (dataType.equalsIgnoreCase("vector")) queryTable = new * DoubleVectorTableMR(queryFileName, "", lastQuery, dim, data); else if * (dataType.equalsIgnoreCase("dna")) queryTable = new * DNATable(queryFileName, "", lastQuery, frag); else if * (dataType.equalsIgnoreCase("image")) queryTable = new * ImageTable(queryFileName, "", lastQuery); else if * (dataType.equalsIgnoreCase("msms")) queryTable = new * SpectraWithPrecursorMassTable(queryFileName, "", lastQuery); else * System.err.println("data type not supported! " + dataType); } catch * (IOException e1) { // TODO Auto-generated catch block * e1.printStackTrace(); } * * // evaluate QueryVPIndexMR evaluator = new QueryVPIndexMR(index, * queryTable, minRadius, maxRadius, step, verify , debug, pathLength, frag, * resultsFileName, firstQuery, lastQuery, forPrint); * * //evaluator.evaluate(); * * // TODO close index? } */ /** * @param index * the {@link Index} to be evaluated * @param query * the {@link Table} to query the index with. * @param minRadius * the minimum range query radius to run * @param maxRadius * the maximum range query radius to run * @param step * the increament unit of the range query radius * @param verify * if true, search results of each query will be verified by a * linear scan * @param debug * decide how much execution log will be output, 0 means nothing. * @param pathLength * @param frag * @param resultsFileName * the name of the file to save the results to. */ // public QueryVPIndex(Index index, Table query, double minRadius, double // maxRadius, double step, boolean verify, Level debug, int pathLength, // int frag, String resultsFileName) // { // this(index, query, minRadius, maxRadius) // } public QueryVPIndexMR(Index index, TableMR query, double minRadius, double maxRadius, double step, boolean verify, Level debug, int pathLength, int frag, String resultsFileName, int firstQuery, int lastQuery, String forPrint) { // check argument if (index == null) throw new IllegalArgumentException(" The Index is null!"); if (query == null) throw new IllegalArgumentException(" The query list is null!"); if ((minRadius < 0) || (maxRadius < 0) || (maxRadius < minRadius) || (step <= 0)) throw new IllegalArgumentException( "Invalid min radius, max radius, or radius increasement unit!"); this.metric = index.getMetric(); this.index = index; this.indexSize = index.size(); this.query = query; this.querySize = query.size(); this.minRadius = minRadius; this.maxRadius = maxRadius; this.step = step; this.verify = verify; this.numRun = (int) Math.round((maxRadius - minRadius) / step) + 1; this.frag = frag; this.resultsFileName = resultsFileName; this.outputToFile = false; this.outputFile = ""; this.firstQuery = firstQuery; this.lastQuery = lastQuery; this.forPrint = forPrint; if (verify) this.linearIndex = index.getAllPoints(); else this.linearIndex = null; this.debug = debug; this.pathLength = pathLength; // allocate space for statistics // number of distance calculation distNum = new double[numRun]; minDistNum = new int[numRun]; minDistNumID = new int[numRun]; maxDistNum = new int[numRun]; maxDistNumID = new int[numRun]; centerDistNum = new double[numRun]; minCenterDistNum = new int[numRun]; minCenterDistNumID = new int[numRun]; maxCenterDistNum = new int[numRun]; maxCenterDistNumID = new int[numRun]; dataDistNum = new double[numRun]; minDataDistNum = new int[numRun]; minDataDistNumID = new int[numRun]; maxDataDistNum = new int[numRun]; maxDataDistNumID = new int[numRun]; // number of index node visited nodeVisited = new double[numRun]; minNodeVisited = new int[numRun]; minNodeVisitedID = new int[numRun]; maxNodeVisited = new int[numRun]; maxNodeVisitedID = new int[numRun]; // number of nodes containing at least on query result resultNode = new double[numRun]; resultInternalNode = new double[numRun]; resultLeafNode = new double[numRun]; // pivotDistNum = new double[numRun]; pivotAsResult = new double[numRun]; resultWithoutDistance = new double[numRun]; internalWithoutDist = new double[numRun]; leafWithoutDist = new double[numRun]; internalPruned = new double[numRun]; leafPruned = new double[numRun]; internalVisited = new double[numRun]; minInternalVisited = new int[numRun]; minInternalVisitedID = new int[numRun]; maxInternalVisited = new int[numRun]; maxInternalVisitedID = new int[numRun]; leafVisited = new double[numRun]; minLeafVisited = new int[numRun]; minLeafVisitedID = new int[numRun]; maxLeafVisited = new int[numRun]; maxLeafVisitedID = new int[numRun]; // search time time = new double[numRun]; minTime = new double[numRun]; minTimeID = new int[numRun]; maxTime = new double[numRun]; maxTimeID = new int[numRun]; // result number result = new double[numRun]; minResult = new int[numRun]; minResultID = new int[numRun]; maxResult = new int[numRun]; maxResultID = new int[numRun]; queryCenterDistance = new double[numRun][][]; layerNode = new int[numRun][]; layerNodeVisited = new int[numRun][]; layerData = new int[numRun][]; layerDataVisited = new int[numRun][]; } /** * the primary method to do the evaluation. It first run the queries for * each query, during which the statistics are set, then output. */ public void evaluate(List<IndexObject> resultList, List<radiusData> queryRadiusData) { // final double delta = 1e-6; // run queries for each query, and so that set the statistics double radius = 0; for (int i = 0; i < this.numRun; i++) // for (double radius = minRadius; radius <= maxRadius; radius += step) { radius = this.minRadius + this.step * i; evaluateRadius(radius, resultList); queryRadiusData.add(new radiusData(radius, resultList.size())); } // output /* * System.out.println("index size, radius, #distance calculation(center, * data object), #node visited(internal, leaf), search time, #result"); * for (int i=0; i< numRun; i++) System.out.println(indexSize + ", " + * (minRadius + i*step) + ", " + distNum[i] + ", (," + centerDistNum[i] * + ", " + dataDistNum[i] + ",), " + nodeVisited[i] + ", (," + * internalVisited[i] + ", " + leafVisited[i] + ",), " + time[i] + ", " * + result[i] + ", "); System.out.println(); System.out.println(); */ DecimalFormat fmt = new DecimalFormat("0.#####"); // for 3 decimal // places // output detail statitstics if (outputToFile && !outputFile.equals("")) { // open file for output PrintWriter outputStream = null; try { outputStream = new PrintWriter(new FileOutputStream(outputFile)); outputStream .println("index size, radius, #distance calculation(center, data object), #node visited(internal, leaf), search time, #result, #layerNodeVisited, #layerDataVisited, queryCenterDistance"); for (int i = 0; i < numRun; i++) { outputStream.println(); outputStream.println(indexSize + ", " + fmt.format((minRadius + i * step)) + ", #dist:[, " + distNum[i] + " ,(, " + (minDistNum[i]) + " ,:, " + (minDistNumID[i]) + " ,) (, " + (maxDistNum[i]) + " ,:, " + (maxDistNumID[i]) + " ,)] " + "{ [, " + (centerDistNum[i]) + " ,(, " + (minCenterDistNum[i]) + " ,:, " + (minCenterDistNumID[i]) + " ,) (, " + (maxCenterDistNum[i]) + " ,:, " + (maxCenterDistNumID[i]) + " ,)] " + "[, " + (dataDistNum[i]) + " ,(, " + (minDataDistNum[i]) + " ,:, " + (minDataDistNumID[i]) + " ,) (, " + (maxDataDistNum[i]) + " ,:, " + (maxDataDistNumID[i]) + " ,)]}"); outputStream.println(indexSize + ", " + (minRadius + i * step) + ", #node: [, " + nodeVisited[i] + " ,(, " + minNodeVisited[i] + " ,:, " + minNodeVisitedID[i] + " ,) (, " + maxNodeVisited[i] + " ,:, " + maxNodeVisitedID[i] + " ,)] " + "{ [, " + internalVisited[i] + " ,(, " + minInternalVisited[i] + " ,:, " + minInternalVisitedID[i] + " ,) (, " + maxInternalVisited[i] + " ,:, " + maxInternalVisitedID[i] + " ,)] " + "[, " + leafVisited[i] + " ,(, " + minLeafVisited[i] + " ,:, " + minLeafVisitedID[i] + " ,) (, " + maxLeafVisited[i] + " ,:, " + maxLeafVisitedID[i] + " ,) ]}"); outputStream.println(indexSize + ", " + (minRadius + i * step) + ", time: [, " + time[i] + " ,(, " + minTime[i] + " ,:, " + minTimeID[i] + " ,) (, " + maxTime[i] + " ,:, " + maxTimeID[i] + " ,)]"); outputStream.println(indexSize + ", " + (minRadius + i * step) + ", #result: [, " + result[i] + " ,(, " + minResult[i] + " ,:, " + minResultID[i] + " ,)(, " + maxResult[i] + " ,:, " + maxResultID[i] + " ,)]"); outputStream.print(indexSize + ", " + (minRadius + i * step) + ", #layer node: [ "); for (int j = 0; j < layerNode[i].length; j++) outputStream.print("( " + j + ":, " + layerNodeVisited[i][j] + " / " + layerNode[i][j] + "), "); outputStream.println("]"); outputStream.print(indexSize + ", " + (minRadius + i * step) + ", #layer data object: [ "); for (int j = 0; j < layerData[i].length; j++) outputStream.print("(" + j + ":, " + layerDataVisited[i][j] + " / " + layerData[i][j] + "), "); outputStream.println("]"); outputStream.print(indexSize + ", " + (minRadius + i * step) + ", query-center dist: [ "); for (int j = 0; j < queryCenterDistance[i].length; j++) { outputStream.print(j + ": (,"); for (int k = 0; k < queryCenterDistance[i][j].length; k++) outputStream.print(fmt .format(queryCenterDistance[i][j][k]) + ", "); outputStream.print(") "); } outputStream.println(" ]"); } outputStream.close(); } catch (IOException e) { e.printStackTrace(); System.out.println("\nError: Problem creating output file \"" + outputFile + "\". Program aborted."); System.exit(0); } System.out.println("Finished writing statistics file \"" + outputFile + "\" to disk."); } else { System.out .println("index size, radius, #distance calculation, #node visited, search time, #result,,#result node, #result internal node, #result leaf node, internalPruned, leafPruned, pivotDistNum, dataDistNum, pivotAsResult, ResultWithoutDist, internalWithoutDist, leafWithoutDist"); for (int i = 0; i < numRun; i++) { System.out.println(forPrint + indexSize + ", " + fmt.format((minRadius + i * step)) + ", " + distNum[i] + ", " + nodeVisited[i] + ", " + time[i] + ", " + result[i] + ", " + resultNode[i] + ", " + resultInternalNode[i] + ", " + resultLeafNode[i] + ", " + internalPruned[i] + ", " + leafPruned[i] + ", " + centerDistNum[i] + ", " + dataDistNum[i] + ", " + pivotAsResult[i] + ", " + resultWithoutDistance[i] + ", " + internalWithoutDist[i] + ", " + leafWithoutDist[i]); } } // end of else } /** * evaluate the index with one fixed range query radius and all the queries. * each query is run with the given radius, search statistics are collect, * and compute average and min, max values. the average, min and max values * are store in the corresponding data fields * * @param radius * the search radius */ @SuppressWarnings("rawtypes") void evaluateRadius(double radius, List<IndexObject> resultList) { // System.out.println(radius); // if (radius ==2) // System.out.println(radius); final int offset = (int) Math.round((radius - minRadius) / step); // the // ordered // id // of // this run among all // the runs. // initialize statistics distNum[offset] = 0; // distance calculation number minDistNum[offset] = Integer.MAX_VALUE; maxDistNum[offset] = Integer.MIN_VALUE; centerDistNum[offset] = 0; minCenterDistNum[offset] = Integer.MAX_VALUE; maxCenterDistNum[offset] = Integer.MIN_VALUE; dataDistNum[offset] = 0; minDataDistNum[offset] = Integer.MAX_VALUE; maxDataDistNum[offset] = Integer.MIN_VALUE; resultNode[offset] = 0; resultInternalNode[offset] = 0; resultLeafNode[offset] = 0; // pivotDistNum[offset] = 0; // dataDistNum[offset] = 0; internalPruned[offset] = 0; leafPruned[offset] = 0; pivotAsResult[offset] = 0; resultWithoutDistance[offset] = 0; internalWithoutDist[offset] = 0; leafWithoutDist[offset] = 0; nodeVisited[offset] = 0; // index node visited number minNodeVisited[offset] = Integer.MAX_VALUE; maxNodeVisited[offset] = Integer.MIN_VALUE; internalVisited[offset] = 0; minInternalVisited[offset] = Integer.MAX_VALUE; maxInternalVisited[offset] = Integer.MIN_VALUE; leafVisited[offset] = 0; minLeafVisited[offset] = Integer.MAX_VALUE; maxLeafVisited[offset] = Integer.MIN_VALUE; time[offset] = 0; // search time minTime[offset] = Double.POSITIVE_INFINITY; maxTime[offset] = Double.NEGATIVE_INFINITY; result[offset] = 0; // search result number minResult[offset] = Integer.MAX_VALUE; maxResult[offset] = Integer.MIN_VALUE; queryCenterDistance[offset] = new double[1][1]; layerNode[offset] = new int[1]; layerNodeVisited[offset] = new int[1]; layerData[offset] = new int[1]; layerDataVisited[offset] = new int[1]; PrintWriter resultsFile = null; if (resultsFileName != null) { try { resultsFile = new PrintWriter(new BufferedWriter( new FileWriter(resultsFileName))); } catch (IOException e) { e.printStackTrace(); } } // start running each query List allQuery = query.getData(); List query2 = allQuery.subList((firstQuery < 0) ? 0 : firstQuery, (lastQuery > allQuery.size()) ? allQuery.size() : lastQuery); Iterator p = query2.iterator(); int queryCounter = -1; while (p.hasNext()) { queryCounter++; RangeQuery q = new RangeQuery((IndexObject) p.next(), radius, pathLength); if (resultsFile != null) { resultsFile.println(queryCounter + ": " + q.getQueryObject()); } // List<IndexObject> resultList = new ArrayList<IndexObject>(); final double startTime = System.currentTimeMillis(); VPRangeCursor cursor = (VPRangeCursor) index.search(q); @SuppressWarnings("unused") int numResults = 0; while (cursor.hasNext()) { IndexObject iObject = ((DoubleIndexObjectPair) cursor.next()) .getObject(); if (resultsFile != null) { resultsFile.println(iObject); } // print each result resultList.add(iObject); numResults++; } final double endTime = System.currentTimeMillis(); // set statistics final double t = (endTime - startTime) / 1000; // search time time[offset] += t; if (minTime[offset] > t) { minTime[offset] = t; minTimeID[offset] = queryCounter; } if (maxTime[offset] < t) { maxTime[offset] = t; maxTimeID[offset] = queryCounter; } final int r = resultList.size(); // result number result[offset] += r; if (minResult[offset] > r) { minResult[offset] = r; minResultID[offset] = queryCounter; } if (maxResult[offset] < r) { maxResult[offset] = r; maxResultID[offset] = queryCounter; } int[] temp1 = cursor.getDistanceCalculationNumber(); // distance // calculation // number distNum[offset] += temp1[0]; if (minDistNum[offset] > temp1[0]) { minDistNum[offset] = temp1[0]; minDistNumID[offset] = queryCounter; } if (maxDistNum[offset] < temp1[0]) { maxDistNum[offset] = temp1[0]; maxDistNumID[offset] = queryCounter; } centerDistNum[offset] += temp1[1]; if (minCenterDistNum[offset] > temp1[1]) { minCenterDistNum[offset] = temp1[1]; minCenterDistNumID[offset] = queryCounter; } if (maxCenterDistNum[offset] < temp1[1]) { maxCenterDistNum[offset] = temp1[1]; maxCenterDistNumID[offset] = queryCounter; } int temp12 = temp1[2]; dataDistNum[offset] += temp12; if (minDataDistNum[offset] > temp12) { minDataDistNum[offset] = temp12; minDataDistNumID[offset] = queryCounter; } if (maxDataDistNum[offset] < temp12) { maxDataDistNum[offset] = temp12; maxDataDistNumID[offset] = queryCounter; } temp1 = cursor.getNodeVisitedNumber(); // node visited number nodeVisited[offset] += temp1[0]; if (minNodeVisited[offset] > temp1[0]) { minNodeVisited[offset] = temp1[0]; minNodeVisitedID[offset] = queryCounter; } if (maxNodeVisited[offset] < temp1[0]) { maxNodeVisited[offset] = temp1[0]; maxNodeVisitedID[offset] = queryCounter; } internalVisited[offset] += temp1[1]; if (minInternalVisited[offset] > temp1[1]) { minInternalVisited[offset] = temp1[1]; minInternalVisitedID[offset] = queryCounter; } if (maxInternalVisited[offset] < temp1[1]) { maxInternalVisited[offset] = temp1[1]; maxInternalVisitedID[offset] = queryCounter; } temp12 = temp1[0] - temp1[1]; leafVisited[offset] += temp12; if (minLeafVisited[offset] > temp12) { minLeafVisited[offset] = temp12; minLeafVisitedID[offset] = queryCounter; } if (maxLeafVisited[offset] < temp12) { maxLeafVisited[offset] = temp12; maxLeafVisitedID[offset] = queryCounter; } resultNode[offset] += cursor.getResultNodeNumber(); resultInternalNode[offset] += cursor.getResultInternalNodeNumber(); resultLeafNode[offset] += cursor.getResultLeafNodeNumber(); pivotAsResult[offset] += cursor.getPivotAsResult(); resultWithoutDistance[offset] += cursor.getResultWithoutDist(); internalWithoutDist[offset] += cursor.getInternalWithoutDist(); leafWithoutDist[offset] += cursor.getLeafWithoutDist(); internalPruned[offset] += cursor.getInternalPruned(); leafPruned[offset] += cursor.getLeafPruned(); /* * double [][] temp2 = cursor.getQueryPivotDistance(); * //query-center distance if ( temp2.length > * queryCenterDistance[offset].length) //re-allocate memory if * necessary { double [][]temp = queryCenterDistance[offset]; * queryCenterDistance[offset] = new double [temp2.length][]; for * (int i=0; i< temp.length; i++) { if ( temp2[i].length > * temp[i].length) { queryCenterDistance[offset][i] = new double [ * temp2[i].length ]; System.arraycopy(temp[i], 0, * queryCenterDistance[offset][i], 0, temp[i].length); for (int j= * temp[i].length; j< temp2[i].length; j++) * queryCenterDistance[offset][i][j] = 0; } else * queryCenterDistance[offset][i] = temp[i]; } for (int i= * temp.length; i< temp2.length; i++) { * queryCenterDistance[offset][i] = new double [ temp2[i].length ]; * for (int j=0; j< temp2[i].length; j++) * queryCenterDistance[offset][i][j] = 0; } } for (int i=0; i< * temp2.length; i++) { if ( temp2[i].length > * queryCenterDistance[offset][i].length) //reallocate memory if * necessary { double [] temp = queryCenterDistance[offset][i]; * queryCenterDistance[offset][i] = new double [ temp2[i].length ]; * System.arraycopy(temp, 0, queryCenterDistance[offset][i], 0, * temp.length); for (int j= temp.length; j< temp2[i].length; j++) * queryCenterDistance[offset][i][j] = 0; } for (int j=0; j< * temp2[i].length; j++) { //System.out.println(queryCounter + ": " * + queryCenterDistance.length + ": " + offset + ", " + * queryCenterDistance[offset].length + ": " + temp2.length + ": " + * i + ", " + queryCenterDistance[offset][i].length + ": " + * temp2[i].length + ": " + j); queryCenterDistance[offset][i][j] += * temp2[i][j]; } } int[][] temp3 = cursor.getLevelNodeVisited(); // * layer node visited if ( layerNode[offset].length < temp3.length) * //re-allocate if necessary { int [] temp = layerNode[offset]; * layerNode[offset] = new int[ temp3.length ]; * System.arraycopy(temp, 0, layerNode[offset], 0, temp.length); for * (int i= temp.length; i< temp3.length; i++) layerNode[offset][i] = * 0; temp = layerNodeVisited[offset]; layerNodeVisited[offset] = * new int [temp3.length]; System.arraycopy(temp, 0, * layerNodeVisited[offset], 0, temp.length); for (int * i=temp.length; i<temp3.length; i++) layerNodeVisited[offset][i] = * 0; } for (int i=0;i<temp3.length; i++) { layerNode[offset][i] += * temp3[i][0]; layerNodeVisited[offset][i] += temp3[i][1]; } temp3 * = cursor.getLevelPointVisited(); // layer data visited if ( * layerData[offset].length < temp3.length) //re-allocate if * necessary { int [] temp = layerData[offset]; layerData[offset] = * new int[ temp3.length ]; System.arraycopy(temp, 0, * layerData[offset], 0, temp.length); for (int i= temp.length; i< * temp3.length; i++) layerData[offset][i] = 0; temp = * layerDataVisited[offset]; layerDataVisited[offset] = new int * [temp3.length]; System.arraycopy(temp, 0, * layerDataVisited[offset], 0, temp.length); for (int * i=temp.length; i<temp3.length; i++) layerDataVisited[offset][i] = * 0; } for (int i=0;i<temp3.length; i++) { layerData[offset][i] += * temp3[i][0]; layerDataVisited[offset][i] += temp3[i][1]; } */ // verify the search results if (verify) if (!verifyResult(resultList, q)) { System.out.println("Inconsistent search results! query: " + queryCounter + ", radius: " + radius + " !"); System.exit(-1); } } // end of while for all queries querySize = queryCounter + 1; // System.out.println("query number=" + querySize); // compute average values for all statistics distNum[offset] /= querySize; centerDistNum[offset] /= querySize; dataDistNum[offset] /= querySize; nodeVisited[offset] /= querySize; internalVisited[offset] /= querySize; leafVisited[offset] /= querySize; resultNode[offset] /= querySize; resultInternalNode[offset] /= querySize; resultLeafNode[offset] /= querySize; internalPruned[offset] /= querySize; leafPruned[offset] /= querySize; pivotAsResult[offset] /= querySize; resultWithoutDistance[offset] /= querySize; internalWithoutDist[offset] /= querySize; leafWithoutDist[offset] /= querySize; time[offset] /= querySize; result[offset] /= querySize; for (int j = 0; j < queryCenterDistance[offset].length; j++) for (int k = 0; k < queryCenterDistance[offset][j].length; k++) queryCenterDistance[offset][j][k] /= querySize; for (int j = 0; j < layerData[offset].length; j++) { layerData[offset][j] /= querySize; layerDataVisited[offset][j] /= querySize; } for (int j = 0; j < layerNode[offset].length; j++) { layerNode[offset][j] /= querySize; layerNodeVisited[offset][j] /= querySize; } if (resultsFileName != null) { resultsFile.flush(); resultsFile.close(); System.out.println("results saved to: " + resultsFileName); } } /** * verify the search results of a query by a linear scan * * @param resultList * the results of query * @param predicate * the query */ boolean verifyResult(List<IndexObject> resultList, RangeQuery q) { if (resultList == null) resultList = new ArrayList<IndexObject>(0); /* * System.out.println("result list number:" + resultList.size()); * for(IndexObject o: resultList) System.out.println(o); */ // System.out.println("linear index size=" + linearIndex.size()); Iterator<IndexObject> p = linearIndex.iterator(); IndexObject data; while (p.hasNext()) { data = p.next(); // System.out.println(data); // System.out.println(resultList.get(0).getClass()); // System.out.println(data.getClass()); if (metric.getDistance(data, q.getQueryObject()) <= q.getRadius()) if (!resultList.remove(data)) { System.out .println("Found: linearscan result not in index resultset: " + data.toString() + ", query=" + q.getQueryObject()); return false; } } if (resultList.size() != 0) { System.out .println("Found: index result not returned by linear scan. Query =" + q.getQueryObject()); for (IndexObject o : resultList) System.out.println(o); return false; } else return true; } /* * static public Pair loadSequenceQuery(String fileName, final int first, * final int last) { Metric metric = * Metrics.globalSequenceMetric(Metrics.mPAM250aExtendedAminoAcidsMetric); * DataLoader loader = new edu.utexas.GeDBIT.util.SegmentLoader( * edu.utexas.GeDBIT.type.Sequences.ExtendedAminoAcidsAlphabet ); //load * data from file List query = null; try { query = loader.loadData( new * java.io.BufferedReader( new java.io.FileReader(fileName)), last+1 ); } * catch (Exception e) { e.printStackTrace(); } query = query.subList(first, * last+1); //SegmentPairLoader return a list of Pairs List temp = query; * query = new ArrayList( temp.size() ); Iterator p = temp.iterator(); while * ( p.hasNext() ) query.add ( ( (Pair) p.next() ).first() ); return new * Pair( metric, query); } static public Pair loadVectorQuery(String * fileName, final int first, final int last) { Metric metric = * edu.utexas.GeDBIT.dist.Metrics.EuclideanDistanceMetric; DataLoader loader * = new edu.utexas.GeDBIT.util.DoubleVectorLoader() ; //load data from file * List query = null; try { query = loader.loadData( new * java.io.BufferedReader( new java.io.FileReader(fileName)), last+1 ); } * catch (Exception e) { e.printStackTrace(); } query = query.subList(first, * last+1); //SegmentPairLoader return a list of Pairs List temp = query; * query = new ArrayList( temp.size() ); Iterator p = temp.iterator(); while * ( p.hasNext() ) query.add ( ( (Pair) p.next() ).first() ); return new * Pair( metric, query); } static public Pair loadHammingQuery(String * fileName, final int first, final int last) { Metric metric = new * edu.utexas.GeDBIT.dist.SimpleHammingDistance(); DataLoader loader = new * edu.utexas.GeDBIT.util.LineStringDataLoader(); //load data from file List * query = null; try { query = loader.loadData( new java.io.BufferedReader( * new java.io.FileReader(fileName)), last+2 ); } catch (Exception e) { * e.printStackTrace(); } query = query.subList(first+1, last+2); * //SegmentPairLoader return a list of Pairs List temp = query; query = new * ArrayList( temp.size() ); Iterator p = temp.iterator(); while ( * p.hasNext() ) query.add ( new java.math.BigInteger( (String) p.next(),2 ) * ); return new Pair( metric, query); } */ }