package GeDBIT.parallel.app;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.rmi.Naming;
import java.rmi.NotBoundException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.logging.Level;
import GeDBIT.dist.Metric;
import GeDBIT.index.Index;
import GeDBIT.index.RangeQuery;
import GeDBIT.index.TableManager;
import GeDBIT.index.VPIndex;
import GeDBIT.index.VPRangeCursor;
import GeDBIT.parallel.WorkThreadUtil;
import GeDBIT.parallel.rmi.GlobalIndex;
import GeDBIT.type.DNATable;
import GeDBIT.type.DoubleVector;
import GeDBIT.type.DoubleVectorTable;
import GeDBIT.type.ImageTable;
import GeDBIT.type.IndexObject;
import GeDBIT.type.PeptideTable;
import GeDBIT.type.Sequence;
import GeDBIT.type.SpectraWithPrecursorMassTable;
import GeDBIT.type.Table;
public class QueryGlobalVPIndex {
/**
* This is a utility class to query a VPIndex in parallel, rmi client. It, taking command line
* parameters, runs a set of query on the given index and compute the average
* performance.
*
* main function to evaluate the query performance of an {@link Index} The
* evaluation is done by run a set of query on the Index, and compute total time used.
*
* -d [name of index, should be a prefix of the actual file name containing serialized database]
* -q [query file name]
* -f [offset of first query to be used in the query file, start from 0, inclusive, default 0]
* -l [offset of last query to be used in the query file, exclusive, default 1]
* -i [minimum search radius, default 0.0]
* -a [maximum search radius, default 10.0]
* -s [step size for search radius, default 1]
* -t [data type, "vector", "protein", "dna", "image", "mass"]
* -p [length of the path distance list]
* -v [1 if search results are to be verified against a linear scan and 0 otherwise, default 0]
* -g [debug level, default 0]
* -frag [fragment length, only meaningful for {@link Sequence}s]
* -dim [dimension of vector data to load, only meaningful for {@link DoubleVector}s]
* -res [output results to the given filename]
* -st [subtree number for parallel query]
* -sv [subtree servers ip configuration file name]
* -pr [subtree servers print result objects or not, default 0]
* -qw [wait each query finish or not, default 0]
* The {@link Metric} is hardcoded for each give data type.
*
* @author Rui Mao, Miaojie Feng
* @version 2012.12.29
*/
public static String servers[] = null;
public static HashMap<Integer, GlobalIndex> globalIndexs = null;
Table query;
String forPrint;
final Metric metric;
final Index index;
final int indexSize;
int firstQuery;
int lastQuery;
final double minRadius;
final int frag;
final double maxRadius;
final int pathLength;
int querySize;
final int numRun;
final double step;
final Level debug;
final boolean verify;
public static void main(String[] args) {
String indexName = "";
String queryFileName = "";
String forPrint = "";
int firstQuery = 0;
int lastQuery = 1;
double minRadius = 0.0;
double maxRadius = 10.0;
double step = 1.0;
int frag = 6;
int dim = 2;
boolean qw = false;
int pathLength = 0;
int subtree = 1;
String serversFileName = "";
String dataType = "sequence";
// don't support yet
String resultsFileName = null;
Level debug = Level.OFF;
boolean verify = false;
// parse arguments, and set values
for (int i = 0; i < args.length; i = i + 2) {
if (args[i].equalsIgnoreCase("-d"))
indexName = args[i + 1];
else if (args[i].equalsIgnoreCase("-q"))
queryFileName = args[i + 1];
else if (args[i].equalsIgnoreCase("-st"))
subtree = Integer.parseInt(args[i + 1]);
else if (args[i].equalsIgnoreCase("-qw"))
qw = (Integer.parseInt(args[i + 1]) == 1) ? true : false;
else if (args[i].equalsIgnoreCase("-sv"))
serversFileName = args[i + 1];
else if (args[i].equalsIgnoreCase("-t"))
dataType = args[i + 1];
else if (args[i].equalsIgnoreCase("-forprint"))
forPrint += args[i + 1] + ", ";
else if (args[i].equalsIgnoreCase("-f"))
firstQuery = Integer.parseInt(args[i + 1]);
else if (args[i].equalsIgnoreCase("-l"))
lastQuery = Integer.parseInt(args[i + 1]);
else if (args[i].equalsIgnoreCase("-p"))
pathLength = Integer.parseInt(args[i + 1]);
else if (args[i].equalsIgnoreCase("-i"))
minRadius = Double.parseDouble(args[i + 1]);
else if (args[i].equalsIgnoreCase("-a"))
maxRadius = Double.parseDouble(args[i + 1]);
else if (args[i].equalsIgnoreCase("-s"))
step = Double.parseDouble(args[i + 1]);
else if (args[i].equalsIgnoreCase("-g"))
debug = Level.parse(args[i + 1]);
else if (args[i].equalsIgnoreCase("-frag"))
frag = Integer.parseInt(args[i + 1]);
else if (args[i].equalsIgnoreCase("-dim"))
dim = Integer.parseInt(args[i + 1]);
else if (args[i].equalsIgnoreCase("-v"))
verify = (Integer.parseInt(args[i + 1]) == 1) ? true : false;
else if (args[i].equalsIgnoreCase("-res"))
resultsFileName = args[i + 1];
else if (args[i].equalsIgnoreCase("-pr"))
continue;
else
throw new IllegalArgumentException("Invalid option " + args[i]);
}
// check arguments
if (indexName == "")
throw new IllegalArgumentException("Invalid Index file name!");
if (queryFileName == "")
throw new IllegalArgumentException("Invalid Query file name!");
if ((firstQuery < 0) || (lastQuery < 0) || (lastQuery < firstQuery))
throw new IllegalArgumentException(
"Invalid first query index or last query index!");
if ((minRadius < 0) || (maxRadius < 0) || (maxRadius < minRadius)
|| (step <= 0))
throw new IllegalArgumentException(
"Invalid min radius, max radius, or radius increasement unit!");
// load index from file
Table dataTable = TableManager.getTableManager(indexName).getTable(
indexName);
Index index;
if (dataTable != null)
index = dataTable.getIndex();
else
throw new Error("index: " + indexName + " does not exist");
if (index instanceof VPIndex) {
if (((VPIndex) index).getSubtree() != subtree) {
throw new Error("index: " + indexName + " with wrong subtree");
}
} else
throw new Error("index: " + indexName + " does not support");
Table queryTable = null;
try {
if (dataType.equalsIgnoreCase("protein"))
queryTable = new PeptideTable(queryFileName, "", lastQuery,
frag);
else if (dataType.equalsIgnoreCase("vector"))
queryTable = new DoubleVectorTable(queryFileName, "",
lastQuery, dim);
else if (dataType.equalsIgnoreCase("dna"))
queryTable = new DNATable(queryFileName, "", lastQuery, frag);
else if (dataType.equalsIgnoreCase("image"))
queryTable = new ImageTable(queryFileName, "", lastQuery);
else if (dataType.equalsIgnoreCase("msms"))
queryTable = new SpectraWithPrecursorMassTable(queryFileName,
"", lastQuery);
else
System.err.println("data type not supported! " + dataType);
} catch (IOException e) {
e.printStackTrace();
}
// initialize remote servers and thread pool
initializeServers(serversFileName, subtree, qw, args);
QueryGlobalVPIndex evaluator = new QueryGlobalVPIndex(index,
queryTable, minRadius, maxRadius, step, verify, debug,
pathLength, frag, resultsFileName, firstQuery, lastQuery,
forPrint);
evaluator.evaluate();
}
public QueryGlobalVPIndex(Index index, Table query, double minRadius,
double maxRadius, double step, boolean verify, Level debug,
int pathLength, int frag, String resultsFileName, int firstQuery,
int lastQuery, String forPrint) {
// check argument
if (index == null)
throw new IllegalArgumentException(" The Index is null!");
if (query == null)
throw new IllegalArgumentException(" The query list is null!");
if ((minRadius < 0) || (maxRadius < 0) || (maxRadius < minRadius)
|| (step <= 0))
throw new IllegalArgumentException(
"Invalid min radius, max radius, or radius increasement unit!");
this.metric = index.getMetric();
this.index = index;
this.indexSize = index.size();
this.query = query;
this.querySize = query.size();
this.minRadius = minRadius;
this.maxRadius = maxRadius;
this.step = step;
this.numRun = (int) Math.round((maxRadius - minRadius) / step) + 1;
this.frag = frag;
this.firstQuery = firstQuery;
this.lastQuery = lastQuery;
this.pathLength = pathLength;
this.forPrint = forPrint;
// don't support yet
this.verify = verify;
this.debug = debug;
}
private static void initializeServers(String file, int subtree, boolean qw,
String[] args) {
try {
servers = new String[subtree];
globalIndexs = new HashMap<Integer, GlobalIndex>();
@SuppressWarnings("resource")
BufferedReader br = new BufferedReader(new InputStreamReader(
new FileInputStream(file)));
String data = null;
int counter = 0;
while ((data = br.readLine()) != null && counter < subtree) {
servers[counter++] = data;
}
GlobalIndex index = null;
for (int i = 0; i < subtree; i++) {
index = (GlobalIndex) Naming.lookup("rmi://" + servers[i]
+ "/global");
index.initialize(args, (i + 1));
globalIndexs.put(i, index);
}
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} catch (NotBoundException e) {
e.printStackTrace();
} catch (Exception e) {
e.printStackTrace();
}
WorkThreadUtil.newInstance(subtree);
WorkThreadUtil.setWaitEachQueryFinished(qw);
}
public void evaluate() {
// final double delta = 1e-6;
// DecimalFormat fmt = new DecimalFormat("0.#####"); // for 3 decimal
// places
final double startTotal = System.currentTimeMillis();
// run queries for each query and radius
for (int i = 0; i < this.numRun; i++) {
System.out.println("Evaluating Radius "
+ (this.minRadius + this.step * i));
evaluateRadius(this.minRadius + this.step * i);
}
WorkThreadUtil.setFinishedStatus();
WorkThreadUtil.waitAllQueryFinished();
final double endTotal = System.currentTimeMillis();
// print out total time used
System.out.println("Index size, Search Time");
System.out.println(forPrint + indexSize + ", "
+ ((endTotal - startTotal) / 1000) / this.querySize);
}
public void evaluateRadius(double radius) {
// start running each query
List<? extends IndexObject> allQuery = query.getData();
List<? extends IndexObject> query = allQuery.subList(
(firstQuery < 0) ? 0 : firstQuery,
(lastQuery > allQuery.size()) ? allQuery.size() : lastQuery);
Iterator<? extends IndexObject> p = query.iterator();
int queryCounter = -1;
while (p.hasNext()) {
queryCounter++;
RangeQuery q = new RangeQuery(queryCounter, (IndexObject) p.next(),
radius, pathLength);
VPRangeCursor cursor = (VPRangeCursor) index.search(q);
cursor.searchResults();
} // end of while for all queries
}
}