/*
Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved.
Contact:
SYSTAP, LLC DBA Blazegraph
2501 Calvert ST NW #106
Washington, DC 20008
licenses@blazegraph.com
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/*
* Created on May 29, 2010
*/
package com.bigdata.rdf.sail.bench;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.LineNumberReader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLEncoder;
import java.util.Arrays;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Random;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicLong;
import java.util.regex.Pattern;
import org.apache.log4j.Logger;
import org.openrdf.model.Graph;
import org.openrdf.model.impl.GraphImpl;
import org.openrdf.query.BindingSet;
import org.openrdf.query.TupleQueryResultHandlerBase;
import org.openrdf.query.resultio.TupleQueryResultParser;
import org.openrdf.query.resultio.sparqlxml.SPARQLResultsXMLParserFactory;
import org.openrdf.rio.RDFFormat;
import org.openrdf.rio.RDFParser;
import org.openrdf.rio.RDFParserRegistry;
import org.openrdf.rio.helpers.StatementCollector;
import com.bigdata.counters.CAT;
import com.bigdata.journal.TemporaryStore;
import com.bigdata.jsr166.LinkedBlockingQueue;
import com.bigdata.rdf.axioms.NoAxioms;
import com.bigdata.rdf.sail.sparql.Bigdata2ASTSPARQLParser;
import com.bigdata.rdf.sparql.ast.ASTContainer;
import com.bigdata.rdf.sparql.ast.QueryType;
import com.bigdata.rdf.store.AbstractTripleStore;
import com.bigdata.rdf.store.TempTripleStore;
import com.bigdata.rdf.store.AbstractTripleStore.Options;
/**
* A flyweight utility for issuing queries to an http SPARQL endpoint.
*
* @author thompsonbry@users.sourceforge.net
*/
public class NanoSparqlClient {
private static final Logger log = Logger.getLogger(NanoSparqlClient.class);
/**
* A SPARQL results set in XML.
*/
static final String MIME_SPARQL_RESULTS_XML = "application/sparql-results+xml";
/**
* RDF/XML.
*/
static final String MIME_RDF_XML = "application/rdf+xml";
/**
* The default connection timeout (ms). A value of ZERO (0) means NO
* timeout.
*/
static private final int DEFAULT_TIMEOUT = 0;
// /**
// * Helper class to figure out the type of a query.
// */
// private static enum QueryType {
//
// ASK(0),
// DESCRIBE(1),
// CONSTRUCT(2),
// SELECT(3);
//
// private final int order;
//
// private QueryType(final int order) {
// this.order = order;
// }
//
// private static QueryType getQueryType(final int order) {
// switch (order) {
// case 0:
// return ASK;
// case 1:
// return DESCRIBE;
// case 2:
// return CONSTRUCT;
// case 3:
// return SELECT;
// default:
// throw new IllegalArgumentException("order=" + order);
// }
// }
//
// /**
// * Used to note the offset at which a keyword was found.
// */
// static private class P implements Comparable<P> {
//
// final int offset;
// final QueryType queryType;
//
// public P(final int offset, final QueryType queryType) {
// this.offset = offset;
// this.queryType = queryType;
// }
// /** Sort into descending offset. */
// public int compareTo(final P o) {
// return o.offset - offset;
// }
// }
//
// /**
// * Hack returns the query type based on the first occurrence of the
// * keyword for any known query type in the query.
// *
// * @param queryStr
// * The query.
// *
// * @return The query type.
// */
// static public QueryType fromQuery(final String queryStr) {
//
// // force all to lower case.
// final String s = queryStr.toUpperCase();
//
// final int ntypes = QueryType.values().length;
//
// final P[] p = new P[ntypes];
//
// int nmatch = 0;
// for (int i = 0; i < ntypes; i++) {
//
// final QueryType queryType = getQueryType(i);
//
// final int offset = s.indexOf(queryType.toString());
//
// if (offset == -1)
// continue;
//
// p[nmatch++] = new P(offset, queryType);
//
// }
//
// if (nmatch == 0) {
//
// throw new RuntimeException(
// "Could not determine the query type: " + queryStr);
//
// }
//
// Arrays.sort(p, 0/* fromIndex */, nmatch/* toIndex */);
//
// final P tmp = p[0];
//
//// System.out.println("QueryType: offset=" + tmp.offset + ", type="
//// + tmp.queryType);
//
// return tmp.queryType;
//
// }
//
// }
/**
* Class runs a SPARQL query against an HTTP endpoint.
*/
static public class QueryTask implements Callable<Void> {
// private final HttpClient client;
final QueryOptions opts;
/**
*
* @param opts The query options.
*/
public QueryTask(/* HttpClient client, */ final QueryOptions opts) {
if (opts == null)
throw new IllegalArgumentException();
// this.client = client;
this.opts = opts;
}
@Override
public Void call() throws Exception {
// used to measure the total execution time.
final long begin = System.nanoTime();
/*
* Parse the query so we can figure out how it will need to be
* executed.
*
* Note: This will fail a query on its syntax. However, the logic
* used in the tasks to execute a query will not fail a bad query
* for some reason which I have not figured out yet.
*
* TODO The query parser requires a KB reference for Value to IV
* resolution. This is just passing in an empty KB backed by a
* temporary store since we not really interested in resolving
* anything here, just figuring out the type of the query. It would
* be nice if we could not bother to pass in a KB instance at all,
* but I have not yet looked at modifying the parser to accept a
* null KB reference for this use case.
*/
// final QueryParser engine = new SPARQLParserFactory().getParser();
//
// final ParsedQuery q = engine.parseQuery(opts.queryStr, opts.baseURI);
final ASTContainer astContainer = new Bigdata2ASTSPARQLParser()
.parseQuery2(opts.queryStr, opts.baseURI);
final QueryType queryType = opts.queryType = astContainer
.getOriginalAST().getQueryType();
if (opts.showQuery) {
System.err.println("---- " + Thread.currentThread().getName()
+ " : Query "
+ (opts.source == null ? "" : " : " + opts.source)
+ "----");
System.err.println(opts.queryStr);
}
if (opts.showParseTree) {
System.err.println("----- Parse Tree "
+ (opts.source == null ? "" : " : " + opts.source)
+ "-----");
System.err.println(astContainer.getOriginalAST().toString());
}
// Fully formed and encoded URL @todo use */* for ASK.
final String urlString = opts.serviceURL
+ "?query="
+ URLEncoder.encode(opts.queryStr, "UTF-8")//
+ (opts.explain?"&explain=":"")//
+ (opts.analytic!=null?"&analytic="+opts.analytic:"")//
+ (opts.defaultGraphUri == null ? ""
: ("&default-graph-uri=" + URLEncoder.encode(
opts.defaultGraphUri, "UTF-8")));
// final HttpMethod method = new GetMethod(url);
final URL url = new URL(urlString);
HttpURLConnection conn = null;
try {
/*
* Setup connection properties.
*
* Note:In general GET caches but is more transparent while POST
* does not cache.
*/
conn = (HttpURLConnection) url.openConnection();
conn.setRequestMethod(opts.method);
conn.setDoOutput(true);
conn.setUseCaches(opts.useCaches);
conn.setReadTimeout(opts.timeout);
/*
* Set an appropriate Accept header for the query.
// *
// * Note: We have to parse the query to really get this right.
// *
// * TODO The query parser requires a KB reference for Value to IV
// * resolution. This is just passing in an empty KB backed by a
// * temporary store since we not really interested in resolving
// * anything here, just figuring out the type of the query. It
// * would be nice if we could not bother to pass in a KB instance
// * at all, but I have not yet looked at modifying the parser to
// * accept a null KB reference for this use case.
*/
// final ASTContainer astContainer = new Bigdata2ASTSPARQLParser(
// opts.tmpKb).parseQuery2(opts.queryStr, opts.baseURI);
//
// final QueryType queryType = opts.queryType = astContainer
// .getOriginalAST().getQueryType();
switch(queryType) {
case DESCRIBE:
case CONSTRUCT:
conn.setRequestProperty("Accept", MIME_RDF_XML);
break;
case ASK:
case SELECT:
conn.setRequestProperty("Accept", MIME_SPARQL_RESULTS_XML);
break;
default:
throw new UnsupportedOperationException("QueryType: "
+ queryType);
}
// write out the request headers
if (log.isDebugEnabled()) {
log.debug("*** Request ***");
log.debug(opts.serviceURL);
log.debug(opts.queryStr);
}
// System.out.println("Request Path: " + url);
// System.out.println("Request Query: " + method.getQueryString());
// Header[] requestHeaders = method.getRequestHeaders();
// for (int i = 0; i < requestHeaders.length; i++) {
// System.out.print(requestHeaders[i]);
// }
//
// // execute the method
// client.executeMethod(method);
// connect.
conn.connect();
final int rc = conn.getResponseCode();
if(rc < 200 || rc >= 300) {
throw new IOException(rc + " : "
+ conn.getResponseMessage()+" : "+url);
}
if (log.isDebugEnabled()) {
/*
* write out the response headers
*
* @todo options to show the headers (in/out),
*/
log.debug("*** Response ***");
log.debug("Status Line: " + conn.getResponseMessage());
}
if(opts.explain || opts.showResults) {
// Write the response body onto stdout.
showResults(conn);
// Note: results not counted!
opts.nresults = -1L;
} else {
/*
* Write the #of solutions onto stdout.
*/
final long nresults;
switch (queryType) {
case DESCRIBE:
case CONSTRUCT:
nresults = buildGraph(conn).size();
break;
case ASK: // I think that there are some alternative mime types for ask...
case SELECT:
nresults = countResults(conn);
break;
default:
throw new UnsupportedOperationException("QueryType: "
+ queryType);
}
opts.nresults = nresults;
}
return (Void) null;
} finally {
opts.elapsedNanos = System.nanoTime() - begin;
// clean up the connection resources
// method.releaseConnection();
if (conn != null)
conn.disconnect();
}
} // call()
/**
* Write the response body on stdout.
*
* @param conn
* The connection.
*
* @throws Exception
*/
protected void showResults(final HttpURLConnection conn)
throws Exception {
final LineNumberReader r = new LineNumberReader(
new InputStreamReader(conn.getInputStream(), conn
.getContentEncoding() == null ? "ISO-8859-1" : conn
.getContentEncoding()));
try {
String s;
while ((s = r.readLine()) != null) {
System.out.println(s);
}
} finally {
r.close();
// conn.disconnect();
}
}
/**
* Counts the #of results in a SPARQL result set.
*
* @param conn
* The connection from which to read the results.
*
* @return The #of results.
*
* @throws Exception
* If anything goes wrong.
*/
protected long countResults(final HttpURLConnection conn) throws Exception {
final AtomicLong nsolutions = new AtomicLong();
try {
final TupleQueryResultParser parser = new SPARQLResultsXMLParserFactory()
.getParser();
parser
.setTupleQueryResultHandler(new TupleQueryResultHandlerBase() {
// Indicates the end of a sequence of solutions.
@Override
public void endQueryResult() {
// connection close is handled in finally{}
}
// Handles a solution.
@Override
public void handleSolution(final BindingSet bset) {
if (log.isDebugEnabled())
log.debug(bset.toString());
nsolutions.incrementAndGet();
}
// Indicates the start of a sequence of Solutions.
@Override
public void startQueryResult(List<String> bindingNames) {
}
});
parser.parse(conn.getInputStream());
if (log.isInfoEnabled())
log.info("nsolutions=" + nsolutions);
// done.
return nsolutions.longValue();
} finally {
// // terminate the http connection.
// conn.disconnect();
}
} // countResults
/**
* Builds a graph from an RDF result set (statements, not binding sets).
*
* @param conn
* The connection from which to read the results.
*
* @return The graph
*
* @throws Exception
* If anything goes wrong.
*/
protected Graph buildGraph(final HttpURLConnection conn) throws Exception {
final Graph g = new GraphImpl();
try {
final String baseURI = "";
final RDFParser rdfParser = RDFParserRegistry.getInstance()
.get(RDFFormat.RDFXML)
.getParser();
rdfParser.setVerifyData(true);
rdfParser.setStopAtFirstError(true);
rdfParser.setDatatypeHandling(RDFParser.DatatypeHandling.IGNORE);
rdfParser.setRDFHandler(new StatementCollector(g));
rdfParser.parse(conn.getInputStream(), baseURI);
return g;
} finally {
// // terminate the http connection.
// conn.disconnect();
}
} // buildGraph
} // class Query
/**
* Read the contents of a file.
* <p>
* Note: This makes default platform assumptions about the encoding of the
* file.
*
* @param file
* The file.
*
* @return The file's contents.
*
* @throws IOException
*/
static private String readFromFile(final File file)
throws IOException {
if (file.isDirectory())
throw new IllegalArgumentException();
final LineNumberReader r = new LineNumberReader(
new FileReader(file));
try {
final StringBuilder sb = new StringBuilder();
String s;
while ((s = r.readLine()) != null) {
if (r.getLineNumber() > 1)
sb.append("\n");
sb.append(s);
}
return sb.toString();
} finally {
r.close();
}
}
/**
* Read from stdin.
* <p>
* Note: This makes default platform assumptions about the encoding of the
* data being read.
*
* @return The data read.
*
* @throws IOException
*/
static private String readFromStdin() throws IOException {
final LineNumberReader r = new LineNumberReader(new InputStreamReader(System.in));
try {
final StringBuilder sb = new StringBuilder();
String s;
while ((s = r.readLine()) != null) {
if (r.getLineNumber() > 1)
sb.append("\n");
sb.append(s);
}
return sb.toString();
} finally {
r.close();
}
}
/**
* Populate the list with the plain text files (recursive search of a file
* or directory).
*
* @param fileOrDir
* The file or directory.
* @param fileList
* The list to be populated.
*/
static private void getFiles(final File fileOrDir,
final List<File> fileList) {
if (fileOrDir.isHidden())
return;
if (fileOrDir.isDirectory()) {
final File dir = fileOrDir;
final File[] files = dir.listFiles();
for (int i = 0; i < files.length; i++) {
final File f = files[i];
// recursion.
getFiles(f, fileList);
}
} else {
fileList.add(fileOrDir);
}
}
/**
* Read queries from each file in the given list.
*
* @param fileList
* The list of files.
* @param delim
* When non-<code>null</code>, the delimiter between query
* strings within each file. For example, this can match a
* newline if there is one query per line in the file.
*
* @return An map from the sources to the queries. When there is more than
* one query per file (delim is non <code>null</code>), the queries
* within each file will be numbered sequentially (origin ONE (1)).
*
* @throws IOException
*/
static private final Map<String/* src */, String/* query */> readQueries(
final List<File> fileList, final Pattern delim) throws IOException {
final Map<String/* src */, String/* query */> map = new LinkedHashMap<String, String>();
for (File file : fileList) {
final String s = readFromFile(file);
if (delim == null) {
map.put(file.toString(), s);
} else {
final String[] a = delim.split(s);
int i = 1; // Note: Origin ONE (1).
for (String queryStr : a) {
if(queryStr.trim().length() == 0) {
// Skip blank lines.
continue;
}
// // FIXME This is ignoring search queries!
// if (x.contains("#search"))
map.put(file.toString() + "#" + i, queryStr);
if (log.isDebugEnabled())
log.debug("Read query: file=" + file + ", index=" + i
+ ", query=" + queryStr);
i++;
}
}
}
return map;
}
/**
* Helper produces a random sequence of indices in the range [0:n-1]
* suitable for visiting the elements of an array of n elements in a random
* order. This is useful when you want to randomize the presentation of
* elements from two or more arrays. For example, known keys and values can
* be generated and their presentation order randomized by indexing with the
* returned array.
*/
private static int[] getRandomOrder(final long seed, final int n) {
final Random rnd = new Random(seed);
final class Pair implements Comparable<Pair> {
public double r = rnd.nextDouble();
final public int val;
public Pair(final int val) {
this.val = val;
}
@Override
public int compareTo(final Pair other) {
if (this == other)
return 0;
if (this.r < other.r)
return -1;
else
return 1;
}
}
final Pair[] pairs = new Pair[n];
for (int i = 0; i < n; i++) {
pairs[i] = new Pair(i);
}
java.util.Arrays.sort(pairs);
final int order[] = new int[n];
for (int i = 0; i < n; i++) {
order[i] = pairs[i].val;
}
return order;
}
/**
* Options for the query.
*/
public static class QueryOptions implements Cloneable {
/**
* Temp KB provided to the parser. This KB is empty, but the parser
* needs one to resolve RDF Values to IVs. So, we need one to parse
* even though we are not going to do any resolution.
*/
public AbstractTripleStore tmpKb;
/** The URL of the SPARQL endpoint. */
public String serviceURL = null;
public String username = null;
public String password = null;
/**
* The source for this query (e.g., the file from which it was read)
* (optional).
*/
public String source;
/** The SPARQL query. */
public String queryStr;
/** The baseURI (optional). */
public String baseURI;
/** The default graph URI (optional). */
public String defaultGraphUri = null;
/** When true, request an explanation for the query. */
public boolean explain = false;
/**
* When non-<code>null</code>, either enable or disable the analytic
* query package for the request.
*/
public Boolean analytic = null;
/** The connection timeout (ms). */
public int timeout = DEFAULT_TIMEOUT;
/**
* Either GET or POST. In general, GET is more transparent while POST is
* not cached.
*/
public String method = "GET";
/**
* When <code>false</code>, the http connection will be directed to
* ignore caches.
*/
public boolean useCaches = true;
/** When <code>true</code>, show the original query string. */
public boolean showQuery = false;
/** When <code>true</code>, show the parsed operator tree (on the client side). */
public boolean showParseTree = false;
/** When <code>true</code>, show the results of the query (on stdout). */
public boolean showResults = false;
public boolean verbose = false;
public boolean quiet = false;
/*
* Outputs.
*/
public QueryType queryType = null;
public long nresults = 0;
public long elapsedNanos = 0;
/**
* The query is not specified to the constructor must be set explicitly
* by the caller.
*/
public QueryOptions() {
this(null/* serviceURL */, null/* queryStr */);
}
/**
* @param serviceURL
* The SPARQL end point URL.
* @param queryStr
* The SPARQL query.
*/
public QueryOptions(final String serviceURL, final String queryStr) {
this.serviceURL = serviceURL;
this.queryStr = queryStr;
}
@Override
public QueryOptions clone() {
try {
return (QueryOptions) super.clone();
} catch (CloneNotSupportedException e) {
throw new RuntimeException(e);
}
}
}
/**
* Metadata about a single presentation of a SPARQL query.
*
* @author thompsonbry
*/
private static class QueryTrial {
private final long elapsedNanos;
private final long resultCount;
private final Throwable cause;
public QueryTrial(final long elapsedTime,final long resultCount) {
this.elapsedNanos = elapsedTime;
this.resultCount = resultCount;
this.cause = null;
}
public QueryTrial(final Throwable cause) {
this.elapsedNanos = -1;
this.resultCount = -1;
this.cause = cause;
}
}
/**
* A SPARQL query together with its {@link Score}s and utility methods to
* submit the query, aggregate across its {@link Score}s, and report on the
* aggregated query performance.
*
* TODO Now that we keep the elapsed time and result count for each query
* trial, we can report on min/max/stdev and changes in the #of results
* across trials (which indicates a query which is not stable in its result
* set size).
*/
private static class Query {
/** The source query identifier. */
public final String source;
/** The query. */
public final String queryStr;
/** Metadata about each query presentation. */
public final LinkedBlockingQueue<QueryTrial> trials = new LinkedBlockingQueue<QueryTrial>(/* unbounded */);
/**
* Total elapsed nanoseconds over all {@link QueryTrial}s for this
* {@link Query}.
*/
public final CAT elapsedNanos = new CAT();
public Query(final String source, final String queryStr) {
this.source = source;
this.queryStr = queryStr;
}
public QueryTrial runQuery(QueryOptions opts) throws Exception {
opts = opts.clone();
opts.queryStr = this.queryStr;
opts.source = this.source;
try {
// Run the query
new QueryTask(/* client, */opts).call();
final QueryTrial trial = new QueryTrial(opts.elapsedNanos,
opts.nresults);
trials.add(trial);
elapsedNanos.add(opts.elapsedNanos);
return trial;
} catch (Throwable t) {
trials.add(new QueryTrial(t));
throw new Exception(t);
}
}
}
/**
* Class models a aggregated score for a specific query.
*/
private static class Score implements Comparable<Score> {
/** The query. */
public final Query query;
/** Total elapsed time (nanos). */
public final long elapsedNanos;
public Score(final Query query) {
this.query = query;
// average elapsed nanos for this query across all trials.
this.elapsedNanos = query.elapsedNanos.get() / query.trials.size();
}
/**
* Order by increasing elapsed time (slowest queries are last).
*/
@Override
public int compareTo(final Score o) {
if (elapsedNanos < o.elapsedNanos)
return -1;
if (elapsedNanos > o.elapsedNanos)
return 1;
return 0;
}
}
/**
* Return the order in which the queries will be evaluated. The order in
* which those queries will be executed is determined by the
* <code>seed</code>. When non-zero, the queries evaluation order will be
* randomized. Otherwise the queries are evaluated in the given order. The
* elements of the returned <code>order[]</code> are indices into the
* <code>query[]</code>. The length of the returned <code>order[]</code> is
* the #of queries given times the <i>repeat</i> count.
*
* @param seed
* The random seed -or- ZERO (0L) if the queries will be
* evaluated in the given order.
* @param repeat
* The repeat count.
* @param nqueries
* The #of queries.
*
* @return The evaluation order. The indices in the array are in
* <code>[0:nqueries)</code>. Each index appears <i>repeat</i> times
* in the array.
*/
private static int[] getQueryOrder(final long seed, final int repeat,
final int nqueries) {
final int[] order;
// Total #of trials to execute.
final int ntrials = nqueries * repeat;
// Determine the query presentation order.
if (seed == 0) {
// Run queries in the given order.
order = new int[ntrials];
for (int i = 0; i < ntrials; i++) {
order[i] = i;
}
} else {
// Run queries in a randomized order.
order = getRandomOrder(seed, ntrials);
}
// Now normalize the query index into [0:nqueries).
for (int i = 0; i < ntrials; i++) {
order[i] = order[i] % nqueries;
}
return order;
}
// /**
// * Runs the queries in the evaluation order.
// *
// * @param order
// * The evaluation order. This is an array of indices into the
// * <i>queries</i> array. Indices for the same query may appear
// * more than once.
// * @param queries
// * The queries.
// * @param opts
// * The configured query options.
// * @param nerrors
// * The #of errors is reported via this variable as a side effect.
// */
// private static void runQueriesSingleThreaded(final int[] order,
// final Query[] queries, final QueryOptions opts,
// final AtomicLong nerrors) {
//
// for (int i = 0; i < order.length; i++) {
//
// final int queryId = order[i];
//
// final Query query = queries[queryId];
//
// new RunQueryTask(query, opts, nerrors).run();
//
// } // next query in the evaluation order
//
// }
/**
* Run a query.
*/
static class RunQueryTask implements Runnable {
private final Query query;
private final QueryOptions opts;
private final AtomicLong nerrors;
/**
* @param query
* The query.
* @param opts
* The configured query options (will be cloned).
* @param nerrors
* The #of errors is reported via this variable as a side
* effect.
*/
public RunQueryTask(final Query query, final QueryOptions opts,
final AtomicLong nerrors) {
if (query == null)
throw new IllegalArgumentException();
if (opts == null)
throw new IllegalArgumentException();
if (nerrors == null)
throw new IllegalArgumentException();
this.query = query;
this.opts = opts;
this.nerrors = nerrors;
}
@Override
public void run() {
final long begin = System.nanoTime();
try {
final QueryTrial trial = query.runQuery(opts);
if (!opts.quiet) {
// Show the query run time, #of results, source, etc.
System.out.println("resultCount="
+ (trial.resultCount == -1 ? "N/A"
: trial.resultCount) + ", elapsed="
+ TimeUnit.NANOSECONDS.toMillis(trial.elapsedNanos)
+ "ms, source=" + query.source);
}
} catch (Throwable t) {
nerrors.incrementAndGet();
final long elapsedNanos = System.nanoTime() - begin;
// Note: This is the same format as the result line above!
System.out.println("resultCount=ERR, elapsed="
+ TimeUnit.NANOSECONDS.toMillis(elapsedNanos)
+ "ms, source=" + query.source);
// + ", cause="+ t.getLocalizedMessage());
log.error("nerrors=" + nerrors + ", source=" + query.source
+ ", query=" + query.queryStr + ", cause=" + t);// , t);
}
}
} // RunQueryTask
/**
* Return the {@link Score}s for a collection of queries.
*
* @param queries
* The queries (after they have been evaluated).
*
* @return The {@link Score}s.
*/
private static Score[] getScores(final Query[] queries) {
final Score[] a = new Score[queries.length];
for (int i = 0; i < queries.length; i++) {
final Query query = queries[i];
a[i] = new Score(query);
}
return a;
}
/**
* Report the average running time for each query on <code>stdout</code>.
*
* @param a
* The query scores.
* @param minMillisLatencyToReport
* The minimum latency (in milliseconds) to report.
*/
private static void reportScores(final Score[] a,
final long minMillisLatencyToReport) {
// Place into order (ascending average query evaluation time).
Arrays.sort(a);
System.out.println("average(ms)\tsource\tquery");
for (int i = 0; i < a.length; i++) {
final Score s = a[i];
final long elapsedMillis = TimeUnit.NANOSECONDS
.toMillis(s.elapsedNanos);
if (elapsedMillis >= minMillisLatencyToReport)
System.out.println(elapsedMillis + "\t" + s.query.source + "\t"
+ s.query.queryStr);
}
}
private static void usage() {
System.err.println("usage: (option)* [serviceURL] (query)");
}
/**
* Issue a query against a SPARQL endpoint. By default, the client will read
* from stdin. It will write on stdout.
*
* @param args
* <code>(option)* [serviceURL] (query)</code>
* <p>
* where
* <dl>
* <dt>serviceURL</dt>
* <dd>The URL of the SPARQL endpoint.</dd>
* <dt>query</dt>
* <dd>The SPARQL query (required unless <code>-f</code> is used)
* </dd>
* <p>
* where <i>option</i> is any of
* <dl>
* <dt>-u</dt>
* <dd>username</dd>
* <dt>-p</dt>
* <dd>password</dd>
* <dt>-timeout</dt>
* <dd>The http connection timeout in milliseconds (default
* {@value #DEFAULT_TIMEOUT}) -or- ZERO (0) for an infinite
* timeout.</dd>
* <dt>-method (GET|POST)</dt>
* <dd>The HTTP method for the requests (default GET).</dd>
* <dt>-useCaches (true|false)</dt>
* <dd>Set to <code>false</code> to explicitly disable the use of
* HTTP connection caches along the route to the http endpoint
* (default <code>true</code>).</dd>
* <dt>-showQuery</dt>
* <dd>Show the query string.</dd>
* <dt>-showParseTree</dt>
* <dd>Show the SPARQL parse tree (on the client).</dd>
* <dt>-showResults</dt>
* <dd>Show the query results (on stdout).</dd>
* <dt>-verbose</dt>
* <dd>Be verbose.</dd>
* <dt>-quiet</dt>
* <dd>Be quiet.</dd>
* <dt>-f</dt>
* <dd>A file (or directory) containing the query(s) to be run.
* Each file may contain a single SPARQL query.</dd>
* <dt>-delim</dt>
* <dd>An optional regular expression which delimits query
* strings within each file. For example, this can match a
* newline if there is one query per line in the file. When not
* specified, it is assumed that there is one query per file.</dd>
* <dt>-query</dt>
* <dd>The query follows immediately on the command line (be sure
* to quote the query).</dd>
* <dt>-clients</dt>
* <dd>The #of client threads which will issue queries (default
* ONE (1)).</dd>
* <dt>-repeat #</dt>
* <dd>The #of times to present each query. A seed of ZERO (0)
* will disable the randomized presentation of the queries. The
* default seed is based on the System clock.</dd>
* <dt>-seed seed</dt>
* <dd>Randomize the presentation of the queries, optionally
* using the specified seed for the random number generator -or-
* ZERO (0L) if the queries will be evaluated in the given
* order.</dd>
* <dt>-defaultGraph</dt>
* <dd>The URI of the default graph to use for the query.</dd>
* <dt>-analytic=(true|false)</dt>
* <dd>Request the analytic query package for the submitted queries.</dd>
* <dt>-baseURI</dt>
* <dd>The baseURI of the query (used when parsing the query).</dd>
* <dt>-help</dt>
* <dd>Display help.</dd>
* <dt>--?</dt>
* <dd>Display help.</dd>
* </dl>
* @throws Exception
*
* @todo username/password not supported.
*/
public static void main(final String[] args) throws Exception {
if (args.length == 0) {
usage();
System.exit(1);
}
/*
* Parse the command line, overriding various properties.
*/
long seed = System.nanoTime(); // Note: 0L means not randomized.
int repeat = 1; // repeat count.
long minLatencyToReport = 100; // only queries with at least this much latency are reported.
File file = null; // When non-null, file or directory containing query(s).
Pattern delim = null; // When non-null, this delimits queries within a file.
String queryStr = null; // A query given directly on the command line.
boolean reportScores = false; // Report the average time for each query.
int nclients = 1; // The #of clients.
int threadsPerClient = 1; // TODO The #of threads per client IFF groupQueriesBySource is true.
boolean groupQueriesBySource = false; // TODO When true, each source represents a batch of queries.
long interGroupDelayMillis = 0L; // Latency by the client between query batches.
final QueryOptions opts = new QueryOptions();
String mode = null;
{
int i = 0;
for (; i < args.length && args[i].startsWith("-"); i++) {
final String arg = args[i];
if (arg.equals("-u")) {
opts.username = args[++i];
} else if (arg.equals("-p")) {
opts.password = args[++i];
} else if (arg.equals("-f")) {
file = new File(args[++i]);
// opts.queryStr = readFromFile(new File(file));
} else if (arg.equals("-delim")) {
delim = Pattern.compile(args[++i]);
} else if (arg.equals("-showQuery")) {
opts.showQuery = true;
} else if (arg.equals("-explain")) {
opts.explain = true;
} else if (arg.equals("-analytic")) {
opts.analytic = Boolean.valueOf(args[++i]);
} else if (arg.equals("-showParseTree")) {
opts.showParseTree = true;
} else if (arg.equals("-showResults")) {
opts.showResults = true;
} else if (arg.equals("-reportScores")) {
reportScores = true;
} else if (arg.equals("-verbose")) {
opts.verbose = true;
opts.quiet = false;
} else if (arg.equals("-quiet")) {
opts.verbose = false;
opts.quiet = true;
} else if (arg.equals("-query")) {
queryStr = args[++i];
} else if (arg.equals("-clients")) {
if ((nclients = Integer.valueOf(args[++i])) < 1) {
throw new IllegalArgumentException("Bad clients.");
}
} else if (arg.equals("-repeat")) {
if ((repeat = Integer.valueOf(args[++i])) < 1) {
throw new IllegalArgumentException("Bad repeat.");
}
} else if (arg.equals("-seed")) {
seed = Long.valueOf(args[++i]);
} else if (arg.equals("-method")) {
opts.method = args[++i].trim();
if (!"POST".equals(opts.method)
&& !"GET".equals(opts.method)) {
throw new IllegalArgumentException("Bad method: "
+ opts.method);
}
} else if (arg.equals("-useCaches")) {
opts.useCaches = Boolean.valueOf(args[i++]);
} else if (arg.equals("-timeout")) {
if ((opts.timeout = Integer.valueOf(args[++i])) < 0) {
throw new IllegalArgumentException("Bad timeout.");
}
if (opts.verbose)
System.err.println("timeout: "
+ (opts.timeout == 0 ? "infinite" : (""
+ opts.timeout + "ms")));
} else if (arg.equals("-defaultGraph")) {
opts.defaultGraphUri = args[++i];
if (opts.verbose)
System.err.println("defaultGraph: "
+ opts.defaultGraphUri);
} else if (arg.equals("-baseURI")) {
opts.baseURI = args[++i];
if (opts.verbose)
System.err.println("baseURI: " + opts.baseURI);
} else if (arg.equals("-help") || arg.equals("--?")) {
usage();
System.exit(1);
} else if (arg.equals("-dbMode")) {
mode = args[++i];
} else {
throw new UnsupportedOperationException("Unknown option: "
+ arg);
}
} // next arg.
// The next argument is the serviceURL, which is required.
if (i < args.length) {
opts.serviceURL = args[i++];
if (opts.verbose)
System.err.println("serviceURL: " + opts.serviceURL);
} else {
usage();
System.exit(1);
}
} // parse command line.
opts.tmpKb = createTempKb(mode);
// // create a singular HttpClient object
// final HttpClient client = new HttpClient();
//
// {
//
// final HttpConnectionManagerParams params = client
// .getHttpConnectionManager().getParams();
//
// // Set timeout until a connection is established (ms).
// params.setConnectionTimeout(5000/* timeout(ms) */);
//
// }
//
// // Set default credentials.
// if (username != null) {
//
// final Credentials creds = new UsernamePasswordCredentials(username,
// password);
//
// client.getState().setCredentials(AuthScope.ANY, creds);
//
// }
final Query[] queries;
if (file != null) {
/*
* Read the query(s) from the file system.
*/
if (opts.verbose)
System.err.println("Reading query(s) from file: " + file);
// Figure out which files will be read.
final List<File> fileList = new LinkedList<File>();
// Get the list of files to be read.
getFiles(file, fileList);
// Read the query(s) from the file or directory.
final Map<String/* src */, String/* query */> map = readQueries(fileList,
delim);
final int nqueries = map.size();
if (!opts.quiet)
System.err.println("Read " + nqueries + " queries from "
+ fileList.size() + " sources in " + file);
queries = new Query[nqueries];
int i = 0;
for (Map.Entry<String,String> e : map.entrySet()) {
queries[i++] = new Query(e.getKey(), e.getValue());
}
} else {
/*
* Run a single query. Either the query was given as a command line
* argument or we will read it from stdin now.
*/
final String source;
if (queryStr == null) {
if (opts.verbose)
System.err.println("Reading query from stdin...");
queryStr = readFromStdin();
source = "stdin";
} else {
source = "command line";
}
// An array with just the one query.
queries = new Query[] { new Query(queryStr, source) };
}
/*
* Run trials.
*/
// total elapsed milliseconds for all trials.
final long beginTrials = System.currentTimeMillis();
// total #of errors across all query presentations.
final AtomicLong nerrors = new AtomicLong();
if (nclients == 1 && !groupQueriesBySource) {
/*
* Run the queries in a single thread.
*/
System.err.println("Running queries with with a single client");
final int[] order = getQueryOrder(seed, repeat, queries.length);
for (int i = 0; i < order.length; i++) {
final int queryId = order[i];
final Query query = queries[queryId];
new RunQueryTask(query, opts, nerrors).run();
}
} else if(!groupQueriesBySource) {
/*
* Run the queries using N clients.
*/
System.err
.println("Running queries with parallel clients: nclients="
+ nclients);
// The evaluation order is used to assign tasks to clients.
final int[] order = getQueryOrder(seed, repeat, queries.length);
// The tasks to be run.
final List<Callable<Void>> tasks = new LinkedList<Callable<Void>>();
for (int i = 0; i < order.length; i++) {
final RunQueryTask runnable = new RunQueryTask(
queries[order[i]], opts, nerrors);
tasks.add(new Callable<Void>() {
public Void call() throws Exception {
runnable.run();
return (Void) null;
}
});
}
final ExecutorService clientService = Executors
.newFixedThreadPool(nclients);
try {
// Run the tasks.
clientService.invokeAll(tasks);
} finally {
clientService.shutdownNow();
}
} else {
/*
* FIXME group queries by source and impose latency between batches.
* Randomization is dependent on whether or not the seed was set to
* ZERO (0L).
*/
throw new UnsupportedOperationException();
}
/*
* Report the average query latency for queries with at least a
* specified latency.
*/
if (reportScores) {
reportScores(getScores(queries), minLatencyToReport);
System.out.println("Reporting only queries with at least "
+ minLatencyToReport + "ms latency.");
}
System.out.println("Total elapsed time: "
+ (System.currentTimeMillis() - beginTrials) + "ms for "
+ queries.length + " queries with " + repeat
+ " trials each and " + nclients
+ " clients.");
// Normal exit.
System.exit(0);
}
/**
* Create a temporary kb instance for use by the query parser. Since the
* temporary store is backed by a buffer until that buffer overflows, there
* will not be a backing disk file unless someone starts writing on this.
*
* @param mode the database modus (such as com.bigdata.rdf.store.AbstractTripleStore.Options.TRIPLES_MODE_WITH_PROVENANCE)
*
*/
private static AbstractTripleStore createTempKb(String mode) {
final TemporaryStore tempStore = new TemporaryStore();
// set the mode
Properties p = new Properties();
if (mode.equalsIgnoreCase("triples")) {
p.setProperty(AbstractTripleStore.Options.TRIPLES_MODE, "true");
} else if (mode.equalsIgnoreCase("quads")) {
p.setProperty(Options.QUADS, "true");
p.setProperty(Options.AXIOMS_CLASS, "com.bigdata.rdf.axioms.NoAxioms");
} else if (mode.equalsIgnoreCase("provenance")) {
p.setProperty(
AbstractTripleStore.Options.TRIPLES_MODE_WITH_PROVENANCE, "true");
} // else: default
return new TempTripleStore(tempStore, p, null/* db */);
}
// /**
// * A model of the query workload to be imposed on the SPARQL end point. The
// * model allows you to group queries from the same "source" into a batch, to
// * specify the latency between queries within a batch, and to specify the
// * latency between one batch and the next. You can also specify the number
// * of independent clients which will work their way through the available
// * queries and the size of the per-client thread pool.
// * <p>
// * This workload model is sufficient to model the workload of N concurrent
// * users operating against a shared SPARQL end point, including applications
// * where each user action results in a set of SPARQL queries, such as when
// * painting an HTML page.
// * <p>
// * There are some degenerate cases which are also useful. For example, it is
// * easy to specify a workload model N clients run queries in a randomized
// * order.
// *
// * @author thompsonbry
// */
// static class WorkloadModel implements Callable<Void> {
//
// private final int nclients;
// private final int threadsPerClient;
// private final AtomicLong nerrors = new AtomicLong();
//
// public WorkloadModel(final int nclients, final int threadsPerClient) {
//
// if (nclients < 1)
// throw new IllegalArgumentException();
//
// if (threadsPerClient < 1)
// throw new IllegalArgumentException();
//
// this.nclients = nclients;
//
// this.threadsPerClient = threadsPerClient;
//
//
// }
//
// public Void call() throws Exception {
//
// if (nclients == 1 && threadsPerClient == 1) {
//
// runQueriesSingleThreaded(order, queries, opts, nerrors);
//
// }
//
// }
//
// }
}