package com.bigdata.samples;
import java.io.BufferedInputStream;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.util.Properties;
import java.util.Map.Entry;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
import org.apache.log4j.Logger;
import org.openrdf.model.vocabulary.RDF;
import org.openrdf.query.BindingSet;
import org.openrdf.query.QueryLanguage;
import org.openrdf.query.TupleQuery;
import org.openrdf.query.TupleQueryResult;
import org.openrdf.repository.Repository;
import org.openrdf.repository.RepositoryConnection;
import org.openrdf.rio.RDFFormat;
import com.bigdata.journal.ITx;
import com.bigdata.rdf.sail.BigdataSail;
import com.bigdata.rdf.sail.BigdataSailRepository;
import com.bigdata.rdf.store.AbstractTripleStore;
import com.bigdata.rdf.store.ScaleOutTripleStore;
import com.bigdata.service.jini.JiniClient;
import com.bigdata.service.jini.JiniFederation;
/**
* This class demonstrates concurrent reading and writing with the U10 data set
* on the scale-out architecture. One thread writes the U10 data files, doing
* a commit after every file (this is not the fastest way to perform load, as
* it simulates incremental updates vs bulk load). Another thread asks for
* the number of "FullProfessors" every three seconds. You can watch the
* number of results rise as more data is loaded.
*
* @author mikep
*
*/
public class ScaleOut {
protected final static Logger log = Logger.getLogger(ScaleOut.class);
/**
* The name of the triple store instance inside the scale-out database.
*/
private static final String namespace = "kb2";
/**
* A query asking for the full professors instances.
*/
private static final String query =
"select ?x where { ?x <"+RDF.TYPE+"> <"+LUBM.FULL_PROFESSOR+"> . }";
/**
* Manage the control flow of the program. Open a proxy to the federation,
* kick off the writer, kick off the reader, wait for the writer to
* complete, kill the reader, wait for the reader to complete, shutdown the
* federation.
*
* @param args specify the location of the cluster's JINI config file
*/
public static final void main(String[] args) {
if (args.length == 0) {
System.err.println("usage: filename");
System.exit(1);
}
final String config = args[0];
log.info("config: " + config);
JiniFederation fed = null;
try {
fed = new JiniClient(args).connect();
// force the triple store to be created if it doesn't already exist
createTripleStore(fed);
// create the writer and reader
BigdataWriter writer = new BigdataWriter(fed);
BigdataReader reader = new BigdataReader(fed);
// launch the threads and get their futures
// bigdata has an executor service but any executor service will do
Future writerFuture = fed.getExecutorService().submit(writer);
Future readerFuture = fed.getExecutorService().submit(reader);
// wait for writer to complete
writerFuture.get();
// kill the reader
reader.kill();
// wait for reader to complete
readerFuture.get();
} catch (Exception ex) {
ex.printStackTrace();
} finally {
if (fed != null) fed.shutdown();
}
}
/**
* Create our triple store instance if it doesn't exist.
*
* @param fed the jini federation
* @return the triple store instance
* @throws Exception
*/
private static AbstractTripleStore createTripleStore(JiniFederation fed)
throws Exception {
AbstractTripleStore tripleStore = null;
// locate the resource declaration (aka "open").
tripleStore = (AbstractTripleStore) fed.getResourceLocator().locate(
namespace, ITx.UNISOLATED);
if (tripleStore == null) {
/*
* Pick up properties configured for the client as defaults.
*/
final Properties properties = fed.getClient().getProperties(
ScaleOut.class.getName());
tripleStore = new ScaleOutTripleStore(
fed, namespace, ITx.UNISOLATED, properties);
// create the triple store.
tripleStore.create();
}
return tripleStore;
}
/**
* Lookup the triple store instance using the specified timestamp. Pass in
* ITx.UNISOLATED for the writable instance (not safe for concurrent
* readers), otherwise use a transaction id or timestamp for a historical
* view. This is demonstrated below.
*
* @param fed the jini federation
* @param timestamp the timestamp or transaction id
* @return the triple store instance
* @throws Exception
*/
private static AbstractTripleStore openTripleStore(
JiniFederation fed, long timestamp) throws Exception {
AbstractTripleStore tripleStore = null;
// locate the resource declaration (aka "open").
tripleStore = (AbstractTripleStore) fed.getResourceLocator().locate(
namespace, timestamp);
if (tripleStore == null) {
throw new RuntimeException("triple store does not exist!");
}
return tripleStore;
}
/*
private static void testQuery(JiniFederation fed) throws Exception {
long transactionId =
//ITx.UNISOLATED;
fed.getTransactionService().newTx(ITx.READ_COMMITTED);
log.info("transaction id = " +
(transactionId == ITx.UNISOLATED ? "UNISOLATED" : transactionId));
try {
// get the unisolated triple store for writing
final AbstractTripleStore tripleStore =
openTripleStore(fed, transactionId);
final BigdataSail sail = new BigdataSail(tripleStore);
final Repository repo = new BigdataSailRepository(sail);
repo.initialize();
RepositoryConnection cxn = repo.getConnection();
try {
final TupleQuery tupleQuery =
cxn.prepareTupleQuery(QueryLanguage.SPARQL, query);
tupleQuery.setIncludeInferred(true);
TupleQueryResult result = tupleQuery.evaluate();
// do something with the results
int resultCount = 0;
while (result.hasNext()) {
BindingSet bindingSet = result.next();
log.info(bindingSet);
resultCount++;
}
log.info(resultCount + " results");
} finally {
// close the repository connection
cxn.close();
}
repo.shutDown();
} finally {
if (transactionId != ITx.UNISOLATED) {
fed.getTransactionService().abort(transactionId);
}
}
}
*/
/**
* A writer task to load the U10 data set.
*/
private static class BigdataWriter implements Runnable {
/**
* The jini federation.
*/
private JiniFederation fed;
/**
* Construct the writer task.
*
* @param fed the jini federation
*/
public BigdataWriter(JiniFederation fed) {
this.fed = fed;
}
/**
* Opens the triple store and writes the LUBM ontology and U10 data
* files. Does a commit after every file, which is not the most
* efficient way to bulk load, but simulates incremental updates.
*/
public void run() {
try {
// get the unisolated triple store for writing
final AbstractTripleStore tripleStore =
openTripleStore(fed, ITx.UNISOLATED);
// wrap the triple store in a Sesame SAIL
final BigdataSail sail = new BigdataSail(tripleStore);
final Repository repo = new BigdataSailRepository(sail);
repo.initialize();
// load the data
loadU10(repo);
// shut it down
repo.shutDown();
} catch (Exception ex) {
ex.printStackTrace();
}
}
/**
* Load the LUBM ontology and U10 data into a Sesame Repository.
*
* @param repo the sesame repository
* @throws Exception
*/
private void loadU10(Repository repo) throws Exception {
// always, always autocommit = false
RepositoryConnection cxn = repo.getConnection();
cxn.setAutoCommit(false);
try {
// fast range count!
long stmtsBefore = ((BigdataSailRepository)repo).getDatabase().getStatementCount();
// full index scan!
// long stmtsBefore = cxn.size();
log.info("statements before: " + stmtsBefore);
long start = System.currentTimeMillis();
{ // first add the LUBM ontology
InputStream is =
ScaleOut.class.getResourceAsStream("univ-bench.owl");
Reader reader =
new InputStreamReader(new BufferedInputStream(is));
cxn.add(reader, LUBM.NS, RDFFormat.RDFXML);
cxn.commit();
}
{ // then process the LUBM sample data files one at a time
InputStream is =
ScaleOut.class.getResourceAsStream("U10.zip");
ZipInputStream zis =
new ZipInputStream(new BufferedInputStream(is));
ZipEntry ze = null;
while ((ze = zis.getNextEntry()) != null) {
if (ze.isDirectory()) {
continue;
}
String name = ze.getName();
log.info(name);
ByteArrayOutputStream baos =
new ByteArrayOutputStream();
byte[] bytes = new byte[4096];
int count;
while ((count = zis.read(bytes, 0, 4096)) != -1) {
baos.write(bytes, 0, count);
}
baos.close();
Reader reader = new InputStreamReader(
new ByteArrayInputStream(baos.toByteArray())
);
cxn.add(reader, LUBM.NS, RDFFormat.RDFXML);
cxn.commit();
}
zis.close();
}
// gather statistics
long elapsed = System.currentTimeMillis() - start;
// fast range count!
long stmtsAfter = ((BigdataSailRepository)repo).getDatabase().getStatementCount();
// // full index scan!
// long stmtsAfter = cxn.size();
long stmtsAdded = stmtsAfter - stmtsBefore;
int throughput =
(int) ((double) stmtsAdded / (double) elapsed * 1000d);
log.info("statements after: " + stmtsAfter);
log.info("loaded: " + stmtsAdded + " in " + elapsed
+ " millis: " + throughput + " stmts/sec");
} catch (Exception ex) {
cxn.rollback();
throw ex;
} finally {
// close the repository connection
cxn.close();
}
}
}
/**
* A reader task to issue concurrent queries. Asks for the # of full
* professors every three seconds.
*/
private static class BigdataReader implements Runnable {
/**
* The jini federation.
*/
private JiniFederation fed;
/**
* Allows the reader to be stopped gracefully.
*/
private volatile boolean kill = false;
/**
* Create the reader.
*
* @param fed the jini federation
*/
public BigdataReader(JiniFederation fed) {
this.fed = fed;
}
/**
* Kills the reader gracefully.
*/
public void kill() {
this.kill = true;
}
/**
* Opens a read-committed view of the triple store using the last
* commit point and issues a query for a list of all LUBM full
* professors. Does this every three seconds until killed.
*/
public void run() {
try {
while (!kill) {
doQuery();
Thread.sleep(3000);
}
} catch (Exception ex) {
ex.printStackTrace();
}
}
/**
* Issue the query.
*
* @throws Exception
*/
private void doQuery() throws Exception {
// this is how you get a read-only transaction. MUST be
// committed or aborted later, see below.
long transactionId =
fed.getTransactionService().newTx(ITx.READ_COMMITTED);
try {
// open the read-only triple store
final AbstractTripleStore tripleStore =
openTripleStore(fed, transactionId);
// wrap it in a Sesame SAIL
final BigdataSail sail = new BigdataSail(tripleStore);
final Repository repo = new BigdataSailRepository(sail);
repo.initialize();
RepositoryConnection cxn = repo.getConnection();
try {
final TupleQuery tupleQuery =
cxn.prepareTupleQuery(QueryLanguage.SPARQL, query);
tupleQuery.setIncludeInferred(true /* includeInferred */);
TupleQueryResult result = tupleQuery.evaluate();
// do something with the results
int resultCount = 0;
while (result.hasNext()) {
BindingSet bindingSet = result.next();
// log.info(bindingSet);
resultCount++;
}
log.info(resultCount + " results");
} finally {
// close the repository connection
cxn.close();
}
repo.shutDown();
} finally {
// MUST close the transaction, abort is sufficient
fed.getTransactionService().abort(transactionId);
}
}
}
}