package com.bigdata.samples;
import java.io.BufferedInputStream;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.util.Collection;
import java.util.LinkedList;
import java.util.Properties;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
import org.apache.log4j.Logger;
import org.openrdf.model.vocabulary.RDF;
import org.openrdf.query.BindingSet;
import org.openrdf.query.QueryLanguage;
import org.openrdf.query.TupleQuery;
import org.openrdf.query.TupleQueryResult;
import org.openrdf.repository.Repository;
import org.openrdf.repository.RepositoryConnection;
import org.openrdf.rio.RDFFormat;
import com.bigdata.rdf.sail.BigdataSail;
import com.bigdata.rdf.sail.BigdataSailRepository;
/**
* This class demonstrates concurrent reading and writing with the U10 data set
* on the scale-out architecture. One thread writes the U10 data files, doing
* a commit after every file (this is not the fastest way to perform load, as
* it simulates incremental updates vs bulk load). Another thread asks for
* the number of "FullProfessors" every three seconds. You can watch the
* number of results rise as more data is loaded.
*
* @author mikep
*
*/
public class Concurrency {
protected final static Logger log = Logger.getLogger(Concurrency.class);
/**
* The # of reader threads to create.
*/
private static final int numReaders = 3;
/**
* A query asking for the full professors instances.
*/
private static final String query =
"select ?x where { ?x <"+RDF.TYPE+"> <"+LUBM.FULL_PROFESSOR+"> . }";
/**
* Manage the control flow of the program. Open a bigdata repository,
* kick off the writer, kick off the readers, wait for the writer to
* complete, kill the readers, wait for the readers to complete, shutdown
* the repository.
*
* @param args
*/
public static final void main(String[] args) {
try {
Properties properties = new Properties();
// create a backing file
File journal = File.createTempFile("bigdata", ".jnl");
log.info(journal.getAbsolutePath());
journal.deleteOnExit();
properties.setProperty(
BigdataSail.Options.FILE,
journal.getAbsolutePath()
);
// create the sail and the repository
BigdataSail sail = new BigdataSail(properties);
BigdataSailRepository repo = new BigdataSailRepository(sail);
repo.initialize();
// create the writer and readers
BigdataWriter writer = new BigdataWriter(repo);
Collection<BigdataReader> readers = new LinkedList<BigdataReader>();
for (int i = 0; i < numReaders; i++) {
readers.add(new BigdataReader(repo));
}
// launch the threads and get their futures
// bigdata has an executor service but any executor service will do
ExecutorService executor = Executors.newCachedThreadPool();
Future writerFuture = executor.submit(writer);
Collection<Future> readerFutures = new LinkedList<Future>();
for (BigdataReader reader : readers) {
readerFutures.add(executor.submit(reader));
}
// wait for writer to complete
writerFuture.get();
// kill the readers
for (BigdataReader reader : readers) {
reader.kill();
}
// wait for readers to complete
for (Future readerFuture : readerFutures) {
readerFuture.get();
}
repo.shutDown();
} catch (Exception ex) {
ex.printStackTrace();
}
}
/*
private static void testQuery(JiniFederation fed) throws Exception {
long transactionId =
//ITx.UNISOLATED;
fed.getTransactionService().newTx(ITx.READ_COMMITTED);
log.info("transaction id = " +
(transactionId == ITx.UNISOLATED ? "UNISOLATED" : transactionId));
try {
// get the unisolated triple store for writing
final AbstractTripleStore tripleStore =
openTripleStore(fed, transactionId);
final BigdataSail sail = new BigdataSail(tripleStore);
final Repository repo = new BigdataSailRepository(sail);
repo.initialize();
RepositoryConnection cxn = repo.getConnection();
try {
final TupleQuery tupleQuery =
cxn.prepareTupleQuery(QueryLanguage.SPARQL, query);
tupleQuery.setIncludeInferred(true);
TupleQueryResult result = tupleQuery.evaluate();
// do something with the results
int resultCount = 0;
while (result.hasNext()) {
BindingSet bindingSet = result.next();
log.info(bindingSet);
resultCount++;
}
log.info(resultCount + " results");
} finally {
// close the repository connection
cxn.close();
}
repo.shutDown();
} finally {
if (transactionId != ITx.UNISOLATED) {
fed.getTransactionService().abort(transactionId);
}
}
}
*/
/**
* A writer task to load the U10 data set.
*/
private static class BigdataWriter implements Runnable {
/**
* The bigdata repository
*/
private BigdataSailRepository repo;
/**
* Construct the writer task.
*
* @param fed the bigdata repository
*/
public BigdataWriter(BigdataSailRepository repo) {
this.repo = repo;
}
/**
* Opens the triple store and writes the LUBM ontology and U10 data
* files. Does a commit after every file, which is not the most
* efficient way to bulk load, but simulates incremental updates.
*/
public void run() {
try {
// load the data
loadU10(repo);
} catch (Exception ex) {
ex.printStackTrace();
}
}
/**
* Load the LUBM ontology and U10 data into a Sesame Repository.
*
* @param repo the sesame repository
* @throws Exception
*/
private void loadU10(Repository repo) throws Exception {
// always, always autocommit = false
RepositoryConnection cxn = repo.getConnection();
cxn.setAutoCommit(false);
try {
// fast range count!
long stmtsBefore = ((BigdataSailRepository)repo).getDatabase().getStatementCount();
// // full index scan!
// long stmtsBefore = cxn.size();
log.info("statements before: " + stmtsBefore);
long start = System.currentTimeMillis();
{ // first add the LUBM ontology
InputStream is =
Concurrency.class.getResourceAsStream("univ-bench.owl");
Reader reader =
new InputStreamReader(new BufferedInputStream(is));
cxn.add(reader, LUBM.NS, RDFFormat.RDFXML);
cxn.commit();
}
{ // then process the LUBM sample data files one at a time
InputStream is =
Concurrency.class.getResourceAsStream("U10.zip");
ZipInputStream zis =
new ZipInputStream(new BufferedInputStream(is));
ZipEntry ze = null;
while ((ze = zis.getNextEntry()) != null) {
if (ze.isDirectory()) {
continue;
}
String name = ze.getName();
log.info(name);
ByteArrayOutputStream baos =
new ByteArrayOutputStream();
byte[] bytes = new byte[4096];
int count;
while ((count = zis.read(bytes, 0, 4096)) != -1) {
baos.write(bytes, 0, count);
}
baos.close();
Reader reader = new InputStreamReader(
new ByteArrayInputStream(baos.toByteArray())
);
cxn.add(reader, LUBM.NS, RDFFormat.RDFXML);
cxn.commit();
}
zis.close();
}
// gather statistics
long elapsed = System.currentTimeMillis() - start;
// fast range count!
long stmtsAfter = ((BigdataSailRepository)repo).getDatabase().getStatementCount();
// long stmtsAfter = cxn.size();
long stmtsAdded = stmtsAfter - stmtsBefore;
int throughput =
(int) ((double) stmtsAdded / (double) elapsed * 1000d);
log.info("statements after: " + stmtsAfter);
log.info("loaded: " + stmtsAdded + " in " + elapsed
+ " millis: " + throughput + " stmts/sec");
} catch (Exception ex) {
cxn.rollback();
throw ex;
} finally {
// close the repository connection
cxn.close();
}
}
}
/**
* A reader task to issue concurrent queries. Asks for the # of full
* professors every three seconds.
*/
private static class BigdataReader implements Runnable {
/**
* The bigdata repository
*/
private BigdataSailRepository repo;
/**
* Allows the reader to be stopped gracefully.
*/
private volatile boolean kill = false;
/**
* Create the reader.
*
* @param fed the bigdata repository
*/
public BigdataReader(BigdataSailRepository repo) {
this.repo = repo;
}
/**
* Kills the reader gracefully.
*/
public void kill() {
this.kill = true;
}
/**
* Opens a read-committed view of the triple store using the last
* commit point and issues a query for a list of all LUBM full
* professors. Does this every few seconds until killed.
*/
public void run() {
try {
while (!kill) {
doQuery();
// sleep somewhere between 0 and 3 seconds
Thread.sleep((int) (Math.random() * 3000d));
}
} catch (Exception ex) {
ex.printStackTrace();
}
}
/**
* Issue the query.
*
* @throws Exception
*/
private void doQuery() throws Exception {
RepositoryConnection cxn = repo.getReadOnlyConnection();
try {
final TupleQuery tupleQuery =
cxn.prepareTupleQuery(QueryLanguage.SPARQL, query);
tupleQuery.setIncludeInferred(true /* includeInferred */);
TupleQueryResult result = tupleQuery.evaluate();
// do something with the results
int resultCount = 0;
while (result.hasNext()) {
BindingSet bindingSet = result.next();
// log.info(bindingSet);
resultCount++;
}
log.info(resultCount + " results");
} finally {
// close the repository connection
cxn.close();
}
}
}
}