Concurrency.java example

Explorer
blazegraph-master
- database-master
package com.bigdata.samples;

import java.io.BufferedInputStream;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.util.Collection;
import java.util.LinkedList;
import java.util.Properties;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;

import org.apache.log4j.Logger;
import org.openrdf.model.vocabulary.RDF;
import org.openrdf.query.BindingSet;
import org.openrdf.query.QueryLanguage;
import org.openrdf.query.TupleQuery;
import org.openrdf.query.TupleQueryResult;
import org.openrdf.repository.Repository;
import org.openrdf.repository.RepositoryConnection;
import org.openrdf.rio.RDFFormat;

import com.bigdata.rdf.sail.BigdataSail;
import com.bigdata.rdf.sail.BigdataSailRepository;

/**
 * This class demonstrates concurrent reading and writing with the U10 data set
 * on the scale-out architecture.  One thread writes the U10 data files, doing
 * a commit after every file (this is not the fastest way to perform load, as
 * it simulates incremental updates vs bulk load).  Another thread asks for
 * the number of "FullProfessors" every three seconds.  You can watch the
 * number of results rise as more data is loaded.
 * 
 * @author mikep
 *
 */
public class Concurrency {
    
    protected final static Logger log = Logger.getLogger(Concurrency.class);
    
    /**
     * The # of reader threads to create.
     */
    private static final int numReaders = 3;
    
    /**
     * A query asking for the full professors instances.
     */
    private static final String query = 
        "select ?x where { ?x <"+RDF.TYPE+"> <"+LUBM.FULL_PROFESSOR+"> . }";
    
    /**
     * Manage the control flow of the program.  Open a bigdata repository,
     * kick off the writer, kick off the readers, wait for the writer to 
     * complete, kill the readers, wait for the readers to complete, shutdown 
     * the repository.
     * 
     * @param args 
     */
    public static final void main(String[] args) {
        
        try {

            Properties properties = new Properties();
            
            // create a backing file
            File journal = File.createTempFile("bigdata", ".jnl");
            log.info(journal.getAbsolutePath());
            journal.deleteOnExit();
            properties.setProperty(
                BigdataSail.Options.FILE, 
                journal.getAbsolutePath()
                );

            // create the sail and the repository
            BigdataSail sail = new BigdataSail(properties);
            BigdataSailRepository repo = new BigdataSailRepository(sail);
            repo.initialize();

            // create the writer and readers
            BigdataWriter writer = new BigdataWriter(repo);
            Collection<BigdataReader> readers = new LinkedList<BigdataReader>();
            for (int i = 0; i < numReaders; i++) {
                readers.add(new BigdataReader(repo));
            }
            
            // launch the threads and get their futures
            // bigdata has an executor service but any executor service will do
            ExecutorService executor = Executors.newCachedThreadPool();
            Future writerFuture = executor.submit(writer);
            Collection<Future> readerFutures = new LinkedList<Future>();
            for (BigdataReader reader : readers) {
                readerFutures.add(executor.submit(reader));
            }
            
            // wait for writer to complete
            writerFuture.get();
            
            // kill the readers
            for (BigdataReader reader : readers) {
                reader.kill();
            }
            
            // wait for readers to complete
            for (Future readerFuture : readerFutures) {
                readerFuture.get();
            }
            
            repo.shutDown();

        } catch (Exception ex) {
            
            ex.printStackTrace();
            
        }
        
    }
    
/*    
    private static void testQuery(JiniFederation fed) throws Exception {
        
        long transactionId =  
            //ITx.UNISOLATED;
            fed.getTransactionService().newTx(ITx.READ_COMMITTED);
        
        log.info("transaction id = " + 
            (transactionId == ITx.UNISOLATED ? "UNISOLATED" : transactionId));
        
        try {
            
            // get the unisolated triple store for writing
            final AbstractTripleStore tripleStore = 
                openTripleStore(fed, transactionId);
            
            final BigdataSail sail = new BigdataSail(tripleStore);
            final Repository repo = new BigdataSailRepository(sail);
            repo.initialize();
            
            RepositoryConnection cxn = repo.getConnection();
            try {

                final TupleQuery tupleQuery = 
                    cxn.prepareTupleQuery(QueryLanguage.SPARQL, query);
                tupleQuery.setIncludeInferred(true);
                TupleQueryResult result = tupleQuery.evaluate();
                // do something with the results
                int resultCount = 0;
                while (result.hasNext()) {
                    BindingSet bindingSet = result.next();
                    log.info(bindingSet);
                    resultCount++;
                }
                log.info(resultCount + " results");
                
            } finally {
                // close the repository connection
                cxn.close();
            }
            
            repo.shutDown();
            
        } finally {
            
            if (transactionId != ITx.UNISOLATED) {
                
                fed.getTransactionService().abort(transactionId);
                
            }
            
        }
        
    }
*/
    
    /**
     * A writer task to load the U10 data set.
     */
    private static class BigdataWriter implements Runnable {
        
        /**
         * The bigdata repository
         */
        private BigdataSailRepository repo;
        
        /**
         * Construct the writer task.
         * 
         * @param fed the bigdata repository
         */
        public BigdataWriter(BigdataSailRepository repo) {
            
            this.repo = repo;
            
        }
        
        /**
         * Opens the triple store and writes the LUBM ontology and U10 data
         * files.  Does a commit after every file, which is not the most
         * efficient way to bulk load, but simulates incremental updates. 
         */
        public void run() {

            try {
                
                // load the data
                loadU10(repo);
                
            } catch (Exception ex) {
                
                ex.printStackTrace();
                
            }
            
        }
        
        /**
         * Load the LUBM ontology and U10 data into a Sesame Repository.
         * 
         * @param repo the sesame repository
         * @throws Exception
         */
        private void loadU10(Repository repo) throws Exception {
            
            // always, always autocommit = false
            RepositoryConnection cxn = repo.getConnection();
            cxn.setAutoCommit(false);
            
            try {
                // fast range count!
                long stmtsBefore = ((BigdataSailRepository)repo).getDatabase().getStatementCount();
//                // full index scan!
//                long stmtsBefore = cxn.size();
                log.info("statements before: " + stmtsBefore);
                long start = System.currentTimeMillis();
                
                { // first add the LUBM ontology
                    InputStream is = 
                        Concurrency.class.getResourceAsStream("univ-bench.owl");
                    Reader reader = 
                        new InputStreamReader(new BufferedInputStream(is));
                    cxn.add(reader, LUBM.NS, RDFFormat.RDFXML);
                    cxn.commit();
                }
                
                { // then process the LUBM sample data files one at a time
                    InputStream is = 
                        Concurrency.class.getResourceAsStream("U10.zip");
                    ZipInputStream zis = 
                        new ZipInputStream(new BufferedInputStream(is));
                    ZipEntry ze = null;
                    while ((ze = zis.getNextEntry()) != null) {
                        if (ze.isDirectory()) {
                            continue;
                        }
                        String name = ze.getName();
                        log.info(name);
                        ByteArrayOutputStream baos = 
                            new ByteArrayOutputStream();
                        byte[] bytes = new byte[4096];
                        int count;
                        while ((count = zis.read(bytes, 0, 4096)) != -1) {
                            baos.write(bytes, 0, count);
                        }
                        baos.close();
                        Reader reader = new InputStreamReader(
                            new ByteArrayInputStream(baos.toByteArray())
                            );
                        cxn.add(reader, LUBM.NS, RDFFormat.RDFXML);
                        cxn.commit();
                    }
                    zis.close();
                }
                
                // gather statistics
                long elapsed = System.currentTimeMillis() - start;
                // fast range count!
                long stmtsAfter = ((BigdataSailRepository)repo).getDatabase().getStatementCount();
//                long stmtsAfter = cxn.size();
                long stmtsAdded = stmtsAfter - stmtsBefore;
                int throughput =
                        (int) ((double) stmtsAdded / (double) elapsed * 1000d);
                log.info("statements after: " + stmtsAfter);
                log.info("loaded: " + stmtsAdded + " in " + elapsed
                        + " millis: " + throughput + " stmts/sec");
            
            } catch (Exception ex) {
                cxn.rollback();
                throw ex;
            } finally {
                // close the repository connection
                cxn.close();
            }
            
        }
        
    }
    
    /**
     * A reader task to issue concurrent queries.  Asks for the # of full
     * professors every three seconds.
     */
    private static class BigdataReader implements Runnable {
        
        /**
         * The bigdata repository
         */
        private BigdataSailRepository repo;
        
        /**
         * Allows the reader to be stopped gracefully.
         */
        private volatile boolean kill = false;
        
        /**
         * Create the reader.
         * 
         * @param fed the bigdata repository
         */
        public BigdataReader(BigdataSailRepository repo) {
            
            this.repo = repo;
            
        }
        
        /**
         * Kills the reader gracefully.
         */
        public void kill() {
            
            this.kill = true;
            
        }
        
        /**
         * Opens a read-committed view of the triple store using the last
         * commit point and issues a query for a list of all LUBM full 
         * professors.  Does this every few seconds until killed.
         */
        public void run() {
            
            try {

                while (!kill) {
                    
                    doQuery();
                    
                    // sleep somewhere between 0 and 3 seconds
                    Thread.sleep((int) (Math.random() * 3000d));
                    
                }
                
            } catch (Exception ex) {
                
                ex.printStackTrace();
                
            }
            
        }
        
        /**
         * Issue the query.
         * 
         * @throws Exception
         */
        private void doQuery() throws Exception {
           
            RepositoryConnection cxn = repo.getReadOnlyConnection();
            try {

                final TupleQuery tupleQuery = 
                    cxn.prepareTupleQuery(QueryLanguage.SPARQL, query);
                tupleQuery.setIncludeInferred(true /* includeInferred */);
                TupleQueryResult result = tupleQuery.evaluate();
                // do something with the results
                int resultCount = 0;
                while (result.hasNext()) {
                    BindingSet bindingSet = result.next();
                    // log.info(bindingSet);
                    resultCount++;
                }
                log.info(resultCount + " results");
                
            } finally {
                // close the repository connection
                cxn.close();
            }
                
        }
        
    }

    
}