ScaleOut.java example

Explorer
database-master
package com.bigdata.samples;

import java.io.BufferedInputStream;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.util.Properties;
import java.util.Map.Entry;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;

import org.apache.log4j.Logger;
import org.openrdf.model.vocabulary.RDF;
import org.openrdf.query.BindingSet;
import org.openrdf.query.QueryLanguage;
import org.openrdf.query.TupleQuery;
import org.openrdf.query.TupleQueryResult;
import org.openrdf.repository.Repository;
import org.openrdf.repository.RepositoryConnection;
import org.openrdf.rio.RDFFormat;

import com.bigdata.journal.ITx;
import com.bigdata.rdf.sail.BigdataSail;
import com.bigdata.rdf.sail.BigdataSailRepository;
import com.bigdata.rdf.store.AbstractTripleStore;
import com.bigdata.rdf.store.ScaleOutTripleStore;
import com.bigdata.service.jini.JiniClient;
import com.bigdata.service.jini.JiniFederation;

/**
 * This class demonstrates concurrent reading and writing with the U10 data set
 * on the scale-out architecture.  One thread writes the U10 data files, doing
 * a commit after every file (this is not the fastest way to perform load, as
 * it simulates incremental updates vs bulk load).  Another thread asks for
 * the number of "FullProfessors" every three seconds.  You can watch the
 * number of results rise as more data is loaded.
 * 
 * @author mikep
 *
 */
public class ScaleOut {
    
    protected final static Logger log = Logger.getLogger(ScaleOut.class);
    
    /**
     * The name of the triple store instance inside the scale-out database.
     */
    private static final String namespace = "kb2";
    
    /**
     * A query asking for the full professors instances.
     */
    private static final String query = 
        "select ?x where { ?x <"+RDF.TYPE+"> <"+LUBM.FULL_PROFESSOR+"> . }";
    
    /**
     * Manage the control flow of the program.  Open a proxy to the federation,
     * kick off the writer, kick off the reader, wait for the writer to 
     * complete, kill the reader, wait for the reader to complete, shutdown the 
     * federation.
     * 
     * @param args specify the location of the cluster's JINI config file
     */
    public static final void main(String[] args) {
        
        if (args.length == 0) {
            System.err.println("usage: filename");
            System.exit(1);
        }
        
        final String config = args[0];
        log.info("config: " + config);

        JiniFederation fed = null;

        try {

            fed = new JiniClient(args).connect();

            // force the triple store to be created if it doesn't already exist
            createTripleStore(fed);

            // create the writer and reader
            BigdataWriter writer = new BigdataWriter(fed);
            BigdataReader reader = new BigdataReader(fed);
            
            // launch the threads and get their futures
            // bigdata has an executor service but any executor service will do
            Future writerFuture = fed.getExecutorService().submit(writer);
            Future readerFuture = fed.getExecutorService().submit(reader);
            
            // wait for writer to complete
            writerFuture.get();
            
            // kill the reader
            reader.kill();
            
            // wait for reader to complete
            readerFuture.get();

        } catch (Exception ex) {
            
            ex.printStackTrace();
            
        } finally {

            if (fed != null) fed.shutdown();
                
        }

    }
    
    /**
     * Create our triple store instance if it doesn't exist.
     * 
     * @param fed the jini federation
     * @return the triple store instance
     * @throws Exception
     */
    private static AbstractTripleStore createTripleStore(JiniFederation fed) 
        throws Exception {

        AbstractTripleStore tripleStore = null;
        
        // locate the resource declaration (aka "open").
        tripleStore = (AbstractTripleStore) fed.getResourceLocator().locate(
            namespace, ITx.UNISOLATED);
        
        if (tripleStore == null) {
            
            /*
             * Pick up properties configured for the client as defaults.
             */
            final Properties properties = fed.getClient().getProperties(
                    ScaleOut.class.getName());
            
            tripleStore = new ScaleOutTripleStore(
                fed, namespace, ITx.UNISOLATED, properties);

            // create the triple store.
            tripleStore.create();
            
        }
        
        return tripleStore;
        
    }

    /**
     * Lookup the triple store instance using the specified timestamp.  Pass in
     * ITx.UNISOLATED for the writable instance (not safe for concurrent 
     * readers), otherwise use a transaction id or timestamp for a historical 
     * view.  This is demonstrated below.  
     * 
     * @param fed the jini federation
     * @param timestamp the timestamp or transaction id
     * @return the triple store instance
     * @throws Exception
     */
    private static AbstractTripleStore openTripleStore(
        JiniFederation fed, long timestamp) throws Exception {
        
        AbstractTripleStore tripleStore = null;
        
        // locate the resource declaration (aka "open").
        tripleStore = (AbstractTripleStore) fed.getResourceLocator().locate(
            namespace, timestamp);
        
        if (tripleStore == null) {

            throw new RuntimeException("triple store does not exist!");
            
        }
        
        return tripleStore;
        
    }
/*    
    private static void testQuery(JiniFederation fed) throws Exception {
        
        long transactionId =  
            //ITx.UNISOLATED;
            fed.getTransactionService().newTx(ITx.READ_COMMITTED);
        
        log.info("transaction id = " + 
            (transactionId == ITx.UNISOLATED ? "UNISOLATED" : transactionId));
        
        try {
            
            // get the unisolated triple store for writing
            final AbstractTripleStore tripleStore = 
                openTripleStore(fed, transactionId);
            
            final BigdataSail sail = new BigdataSail(tripleStore);
            final Repository repo = new BigdataSailRepository(sail);
            repo.initialize();
            
            RepositoryConnection cxn = repo.getConnection();
            try {

                final TupleQuery tupleQuery = 
                    cxn.prepareTupleQuery(QueryLanguage.SPARQL, query);
                tupleQuery.setIncludeInferred(true);
                TupleQueryResult result = tupleQuery.evaluate();
                // do something with the results
                int resultCount = 0;
                while (result.hasNext()) {
                    BindingSet bindingSet = result.next();
                    log.info(bindingSet);
                    resultCount++;
                }
                log.info(resultCount + " results");
                
            } finally {
                // close the repository connection
                cxn.close();
            }
            
            repo.shutDown();
            
        } finally {
            
            if (transactionId != ITx.UNISOLATED) {
                
                fed.getTransactionService().abort(transactionId);
                
            }
            
        }
        
    }
*/
    
    /**
     * A writer task to load the U10 data set.
     */
    private static class BigdataWriter implements Runnable {
        
        /**
         * The jini federation.
         */
        private JiniFederation fed;
        
        /**
         * Construct the writer task.
         * 
         * @param fed the jini federation
         */
        public BigdataWriter(JiniFederation fed) {
            
            this.fed = fed;
            
        }
        
        /**
         * Opens the triple store and writes the LUBM ontology and U10 data
         * files.  Does a commit after every file, which is not the most
         * efficient way to bulk load, but simulates incremental updates. 
         */
        public void run() {

            try {
                
                // get the unisolated triple store for writing
                final AbstractTripleStore tripleStore = 
                    openTripleStore(fed, ITx.UNISOLATED);

                // wrap the triple store in a Sesame SAIL
                final BigdataSail sail = new BigdataSail(tripleStore);
                final Repository repo = new BigdataSailRepository(sail);
                repo.initialize();
                
                // load the data
                loadU10(repo);
                
                // shut it down
                repo.shutDown();
                
            } catch (Exception ex) {
                
                ex.printStackTrace();
                
            }
            
        }
        
        /**
         * Load the LUBM ontology and U10 data into a Sesame Repository.
         * 
         * @param repo the sesame repository
         * @throws Exception
         */
        private void loadU10(Repository repo) throws Exception {
            
            // always, always autocommit = false
            RepositoryConnection cxn = repo.getConnection();
            cxn.setAutoCommit(false);
            
            try {
                // fast range count!
                long stmtsBefore = ((BigdataSailRepository)repo).getDatabase().getStatementCount();
                // full index scan!
//                long stmtsBefore = cxn.size();
                log.info("statements before: " + stmtsBefore);
                long start = System.currentTimeMillis();
                
                { // first add the LUBM ontology
                    InputStream is = 
                        ScaleOut.class.getResourceAsStream("univ-bench.owl");
                    Reader reader = 
                        new InputStreamReader(new BufferedInputStream(is));
                    cxn.add(reader, LUBM.NS, RDFFormat.RDFXML);
                    cxn.commit();
                }
                
                { // then process the LUBM sample data files one at a time
                    InputStream is = 
                        ScaleOut.class.getResourceAsStream("U10.zip");
                    ZipInputStream zis = 
                        new ZipInputStream(new BufferedInputStream(is));
                    ZipEntry ze = null;
                    while ((ze = zis.getNextEntry()) != null) {
                        if (ze.isDirectory()) {
                            continue;
                        }
                        String name = ze.getName();
                        log.info(name);
                        ByteArrayOutputStream baos = 
                            new ByteArrayOutputStream();
                        byte[] bytes = new byte[4096];
                        int count;
                        while ((count = zis.read(bytes, 0, 4096)) != -1) {
                            baos.write(bytes, 0, count);
                        }
                        baos.close();
                        Reader reader = new InputStreamReader(
                            new ByteArrayInputStream(baos.toByteArray())
                            );
                        cxn.add(reader, LUBM.NS, RDFFormat.RDFXML);
                        cxn.commit();
                    }
                    zis.close();
                }
                
                // gather statistics
                long elapsed = System.currentTimeMillis() - start;
                // fast range count!
                long stmtsAfter = ((BigdataSailRepository)repo).getDatabase().getStatementCount();
//                // full index scan!
//                long stmtsAfter = cxn.size();
                long stmtsAdded = stmtsAfter - stmtsBefore;
                int throughput =
                        (int) ((double) stmtsAdded / (double) elapsed * 1000d);
                log.info("statements after: " + stmtsAfter);
                log.info("loaded: " + stmtsAdded + " in " + elapsed
                        + " millis: " + throughput + " stmts/sec");
            
            } catch (Exception ex) {
                cxn.rollback();
                throw ex;
            } finally {
                // close the repository connection
                cxn.close();
            }
            
        }
        
    }
    
    /**
     * A reader task to issue concurrent queries.  Asks for the # of full
     * professors every three seconds.
     */
    private static class BigdataReader implements Runnable {
        
        /**
         * The jini federation.
         */
        private JiniFederation fed;
        
        /**
         * Allows the reader to be stopped gracefully.
         */
        private volatile boolean kill = false;
        
        /**
         * Create the reader.
         * 
         * @param fed the jini federation
         */
        public BigdataReader(JiniFederation fed) {
            
            this.fed = fed;
            
        }
        
        /**
         * Kills the reader gracefully.
         */
        public void kill() {
            
            this.kill = true;
            
        }
        
        /**
         * Opens a read-committed view of the triple store using the last
         * commit point and issues a query for a list of all LUBM full 
         * professors.  Does this every three seconds until killed.
         */
        public void run() {
            
            try {

                while (!kill) {
                    
                    doQuery();
                    
                    Thread.sleep(3000);
                    
                }
                
            } catch (Exception ex) {
                
                ex.printStackTrace();
                
            }
            
        }
        
        /**
         * Issue the query.
         * 
         * @throws Exception
         */
        private void doQuery() throws Exception {
           
            // this is how you get a read-only transaction.  MUST be
            // committed or aborted later, see below.
            long transactionId = 
                fed.getTransactionService().newTx(ITx.READ_COMMITTED);
            
            try {

                // open the read-only triple store
                final AbstractTripleStore tripleStore = 
                    openTripleStore(fed, transactionId);
                
                // wrap it in a Sesame SAIL
                final BigdataSail sail = new BigdataSail(tripleStore);
                final Repository repo = new BigdataSailRepository(sail);
                repo.initialize();
                
                RepositoryConnection cxn = repo.getConnection();
                try {

                    final TupleQuery tupleQuery = 
                        cxn.prepareTupleQuery(QueryLanguage.SPARQL, query);
                    tupleQuery.setIncludeInferred(true /* includeInferred */);
                    TupleQueryResult result = tupleQuery.evaluate();
                    // do something with the results
                    int resultCount = 0;
                    while (result.hasNext()) {
                        BindingSet bindingSet = result.next();
                        // log.info(bindingSet);
                        resultCount++;
                    }
                    log.info(resultCount + " results");
                    
                } finally {
                    // close the repository connection
                    cxn.close();
                }
                
                repo.shutDown();
                
            } finally {
                
                // MUST close the transaction, abort is sufficient
                fed.getTransactionService().abort(transactionId);
                
            }
            
        }
        
    }

    
}