package com.bigdata.rdf.graph.impl.bd; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.Reader; import java.lang.reflect.Constructor; import java.util.Properties; import java.util.concurrent.atomic.AtomicInteger; import org.apache.log4j.Logger; import org.openrdf.sail.SailConnection; import com.bigdata.Banner; import com.bigdata.journal.BufferMode; import com.bigdata.journal.ITx; import com.bigdata.journal.Journal; import com.bigdata.rdf.graph.IGASProgram; import com.bigdata.rdf.graph.IGraphAccessor; import com.bigdata.rdf.graph.impl.bd.BigdataGASEngine.BigdataGraphAccessor; import com.bigdata.rdf.graph.impl.bd.BigdataGraphFixture.BigdataSailGraphLoader; import com.bigdata.rdf.graph.impl.util.GASRunnerBase; import com.bigdata.rdf.graph.util.GraphLoader; import com.bigdata.rdf.sail.BigdataSail; import com.bigdata.rdf.sail.BigdataSail.BigdataSailConnection; import com.bigdata.rdf.store.AbstractTripleStore; /** * Base class for running performance tests against the bigdata backend. * * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a> */ public class BigdataGASRunner<VS, ES, ST> extends GASRunnerBase<VS, ES, ST> { private static final Logger log = Logger.getLogger(BigdataGASRunner.class); /** * Configured options for the {@link GASRunner}. * * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a> */ protected class BigdataOptionData extends GASRunnerBase<VS, ES, ST>.OptionData { /** * The {@link BufferMode} to use. */ private BufferMode bufferModeOverride = null; // override only. /** * The namespace of the bigdata KB instance. */ private String namespaceOverride = "kb"; /** * The as-configured {@link Properties} for the {@link Journal}. */ private Properties properties; /** * The effective KB name. This is set by consulting * {@link #namespaceOverride} and the as configured {@link #properties}. */ private String namespace; /** * The backend. * * TODO Could start NSS and use SPARQL UPDATE "LOAD" to load the data. * That exposes the SPARQL end point for other purposes during the test. * Is this useful? It could also let us run the GASEngine on a remote * service (submit a callable to an HA server or define a REST API for * submitting these GAS algorithms). */ private Journal jnl; /** * <code>true</code> iff the backend is temporary (created on a * temporary backing file). Temporary backends are destroyed in * {@link #shutdown()}. */ private boolean isTemporary; /** * Set to <code>true</code> iff we determine that the data needs to be * loaded (e.g., the KB was empty, so we have to load the data sets). * * TODO Rename for clearer semantics. Basically, do we have to load the * data files or can we assume that the data are already loaded. Lift * into base class? */ private boolean newKB = false; /** * The #of edges in the KB instance and <code>-1</code> until set by * {@link BigdataGASRunner#loadFiles()}. */ private long nedges = -1; protected BigdataOptionData() { super(); } private Properties getProperties(final String resource) throws IOException { if (log.isInfoEnabled()) log.info("Reading properties: " + resource); InputStream is = null; try { // try the classpath is = getClass().getResourceAsStream(resource); if (is != null) { } else { // try file system. final File file = new File(resource); if (file.exists()) { is = new FileInputStream(file); } else { throw new IOException("Could not locate resource: " + resource); } } /* * Obtain a buffered reader on the input stream. */ final Properties properties = new Properties(); final Reader reader = new BufferedReader(new InputStreamReader(is)); try { properties.load(reader); } finally { try { reader.close(); } catch (Throwable t) { log.error(t); } } /* * Allow override of select options from the command line. */ { final String[] overrides = new String[] { // Journal options. com.bigdata.journal.Options.FILE, // // RDFParserOptions. // RDFParserOptions.Options.DATATYPE_HANDLING, // RDFParserOptions.Options.PRESERVE_BNODE_IDS, // RDFParserOptions.Options.STOP_AT_FIRST_ERROR, // RDFParserOptions.Options.VERIFY_DATA, // // DataLoader options. // DataLoader.Options.BUFFER_CAPACITY, // DataLoader.Options.CLOSURE, // DataLoader.Options.COMMIT, // DataLoader.Options.FLUSH, }; for (String s : overrides) { if (System.getProperty(s) != null) { // Override/set from the environment. final String v = System.getProperty(s); if (log.isInfoEnabled()) log.info("OVERRIDE:: Using: " + s + "=" + v); properties.setProperty(s, v); } } } return properties; } finally { if (is != null) { try { is.close(); } catch (Throwable t) { log.error(t); } } } } /** * Initialization after all arguments have been set. */ @Override public void init() throws Exception { super.init(); properties = getProperties(propertyFile); /* * Note: Allows override through the command line argument. The default * is otherwise the default and the value in the properties file (if * any) will be used unless it is overridden. */ final BufferMode bufferMode = bufferModeOverride == null ? BufferMode .valueOf(properties.getProperty(Journal.Options.BUFFER_MODE, Journal.Options.DEFAULT_BUFFER_MODE)) : bufferModeOverride; properties.setProperty(Journal.Options.BUFFER_MODE, bufferMode.name()); final boolean isTransient = !bufferMode.isStable(); if (isTransient) { isTemporary = true; } else { final String fileStr = properties.getProperty(Journal.Options.FILE); if (fileStr == null) { /* * We will use a temporary file that we create here. The journal * will be destroyed below. */ isTemporary = true; final File tmpFile = File.createTempFile( BigdataGASRunner.class.getSimpleName(), Journal.Options.JNL); // Set this on the Properties so it will be used by the jnl. properties.setProperty(Journal.Options.FILE, tmpFile.getAbsolutePath()); } else { // real file is named. isTemporary = false; } } // The effective KB name. namespace = namespaceOverride == null ? properties .getProperty(BigdataSail.Options.NAMESPACE, BigdataSail.Options.DEFAULT_NAMESPACE) : namespaceOverride; properties.setProperty(BigdataSail.Options.NAMESPACE, namespace); // Open Journal. jnl = new Journal(properties); // Locate/create KB. { final BigdataSail sail; if (isTemporary) { new BigdataSail(namespace, jnl).create(properties); newKB = true; } else { sail = new BigdataSail(namespace, jnl); if (!sail.exists()) { // create. sail.create(properties); newKB = true; } else { // exists. final BigdataSailConnection con = sail.getReadOnlyConnection(); try { newKB = con.getTripleStore().getStatementCount() == 0L; } finally { con.close(); } } } } } @Override public void shutdown() { if (jnl != null) { if (isTemporary) { log.warn("Destroying temporary journal."); jnl.destroy(); } else { jnl.close(); } } super.shutdown(); } /** * Return <code>true</code>iff one or more arguments can be parsed * starting at the specified index. * * @param i * The index into the arguments. * @param args * The arguments. * @return <code>true</code> iff any arguments were recognized. */ @Override public boolean handleArg(final AtomicInteger i, final String[] args) { if (super.handleArg(i, args)) { return true; } final String arg = args[i.get()]; if (arg.equals("-bufferMode")) { final String s = args[i.incrementAndGet()]; bufferModeOverride = BufferMode.valueOf(s); } else if (arg.equals("-namespace")) { final String s = args[i.incrementAndGet()]; namespaceOverride = s; } else { return false; } return true; } /** * {@inheritDoc} * * TODO report #of vertices (DISTINCT UNION (?s, ?o) * * TODO What happened to the predicate summary/histogram/distribution * code? */ @Override public void report(final StringBuilder sb) { sb.append(", edges(kb)=" + nedges); sb.append(", namespace=" + namespace);// sb.append(", bufferMode=" + jnl.getBufferStrategy().getBufferMode()); } } /** * Factory for the {@link OptionData}. */ @Override protected OptionData newOptionData() { return new BigdataOptionData(); } @Override protected BigdataGASEngine newGASEngine() { final BigdataOptionData opt = getOptionData(); return new BigdataGASEngine(opt.jnl, opt.nthreads); } @Override protected IGraphAccessor newGraphAccessor() { final BigdataOptionData opt = getOptionData(); /* * Use a read-only view (sampling depends on access to the BTree rather * than the ReadCommittedIndex). */ final BigdataGraphAccessor graphAccessor = new BigdataGraphAccessor( opt.jnl, opt.namespace, opt.jnl.getLastCommitTime()); return graphAccessor; } @SuppressWarnings("unchecked") @Override protected BigdataOptionData getOptionData() { return (BigdataOptionData) super.getOptionData(); } /** * Run a GAS analytic against some data set. * * @param args * USAGE:<br/> * <code>(options) analyticClass propertyFile</code> * <p> * <i>Where:</i> * <dl> * <dt>propertyFile</dt> * <dd>A java properties file for a standalone {@link Journal}.</dd> * </dl> * and <i>options</i> are any of the options defined for the * {@link GASRunnerBase} PLUS any of: * <dl> * <dt>-bufferMode</dt> * <dd>Overrides the {@link BufferMode} (if any) specified in the * <code>propertyFile</code>.</dd> * <dt>-namespace</dt> * <dd>The namespace of the default SPARQL endpoint (the * namespace will be <code>kb</code> if none was specified when * the triple/quad store was created).</dd> * </p> * @throws ClassNotFoundException */ public BigdataGASRunner(final String[] args) throws ClassNotFoundException { super(args); Banner.banner(); } /** * Return an instance of the {@link IGASProgram} to be evaluated. */ protected IGASProgram<VS, ES, ST> newGASProgram() { final Class<IGASProgram<VS, ES, ST>> cls = getOptionData().analyticClass; try { final Constructor<IGASProgram<VS, ES, ST>> ctor = cls .getConstructor(new Class[] {}); final IGASProgram<VS, ES, ST> gasProgram = ctor .newInstance(new Object[] {}); return gasProgram; } catch (Exception e) { throw new RuntimeException(e); } } @Override public void loadFiles() throws IOException { final BigdataOptionData opt = getOptionData(); final Journal jnl = opt.jnl; final String namespace = opt.namespace; final String[] loadSet = opt.loadSet.toArray(new String[0]); // Load data using the unisolated view. final AbstractTripleStore kb = (AbstractTripleStore) jnl .getResourceLocator().locate(namespace, ITx.UNISOLATED); if (opt.newKB && loadSet.length > 0) { final BigdataSail sail = new BigdataSail(kb); try { try { sail.initialize(); loadFiles(sail, loadSet); } finally { if (sail.isOpen()) sail.shutDown(); } } catch (Exception ex) { throw new RuntimeException(ex); } } // total #of edges in that graph. opt.nedges = kb.getStatementCount(); } private void loadFiles(final BigdataSail sail, final String[] loadSet) throws Exception { boolean ok = false; final SailConnection cxn = sail.getUnisolatedConnection(); try { final GraphLoader loader = new BigdataSailGraphLoader(cxn); for (String f : loadSet) { loader.loadGraph(null/* fallback */, f/* resource */); } cxn.commit(); ok = true; } finally { if (!ok) cxn.rollback(); cxn.close(); } } /** * Performance testing harness. * * @see #GASRunner(String[]) */ @SuppressWarnings("rawtypes") public static void main(final String[] args) throws Exception { new BigdataGASRunner(args).call(); } }