/** Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved. Contact: SYSTAP, LLC DBA Blazegraph 2501 Calvert ST NW #106 Washington, DC 20008 licenses@blazegraph.com This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2 of the License. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /* * Created on Oct 9, 2007 */ package com.bigdata.journal; import java.io.Writer; import java.nio.channels.ClosedByInterruptException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Properties; import java.util.Random; import java.util.UUID; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ExecutionException; import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; import com.bigdata.btree.BTree; import com.bigdata.btree.IIndex; import com.bigdata.btree.IndexMetadata; import com.bigdata.testutil.ExperimentDriver; import com.bigdata.testutil.ExperimentDriver.IComparisonTest; import com.bigdata.testutil.ExperimentDriver.Result; import com.bigdata.util.Bytes; import com.bigdata.util.NV; /** * Stress tests for concurrent processing of operations on named unisolated * indices. * * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a> */ public class StressTestConcurrentUnisolatedIndices extends ProxyTestCase<Journal> implements IComparisonTest { public StressTestConcurrentUnisolatedIndices() { } public StressTestConcurrentUnisolatedIndices(final String name) { super(name); } private Journal journal; @Override public void setUpComparisonTest(final Properties properties) throws Exception { journal = new Journal(properties); } @Override public void tearDownComparisonTest() throws Exception { if (journal != null) { if (journal.isOpen()) { journal.shutdownNow(); } journal.deleteResources(); } // release reference. journal = null; } /** * A stress test with a small pool of concurrent clients. */ public void test_concurrentClients() throws InterruptedException { final Properties properties = getProperties(); final Journal journal = new Journal(properties); try { // if(journal.getBufferStrategy() instanceof MappedBufferStrategy) { // // /* // * @todo the mapped buffer strategy has become cpu bound w/o // * termination when used with concurrent clients - this needs to be // * looked into further. // */ // // fail("Mapped buffer strategy may have problem with tx concurrency"); // // } /* * Note: Using a timeout will cause any tasks still running when the * timeout expires to be interrupted. The code is clearly stable * when the timeout is Long.MAX_VALUE, even with the presence of a * number of spurious extensions from the failureRate. However, * there are clearly problems which emerge when the timeout is less * than the time required to complete the scheduled tasks. A variety * of errors can be emerged when the scheduled tasks are all * cancelled. It is difficult to say whether any of those problems * could be observed by an application outside of a shutdownNow() * scenario. */ doConcurrentClientTest(journal,// 30,// timeout 20, // 3,// nresources // 20 1, // minLocks 3, // 5 // maxLocks 100, //5000, // ntrials // 1000 3, // keyLen 1000, // 1000, // nops 0.02d // failureRate ); } finally { journal.destroy(); } } /** * A stress test of concurrent writers on one or more named indices. * * @param journal * The database. * * @param timeout * The #of seconds before the test will terminate. * * @param nresources * The #of named indices that will be used by the tasks. * * @param minLocks * The minimum #of resources in which a writer will obtain a lock * in [<i>0</i>:<i>nresources</i>]. * * @param maxLocks * The maximum #of resources in which a writer will obtain a lock * in [<i>minLocks</i>:<i>nresources</i>]. * * @param ntrials * The #of transactions to execute. * * @param keyLen * The length of the random unsigned byte[] keys used in the * operations. The longer the keys the less likely it is that * there will be a write-write conflict (that concurrent txs will * write on the same key). * * @param nops * The #of operations to be performed in each transaction. * * @param failureRate * The percentage of {@link Writer}s that will throw a * {@link SpuriousException} rather than completing normally. * * @todo factor out the operation to be run as a test parameter? */ static public Result doConcurrentClientTest(Journal journal, long timeout, int nresources, int minLocks, int maxLocks, int ntrials, int keyLen, int nops, double failureRate) throws InterruptedException { if (journal == null) throw new IllegalArgumentException(); if (timeout <= 0) throw new IllegalArgumentException(); if (nresources <= 0) throw new IllegalArgumentException(); if (minLocks < 0) throw new IllegalArgumentException(); if (maxLocks < minLocks || maxLocks > nresources) throw new IllegalArgumentException(); if (ntrials < 1) throw new IllegalArgumentException(); if (keyLen < 1) throw new IllegalArgumentException(); if (nops < 0) throw new IllegalArgumentException(); if (failureRate < 0.0 || failureRate > 1.0) throw new IllegalArgumentException(); final Random r = new Random(); /* * Setup the named resources/indices. */ final String[] resources = new String[nresources]; { for (int i = 0; i < nresources; i++) { resources[i] = "index#" + i; journal.registerIndex(resources[i], BTree.create(journal, new IndexMetadata(resources[i], UUID.randomUUID()))); } journal.commit(); } if (log.isInfoEnabled()) log.info("Created indices: " + Arrays.toString(resources)); /* * Setup the tasks that we will submit. */ final Collection<AbstractTask<Void>> tasks = new HashSet<AbstractTask<Void>>(); final ConcurrentHashMap<String, Thread> btrees = new ConcurrentHashMap<String, Thread>(); for (int i = 0; i < ntrials; i++) { // choose nlocks and indices to use. final int nlocks = r.nextInt(maxLocks - minLocks) + minLocks; assert nlocks >= minLocks && nlocks <= maxLocks; final Collection<String> tmp = new HashSet<String>(nlocks); while (tmp.size() < nlocks) { tmp.add(resources[r.nextInt(nresources)]); } final String[] resource = tmp.toArray(new String[nlocks]); tasks.add(new WriteTask(journal, resource, i, keyLen, nops, failureRate, btrees)); } /* * Run all tasks and wait for up to the timeout for them to complete. */ if (log.isInfoEnabled()) log.info("Submitting " + tasks.size() + " tasks"); final long begin = System.currentTimeMillis(); final List<Future<Void>> results = journal.invokeAll(tasks, timeout, TimeUnit.SECONDS); final long elapsed = System.currentTimeMillis() - begin; /* * Examine the futures to see how things went. */ final Iterator<Future<Void>> itr = results.iterator(); int nfailed = 0; // #of tasks that failed. // int nretry = 0; // #of tasks that threw RetryException int ninterrupt = 0; // #of interrupted tasks. int ncommitted = 0; // #of tasks that successfully committed. int nuncommitted = 0; // #of tasks that did not complete in time. while (itr.hasNext()) { final Future<?> future = itr.next(); if (future.isCancelled()) { nuncommitted++; continue; } try { future.get(); ncommitted++; } catch(ExecutionException ex ) { if (isInnerCause(ex, InterruptedException.class) || isInnerCause(ex, ClosedByInterruptException.class)) { /* * Note: Tasks will be interrupted if a timeout occurs when * attempting to run the submitted tasks - this is normal. */ log.warn("Interrupted: " + ex); ninterrupt++; } else if(isInnerCause(ex, SpuriousException.class)) { nfailed++; // } else if(isInnerCause(ex, RetryException.class)) { // // nretry++; } else { // Other kinds of exceptions are errors. fail("Not expecting: " + ex, ex); } } } final WriteExecutorService writeService = journal.getConcurrencyManager() .getWriteService(); journal.shutdownNow(); /* * Compute bytes written per second. */ final long seconds = TimeUnit.SECONDS.convert(elapsed, TimeUnit.MILLISECONDS); final long bytesWrittenPerSecond = journal.getRootBlockView() .getNextOffset() / (seconds == 0 ? 1 : seconds); final Result ret = new Result(); // these are the results. ret.put("nfailed",""+nfailed); // ret.put("nretry",""+nretry); ret.put("ncommitted",""+ncommitted); ret.put("ninterrupt",""+ninterrupt); ret.put("nuncommitted", ""+nuncommitted); ret.put("elapsed(ms)", ""+elapsed); ret.put("bytesWrittenPerSec", ""+bytesWrittenPerSecond); ret.put("tasks/sec", ""+(ncommitted * 1000 / elapsed)); ret.put("maxRunning", ""+writeService.getMaxRunning()); ret.put("maxPoolSize", ""+writeService.getMaxPoolSize()); ret.put("maxLatencyUntilCommit", ""+writeService.getMaxCommitWaitingTime()); ret.put("maxCommitLatency", ""+writeService.getMaxCommitServiceTime()); System.err.println(ret.toString(true/*newline*/)); journal.deleteResources(); return ret; } static private final Random r = new Random(); /** * A task that writes on named unisolated index(s). */ public static class WriteTask extends AbstractTask<Void> { private final int trial; private final int keyLen; private final int nops; private final double failureRate; private final ConcurrentHashMap<String/*indexName*/, Thread> btrees; public WriteTask(final IConcurrencyManager concurrencyManager, final String[] resource, final int trial, final int keyLen, final int nops, final double failureRate, final ConcurrentHashMap<String, Thread> btrees) { super(concurrencyManager, ITx.UNISOLATED, resource); this.trial = trial; this.keyLen = keyLen; this.nops = nops; this.failureRate = failureRate; this.btrees = btrees; } @Override protected String getTaskName() { return super.getTaskName()+"#"+trial; } /** * Executes random operation on a named unisolated index. * * @return null */ @Override public Void doTask() throws Exception { // the index names on which the writer holds a lock. final String[] resource = getResource(); final IIndex[] indices = new IIndex[resource.length]; final Thread t = Thread.currentThread(); try { /* * First, mark each index in the [btrees] concurrent hash map * with the thread in which this task instance is executing. * * Note: These marks will be cleared by a finally {} clause * below. They exist to detect failures in the lock manager. */ for (int i = 0; i < resource.length; i++) { final String name = resource[i]; final Thread other = btrees.putIfAbsent(name, t); if (other != null) { log.error("Unisolated index already in use: " + resource[i]); throw new AssertionError( "Unisolated index already in use: " + resource[i] + ", currentThread=" + t + ", otherThread=" + other); } indices[i] = getJournal().getIndex(name); } /* * Random write operations on the named index(s). */ for (int i = 0; i < nops; i++) { final IIndex ndx = indices[i % resource.length]; final byte[] key = new byte[keyLen]; r.nextBytes(key); if (r.nextInt(100) > 10) { final byte[] val = new byte[5]; r.nextBytes(val); ndx.insert(key, val); } else { ndx.remove(key); } } // for( i : nops ) if (r.nextDouble() < failureRate) { throw new SpuriousException(); } return null; } finally { /* * Clear the marks from the concurrent hash map which associate * the named resources (the indices) with the thread in which * this test was executing. */ for (int i = 0; i < resource.length; i++) { if (indices[i] != null) { // do NOT remove if never added! final String name = resource[i]; final Thread tmp = btrees.remove(name); if (tmp != t) { throw new AssertionError( "Index associated with another thread? index=" + name + ", currentThread=" + t + ", otherThread=" + tmp); } } } } } } // class WriteTask /** * Thrown by a {@link Writer} if it is selected for abort based on the * {@link TestOptions#FAILURE_RATE}. * * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a> */ private static class SpuriousException extends RuntimeException { /** * */ private static final long serialVersionUID = 5032559382234334218L; } /** * Runs a single instance of the test as configured in the code. * * @todo try running the test out more than 30 seconds. Note that a larger * journal maximum extent is required since the journal will otherwise * overflow. * * @todo can we get to the point of being IO bound with lots of small write * operations on lots of indices using Disk and a 2CPUs? 4CPUs? With * larger write operations? With fewer indices? * * @todo Try to make this a correctness test since there are lots of little * ways in which things can go wrong. * * @todo use the failureRate to vet a strategy where we checkpoint indices * after each task and rollback to the prior checkpoint iff a task * fails rather than discarding the entire commit group. * * @see ExperimentDriver * @see GenerateExperiment */ public static void main(final String[] args) throws Exception { final Properties properties = new Properties(); // avoids journal overflow when running out to 60 seconds. properties.put(Options.MAXIMUM_EXTENT, ""+Bytes.megabyte32*400); // properties.setProperty(Options.FORCE_ON_COMMIT,ForceEnum.No.toString()); // properties.setProperty(Options.BUFFER_MODE, BufferMode.Transient.toString()); // properties.setProperty(Options.BUFFER_MODE, BufferMode.Direct.toString()); // properties.setProperty(Options.BUFFER_MODE, BufferMode.Mapped.toString()); properties.setProperty(Options.BUFFER_MODE, BufferMode.Disk.toString()); properties.setProperty(Options.CREATE_TEMP_FILE, "true"); properties.setProperty(TestOptions.TIMEOUT,"60"); properties.setProperty(TestOptions.NRESOURCES,"10"); properties.setProperty(TestOptions.MIN_LOCKS,"1"); properties.setProperty(TestOptions.MAX_LOCKS,"3"); properties.setProperty(TestOptions.NTRIALS,"20000"); properties.setProperty(TestOptions.KEYLEN,"4"); properties.setProperty(TestOptions.NOPS,"4"); properties.setProperty(TestOptions.FAILURE_RATE,"0.00"); IComparisonTest test = new StressTestConcurrentUnisolatedIndices(); test.setUpComparisonTest(properties); try { test.doComparisonTest(properties); } finally { try { test.tearDownComparisonTest(); } catch(Throwable t) { log.warn("Tear down problem: "+t, t); } } } /** * Additional properties understood by this test. */ public static interface TestOptions extends ConcurrencyManager.Options { /** * The timeout for the test (seconds). */ public static final String TIMEOUT = "timeout"; /** * The #of named resources from which {@link Writer}s may choosen the * indices on which they will write. */ public static final String NRESOURCES = "nresources"; /** * The minimum #of locks that a writer will obtain (0 or more, but a * writer with zero locks will not write on anything). */ public static final String MIN_LOCKS = "minLocks"; /** * The maximum #of locks that a writer will obtain (LTE * {@link #NRESOURCES}). A writer will write on each resource that it * locks. */ public static final String MAX_LOCKS = "maxLocks"; /** * The #of trials (aka transactions) to run. */ public static final String NTRIALS = "ntrials"; /** * The length of the keys used in the test. This directly impacts the * likelyhood of a write-write conflict. Shorter keys mean more * conflicts. However, note that conflicts are only possible when there * are at least two concurrent clients running. */ public static final String KEYLEN = "keyLen"; /** * The #of operations in each trial. */ public static final String NOPS = "nops"; /** * The failure rate [0.0:1.0]. A {@link Writer} aborts by throwing a * {@link SpuriousException}. */ public static final String FAILURE_RATE = "failureRate"; } /** * Setup and run a test. * * @param properties * There are no "optional" properties - you must make sure that * each property has a defined value. */ @Override public Result doComparisonTest(final Properties properties) throws Exception { final long timeout = Long.parseLong(properties.getProperty(TestOptions.TIMEOUT)); final int nresources = Integer.parseInt(properties.getProperty(TestOptions.NRESOURCES)); final int minLocks = Integer.parseInt(properties.getProperty(TestOptions.MIN_LOCKS)); final int maxLocks = Integer.parseInt(properties.getProperty(TestOptions.MAX_LOCKS)); final int ntrials = Integer.parseInt(properties.getProperty(TestOptions.NTRIALS)); final int keyLen = Integer.parseInt(properties.getProperty(TestOptions.KEYLEN)); final int nops = Integer.parseInt(properties.getProperty(TestOptions.NOPS)); final double failureRate = Double.parseDouble(properties.getProperty(TestOptions.FAILURE_RATE)); Result result = doConcurrentClientTest(journal, timeout, nresources, minLocks, maxLocks, ntrials, keyLen, nops, failureRate ); return result; } /** * Experiment generation utility class. * * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a> */ public static class GenerateExperiment extends ExperimentDriver { /** * Generates an XML file that can be run by {@link ExperimentDriver}. * * @param args */ public static void main(final String[] args) throws Exception { // this is the test to be run. final String className = StressTestConcurrentUnisolatedIndices.class.getName(); final Map<String,String> defaultProperties = new HashMap<String,String>(); // force delete of the files on close of the journal under test. defaultProperties.put(Options.CREATE_TEMP_FILE,"true"); // avoids journal overflow when running out to 60 seconds. defaultProperties.put(Options.MAXIMUM_EXTENT, ""+Bytes.megabyte32*400); /* * Set defaults for each condition. */ defaultProperties.put(TestOptions.TIMEOUT,"30"); defaultProperties.put(TestOptions.NTRIALS,"10000"); // @todo vary nresources, minLocks, and maxLocks. defaultProperties.put(TestOptions.NRESOURCES,"10"); defaultProperties.put(TestOptions.MIN_LOCKS,"1"); defaultProperties.put(TestOptions.MAX_LOCKS,"3"); defaultProperties.put(TestOptions.KEYLEN,"4"); defaultProperties.put(TestOptions.NOPS,"100"); defaultProperties.put(TestOptions.FAILURE_RATE,"0.02"); /* * Build up the conditions. */ List<Condition>conditions = new ArrayList<Condition>(); conditions.add(new Condition(defaultProperties)); // @todo also vary the maximum pool size. // @todo report the maximum pool size as a result along with maxrunning. conditions = apply(conditions,new NV[][]{ new NV[]{new NV(TestOptions.WRITE_SERVICE_CORE_POOL_SIZE,"1"),}, new NV[]{new NV(TestOptions.WRITE_SERVICE_CORE_POOL_SIZE,"10"),}, new NV[]{new NV(TestOptions.WRITE_SERVICE_CORE_POOL_SIZE,"20"),}, new NV[]{new NV(TestOptions.WRITE_SERVICE_CORE_POOL_SIZE,"50"),}, new NV[]{new NV(TestOptions.WRITE_SERVICE_CORE_POOL_SIZE,"100"),}, }); // conditions = apply(conditions,new NV[]{ // new NV(TestOptions.NOPS,"1"), // new NV(TestOptions.NOPS,"10"), // new NV(TestOptions.NOPS,"100"), // new NV(TestOptions.NOPS,"1000"), // }); // // conditions = apply(conditions,new NV[]{ // new NV(TestOptions.KEYLEN,"4"), // new NV(TestOptions.KEYLEN,"8"), //// new NV(TestOptions.KEYLEN,"32"), //// new NV(TestOptions.KEYLEN,"64"), //// new NV(TestOptions.KEYLEN,"128"), // }); conditions = apply( conditions, new NV[][] { // new NV[] { new NV(Options.BUFFER_MODE, BufferMode.Transient.toString()), }, // new NV[] { new NV(Options.BUFFER_MODE, BufferMode.Direct.toString()), }, // new NV[] { new NV(Options.BUFFER_MODE, BufferMode.Direct.toString()), new NV(Options.FORCE_ON_COMMIT, ForceEnum.No .toString()), }, // new NV[] { new NV(Options.BUFFER_MODE, BufferMode.Mapped.toString()), }, // new NV[] { new NV(Options.BUFFER_MODE, BufferMode.Disk.toString()), }, // new NV[] { new NV(Options.BUFFER_MODE, BufferMode.Disk.toString()), new NV(Options.FORCE_ON_COMMIT, ForceEnum.No .toString()), }, // }); final Experiment exp = new Experiment(className,defaultProperties,conditions); // copy the output into a file and then you can run it later. System.err.println(exp.toXML()); } } }