StressTestConcurrent.java example

Explorer
blazegraph-master
- database-master
/**

Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016.  All rights reserved.

Contact:
     SYSTAP, LLC DBA Blazegraph
     2501 Calvert ST NW #106
     Washington, DC 20008
     licenses@blazegraph.com

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/
/*
 * Created on May 23, 2007
 */

package com.bigdata.service;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Random;
import java.util.UUID;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.concurrent.locks.ReentrantLock;

import org.apache.log4j.Level;

import com.bigdata.btree.BTree;
import com.bigdata.btree.IIndex;
import com.bigdata.btree.IndexMetadata;
import com.bigdata.btree.NOPTupleSerializer;
import com.bigdata.btree.keys.ASCIIKeyBuilderFactory;
import com.bigdata.btree.keys.KeyBuilder;
import com.bigdata.btree.proc.BatchInsert.BatchInsertConstructor;
import com.bigdata.btree.proc.BatchRemove.BatchRemoveConstructor;
import com.bigdata.counters.AbstractStatisticsCollector;
import com.bigdata.journal.BasicExperimentConditions;
import com.bigdata.journal.BufferMode;
import com.bigdata.journal.DiskOnlyStrategy;
import com.bigdata.journal.ITx;
import com.bigdata.journal.TemporaryRawStore;
import com.bigdata.journal.TemporaryStore;
import com.bigdata.journal.ValidationError;
import com.bigdata.rawstore.IRawStore;
import com.bigdata.rawstore.WormAddressManager;
import com.bigdata.resources.OverflowCounters;
import com.bigdata.resources.ResourceManager;
import com.bigdata.service.DataService.Options;
import com.bigdata.service.ndx.ClientIndexView;
import com.bigdata.testutil.ExperimentDriver;
import com.bigdata.testutil.ExperimentDriver.IComparisonTest;
import com.bigdata.testutil.ExperimentDriver.Result;
import com.bigdata.util.Bytes;
import com.bigdata.util.DaemonThreadFactory;
import com.bigdata.util.NV;
import com.bigdata.util.concurrent.ThreadPoolExecutorStatisticsTask;

/**
 * Test suite for concurrent operations on a {@link DataService}. A federation
 * consisting of a {@link MetadataService} and a single {@link DataService} is
 * started. A client is created, connects to the federation, and registers an
 * index the federation. A pool of threads is created for that client and
 * populated with a number of operations. The threads then write and read
 * concurrently using unisolated operations on the data services. This test can
 * be used to observe the throughput and queue depth of arising from a variety
 * of data service and client configurations.
 * 
 * @todo The primary metrics reported by the test are elapsed time and
 *       operations per second. Compute the through put in terms of bytes per
 *       second for writes. This is interesting since it allows us to compare
 *       the effect of batch size on writes. Add parameterization for read vs
 *       write vs remove so that we can test the effect of batch size for
 *       operation profiles based on each of those kinds of operations.
 * 
 * @todo get the comparison support working. Parameterize the
 *       {@link DataService} configuration from the test suite so that we can
 *       test Disk vs Direct, forceCommit=No vs default, and other properties
 *       that might have interesting effects. These things can be directly
 *       manipulated in the mean time by editing the DataServer0.properties
 *       file.
 * 
 * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a>
 * @version $Id$
 */
public class StressTestConcurrent extends
        AbstractEmbeddedFederationTestCase implements IComparisonTest {

    /**
     * 
     */
    public StressTestConcurrent() {
    }

    /**
     * @param arg0
     */
    public StressTestConcurrent(String arg0) {
        super(arg0);
    }

    /**
     * @todo try varying the releaseAge
     */
    @Override
    public Properties getProperties() {
        
        final Properties properties = new Properties(super.getProperties());

        // Make sure this test uses disk so that it can trigger overflows.
       properties.setProperty(Options.BUFFER_MODE, BufferMode.Disk
                .toString());

        /*
         * Note: if we make the initial and maximum extent small so that we
         * trigger overflow a lot then we introduce a lot of overhead. However
         * the ratios of the number of indices, the maximum journal extent, and
         * the nominal shard size must appropriate or most of the overflow
         * operations will be index segment builds with few or no splits (the
         * shards will not grow large enough to be split).
         */
        properties.setProperty(Options.INITIAL_EXTENT, "" + 1 * Bytes.megabyte);
        properties.setProperty(Options.MAXIMUM_EXTENT, "" + 1 * Bytes.megabyte);

        // make sure overflow processing is enabled.
        properties.setProperty(Options.OVERFLOW_ENABLED, "true");

        // Note: another way to disable moves is to restrict the test to a
        // single data service.
        properties.setProperty(
                com.bigdata.service.EmbeddedClient.Options.NDATA_SERVICES, "2");

        // enable moves (one per target).
        properties.setProperty(
                ResourceManager.Options.MAXIMUM_MOVES_PER_TARGET, "1");

        // disable the CPU threshold for moves.
        properties.setProperty(
                ResourceManager.Options.MOVE_PERCENT_CPU_TIME_THRESHOLD, ".0");

        /*
         * Note: Disables the initial round robin policy for the load balancer
         * service so that it will use our fakes scores.
         */
        properties.setProperty(
                LoadBalancerService.Options.INITIAL_ROUND_ROBIN_UPDATE_COUNT,
                "0");

        // load balancer update delay
//      properties.setProperty(LoadBalancerService.Options.UPDATE_DELAY,"10000");
      
        // make sure scatter splits are enabled.
        properties.setProperty(Options.SCATTER_SPLIT_ENABLED, "true");

        // small shards.
        properties.setProperty(Options.NOMINAL_SHARD_SIZE, "" + Bytes.kilobyte
                * 10);

        /*
         * Note: Overflow frequency is being controlled by specifying a small
         * maximum extent above, so overflow acceleration should be turned off
         * here or it will trigger every few writes!
         * 
         * Likewise, since we are overflowing so frequently, split acceleration
         * does not give us enough time to build up enough writes and an index
         * with very few writes gets split into too many index partitions.
         */

        // disable split acceleration.
        properties.setProperty(Options.ACCELERATE_SPLIT_THRESHOLD, "0");
        // lots of acceleration (too much).
//        properties.setProperty(Options.ACCELERATE_SPLIT_THRESHOLD, "50");

        // disable overflow acceleration.
        properties.setProperty(Options.ACCELERATE_OVERFLOW_THRESHOLD, "0");
        // lots of acceleration (too much).
//        properties.setProperty(Options.ACCELERATE_OVERFLOW_THRESHOLD, ""
//                + (Bytes.gigabyte * 10));

        return properties;
        
    }
    
    @Override
    public void setUpComparisonTest(Properties properties) throws Exception {

        super.setUp();
        
    }

    @Override
    public void tearDownComparisonTest() throws Exception {

        super.tearDown();

    }

    /**
     * Test of N concurrent operations.
     * 
     * @todo run a performance analysis generating a graph of response time by
     *       queue length. the queue length can be the #of parallel clients but
     *       be sure to set up the {@link ClientIndexView} so that it does not
     *       cap the concurrency or it will skew the results. also note that the
     *       maximum possible parallelism will be capped by the #of index
     *       partitions and (if indices are not being split) by the #of indices.
     * 
     * @todo declare a variety of tests (a) overflow disabled; (b) w/ ground
     *       truth; (c) overflow enabled; (d) with ground truth. these probably
     *       need to be each in their own subclass in order to get the setup
     *       correct since the properties need to be overridden. See
     *       {@link #doComparisonTest(Properties)}.
     * 
     * @throws Exception
     */
    public void test_stressTest2() throws Exception {

        int nclients = 10; // max concurrency limited by #of index partitions.
        long timeout = 50; // 20 or 40 (Note: ignored for correctness testing!)
        int ntrials = 1000; // 1000 or 10000
        int keyLen = 4; // @todo not used right now.
        int nops = 100; // 100
        double insertRate = .8d;
        int nindices = 5; // was 10
        boolean testCorrectness = true;

        doConcurrentClientTest(client, nclients, timeout, ntrials, keyLen,
                nops, insertRate, nindices, testCorrectness );
        
    }

    /**
     * A stress test with a pool of concurrent clients.
     * 
     * @param client
     *            The client.
     * 
     * @param timeout
     *            The #of seconds before the test will terminate (ignored if
     *            <i>testCorrectness := true</i> since tasks MUST run to
     *            completion in order for comparisons against ground truth to be
     *            valid).
     * 
     * @param nclients
     *            The #of concurrent clients.
     * 
     * @param ntrials
     *            The #of batch (remote) operations to execute.
     * 
     * @param keyLen
     *            The length of the random unsigned byte[] keys used in the
     *            operations. The longer the keys the less likely it is that
     *            there will be a write-write conflict (that concurrent txs will
     *            write on the same key).
     * 
     * @param nops
     *            The #of rows in each operation.
     * 
     * @param insertRate
     *            The rate of insert operations (inserting <i>nops</i> tuples)
     *            in [0.0:1.0]. The balance of the operations will remove
     *            <i>nops</i> tuples.
     * 
     * @param nindices
     *            The #of different indices to which the operation will be
     *            applied. The tasks will be generated modulo <i>nindices</i>.
     *            When nindices is greater than one, there is increased
     *            likelihood of tasks running concurrently before the first
     *            split. Regardless of the value of nindices, after a scale-out
     *            index has been split the likelihood of concurrent writers goes
     *            up significantly.
     * 
     * @param testCorrectness
     *            When <code>true</code>, ground truth will be maintained and
     *            verified against the post-condition of the index(s) under
     *            test. This option may be used to verify index partition
     *            split/join/move semantics and the correctness of
     *            {@link ClientIndexView} views. All operations on a ground
     *            truth index are serialized (all operations may be serialized
     *            if the ground truth indices are all backed by the same store)
     *            so this option can not be used when you are doing performance
     *            testing.
     * 
     * @todo Note: When <i>nindices</i> is high the setup time on this test is
     *       quite large since the indices are registered sequentially rather
     *       than using parallelism. Run the index registration tasks in a
     *       thread pool to cut down the test setup latency.
     * 
     * @todo factor out the operation to be run.
     * 
     * @todo factor out the setup for the federation so that we can test
     *       embedded or distributed (either one process, many processes, or
     *       many hosts). Setup of a distributed federation is more complex,
     *       whether on one host or many hosts, since it requires Jini
     *       configurations for each service. Finally, if the test index exists
     *       then it must be dropped.
     *       <p>
     *       In a distributed configuration, the clients can also be distributed
     *       which raises the complexity further. In all, we really need a means
     *       to setup a cluster as a bigdata federation based on a master
     *       configuration. E.g., something to generate the individual
     *       configuration files from a master description of the federation and
     *       something to deploy those files together with the necessary
     *       software onto the cluster. SCA probably addresses this issue.
     * 
     * @todo It would be especially nice to have this run against a cluster so
     *       that we could characterize throughput as a function of the #of
     *       machines, but that also requires a distributed client otherwise the
     *       client may become the bottleneck.
     * 
     * @todo parameterize for random deletes and writes and parameterize those
     *       operations so that they can be made likely to force a join or split
     *       of an index partition.
     */
    public Result doConcurrentClientTest(final IBigdataClient<?> client,
            final int nclients, final long timeout, final int ntrials,
            final int keyLen, final int nops, final double insertRate, final int nindices,
            boolean testCorrectness) throws InterruptedException, IOException {
        
        // The basename of the scale-out index(s) for the test.
        final String basename = "testIndex";

        // connect to the federation.
        final IBigdataFederation<?> federation = client.connect();
        
        /*
         * Register the scale-out index(s).
         */
        assert nindices > 0;
        
        final IIndex[] index = new IIndex[nindices];
        final BTree[] groundTruth = new BTree[nindices];
        final IRawStore[] groundTruthStore = new IRawStore[nindices];
        final ReentrantLock[] lock = new ReentrantLock[nindices];

        // Used to run the client tasks.
        final ThreadPoolExecutor executorService = (ThreadPoolExecutor) Executors
                .newFixedThreadPool(nclients, DaemonThreadFactory
                        .defaultThreadFactory());

        // Used to collect performance counters on some queues.
        final ScheduledExecutorService sampleService = Executors
                .newSingleThreadScheduledExecutor(DaemonThreadFactory
                        .defaultThreadFactory());

        // Used to periodically spam the LBS with fake data to prompt moves.
        final ScheduledExecutorService spamLBSService = Executors
                .newSingleThreadScheduledExecutor(DaemonThreadFactory
                        .defaultThreadFactory());

        try {

            for (int i = 0; i < nindices; i++) {

                final String name = basename + i;
                final UUID indexUUID = UUID.randomUUID();
                {

                    final IndexMetadata indexMetadata = new IndexMetadata(name,
                            indexUUID);

                    indexMetadata.setTupleSerializer(new NOPTupleSerializer(
                            new ASCIIKeyBuilderFactory(keyLen)));

                    // must support delete markers
                    indexMetadata.setDeleteMarkers(true);

                    // register the scale-out index, creating a single index
                    // partition.
                    federation.registerIndex(indexMetadata);

                    if (testCorrectness) {

                        /*
                         * Setup a distinct backing store for the ground truth
                         * for each index and a lock to serialize access to that
                         * index. This allows concurrency if you start with more
                         * than one index or after an index has been split.
                         */

                        groundTruthStore[i] = new TemporaryRawStore(
                                WormAddressManager.SCALE_UP_OFFSET_BITS);

                        final IndexMetadata md = indexMetadata.clone();

                        // turn off delete markers for the ground truth index.
                        md.setDeleteMarkers(false);

                        groundTruth[i] = BTree.create(groundTruthStore[i], md);

                        lock[i] = new ReentrantLock();

                    }

                }

                index[i] = federation.getIndex(name, ITx.UNISOLATED);

            }

            // will log the behavior of this queue.
            {

                final long initialDelay = 0; // initial delay in ms.
                final long delay = 1000; // delay in ms.
                final TimeUnit unit = TimeUnit.MILLISECONDS;

                final ThreadPoolExecutorStatisticsTask queueLengthTask = new ThreadPoolExecutorStatisticsTask(
                        "testExecutorService", executorService);

                sampleService.scheduleWithFixedDelay(queueLengthTask,
                        initialDelay, delay, unit);

            }

            // will periodically spam the LBS to prompt moves.
            if (fed.getDataServiceUUIDs(0/* maxCount */).length == 2) {

                final long initialDelay = 3000; // initial delay in ms.
                final long delay = initialDelay * 2; // delay in ms.
                final TimeUnit unit = TimeUnit.MILLISECONDS;

                final Runnable spamTask = new Runnable() {

                    final Random r = new Random();
                    
                    @Override
                    public void run() {
                        try {
                            if(r.nextBoolean()) {
                                if(r.nextBoolean()) {
                                    StressTestConcurrent.this.setupLBSForMove(dataService0);
                                } else {
                                    StressTestConcurrent.this.setupLBSForMove(dataService1);
                                }
                            } else {
                                // Tell the LBS that the services are equally loaded.
                                StressTestConcurrent.this.setupLBSForMove(null);
                            }
                        } catch (IOException e) {
                            log.error(e, e);
                        }
                    }

                };

                sampleService.scheduleWithFixedDelay(spamTask, initialDelay,
                        delay, unit);

            }

            final Collection<Callable<Void>> tasks = new HashSet<Callable<Void>>();

            for (int i = 0; i < ntrials; i++) {

                final int k = i % nindices;

                tasks.add(new Task(index[k], keyLen, nops, insertRate,
                        groundTruth[k], lock[k]));

            }

            /*
             * Run the M transactions on N clients.
             */

            final long begin = System.currentTimeMillis();

            log.warn("Starting tasks on client");

            /*
             * Note: When [testCorrectness := true] we MUST wait for all tasks
             * to complete since the ground truth data can otherwise differ from
             * the data successfully committed on the database (if a task is
             * canceled during the write on groundTruth then it WILL NOT agree
             * with the scale-out indices).
             */
            final List<Future<Void>> results = executorService.invokeAll(tasks,
                    testCorrectness ? Long.MAX_VALUE : timeout,
                    TimeUnit.SECONDS);

            final long elapsed = System.currentTimeMillis() - begin;

            if (log.isInfoEnabled())
                log.info("Examining task results: elapsed=" + elapsed);

            final Iterator<Future<Void>> itr = results.iterator();

            int nfailed = 0; // #of operations that failed
            int ncommitted = 0; // #of operations that committed.
            int nuncommitted = 0; // #of operations that did not complete in
                                  // time.
            int ntimeout = 0;
            int ninterrupted = 0;
            final LinkedList<Exception> failures = new LinkedList<Exception>();

            while (itr.hasNext()) {

                final Future<Void> future = itr.next();

                if (future.isCancelled()) {

                    nuncommitted++;

                    continue;

                }

                try {

                    // Don't wait
                    future.get(0L, TimeUnit.MILLISECONDS);

                    ncommitted++;

                } catch (ExecutionException ex) {

                    // Validation errors are allowed and counted as aborted txs.

                    if (ex.getCause() instanceof ValidationError) {

                        nfailed++;

                    } else {

                        // Other kinds of exceptions are errors.

                        log.error("Not expecting: " + ex.getMessage());

                        failures.add(ex);

                    }

                } catch (InterruptedException e) {

                    ninterrupted++;

                } catch (TimeoutException e) {

                    ntimeout++;

                }

            }

            /*
             * Note: This can cause exceptions to be thrown out of the write
             * executor service since the concurrency manager will have been
             * shutdown but asynchronous overflow processing is doubtless still
             * running some tasks.
             */
            executorService.shutdownNow();

            /*
             * Figure out how many of these different operations were executed
             * by the data service(s).
             */
            final OverflowCounters overflowCounters = new OverflowCounters();
            if (dataService0 != null) {
                overflowCounters.add(((DataService) dataService0).getResourceManager()
                        .getOverflowCounters());
            }
            if (dataService1 != null) {
                overflowCounters.add(((DataService) dataService1).getResourceManager()
                        .getOverflowCounters());
            }

            final Result ret = new Result();

            ret.put("ncommitted", "" + ncommitted);
            ret.put("nfailed", "" + nfailed);
            ret.put("nuncommitted", "" + nuncommitted);
            ret.put("ntimeout", "" + ntimeout);
            ret.put("ninterrupted", "" + ninterrupted);
            ret.put("elapsed(ms)", "" + elapsed);
            ret.put("operations/sec", "" + (ncommitted * 1000 / elapsed));
            ret.put("failures", "" + (failures.size()));
            ret.put("nbuild", "" + overflowCounters.indexPartitionBuildCounter);
            ret.put("nmerge", "" + overflowCounters.indexPartitionMoveCounter);
            ret.put("nsplit", "" + overflowCounters.indexPartitionSplitCounter);
            ret.put("nmove", "" + overflowCounters.indexPartitionMoveCounter);

            if (log.isInfoEnabled())
                log.info(ret.toString(true/* newline */));

            if (log.isInfoEnabled())
                log.info(overflowCounters.getCounters().toString());

            if (!failures.isEmpty()) {

                log.error("failures:\n" + Arrays.toString(failures.toArray()));

                fail("There were " + failures.size()
                        + " failed tasks for unexpected causes");

            }

            if (testCorrectness) {

                /*
                 * @todo config parameter.
                 * 
                 * Note: there may be differences when we have forced overflow
                 * and when we have not since forcing overflow will trigger
                 * compacting merges. So you are more likely to find a problem
                 * if you DO NOT force overflow.
                 */
                final boolean forceOverflow = false;
                if (forceOverflow) {

                    log.warn("Forcing overflow: " + new Date());

					((AbstractScaleOutFederation<?>) federation)
							.forceOverflow(true/* compactingMerge */, true/* truncateJournal */);

                    log.warn("Forced  overflow: " + new Date());

                }

                /*
                 * For each index, verify its state against the corresponding
                 * ground truth index.
                 */

                for (int i = 0; i < nindices; i++) {

                    final String name = basename + i;

                    final IIndex expected = groundTruth[i];

                    if (log.isInfoEnabled())
                        log.info("Validating: "
                            + name
                            + " #groundTruthEntries="
                            + groundTruth[i].rangeCount()
                            + ", #partitions="
                            + federation.getMetadataIndex(name,
                                    ITx.READ_COMMITTED).rangeCount());

                    /*
                     * Note: This uses an iterator based comparison so that we
                     * can compare a local index without delete markers and a
                     * key-range partitioned index with delete markers.
                     * 
                     * Note: This is using a read-only tx reading from the last
                     * commit point on the federation. That guarantees a
                     * consistent read.
                     * 
                     * Note: Tasks must run to completion!
                     * 
                     * If any tasks were cancelled while they were running then
                     * the groundTruth MIGHT NOT agree with the scale-out
                     * indices. This is true even though the task which writes
                     * on the scale-out indices does not update the ground truth
                     * until it has successfully written on the scale-out index.
                     * The reason is that the BTree code itself can notice the
                     * interrupt while we are writing on the groundTruth index
                     * and if the task is cancelled in the middle of a BTree
                     * mutation then the state of the groundTruth and scale-out
                     * indices WILL NOT agree.
                     * 
                     * FIXME I still see errors where the last byte in the key
                     * is off by one in this test from time to time. I am not
                     * sure if this is a test harness problem (assumptions that
                     * the test harness is making) or a system problem.
                     * 
                     * expected=com.bigdata.btree.Tuple@8291269{ nvisited=2368,
                     * flags=[KEYS,VALS], key=[-128, 0, 11, -45], val=[108,
                     * -114, -104, -47, -70], obj=[108, -114, -104, -47, -70],
                     * sourceIndex=0},
                     * 
                     * actual=com.bigdata.btree.
                     * AbstractChunkedTupleIterator$ResultSetTuple@33369876{
                     * nvisited=197, flags=[KEYS,VALS], key=[-128, 0, 11, -46],
                     * val=[111, 56, 17, 100, 56], obj=[111, 56, 17, 100, 56],
                     * sourceIndex=2}
                     */

                    // read-only tx from lastCommitTime.
                    final long tx = federation.getTransactionService().newTx(
                            ITx.READ_COMMITTED);

                    try {

                        assertSameEntryIterator(expected, federation.getIndex(
                                name, tx));

                    } finally {

                        federation.getTransactionService().abort(tx);

                    }

                    /*
                     * Verify against the unisolated views (this might be Ok if
                     * all tasks ran to completion, but if there is ongoing
                     * asynchronous overflow activity then that could mess this
                     * up since the UNISOLATED index views do not have
                     * read-consistent semantics).
                     */
                    assertSameEntryIterator(expected, federation.getIndex(name,
                            ITx.UNISOLATED));

                    /*
                     * Release the ground truth index and the backing store.
                     */

                    groundTruth[i].close();
                    groundTruth[i] = null;

                    groundTruthStore[i].destroy();

                }

                if (log.isInfoEnabled())
                    log.info("Validated " + nindices
                            + " indices against ground truth.");

            }

            return ret;

        } finally {

            /*
             * Make sure that we destroy the temporary store used for the ground
             * truth indices.
             */
            for (IRawStore tmp : groundTruthStore) {

                if (tmp != null && tmp.isOpen()) {

                    tmp.destroy();

                }

            }

            // make sure all services are down.
            executorService.shutdownNow();
            sampleService.shutdownNow();
            spamLBSService.shutdownNow();

        }

    }

    /**
     * Fake out the load balancer so that it will report the one data service is
     * "highly utilized" while the other data service is "under utilized".
     * 
     * @param targetService
     *            The target data service -or- <code>null</code> if you want to
     *            tell the LBS that the services are equally loaded.
     * 
     * @throws IOException
     */
    private void setupLBSForMove(final IDataService targetService)
            throws IOException {

        // explicitly set the log level for the load balancer.
        LoadBalancerService.log.setLevel(Level.INFO);

        final AbstractEmbeddedLoadBalancerService lbs = ((AbstractEmbeddedLoadBalancerService) ((EmbeddedFederation<?>) fed)
                .getLoadBalancerService());

        final ServiceScore[] fakeServiceScores = new ServiceScore[2];

        if (targetService == null) {

            log.warn("Spamming LBS: services have equal load.");

            fakeServiceScores[0] = new ServiceScore(
                    AbstractStatisticsCollector.fullyQualifiedHostName,
                    dataService0.getServiceUUID(), "dataService0", 0.5// rawScore
            );

            fakeServiceScores[1] = new ServiceScore(
                    AbstractStatisticsCollector.fullyQualifiedHostName,
                    dataService1.getServiceUUID(), "dataService1", 0.5// rawScore
            );

        } else {

            log.warn("Spamming LBS: one service will appear heavily loaded.");

            fakeServiceScores[0] = new ServiceScore(
                    AbstractStatisticsCollector.fullyQualifiedHostName,
                    dataService0.getServiceUUID(), "dataService0",
                    // rawScore
                    targetService.getServiceUUID().equals(
                            dataService0.getServiceUUID()) ? 1.0 : 0.0);

            fakeServiceScores[1] = new ServiceScore(
                    AbstractStatisticsCollector.fullyQualifiedHostName,
                    dataService1.getServiceUUID(), "dataService1",
                    // rawScore
                    targetService.getServiceUUID().equals(
                            dataService0.getServiceUUID()) ? 1.0 : 0.0);

        }

        // set the fake scores on the load balancer.
        lbs.setServiceScores(fakeServiceScores);

    }
    
    /**
     * Run an unisolated operation.
     */
    public static class Task implements Callable<Void> {

        private final IIndex ndx;
//        private final int keyLen;
        private final int nops;
        private final double insertRate;
        private final IIndex groundTruth;
        private final ReentrantLock lock;
        
        /*
         * @todo This has a very large impact on the throughput. It directly
         * controls the maximum distance between keys in a batch operations.
         * In turn, that translates into the "sparsity" of the operation. A
         * small value (~10) can show 4x higher throughput than a value of
         * 1000. This is because the btree cache is more or less being
         * defeated as the spacing between the keys touched in any operation
         * grows.
         * 
         * The other effect of this parameter is to change the #of possible
         * keys in the index. A larger value allows more distinct keys to be
         * generated, which in turn increases the #of entries that are
         * permitted into the index.
         * 
         * incRange => operations per second (Disk, no sync on commit, laptop, 5.23.07).
         * 
         * 10 => 463
         * 
         * 100 => 222
         * 
         * 1000 => 132
         * 
         * 10000 => 114 
         * 
         * 100000 => 116 
         * 
         * @todo Tease apart the sparsity effect from the #of entries
         * effect, or at least report the #of entries and height of the
         * index at the end of the overall run.
         */
        static final int incRange = 100;  
        
        int lastKey = 0;

        final Random r = new Random();

        final KeyBuilder keyBuilder = new KeyBuilder(Bytes.SIZEOF_INT);
        
        final private byte[] nextKey() {

            // Note: MUST be + 1 so that the keys are strictly increasing!
            final int key = lastKey + r.nextInt(incRange) + 1;
            
            final byte[] data = keyBuilder.reset().append(key).getKey();
            
            lastKey = key;

            return data;

        }
        
        /**
         * @param ndx
         *            The index under test.
         * @param groundTruth
         *            Used for performing ground truth correctness tests when
         *            running against one or more data services with index
         *            partition split, move, and join enabled (optional). When
         *            specified this should be backed by a
         *            {@link TemporaryStore} or {@link TemporaryRawStore}. The
         *            caller is responsible for validating the index under test
         *            against the ground truth on completion of the test.
         * @param lock
         *            Used to coordinate operations on the groundTruth store.
         *            May be <code>null</code> if the groundTruth store is
         *            <code>null</code>.
         * 
         * @todo parameterize for operation type (insert, remove, read,
         *       contains). let the caller determine the profile of operations
         *       to be executed against the service.
         * 
         * @todo keyLen is ignored. It could be replaced by an increment value
         *       that would govern the distribution of the keys.
         */
        public Task(IIndex ndx, int keyLen, int nops, double insertRate,
                IIndex groundTruth, ReentrantLock lock) {

            this.ndx = ndx;
           
//            this.keyLen = keyLen;
            
            if (insertRate < 0d || insertRate > 1d)
                throw new IllegalArgumentException();
            
            this.insertRate = insertRate;
            
            this.nops = nops;
            
            this.groundTruth = groundTruth;
            
            this.lock = lock;
            
            if (groundTruth != null && lock == null) {
                
                throw new IllegalArgumentException();
                
            }
            
        }

        /**
         * Executes a random batch operation with keys presented in sorted
         * order.
         * <p>
         * Note: Batch operations with sorted keys have twice the performance of
         * the corresponding operation with unsorted keys due to improved
         * locality of the lookups performed on the index.
         * 
         * @return The commit time of the transaction.
         */
        public Void call() throws Exception {

            byte[][] keys = new byte[nops][];
            byte[][] vals = new byte[nops][];

            if (r.nextDouble() <= insertRate) {

                /*
                 * Insert
                 */

//                log.info("insert: nops=" + nops);

                for (int i = 0; i < nops; i++) {

                    keys[i] = nextKey();

                    vals[i] = new byte[5];

                    r.nextBytes(vals[i]);

                }

                /*
                 * Note: Lock is forcing the same serialization order on the
                 * test and ground truth index writes.
                 */
                lock.lock();

                try {

                    ndx.submit(0/* fromIndex */, nops/* toIndex */, keys, vals, //
                            BatchInsertConstructor.RETURN_NO_VALUES, //
                            null// handler
                            );

                    if (groundTruth != null) {

                        /*
                         * Note: Even though we write on the groundTruth after
                         * the scale-out index, it is possible that the mutation
                         * on the ground truth will be interrupted if the task
                         * is cancelled such that the groundTruth and the
                         * scale-out index do not agree.
                         */
                        
                        groundTruth.submit(0/* fromIndex */, nops/* toIndex */,
                                keys, vals, //
                                BatchInsertConstructor.RETURN_NO_VALUES, //
                                null// handler
                                );

                    }

                } finally {

                    lock.unlock();

                }

            } else {

                /*
                 * Remove.
                 */

//                log.info("remove: nops=" + nops);

                for (int i = 0; i < nops; i++) {

                    keys[i] = nextKey();

                }

                /*
                 * Note: Lock is forcing the same serialization order on the
                 * test and ground truth index writes.
                 */
                lock.lock();

                try {

                    ndx.submit(0/* fromIndex */, nops/* toIndex */, keys,
                            null/* vals */,//
                            BatchRemoveConstructor.RETURN_MUTATION_COUNT,//
                            null// handler
                            );

                    if (groundTruth != null) {

                        /*
                         * Note: Even though we write on the groundTruth after
                         * the scale-out index, it is possible that the mutation
                         * on the ground truth will be interrupted if the task
                         * is cancelled such that the groundTruth and the
                         * scale-out index do not agree.
                         */

                        groundTruth.submit(0/* fromIndex */, nops/* toIndex */,
                                keys, null/* vals */,//
                                BatchRemoveConstructor.RETURN_MUTATION_COUNT,//
                                null// handler
                                );

                    }

                } finally {

                    lock.unlock();

                }

            }
            
            return null;
            
        }
        
    }

    /**
     * Runs a single instance of the test as configured in the code.
     * 
     * @todo try running the test out more than 30 seconds. Note that a larger
     *       journal maximum extent is required since the journal will otherwise
     *       overflow.
     * 
     * @todo compute the bytes/second rate (read/written) (its in the counters
     *       for the {@link DiskOnlyStrategy}).
     * 
     * @todo Try to make this a correctness test since there are lots of little
     *       ways in which things can go wrong. Note that the actual execution
     *       order is important....
     * 
     * @todo Test for correct aborts. E.g., seed some tasks with keys or values
     *       that are never allowed to enter the index - the presence of those
     *       data means that the operation will choose to abort rather than to
     *       continue. Since we have written the data on the index this will let
     *       us test that abort() correctly rolls back the index writes. If we
     *       observe those keys/values in an index then we know that either
     *       abort is not working correctly or concurrent operations are being
     *       executed on the _same_ named index.
     * 
     * @see ExperimentDriver, which parameterizes the use of this stress test.
     *      That information should be used to limit the #of transactions
     *      allowed to start at one time on the server and should guide a search
     *      for thinning down resource consumption, e.g., memory usage by
     *      btrees, the node serializer, etc.
     * 
     * @see GenerateExperiment, which may be used to generate a set of
     *      conditions to be run by the {@link ExperimentDriver}.
     */
    public static void main(String[] args) throws Exception {

        Properties properties = new Properties();

//        properties.setProperty(Options.FORCE_ON_COMMIT, ForceEnum.No.toString());

//        properties.setProperty(Options.BUFFER_MODE, BufferMode.Transient.toString());

        // properties.setProperty(Options.BUFFER_MODE, BufferMode.Direct.toString());

        // properties.setProperty(Options.BUFFER_MODE, BufferMode.Mapped.toString());

         properties.setProperty(Options.BUFFER_MODE, BufferMode.Disk.toString());

        properties.setProperty(Options.CREATE_TEMP_FILE, "true");

        properties.setProperty(TestOptions.TIMEOUT, "10");

        properties.setProperty(TestOptions.NCLIENTS, "10");

        properties.setProperty(TestOptions.NTRIALS, "10000");

        properties.setProperty(TestOptions.KEYLEN, "4");

        properties.setProperty(TestOptions.NOPS, "4");

        IComparisonTest test = new StressTestConcurrent();
        
        test.setUpComparisonTest(properties);
        
        try {

            test.doComparisonTest(properties);
        
        } finally {

            try {
                
                test.tearDownComparisonTest();
                
            } catch(Throwable t) {

                log.warn("Tear down problem: "+t, t);
                
            }
            
        }

    }

    /**
     * Additional properties understood by this test.
     */
    public static interface TestOptions extends Options {

        /**
         * The timeout for the test.
         */
        public static final String TIMEOUT = "timeout";

        /**
         * The #of concurrent clients to run.
         */
        public static final String NCLIENTS = "nclients";

        /**
         * The #of trials (aka transactions) to run.
         */
        public static final String NTRIALS = "ntrials";

        /**
         * The length of the keys used in the test. This directly impacts the
         * likelyhood of a write-write conflict. Shorter keys mean more
         * conflicts. However, note that conflicts are only possible when there
         * are at least two concurrent clients running.
         */
        public static final String KEYLEN = "keyLen";

        /**
         * The #of operations in each trial.
         */
        public static final String NOPS = "nops";
        
        /**
         * The rate of insert operations (inserting <i>nops</i> tuples) in
         * [0.0:1.0]. The balance of the operations will remove <i>nops</i>
         * tuples.
         */
        String INSERT_RATE = "insertRate";
        
        /**
         * The #of distinct scale-out indices that will be used during the run.
         * Each index may be split over time as the run progresses, eventually
         * yielding multiple index partitions.
         */
        public static final String NINDICES = "nindices";

        /**
         * When <code>true</code>, ground truth will be maintained and
         * verified against the post-condition of the index(s) under test.
         * <p>
         * Note: This option may be used to verify index partition
         * split/join/move semantics and the correctness of
         * {@link ClientIndexView} views.
         * <p>
         * Note: All operations on a ground truth index are serialized so this
         * option can not be used when you are doing performance testing.
         */
        final String TEST_CORRECTNESS = "testCorrectness";
        
    }

    /**
     * Setup and run a test.
     * 
     * @param properties
     *            There are no "optional" properties - you must make sure that
     *            each property has a defined value.
     */
    public Result doComparisonTest(Properties properties) throws Exception {

        final long timeout = Long.parseLong(properties
                .getProperty(TestOptions.TIMEOUT));

        final int nclients = Integer.parseInt(properties
                .getProperty(TestOptions.NCLIENTS));

        final int ntrials = Integer.parseInt(properties
                .getProperty(TestOptions.NTRIALS));

        final int keyLen = Integer.parseInt(properties
                .getProperty(TestOptions.KEYLEN));

        final int nops = Integer.parseInt(properties
                .getProperty(TestOptions.NOPS));

        final double insertRate = Integer.parseInt(properties
                .getProperty(TestOptions.INSERT_RATE));

        final int nindices = Integer.parseInt(properties
                .getProperty(TestOptions.NINDICES));

        final boolean testCorrectness = Boolean.parseBoolean(properties
                .getProperty(TestOptions.TEST_CORRECTNESS));

        Result result = doConcurrentClientTest(client, nclients, timeout,
                ntrials, keyLen, nops, insertRate, nindices, testCorrectness);

        return result;

    }

    /**
     * Experiment generation utility class.
     * 
     * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a>
     * @version $Id$
     */
    public static class GenerateExperiment extends ExperimentDriver {

        /**
         * Generates an XML file that can be run by {@link ExperimentDriver}.
         * 
         * @param args
         */
        public static void main(String[] args) throws Exception {

            // this is the test to be run.
            String className = StressTestConcurrent.class.getName();

            Map<String, String> defaultProperties = new HashMap<String, String>();

            // force delete of the files on close of the journal under test.
            defaultProperties.put(Options.CREATE_TEMP_FILE, "true");

            // avoids journal overflow when running out to 60 seconds.
            defaultProperties.put(Options.MAXIMUM_EXTENT, "" + Bytes.megabyte32
                    * 400);

            /*
             * Set defaults for each condition.
             */

            defaultProperties.put(TestOptions.TIMEOUT, "30");

            defaultProperties.put(TestOptions.NTRIALS, "10000");

            // defaultProperties.put(TestOptions.NCLIENTS,"10");

            defaultProperties.put(TestOptions.KEYLEN, "4");

            defaultProperties.put(TestOptions.NOPS, "100");

            List<Condition> conditions = new ArrayList<Condition>();

            conditions.addAll(BasicExperimentConditions.getBasicConditions(
                    defaultProperties, new NV[] { new NV(TestOptions.NCLIENTS,
                            "1") }));

            conditions.addAll(BasicExperimentConditions.getBasicConditions(
                    defaultProperties, new NV[] { new NV(TestOptions.NCLIENTS,
                            "2") }));

            conditions.addAll(BasicExperimentConditions.getBasicConditions(
                    defaultProperties, new NV[] { new NV(TestOptions.NCLIENTS,
                            "10") }));

            conditions.addAll(BasicExperimentConditions.getBasicConditions(
                    defaultProperties, new NV[] { new NV(TestOptions.NCLIENTS,
                            "20") }));

            conditions.addAll(BasicExperimentConditions.getBasicConditions(
                    defaultProperties, new NV[] { new NV(TestOptions.NCLIENTS,
                            "100") }));

            conditions.addAll(BasicExperimentConditions.getBasicConditions(
                    defaultProperties, new NV[] { new NV(TestOptions.NCLIENTS,
                            "200") }));

            Experiment exp = new Experiment(className, defaultProperties,
                    conditions);

            // copy the output into a file and then you can run it later.
            System.err.println(exp.toXML());

        }

    }

}