TestScatterSplit.java example

Explorer
blazegraph-master
- database-master
/*

Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016.  All rights reserved.

Contact:
     SYSTAP, LLC DBA Blazegraph
     2501 Calvert ST NW #106
     Washington, DC 20008
     licenses@blazegraph.com

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

*/
/*
 * Created on Mar 11, 2008
 */

package com.bigdata.service;

import java.io.IOException;
import java.util.Properties;
import java.util.UUID;
import java.util.concurrent.ExecutionException;

import com.bigdata.btree.BTree;
import com.bigdata.btree.IRangeQuery;
import com.bigdata.btree.ITupleIterator;
import com.bigdata.btree.IndexMetadata;
import com.bigdata.btree.ScatterSplitConfiguration;
import com.bigdata.btree.keys.TestKeyBuilder;
import com.bigdata.btree.proc.BatchInsert.BatchInsertConstructor;
import com.bigdata.io.SerializerUtil;
import com.bigdata.journal.BufferMode;
import com.bigdata.journal.ITx;
import com.bigdata.journal.TemporaryRawStore;
import com.bigdata.mdi.IMetadataIndex;
import com.bigdata.mdi.PartitionLocator;
import com.bigdata.resources.ResourceManager;
import com.bigdata.resources.ResourceManager.Options;
import com.bigdata.service.ndx.ClientIndexView;
import com.bigdata.service.ndx.RawDataServiceTupleIterator;
import com.bigdata.util.Bytes;

/**
 * Some unit tests for moving an index partition.
 * 
 * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a>
 * @version $Id$
 */
public class TestScatterSplit extends AbstractEmbeddedFederationTestCase {

    public TestScatterSplit() {
        super();
    }

    public TestScatterSplit(String name) {
        super(name);
    }

    /**
     * Overridden to specify the {@link BufferMode#Disk} mode and to lower the
     * threshold at which an overflow operation will be selected.
     */
    public Properties getProperties() {
        
        final Properties properties = new Properties(super.getProperties());
        
        // overrides Transient in the base class.
        properties.setProperty(Options.BUFFER_MODE, BufferMode.Disk
                .toString());

        // this test relies on 2 or more data services.
        properties.setProperty(EmbeddedClient.Options.NDATA_SERVICES, "2");

        // Note: disable copy of small index segments to the new journal during overflow.
        properties.setProperty(Options.COPY_INDEX_THRESHOLD,"0");
        
//        // set low minimum #of active partitions per data service.
//        properties.setProperty(Options.MINIMUM_ACTIVE_INDEX_PARTITIONS,"1");
        
//        // enable moves (one per target).
//        properties.setProperty(ResourceManager.Options.MAXIMUM_MOVES_PER_TARGET,"1");
//
//        // allow move of shards which would otherwise be split.
//        properties.setProperty(ResourceManager.Options.MAXIMUM_MOVE_PERCENT_OF_SPLIT,"2.0");
//
//        // disable the CPU threshold for moves.
//        properties.setProperty(ResourceManager.Options.MOVE_PERCENT_CPU_TIME_THRESHOLD,".0");
        
        // enable scatter splits
        properties.setProperty(ResourceManager.Options.SCATTER_SPLIT_ENABLED,"true");

//        /*
//         * Note: Disables the initial round robin policy for the load balancer
//         * service so that it will use our fakes scores.
//         */
//        properties.setProperty(LoadBalancerService.Options.INITIAL_ROUND_ROBIN_UPDATE_COUNT, "0");

        // turn off acceleration features.
        properties.setProperty(Options.ACCELERATE_OVERFLOW_THRESHOLD, "0");
        properties.setProperty(Options.ACCELERATE_SPLIT_THRESHOLD, "0");

        // Note: Set a low maximum shard size.
        properties.setProperty(Options.NOMINAL_SHARD_SIZE, ""+Bytes.megabyte);

//        properties.setProperty(Options.INITIAL_EXTENT, ""+1*Bytes.megabyte);
        
//        properties.setProperty(Options.MAXIMUM_EXTENT, ""+1*Bytes.megabyte);
        
        return properties;
        
    }

    /**
     * Test writes on a scale-out index until it has enough data to undergo a
     * scatter split, validates that the index was distributed into N shards per
     * DS and validates the scale-out index after the scatter split against
     * ground truth.
     * 
     * @throws IOException
     * @throws ExecutionException
     * @throws InterruptedException
     */
    public void test_scatterSplit() throws IOException, InterruptedException,
            ExecutionException {

        final int dataServiceCount = 2;
        final int expectedIndexPartitionCount = 4;
        
        /*
         * Register the index.
         */
        final String name = "testIndex";
        final UUID indexUUID = UUID.randomUUID();
        {

            final IndexMetadata indexMetadata = new IndexMetadata(name,indexUUID);

            // must support delete markers
            indexMetadata.setDeleteMarkers(true);

            /*
             * Explicitly setup the scatter split operation to distribute 4
             * index partitions across 2 data services (this is done explicitly
             * in case the configuration defaults are changed).
             */
            indexMetadata
                    .setScatterSplitConfiguration(new ScatterSplitConfiguration(
                            true,// enabled
                            .25,// percentOfSplitThreshold
                            dataServiceCount, //
                            expectedIndexPartitionCount//
                    ));

            // register the scale-out index, creating a single index partition.
            fed.registerIndex(indexMetadata, dataService0.getServiceUUID());

        }

        /*
         * Verify the initial index partition.
         */
        final PartitionLocator pmd0;
        {
            
            final ClientIndexView ndx = (ClientIndexView) fed.getIndex(name,
                    ITx.UNISOLATED);

            final IMetadataIndex mdi = ndx.getMetadataIndex();
            
            assertEquals("#index partitions", 1, mdi.rangeCount());

            // This is the initial partition locator metadata record.
            pmd0 = mdi.get(new byte[]{});

            assertEquals("partitionId", 0L, pmd0.getPartitionId());

            assertEquals("dataServiceUUID", dataService0
                    .getServiceUUID(), pmd0.getDataServiceUUID());
            
        }
        assertEquals("partitionCount", 1, getPartitionCount(name));
        
        /*
         * Setup the ground truth B+Tree.
         */
        final BTree groundTruth;
        {
        
            final IndexMetadata indexMetadata = new IndexMetadata(indexUUID);

            groundTruth = BTree.create(new TemporaryRawStore(), indexMetadata);

        }

        /*
         * Populate the index with data until the initial the journal for the
         * data service on which the initial partition resides overflows.
         * 
         * Note: The index split will occur asynchronously once (a) the index
         * partition has a sufficient #of entries; and (b) a group commit
         * occurs. However, this loop will continue to run, so writes will
         * continue to accumulate on the index partition on the live journal.
         * Once the overflow process completes the client be notified that the
         * index partition which it has been addressing no longer exists on the
         * data service. At that point the client SHOULD re-try the operation.
         * Once the client returns from the retry we will notice that the
         * partition count has increased and exit this loop.
         */
        final int batchSize = 5000;
        long overflowCounter = dataService0.getAsynchronousOverflowCounter();
        int npartitions = -1;
        {

            if(log.isInfoEnabled())
                log.info("Writing on indices to provoke overflow");

            int nrounds = 0;
            long nwritten = 0L;
            while (npartitions <= 1) {

                final byte[][] keys = new byte[batchSize][];
                final byte[][] vals = new byte[batchSize][];

                for (int i = 0; i < batchSize; i++) {

                    keys[i] = TestKeyBuilder.asSortKey(nwritten + i);

                    vals[i] = SerializerUtil.serialize(nwritten + i);
                    
                }

                // insert the data into the ground truth index.
                groundTruth
                        .submit(0/* fromIndex */, batchSize/* toIndex */, keys,
                                vals, BatchInsertConstructor.RETURN_NO_VALUES,
                                null/* handler */);

                // Set flag to force overflow on group commit.
                dataService0
                        .forceOverflow(false/* immediate */, false/* compactingMerge */);

                // insert the data into the scale-out index.
                fed.getIndex(name, ITx.UNISOLATED)
                        .submit(0/* fromIndex */, batchSize/* toIndex */, keys,
                                vals, BatchInsertConstructor.RETURN_NO_VALUES,
                                null/* handler */);

                overflowCounter = awaitAsynchronousOverflow(dataService0,
                        overflowCounter);
                
                assertEquals("rangeCount", groundTruth.getEntryCount(), fed
                        .getIndex(name, ITx.UNISOLATED).rangeCount());

                nrounds++;

                nwritten += batchSize;

                npartitions = getPartitionCount(name);

//                if (log.isInfoEnabled())
//                    log.info
                System.err.println
                    ("Populating the index: overflowCounter="
                        + overflowCounter + ", nrounds=" + nrounds
                        + ", nwritten=" + nwritten + ", nentries="
                        + groundTruth.getEntryCount() + " ("
                        + fed.getIndex(name, ITx.UNISOLATED).rangeCount()
                        + "), npartitions=" + npartitions);

                /*
                 * Compare the index against ground truth after overflow.
                 */
                if(log.isInfoEnabled())
                    log.info("Verifying scale-out index against ground truth");

                assertSameEntryIterator(groundTruth, fed.getIndex(name,
                        ITx.UNISOLATED));

            }

        }

        /*
         * Figure out which index partition was moved and verify that there is
         * now (at least) one index partition on each data service.
         */
        {

            int ndataService0 = 0;// #of index partitions on data service 0.
            int ndataService1 = 0;// #of index partitions on data service 1.
            
            final ITupleIterator itr = new RawDataServiceTupleIterator(
                    fed.getMetadataService(),//
                    MetadataService.getMetadataIndexName(name), //
                    ITx.READ_COMMITTED,//
                    true, // readConsistent
                    null, // fromKey
                    null, // toKey
                    0,    // capacity,
                    IRangeQuery.DEFAULT,// flags
                    null // filter
                    );

            int n = 0;

            while (itr.hasNext()) {

                final PartitionLocator locator = (PartitionLocator) SerializerUtil
                        .deserialize(itr.next().getValue());

                System.err.println("locators["+n+"]="+locator);
                
                if (locator.getDataServiceUUID().equals(dataService0
                        .getServiceUUID())) {

                    ndataService0++;

                } else if (locator.getDataServiceUUID().equals(
                        dataService1.getServiceUUID())) {

                    ndataService1++;

                } else {

                    fail("Not expecting partition move to this service: "
                            + locator);

                }

                n++;

            }

            npartitions = getPartitionCount(name);

            System.err.println("npartitions=" + npartitions);
            System.err.println("npartitions(ds0)=" + ndataService0);
            System.err.println("npartitions(ds1)=" + ndataService1);

            // Verify expected #of partitions.
            assertEquals("partitionCount=" + npartitions,
                    expectedIndexPartitionCount, npartitions);

            assertEquals("#dataService0=" + ndataService0,
                    expectedIndexPartitionCount / 2, ndataService0);
            
            assertEquals("#dataService1=" + ndataService0,
                    expectedIndexPartitionCount / 2, ndataService1);

        }

    }

}