TestMove.java example

Explorer
blazegraph-master
- database-master
/*

Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016.  All rights reserved.

Contact:
     SYSTAP, LLC DBA Blazegraph
     2501 Calvert ST NW #106
     Washington, DC 20008
     licenses@blazegraph.com

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

*/
/*
 * Created on Mar 11, 2008
 */

package com.bigdata.service;

import java.io.IOException;
import java.util.Properties;
import java.util.UUID;
import java.util.concurrent.ExecutionException;

import org.apache.log4j.Level;

import com.bigdata.btree.BTree;
import com.bigdata.btree.IRangeQuery;
import com.bigdata.btree.ITupleIterator;
import com.bigdata.btree.IndexMetadata;
import com.bigdata.btree.keys.KV;
import com.bigdata.btree.keys.TestKeyBuilder;
import com.bigdata.btree.proc.BatchInsert.BatchInsertConstructor;
import com.bigdata.counters.AbstractStatisticsCollector;
import com.bigdata.io.SerializerUtil;
import com.bigdata.journal.BufferMode;
import com.bigdata.journal.ITx;
import com.bigdata.journal.TemporaryRawStore;
import com.bigdata.mdi.IMetadataIndex;
import com.bigdata.mdi.PartitionLocator;
import com.bigdata.resources.ResourceManager;
import com.bigdata.resources.ResourceManager.Options;
import com.bigdata.service.ndx.ClientIndexView;
import com.bigdata.service.ndx.RawDataServiceTupleIterator;
import com.bigdata.util.Bytes;

/**
 * Some unit tests for moving an index partition.
 * 
 * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a>
 * @version $Id$
 */
public class TestMove extends AbstractEmbeddedFederationTestCase {

    public TestMove() {
        super();
    }

    public TestMove(String name) {
        super(name);
    }

    /**
     * Overridden to specify the {@link BufferMode#Disk} mode and to lower the
     * threshold at which an overflow operation will be selected.
     */
    public Properties getProperties() {
        
        final Properties properties = new Properties(super.getProperties());
        
        // overrides Transient in the base class.
        properties.setProperty(Options.BUFFER_MODE, BufferMode.Disk
                .toString());

        // this test relies on 2 or more data services.
        properties.setProperty(EmbeddedClient.Options.NDATA_SERVICES, "2");

        // Note: disable copy of small index segments to the new journal during overflow.
        properties.setProperty(Options.COPY_INDEX_THRESHOLD,"0");
        
        // set low minimum #of active partitions per data service.
        properties.setProperty(Options.MINIMUM_ACTIVE_INDEX_PARTITIONS,"1");
        
        // enable moves (one per target).
        properties.setProperty(ResourceManager.Options.MAXIMUM_MOVES_PER_TARGET,"1");

        // allow move of shards which would otherwise be split.
        properties.setProperty(ResourceManager.Options.MAXIMUM_MOVE_PERCENT_OF_SPLIT,"2.0");

        // disable the CPU threshold for moves.
        properties.setProperty(ResourceManager.Options.MOVE_PERCENT_CPU_TIME_THRESHOLD,".0");
        
        // disable scatter split
        properties.setProperty(ResourceManager.Options.SCATTER_SPLIT_ENABLED,"false");

        /*
         * Note: Disables the initial round robin policy for the load balancer
         * service so that it will use our fakes scores.
         */
        properties.setProperty(LoadBalancerService.Options.INITIAL_ROUND_ROBIN_UPDATE_COUNT, "0");

        // turn off acceleration features.
        properties.setProperty(Options.ACCELERATE_OVERFLOW_THRESHOLD, "0");
        properties.setProperty(Options.ACCELERATE_SPLIT_THRESHOLD, "0");

        // Note: Set a low maximum shard size.
        properties.setProperty(Options.NOMINAL_SHARD_SIZE, ""+Bytes.megabyte);

//        properties.setProperty(Options.INITIAL_EXTENT, ""+1*Bytes.megabyte);
        
//        properties.setProperty(Options.MAXIMUM_EXTENT, ""+1*Bytes.megabyte);
        
        return properties;
        
    }

    /**
     * Test forces a move of an index partition and validates the scale-out
     * index after the move against ground truth.
     * 
     * @throws IOException
     * @throws ExecutionException
     * @throws InterruptedException
     */
    public void test_move() throws IOException, InterruptedException, ExecutionException {
        
        /*
         * Register the index.
         */
        final String name = "testIndex";
        final UUID indexUUID = UUID.randomUUID();
        {

            final IndexMetadata indexMetadata = new IndexMetadata(name,indexUUID);

            // must support delete markers
            indexMetadata.setDeleteMarkers(true);

            // register the scale-out index, creating a single index partition.
            fed.registerIndex(indexMetadata, dataService0.getServiceUUID());

        }

        /*
         * Verify the initial index partition.
         */
        final PartitionLocator pmd0;
        {
            
            final ClientIndexView ndx = (ClientIndexView) fed.getIndex(name,
                    ITx.UNISOLATED);

            final IMetadataIndex mdi = ndx.getMetadataIndex();
            
            assertEquals("#index partitions", 1, mdi.rangeCount());

            // This is the initial partition locator metadata record.
            pmd0 = mdi.get(new byte[]{});

            assertEquals("partitionId", 0L, pmd0.getPartitionId());

            assertEquals("dataServiceUUID", dataService0
                    .getServiceUUID(), pmd0.getDataServiceUUID());
            
        }
        assertEquals("partitionCount", 1, getPartitionCount(name));
        
        /*
         * Setup the ground truth B+Tree.
         */
        final BTree groundTruth;
        {
        
            final IndexMetadata indexMetadata = new IndexMetadata(indexUUID);

            groundTruth = BTree.create(new TemporaryRawStore(), indexMetadata);

        }

        /*
         * Populate the index with data until the initial the journal for the
         * data service on which the initial partition resides overflows.
         * 
         * Note: The index split will occur asynchronously once (a) the index
         * partition has a sufficient #of entries; and (b) a group commit
         * occurs. However, this loop will continue to run, so writes will
         * continue to accumulate on the index partition on the live journal.
         * Once the overflow process completes the client be notified that the
         * index partition which it has been addressing no longer exists on the
         * data service. At that point the client SHOULD re-try the operation.
         * Once the client returns from the retry we will notice that the
         * partition count has increased and exit this loop.
         */
        final int batchSize = 5000;
        long overflowCounter = dataService0.getAsynchronousOverflowCounter();
        int npartitions = -1;
        {

            if(log.isInfoEnabled())
                log.info("Writing on indices to provoke overflow");

            int nrounds = 0;
            long nwritten = 0L;
            while (npartitions < 2) {

                final byte[][] keys = new byte[batchSize][];
                final byte[][] vals = new byte[batchSize][];

                for (int i = 0; i < batchSize; i++) {

                    keys[i] = TestKeyBuilder.asSortKey(nwritten + i);

                    vals[i] = SerializerUtil.serialize(nwritten + i);
                    
                }

                // insert the data into the ground truth index.
                groundTruth
                        .submit(0/* fromIndex */, batchSize/* toIndex */, keys,
                                vals, BatchInsertConstructor.RETURN_NO_VALUES,
                                null/* handler */);

                // Set flag to force overflow on group commit.
                dataService0
                        .forceOverflow(false/* immediate */, false/* compactingMerge */);

                // insert the data into the scale-out index.
                fed.getIndex(name, ITx.UNISOLATED)
                        .submit(0/* fromIndex */, batchSize/* toIndex */, keys,
                                vals, BatchInsertConstructor.RETURN_NO_VALUES,
                                null/* handler */);

                overflowCounter = awaitAsynchronousOverflow(dataService0,
                        overflowCounter);
                
                assertEquals("rangeCount", groundTruth.getEntryCount(), fed
                        .getIndex(name, ITx.UNISOLATED).rangeCount());

                nrounds++;

                nwritten += batchSize;

                npartitions = getPartitionCount(name);

                if (log.isInfoEnabled())
                    log.info
                    ("Populating the index: overflowCounter="
                        + overflowCounter + ", nrounds=" + nrounds
                        + ", nwritten=" + nwritten + ", nentries="
                        + groundTruth.getEntryCount() + " ("
                        + fed.getIndex(name, ITx.UNISOLATED).rangeCount()
                        + "), npartitions=" + npartitions);

                /*
                 * Compare the index against ground truth after overflow.
                 */
                if(log.isInfoEnabled())
                    log.info("Verifying scale-out index against ground truth");

                assertSameEntryIterator(groundTruth, fed.getIndex(name,
                        ITx.UNISOLATED));

            }

        }

        npartitions = getPartitionCount(name);

        // Verify at least 2 partitions.
        assertTrue("partitionCount=" + npartitions, npartitions >= 2);
        
        /*
         * Fake out the load balancer so that it will report the source data
         * service (dataService0) is "highly utilized" and the target data
         * service (dataService1) is "under utilized".
         */
        {

            if (log.isInfoEnabled())
                log.info("Setting up LBS for move.");

            // explicitly set the log level for the load balancer.
            LoadBalancerService.log.setLevel(Level.INFO);
            
            final AbstractEmbeddedLoadBalancerService lbs = ((AbstractEmbeddedLoadBalancerService) ((EmbeddedFederation) fed)
                    .getLoadBalancerService());

            final ServiceScore[] fakeServiceScores = new ServiceScore[2];

            fakeServiceScores[0] = new ServiceScore(
                    AbstractStatisticsCollector.fullyQualifiedHostName,
                    dataService0.getServiceUUID(), "dataService0", 1.0/* rawScore */);

            fakeServiceScores[1] = new ServiceScore(
                    AbstractStatisticsCollector.fullyQualifiedHostName,
                    dataService1.getServiceUUID(), "dataService1", 0.0/* rawScore */);

            // set the fake scores on the load balancer.
            lbs.setServiceScores(fakeServiceScores);
            
        }

        /*
         * Continue to populate index until we can provoke another overflow.
         * 
         * Since we have configured the various thresholds appropriately this
         * overflow should select one of the index partitions to move over to
         * the other data service.
         */
        {

            if (log.isInfoEnabled())
                log.info("Writing on indices to provoke overflow");
            
//            int nrounds = 0;
//            long nwritten = 0L;
//            boolean done = false;
//            while (!done) 
            {

                /*
                 * Just a little random data.
                 * 
                 * Note: We have to write enough data so that the new updates
                 * are not just copied onto the new journal in order for the
                 * index partition(s) on which we write to be eligible for a
                 * move. 
                 */
                final int nentries = 5000;
                final KV[] data = getRandomKeyValues(nentries);
                final byte[][] keys = new byte[nentries][];
                final byte[][] vals = new byte[nentries][];

                for (int i = 0; i < nentries; i++) {

                    keys[i] = data[i].key;

                    vals[i] = data[i].val;

                }

                // insert the data into the ground truth index.
                groundTruth
                        .submit(0/* fromIndex */, nentries/* toIndex */, keys,
                                vals, BatchInsertConstructor.RETURN_NO_VALUES,
                                null/* handler */);

                /*
                 * Set flag to force overflow on group commit.
                 */
                dataService0
                        .forceOverflow(false/* immediate */, true/* compactingMerge */);

                // insert the data into the scale-out index.
                fed.getIndex(name, ITx.UNISOLATED)
                        .submit(0/* fromIndex */, nentries/* toIndex */, keys,
                                vals, BatchInsertConstructor.RETURN_NO_VALUES,
                                null/* handler */);

                // wait until overflow processing is done.
                overflowCounter = awaitAsynchronousOverflow(dataService0,
                        overflowCounter);

                /*
                 * Compare the index against ground truth after overflow.
                 */

                if (log.isInfoEnabled())
                    log.info("Verifying scale-out index against ground truth");

                assertSameEntryIterator(groundTruth, fed.getIndex(name,
                        ITx.UNISOLATED));

            }
        
        }

        /*
         * Figure out which index partition was moved and verify that there is
         * now (at least) one index partition on each data service.
         */
        {

            int ndataService0 = 0;// #of index partitions on data service 0.
            int ndataService1 = 0;// #of index partitions on data service 1.
            
            final ITupleIterator<?> itr = new RawDataServiceTupleIterator(
                    fed.getMetadataService(),//
                    MetadataService.getMetadataIndexName(name), //
                    ITx.READ_COMMITTED,//
                    true, // readConsistent
                    null, // fromKey
                    null, // toKey
                    0,    // capacity,
                    IRangeQuery.DEFAULT,// flags
                    null // filter
                    );

            int n = 0;

            while (itr.hasNext()) {

                final PartitionLocator locator = (PartitionLocator) SerializerUtil
                        .deserialize(itr.next().getValue());

                if (log.isInfoEnabled())
                    log.info("locators[" + n + "]=" + locator);

                if (locator.getDataServiceUUID().equals(dataService0
                        .getServiceUUID())) {

                    ndataService0++;

                } else if (locator.getDataServiceUUID().equals(
                        dataService1.getServiceUUID())) {

                    ndataService1++;

                } else {

                    fail("Not expecting partition move to this service: "
                            + locator);

                }

                n++;

            }

            if (log.isInfoEnabled()) {
                log.info("npartitions=" + getPartitionCount(name));
                log.info("npartitions(ds0)=" + ndataService0);
                log.info("npartitions(ds1)=" + ndataService1);
            }
            assertEquals("#dataService0=" + ndataService0, 1, ndataService0);
            assertEquals("#dataService1=" + ndataService0, 1, ndataService1);

        }

    }

}