TestFixedLengthPrefixShardSplits.java example

Explorer
blazegraph-master
- database-master
/*

Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016.  All rights reserved.

Contact:
     SYSTAP, LLC DBA Blazegraph
     2501 Calvert ST NW #106
     Washington, DC 20008
     licenses@blazegraph.com

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

*/
/*
 * Created on Dec 20, 2009
 */

package com.bigdata.resources;

import java.util.Random;
import java.util.UUID;

import com.bigdata.btree.BTree;
import com.bigdata.btree.FixedLengthPrefixSplits;
import com.bigdata.btree.IndexMetadata;
import com.bigdata.btree.IndexSegment;
import com.bigdata.btree.IndexSegmentBuilder;
import com.bigdata.btree.IndexSegmentStore;
import com.bigdata.btree.keys.IKeyBuilder;
import com.bigdata.btree.keys.KeyBuilder;
import com.bigdata.btree.keys.TestKeyBuilder;
import com.bigdata.journal.IJournal;
import com.bigdata.mdi.IResourceMetadata;
import com.bigdata.mdi.LocalPartitionMetadata;
import com.bigdata.service.Split;
import com.bigdata.util.Bytes;
import com.bigdata.util.BytesUtil;

/**
 * Unit tests for splitting an index segment based on its size on the disk, the
 * nominal size of an index partition, and an optional application level
 * constraint on the choice of the separator keys. This approach presumes a
 * compacting merge has been performed such that all history other than the
 * buffered writes is on a single index segment. The buffered writes are not
 * considered when choosing the #of splits to make and the separator keys for
 * those splits. They are simply copied afterwards onto the new index partition
 * which covers their key-range.
 * 
 * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a>
 * @version $Id$
 * 
 * @see src/architecture/SplitMath.xls
 */
public class TestFixedLengthPrefixShardSplits extends AbstractTestSegSplitter {

    /**
     * 
     */
    public TestFixedLengthPrefixShardSplits() {
    }

    /**
     * @param name
     */
    public TestFixedLengthPrefixShardSplits(String name) {
        super(name);
    }

//    protected IJournal getStore() throws IOException {
//        
//        Properties p = new Properties();
//        
//        p.setProperty(Options.FILE, File.createTempFile(getName(), Options.JNL).toString());
//        
//        return new Journal(p);
//        
//    }
//    
//    protected IPartitionIdFactory pidFactory = new MockPartitionIdFactory();
//    
//    /**
//     * Always accepts the recommended separator key.
//     */
//    protected static final ISimpleSplitHandler acceptAllSplits = new ISimpleSplitHandler() {
//        
//        public byte[] getSeparatorKey(IndexSegment seg, int fromIndex, int toIndex,
//                int splitAt) {
//
//            return seg.keyAt(splitAt);
//            
//        }
//    };
//    
//    /**
//     * Always returns <code>null</code> (never accepts any splits).
//     */
//    protected static final ISimpleSplitHandler rejectAllSplits = new ISimpleSplitHandler() {
//        
//        public byte[] getSeparatorKey(IndexSegment seg, int fromIndex, int toIndex,
//                int splitAt) {
//
//            return null;
//            
//        }
//    };
//
//    /**
//     * Accepts the recommended separator key unless it is GTE the key given to
//     * the constructor, in which case it refuses to accept any splits.
//     */
//    private static class RejectSplitsAfterKey implements ISimpleSplitHandler {
//
//        private final byte[] rejectKey;
//
//        public RejectSplitsAfterKey(final byte[] rejectKey) {
//
//            if (rejectKey == null)
//                throw new IllegalArgumentException();
//
//            this.rejectKey = rejectKey;
//            
//        }
//        
//        public byte[] getSeparatorKey(final IndexSegment seg,
//                final int fromIndex, final int toIndex, final int splitAt) {
//
//            final byte[] a = seg.keyAt(splitAt);
//
//            if (BytesUtil.compareBytes(a, rejectKey) >= 0) {
//
//                // reject any split GTE the key specified to the ctor.
//                return null;
//
//            }
//
//            // Otherwise accept the recommended separator key.
//            return a;
//            
//        }
//    };
//
//    /**
//     * Generate an {@link IndexSegment} from the given BTree.
//     * 
//     * @param src
//     *            The source {@link BTree}.
//     * 
//     * @return The {@link IndexSegmentBuilder}.
//     */
//    static public IndexSegmentBuilder doBuild(final String name,
//            final ILocalBTreeView src, final long commitTime,
//            final byte[] fromKey, final byte[] toKey) throws Exception {
//
//        if (src == null)
//            throw new IllegalArgumentException();
//
//        // final String name = getName();
//
//        final File tmpDir = new File(System.getProperty("java.io.tmpdir"));
//
//        File outFile = null;
//        final IndexSegmentBuilder builder;
//        try {
//
//            // the file to be generated.
//            outFile = File.createTempFile(name, Options.SEG, tmpDir);
//
//            // new builder.
//            builder = IndexSegmentBuilder.newInstance(/*name, */src, outFile,
//                    tmpDir, true/* compactingMerge */, commitTime, fromKey,
//                    toKey);
//
//            // build the index segment.
//            builder.call();
//
//            return builder;
//        
//        } catch (Throwable t) {
//
//            if (outFile != null && outFile.exists()) {
//
//                try {
//
//                    outFile.delete();
//
//                } catch (Throwable t2) {
//
//                    log.warn(t2.getLocalizedMessage(), t2);
//
//                }
//
//            }
//
//            if (t instanceof Exception)
//                throw (Exception) t;
//
//            throw new RuntimeException(t);
//
//        }
//
//    }
//
//    /**
//     * Register a {@link BTree} against the journal, generate some data in the
//     * specified key range, and commit the data.
//     *<p>
//     * Note: this uses int values to generate the keys. If you specify the
//     * fromKey as anything other than an empty byte[] or the toKey as anything
//     * other than null then the key must be at least 4 bytes long and the 1st 4
//     * bytes will be decoded as an integer.
//     * 
//     * @param store
//     * @param ntuples
//     * @param pmd
//     */
//    private BTree generateData(final IJournal store, final int ntuples,
//            final LocalPartitionMetadata pmd) {
//        
//        assert store!=null;
//        assert pmd!=null;
//        
//        final byte[] fromKey = pmd.getLeftSeparatorKey();
//        final byte[] toKey = pmd.getRightSeparatorKey();
//
//        // inclusive lower bound.
//        final int low;
//        if (fromKey.length > 0) {
//            low = KeyBuilder.decodeInt(fromKey/* buf */, 0/* off */);
//        } else
//            low = Integer.MIN_VALUE;
//
//        // exclusive upper bound.
//        final int high;
//        if (toKey != null) {
//            high = KeyBuilder.decodeInt(toKey/* buf */, 0/* off */);
//        } else
//            high = Integer.MAX_VALUE;
//
//        /*
//         * @todo to apply an application constraint we need to pass it in here
//         * when we register the BTree (alternatively, you can work around this
//         * with how the unit test is setup).
//         */
//        final BTree btree;
//        {
//
//            final IndexMetadata md = new IndexMetadata(UUID.randomUUID());
//            
//            md.setPartitionMetadata(pmd);
//            
//            btree = (BTree) store.registerIndex(getName(), md);
//            
//        }
//
//        // generate evenly space tuples.
//        final long inc = ((long) high - (long) low) / ntuples;
//        assert inc > 0 : "inc=" + inc;
//        int ninsert = 0;
//        for (int i = low; i <= high && ninsert < ntuples; i += inc) {
//
//            btree.insert(TestKeyBuilder.asSortKey(i), i);
//
//            ninsert++;
//
//        }
//
//        // verify generated correct #of tuples.
//        assertEquals(ntuples, btree.getEntryCount());
//        
//        store.commit();
//        
//        // return view with lastCommitTime set.
//        return (BTree) store.getIndex(getName());
//        
//    }
//
//    /**
//     * Register a {@link BTree} against the journal, generate some data and
//     * commit the data (the data corresponds to a simple triple index schema but
//     * does not handle statement indices with variable length keys).
//     * 
//     * @param store
//     * @param ntuples
//     * @param pmd
//     * @param splitHandler
//     */
//    private BTree generateSPOData(final IJournal store, final int ntuples,
//            final LocalPartitionMetadata pmd) {
//        
//        assert store!=null;
//        assert pmd!=null;
//        
//        final byte[] fromKey = pmd.getLeftSeparatorKey();
//        final byte[] toKey = pmd.getRightSeparatorKey();
//        assert fromKey != null;
////        assert toKey != null;
//
//        final BTree btree;
//        {
//
//            final IndexMetadata md = new IndexMetadata(UUID.randomUUID());
//            
//            md.setPartitionMetadata(pmd);
//            
//            btree = (BTree) store.registerIndex(getName(), md);
//            
//        }
//
//        final Random r = new Random();
//        
//        final long[] s = new long[Math.max(ntuples/4,20)];
//        final long[] p = new long[Math.max(ntuples/50,5)];
//        final long[] o = new long[Math.max(ntuples/10,100)];
//        
//        long v = 0;
//        for (int i = 0; i < s.length; i++) {
//            v = (s[i] = v + r.nextInt(100));
//        }
//        for (int i = 0; i < p.length; i++) {
//            v = (p[i] = v + r.nextInt(100));
//        }
//        for (int i = 0; i < o.length; i++) {
//            v = (o[i] = v + r.nextInt(100));
//        }
//
//        final IKeyBuilder keyBuilder = KeyBuilder
//                .newInstance(Bytes.SIZEOF_LONG * 3);
//        
//        int ninsert = 0;
//        int ntries = 0;
//        while (ninsert < ntuples) {
//
//            final byte[] key = keyBuilder.reset()//
//                    .append(s[r.nextInt(s.length)])//
//                    .append(p[r.nextInt(p.length)])//
//                    .append(o[r.nextInt(o.length)])//
//                    .getKey();
//            
//            if (!btree.contains(key)) {
//
//                btree.insert(key, null/* val */);
//
//                ninsert++;
//                
//            }
//            
//            ntries++;
//
//            if (ntries > ntuples * 4) {
//
//                throw new RuntimeException(
//                        "Test setup is not terminating: ntuples=" + ntuples
//                                + ", ntries=" + ntries + ", ninsert=" + ninsert);
//
//            }
//            
//        }
//
//        // verify generated correct #of tuples.
//        assertEquals(ntuples, btree.getEntryCount());
//        
//        store.commit();
//        
//        // return view with lastCommitTime set.
//        return (BTree) store.getIndex(getName());
//        
//    }

//    /**
//     * A {@link SparseRowStore} schema used by some unit tests.
//     */
//    private final Schema schema = new Schema("Employee", "Id", KeyType.Long);
//
//    /**
//     * Register a {@link BTree} against the journal, generate some data and
//     * commit the data.
//     * <p>
//     * Note: This is a bit slow since it does a commit after each logical row
//     * inserted into the B+Tree.
//     * 
//     * @param store
//     * @param nrows
//     * @param pmd
//     */
//    private BTree generateSparseRowStoreData(final IJournal store,
//            final int nrows, final LocalPartitionMetadata pmd) {
//
//        assert store!=null;
//        assert pmd!=null;
//        
//        final byte[] fromKey = pmd.getLeftSeparatorKey();
//        final byte[] toKey = pmd.getRightSeparatorKey();
//        assert fromKey != null;
////        assert toKey != null;
//
//        final BTree btree;
//        {
//
//            final IndexMetadata md = new IndexMetadata(UUID.randomUUID());
//            
//            md.setPartitionMetadata(pmd);
//            
//            btree = (BTree) store.registerIndex(getName(), md);
//            
//        }
//
//        final SparseRowStore srs = new SparseRowStore(btree);
//
//        final String[] names = new String[] { "Bryan", "Mike", "Martyn",
//                "Paul", "Julie" };
//
//        final String[] states = new String[] {"MD","DC","UT","MA","NC"};
//        
//        // put a little randomness in to exercise the split logic more.
//        final Random r = new Random();
//        long id = 0;
//        long nadded = 0;
//        for (int i = 0; i < nrows; i++) {
//
//            final Map<String, Object> propertySet = new HashMap<String, Object>();
//
//            id += r.nextInt(20);
//            
//            propertySet.put("Id", id); // primary key.
//            propertySet.put("Name", names[i % names.length] + id);
//            propertySet.put("State", states[i % states.length] + id);
//            nadded += 3;
//
//            final int nextra = r.nextInt(10);
//            for (int j = 0; j < nextra; j++) {
//                propertySet.put("attr" + j, r.nextInt(20));
//                nadded++;
//            }
//
//            srs.write(schema, propertySet);
//
//        }
//
//        // verify generated correct #of tuples.
//        assertEquals(nadded, btree.getEntryCount());
//
//        store.commit();
//        
//        // return view with lastCommitTime set.
//        return (BTree) store.getIndex(getName());
//        
//    }
//
//    /**
//     * Mock implementation assigns index partitions from a counter beginning
//     * with ZERO (0), which is the first legal index partition identifier. The
//     * name parameter is ignored.
//     * 
//     * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a>
//     * @version $Id$
//     */
//    static class MockPartitionIdFactory implements IPartitionIdFactory {
//
//        private int i = 0;
//
//        public int nextPartitionId(String nameIsIgnored) {
//
//            return i++;
//
//        }
//
//    }

    /**
     * Register a {@link BTree} against the journal, generate some data in the
     * specified key range, and commit the data.
     *<p>
     * Note: this uses int values to generate the keys. If you specify the
     * fromKey as anything other than an empty byte[] or the toKey as anything
     * other than null then the key must be at least 4 bytes long and the 1st 4
     * bytes will be decoded as an integer.
     * 
     * @param store
     * @param ntuples
     * @param pmd
     */
    protected BTree generateData(final IJournal store, final String name,
            final int ntuples, final LocalPartitionMetadata pmd) {
        
        assert store!=null;
        assert pmd!=null;
        
        final byte[] fromKey = pmd.getLeftSeparatorKey();
        final byte[] toKey = pmd.getRightSeparatorKey();

        // inclusive lower bound.
        final int low;
        if (fromKey.length > 0) {
            low = KeyBuilder.decodeInt(fromKey/* buf */, 0/* off */);
        } else
            low = Integer.MIN_VALUE;

        // exclusive upper bound.
        final int high;
        if (toKey != null) {
            high = KeyBuilder.decodeInt(toKey/* buf */, 0/* off */);
        } else
            high = Integer.MAX_VALUE;

        /*
         * @todo to apply an application constraint we need to pass it in here
         * when we register the BTree (alternatively, you can work around this
         * with how the unit test is setup).
         */
        final BTree btree;
        {

            final IndexMetadata md = new IndexMetadata(UUID.randomUUID());
            
            md.setPartitionMetadata(pmd);
            
            btree = (BTree) store.registerIndex(name, md);
            
        }

        // generate evenly space tuples.
        final long inc = ((long) high - (long) low) / ntuples;
        assert inc > 0 : "inc=" + inc;
        int ninsert = 0;
        for (int i = low; i <= high && ninsert < ntuples; i += inc) {

            btree.insert(TestKeyBuilder.asSortKey(i), i);

            ninsert++;

        }

        // verify generated correct #of tuples.
        assertEquals(ntuples, btree.getEntryCount());
        
        store.commit();
        
        // return view with lastCommitTime set.
        return (BTree) store.getIndex(name);
        
    }

    /**
     * Register a {@link BTree} against the journal, generate some data and
     * commit the data (the data corresponds to a simple triple index schema but
     * does not handle statement indices with variable length keys).
     * 
     * @param store
     * @param ntuples
     * @param pmd
     * @param splitHandler
     */
    protected BTree generateSPOData(final IJournal store, final int ntuples,
            final LocalPartitionMetadata pmd) {
        
        assert store!=null;
        assert pmd!=null;
        
        final byte[] fromKey = pmd.getLeftSeparatorKey();
        final byte[] toKey = pmd.getRightSeparatorKey();
        assert fromKey != null;
//        assert toKey != null;

        final BTree btree;
        {

            final IndexMetadata md = new IndexMetadata(UUID.randomUUID());
            
            md.setPartitionMetadata(pmd);
            
            btree = (BTree) store.registerIndex(getName(), md);
            
        }

        final Random r = new Random();
        
        final long[] s = new long[Math.max(ntuples/4,20)];
        final long[] p = new long[Math.max(ntuples/50,5)];
        final long[] o = new long[Math.max(ntuples/10,100)];
        
        long v = 0;
        for (int i = 0; i < s.length; i++) {
            v = (s[i] = v + r.nextInt(100));
        }
        for (int i = 0; i < p.length; i++) {
            v = (p[i] = v + r.nextInt(100));
        }
        for (int i = 0; i < o.length; i++) {
            v = (o[i] = v + r.nextInt(100));
        }

        final IKeyBuilder keyBuilder = KeyBuilder
                .newInstance(Bytes.SIZEOF_LONG * 3);
        
        int ninsert = 0;
        int ntries = 0;
        while (ninsert < ntuples) {

            final byte[] key = keyBuilder.reset()//
                    .append(s[r.nextInt(s.length)])//
                    .append(p[r.nextInt(p.length)])//
                    .append(o[r.nextInt(o.length)])//
                    .getKey();
            
            if (!btree.contains(key)) {

                btree.insert(key, null/* val */);

                ninsert++;
                
            }
            
            ntries++;

            if (ntries > ntuples * 4) {

                throw new RuntimeException(
                        "Test setup is not terminating: ntuples=" + ntuples
                                + ", ntries=" + ntries + ", ninsert=" + ninsert);

            }
            
        }

        // verify generated correct #of tuples.
        assertEquals(ntuples, btree.getEntryCount());
        
        store.commit();
        
        // return view with lastCommitTime set.
        return (BTree) store.getIndex(getName());
        
    }

    /**
     * Unit test for the logic handling application constraints on the choice of
     * the separator key, which can also change the #of splits which may be
     * generated from a given input B+Tree.  Specifically, this test the
     * {@link FixedLengthPrefixSplits} constraint.
     */
    public void test_split_applicationConstraint_nbytePrefix() throws Exception {

        /*
         * Test parameters.
         */
        final byte[] fromKey = new byte[0];
        final byte[] toKey = null;

        final int ntuples = 1000;
        
        IndexSegmentBuilder builder = null;
        final IJournal store = getStore();
        
        try {

            final LocalPartitionMetadata pmd = new LocalPartitionMetadata(
                    pidFactory.nextPartitionId(getName()),//
                    -1, // sourcePartitionId
                    fromKey, //
                    toKey,//
                    new IResourceMetadata[] { store.getResourceMetadata() }, //
                    null // cause
//                    ,null // history
            );

            // Generates BTree w/ constrained keys and commits to store.
            final BTree src = generateSPOData(store, ntuples, pmd);

            // Build the index segment (a compacting merge).
            builder = doBuild(getName(), src, src.getLastCommitTime(), fromKey, toKey);

            final IndexSegmentStore segStore = new IndexSegmentStore(
                    builder.outFile);

            /*
             * Test ability to create two splits from the data when the split
             * handler accepts anything.
             */
            try {

                final int expectedSplitCount = 2;

                final long nominalShardSize = (long) (segStore.size() / (expectedSplitCount / 2.));

                final IndexSegment seg = segStore.loadIndexSegment();

                // Compute splits.
                final Split[] splits = SplitUtility.getSplits(pidFactory, pmd,
                        seg, nominalShardSize, acceptAllSplits);

                // Validate splits.
                SplitUtility.validateSplits(pmd, splits, true/* checkStuff */);

                assertEquals("#splits", expectedSplitCount, splits.length);

            } finally {

                segStore.close();

            }

            /*
             * Test ability to create two splits when the split handler is
             * constrained to only accept an 8 byte prefix boundary.
             */
            try {

                final int expectedSplitCount = 2;

                final long nominalShardSize = (long) (segStore.size() / (expectedSplitCount / 2.));

                final IndexSegment seg = segStore.loadIndexSegment();

                // Compute splits.
                final Split[] splits = SplitUtility.getSplits(pidFactory, pmd,
                        seg, nominalShardSize, new FixedLengthPrefixSplits(
                                Bytes.SIZEOF_LONG));

                // Validate splits.
                SplitUtility.validateSplits(pmd, splits, true/* checkStuff */);

                assertEquals("#splits", expectedSplitCount, splits.length);

                // the separator key between the two splits.
                final byte[] prefix = splits[0].pmd.getRightSeparatorKey();

                // verify the prefix non-null and also its length.
                assertNotNull(prefix);
                assertEquals("prefix length", Bytes.SIZEOF_LONG, prefix.length);

                /*
                 * Lookup the indexOf the prefix in the source B+Tree. For this
                 * unit test (and for the SPO indices) it will be an insertion
                 * point. Convert that to an index and compare the keyAt that
                 * index and keyAt(index-1). They should differ in their first 8
                 * bytes (this is the constraint which is being imposed).
                 * 
                 * Note: We could get this right by chance with a NOP split
                 * handler. However, the odds are against it and this test will
                 * nearly always correctly fail a broken split handler.
                 */
                
                // Get insertion point (there are no 8-byte keys in the ndx).
                final long pos = seg.indexOf(prefix);
                assertTrue(pos < 0);
                
                // Convert to a tuple index.
                final long index = -(pos) - 1;

                // The actual key before the separator key.
                final byte[] keyBefore = seg.keyAt(index - 1);
                assertEquals(Bytes.SIZEOF_LONG * 3, keyBefore.length);

                // The actual key after the separator key.
                final byte[] keyAfter = seg.keyAt(index);
                assertEquals(Bytes.SIZEOF_LONG * 3, keyAfter.length);

                // Compare the first 8 bytes of those keys (unsigned byte[] comparison).
                final int cmp = BytesUtil.compareBytesWithLenAndOffset(//
                        0/* aoff */, Bytes.SIZEOF_LONG/* alen */, keyBefore,//
                        0/* boff */, Bytes.SIZEOF_LONG/* blen */, keyAfter//
                        );
                
                // The 1st key prefix must be strictly LT the 2nd key prefix.
                assertTrue(cmp < 0);

            } finally {

                segStore.close();

            }
            
        } finally {

            if (builder != null) {

                // delete the generated index segment.
                if(!builder.outFile.delete()) {

                    log.warn("Could not delete: "+builder.outFile);
                    
                }
                
            }
            
            store.destroy();

        }
        
    }

}