AbstractRabaCoderTestCase.java example

Explorer
blazegraph-master
- database-master
/*

Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016.  All rights reserved.

Contact:
     SYSTAP, LLC DBA Blazegraph
     2501 Calvert ST NW #106
     Washington, DC 20008
     licenses@blazegraph.com

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/
/*
 * Created on Aug 6, 2009
 */

package com.bigdata.btree.raba.codec;

import it.unimi.dsi.compression.CanonicalFast64CodeWordDecoder;
import it.unimi.dsi.compression.HuffmanCodec;

import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.text.NumberFormat;
import java.util.Arrays;
import java.util.Iterator;
import java.util.Random;
import java.util.concurrent.TimeUnit;

import junit.framework.AssertionFailedError;
import junit.framework.TestCase;
import junit.framework.TestCase2;

import com.bigdata.btree.AbstractBTreeTestCase;
import com.bigdata.btree.keys.KeyBuilder;
import com.bigdata.btree.keys.TestKeyBuilder;
import com.bigdata.btree.raba.IRaba;
import com.bigdata.btree.raba.ReadOnlyKeysRaba;
import com.bigdata.btree.raba.ReadOnlyValuesRaba;
import com.bigdata.io.AbstractFixedByteArrayBuffer;
import com.bigdata.io.DataOutputBuffer;
import com.bigdata.io.FixedByteArrayBuffer;
import com.bigdata.io.SerializerUtil;
import com.bigdata.util.BytesUtil;
import com.bigdata.util.BytesUtil.UnsignedByteArrayComparator;

/**
 * Abstract test suite for {@link IRabaCoder} implementations.
 * 
 * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a>
 * @version $Id$
 */
abstract public class AbstractRabaCoderTestCase extends TestCase2 {

    /**
     * 
     */
    public AbstractRabaCoderTestCase() {
    }

    /**
     * @param name
     */
    public AbstractRabaCoderTestCase(String name) {
        super(name);
    }

    /**
     * Return <code>true</code> if the {@link IRabaCoder} only handles fixed
     * length values (the default always returns <code>false</code>). This is
     * used to skip tests which have variable length byte[]s when testing the
     * {@link FixedLengthValueRabaCoder}.
     */
    protected boolean isFixedLength() {
        
        return false;
        
    }

    /**
     * The fixture under test. This will be <code>null</code> unless you
     * explicitly set it in {@link #setUp()}.
     */
    protected IRabaCoder rabaCoder = null;

    /**
     * A simple unit test.
     */
    public void test_mike_personick() throws UnsupportedEncodingException {

        if(isFixedLength()) return;
        
        final byte[][] a = new byte[2][];
        a[0] = "mike".getBytes("US-ASCII");
        a[1] = "personick".getBytes("US-ASCII");

        if (rabaCoder.isValueCoder()) {

            doRoundTripTest(rabaCoder, new ReadOnlyValuesRaba(a));

        }

        if (rabaCoder.isKeyCoder()) {

            final IRaba expected = new ReadOnlyKeysRaba(a);

            doRoundTripTest(rabaCoder, expected);

            {
                /*
                 * Spot check the correct computation of the insertion point for
                 * a variety of search keys.
                 */

                final AbstractFixedByteArrayBuffer data = rabaCoder.encode(
                        expected, new DataOutputBuffer());

                final ICodedRaba actual = rabaCoder.decode(data);

                // verify correct insertion point for an empty byte[].
                assertEquals(-1, actual.search(new byte[] {}));

                assertEquals(-1, actual.search(new byte[] { 'm', 'i', 'k' }));

                assertEquals(-2, actual.search(new byte[] { 'm', 'i', 'k', 'e',
                        's' }));

                assertEquals(-2, actual.search("personic".getBytes("US-ASCII")));

                assertEquals(-3, actual.search("personicks"
                        .getBytes("US-ASCII")));
            
            }
            
        }

    }

    /**
     * Test with byte values which are negative values when interpreted as as
     * signed 8 bit integers.
     */
    public void test_negativeByteValues() throws UnsupportedEncodingException {

        if(isFixedLength()) return;

        final byte[][] a = new byte[1][];

        a[0] = new byte[] { 64, -64 };

        if (rabaCoder.isKeyCoder()) {

            doRoundTripTest(rabaCoder, new ReadOnlyKeysRaba(a));

        }

        if (rabaCoder.isValueCoder()) {

            doRoundTripTest(rabaCoder, new ReadOnlyValuesRaba(a));

        }

    }

    /**
     * Test with an empty byte[] element.
     * 
     * @throws UnsupportedEncodingException
     */
    public void test_emptyElement() throws UnsupportedEncodingException {

        if(isFixedLength()) return;

        final byte[][] a = new byte[3][];
        a[0] = new byte[0];
        a[1] = "mike".getBytes("US-ASCII");
        a[2] = "personick".getBytes("US-ASCII");
        
        if (rabaCoder.isKeyCoder()) {

            doRoundTripTest(rabaCoder, new ReadOnlyKeysRaba(a));

        }

        if (rabaCoder.isValueCoder()) {

            doRoundTripTest(rabaCoder, new ReadOnlyValuesRaba(a));

        }

    }

    /**
     * Test with a single byte value (nsymbols:=1). This test was written to a
     * known bug in {@link HuffmanCodec} and
     * {@link CanonicalFast64CodeWordDecoder}. A workaround for that bug has
     * been implemented in the {@link CanonicalHuffmanRabaCoder}.
     */
    public void test_nsymbolsOne() {

        if(isFixedLength()) return;

        final byte[][] a = new byte[1][];
        a[0] = new byte[]{1};
        
        if (rabaCoder.isKeyCoder()) {

            doRoundTripTest(rabaCoder, new ReadOnlyKeysRaba(a));

        }

        if (rabaCoder.isValueCoder()) {

            doRoundTripTest(rabaCoder, new ReadOnlyValuesRaba(a));

        }

    }

    /**
     * Test with a single byte value (nsymbols=1) and some nulls. This test was
     * written to a known bug in {@link HuffmanCodec} and
     * {@link CanonicalFast64CodeWordDecoder}. A workaround for that bug has
     * been implemented in the {@link CanonicalHuffmanRabaCoder}.
     */
    public void test_nsymbolsOne_nulls() {

        if(isFixedLength()) return;

        final byte[][] a = new byte[3][];
        a[0] = new byte[]{1};
        a[1] = null;
        a[2] = null;
        
        if (rabaCoder.isValueCoder()) {

            doRoundTripTest(rabaCoder, new ReadOnlyValuesRaba(a));

        }

    }

    /**
     * Test with a null value.
     * 
     * @throws UnsupportedEncodingException
     */
    public void test_withNulls() throws UnsupportedEncodingException {

        if (!rabaCoder.isValueCoder()) {

            // coded does not allow nulls.
            return;
            
        }
        
        final byte[][] a = new byte[3][];
        a[0] = getRandomValue(rabaCoder);
        a[1] = getRandomValue(rabaCoder);
        a[2] = null;
        
        final IRaba expected = new ReadOnlyValuesRaba(a);

        doRoundTripTest(rabaCoder, expected);

    }

    /**
     * Test with a single value, which is null.
     * 
     * FIXME Due to a bug in the {@link CanonicalFast64CodeWordDecoder} ctor
     * there is a problem handling a logical byte[][] consisting solely of
     * <code>null</code>s. We handle this for the case of an empty logical
     * byte[][] using an {@link EmptyRabaValueDecoder}. I suppose that could be
     * parameterized to identify the <code>null</code>s or else just fix the
     * ctor.
     */
    public void test_withNulls2() {
        
        if (!rabaCoder.isValueCoder()) {

            // coded does not allow nulls.
            return;
            
        }
        
        final byte[][] a = new byte[1][];

        a[0] = null;
        
        final IRaba expected = new ReadOnlyValuesRaba(a);

        doRoundTripTest(rabaCoder, expected);

    }

    final Random r = new Random();

    public void test_empty() throws IOException {

        doRandomRoundTripTest(rabaCoder, 0/* size */, 0/*capacity*/);

        doRandomRoundTripTest(rabaCoder, 0/* size */, 1/*capacity*/);
        
        doRandomRoundTripTest(rabaCoder, 0/* size */, 2/*capacity*/);
        
        doRandomRoundTripTest(rabaCoder, 0/* size */, 10/*capacity*/);
        
    }

    /**
     * Test with {@link IRaba} having a size of ONE (1) and a variety of
     * capacities.
     */
    public void test_entryCount1() throws IOException {

        doRandomRoundTripTest(rabaCoder, 1/* n */, 1/* capacity */);

        doRandomRoundTripTest(rabaCoder, 1/* n */, 2/* capacity */);

        doRandomRoundTripTest(rabaCoder, 1/* n */, 10/* capacity */);

    }

    /**
     * Test with {@link IRaba} having a size of TWO (2) and a variety of
     * capacities.
     */
    public void test_entryCount2() throws IOException {

        doRandomRoundTripTest(rabaCoder, 2/* n */, 2/* capacity */);

        doRandomRoundTripTest(rabaCoder, 2/* n */, 3/* capacity */);

        doRandomRoundTripTest(rabaCoder, 2/* n */, 10/* capacity */);

    }
    
    /**
     * This test case was developed for the {@link FrontCodedRabaCoder}.
     */
    public void test_error1() throws IOException {

        if(isFixedLength()) return;
        
        final byte b187 = KeyBuilder.encodeByte(187);
        final byte b146 = KeyBuilder.encodeByte(146);
        final byte b207 = KeyBuilder.encodeByte(207);
        
        final byte[][] a = new byte[][] {//
        new byte[]{121, b187, b146, b207, 99, 112, 24, 116},//
        new byte[]{121, b187, b146, b207, 99, 112, 43, 68},//
        new byte[]{121, b187, b146, b207, 99, 112, 46, 78},//
        new byte[]{121, b187, b146, b207, 99, 112, 54, KeyBuilder.encodeByte(176)},//
        new byte[]{121, b187, b146, b207, 99, 112, 54, KeyBuilder.encodeByte(236)},//
        new byte[]{121, b187, b146, b207, 99, 112, 55, KeyBuilder.encodeByte(209)},//
        new byte[]{121, b187, b146, b207, 99, 112, 62, 85},//
        new byte[]{121, b187, b146, b207, 99, 112, 63, KeyBuilder.encodeByte(238)},//
        new byte[]{121, b187, b146, b207, 99, 112, 71, 124},//
        new byte[]{121, b187, b146, b207, 99, 112, 73, 49}//
        };
        
        if(rabaCoder.isKeyCoder()) {
            final IRaba expected = new ReadOnlyKeysRaba(a);
//            final IRaba actual = rabaCoder.encode(expected);
//            System.err.println(actual.toString());
//            // this is the one whose rlen/clen appear to be incorrect.
//            assertEquals(expected.get(4),actual.get(4));
//            assertEquals(0, actual.search(expected.get(0)));
//            assertEquals(1, actual.search(expected.get(1)));
//            assertEquals(2, actual.search(expected.get(2)));
//            assertEquals(3, actual.search(expected.get(3)));
//            assertEquals(8, actual.search(expected.get(8)));
//            assertEquals(4, actual.search(expected.get(4)));
//            assertEquals(5, actual.search(expected.get(5)));//broken
            doRoundTripTest(rabaCoder, expected);
        }

        if(rabaCoder.isValueCoder()) {
            doRoundTripTest(rabaCoder, new ReadOnlyValuesRaba(a));
        }

    }

    /**
     * This test case was developed for the {@link CanonicalHuffmanRabaCoder}.
     * 
     * <pre>
     * junit.framework.AssertionFailedError: search([44, 186, 169, 175, 191, 31, 36, 227]): expectedIndex=1, actualIndex=-1,
     * expected=com.bigdata.btree.raba.ReadOnlyKeysRaba{ capacity=3, size=2, isKeys=true, isReadOnly=true, [
     * [44, 186, 169, 175, 191, 31, 36, 12],
     * [44, 186, 169, 175, 191, 31, 36, 227]]},
     * actual=com.bigdata.btree.raba.codec.CanonicalHuffmanRabaCoder$RabaDecoder{ capacity=2, size=2, isKeys=true, isReadOnly=true, [
     * [44, 186, 169, 175, 191, 31, 36, 12],
     * [44, 186, 169, 175, 191, 31, 36, 227]]}
     *     at junit.framework.Assert.fail(Assert.java:47)
     *     at com.bigdata.btree.AbstractBTreeTestCase.assertSameRaba(AbstractBTreeTestCase.java:583)
     *     at com.bigdata.btree.raba.codec.AbstractRabaCoderTestCase.doRoundTripTest(AbstractRabaCoderTestCase.java:499)
     *     at com.bigdata.btree.raba.codec.AbstractRabaCoderTestCase.doRandomRoundTripTest(AbstractRabaCoderTestCase.java:487)
     *     at com.bigdata.btree.raba.codec.AbstractRabaCoderTestCase.test_entryCount2(AbstractRabaCoderTestCase.java:247)
     * </pre>
     * 
     * <pre>
     * junit.framework.AssertionFailedError: search([44, 197, 214, 208, 192, 31, 36, 156]): expectedIndex=1, actualIndex=-1,
     * expected=com.bigdata.btree.raba.ReadOnlyKeysRaba{ capacity=2, size=2, isKeys=true, isReadOnly=true, [
     * [44, 197, 214, 208, 192, 31, 36, 12],
     * [44, 197, 214, 208, 192, 31, 36, 156]]},
     * actual=com.bigdata.btree.raba.codec.CanonicalHuffmanRabaCoder$RabaDecoder{ capacity=2, size=2, isKeys=true, isReadOnly=true, [
     * [44, 197, 214, 208, 192, 31, 36, 12],
     * [44, 197, 214, 208, 192, 31, 36, 156]]}
     *     at junit.framework.Assert.fail(Assert.java:47)
     *     at com.bigdata.btree.AbstractBTreeTestCase.assertSameRaba(AbstractBTreeTestCase.java:583)
     *     at com.bigdata.btree.raba.codec.AbstractRabaCoderTestCase.doRoundTripTest(AbstractRabaCoderTestCase.java:538)
     *     at com.bigdata.btree.raba.codec.AbstractRabaCoderTestCase.test_error2(AbstractRabaCoderTestCase.java:325)
     * </pre>
     */
    public void test_error2() {

        if(isFixedLength()) return;

        final int n = 2;
        final byte[][] a = new byte[n][];
        a[0] = new byte[]{44, 127-186, 127-169, 127-175, 127-191, 31, 36, 12};
        a[1] = new byte[]{44, 127-186, 127-169, 127-175, 127-191, 31, 36, 127-227};
//        a[0] = new byte[]{44, 186-127, 169-127, 175-127, 191-127, 31, 36, 12};
//        a[1] = new byte[]{44, 186-127, 169-127, 175-127, 191-127, 31, 36, 227-127};

        if(rabaCoder.isKeyCoder()) {
            final IRaba expected = new ReadOnlyKeysRaba(a);
            doRoundTripTest(rabaCoder, expected);
        }

        if(rabaCoder.isValueCoder()) {
            doRoundTripTest(rabaCoder, new ReadOnlyValuesRaba(a));
        }
    }
    
    public void test_randomOnce() throws IOException {
        
        // #of elements.
        final int n = r.nextInt(100);

        // capacity of the array.
        final int capacity = n + r.nextInt(n + 1);

        doRandomRoundTripTest(rabaCoder, n, capacity);
        
    }

    public void testStress() throws IOException {
        
        for (int i = 0; i < 1000; i++) {

            // #of elements.
            final int n = r.nextInt(100);

            // capacity of the array.
            final int capacity = n + r.nextInt(n + 1);

            doRandomRoundTripTest(rabaCoder, n, capacity);

        }

    }

    /**
     * Test using a sequence of random URIs (formed with successive prefixes).
     * 
     * @throws Exception
     */
    public void test_randomURIs() throws Exception {

        if(isFixedLength()) return;
        
        // random, distinct, unordered w/o nulls.
        final byte[][] data = new RandomURIGenerator(r).generateValues(100);

        if (rabaCoder.isValueCoder()) {

            // layer on interface.
            final IRaba raba = new ReadOnlyValuesRaba(0/* fromIndex */,
                    data.length/* toIndex */, data.length/* capacity */, data);

            doRoundTripTest(rabaCoder, raba);

        }

        if (rabaCoder.isKeyCoder()) {

            // put into sorted order.
            Arrays.sort(data, 0, data.length, UnsignedByteArrayComparator.INSTANCE);

            // layer on interface.
            final IRaba raba = new ReadOnlyKeysRaba(0/* fromIndex */,
                    data.length/* toIndex */, data.length/* capacity */, data);

            doRoundTripTest(rabaCoder, raba);

        }
        
    }

    /**
     * Return a random byte array. The byte array will also have a random length
     * in [0:512] unless the {@link IRabaCoder} is a
     * {@link FixedLengthValueRabaCoder}, in which case a byte[] having the
     * appropriate length will be returned.
     * 
     * @param dataCoder
     *            The coder.
     *            
     * @return The random byte[].
     */
    protected byte[] getRandomValue(final IRabaCoder dataCoder) {

        final int len;
        if (dataCoder instanceof FixedLengthValueRabaCoder) {
            
            len = ((FixedLengthValueRabaCoder) dataCoder).getLength();

        } else {
        
            len = r.nextInt(512);
            
        }

        final byte[] a = new byte[len];

        r.nextBytes(a);

        return a;

    }
    
    /**
     * Generates a random byte[][] and verifies round-trip encoding and
     * decoding.
     * <p>
     * Note: This uses a uniform random distribution. Therefore the huffman
     * codes tend to occupy MORE space than the original byte[][] since the
     * canonical huffman code can not be shorter than the original byte values
     * on average when all values are equally likely. When you want to measure
     * the expected compression ratio you need to test with a gaussian
     * distribution (bell curve) over the values, a USASCII distribution, etc.
     * 
     * @param size
     *            The #of entries in the byte[][].
     * @param capacity
     *            The capacity of the byte[][].
     * @throws IOException
     */
    protected void doRandomRoundTripTest(final IRabaCoder dataCoder,
            final int size, final int capacity) throws IOException {

        assert capacity >= size;

        if (dataCoder.isValueCoder()) {

            /*
             * Note: random values are not ordered and may contain nulls.
             */
 
            final byte[][] data = new byte[capacity][];

            for (int i = 0; i < size; i++) {

                final boolean isNull = r.nextFloat() < .03;

                if(isNull) {
                    
                    data[i] = null;

                } else {

                    data[i] = getRandomValue(dataCoder);

                }

            }

            // layer on interface.
            final IRaba raba = new ReadOnlyValuesRaba(0/* fromIndex */,
                    size/* toIndex */, capacity, data);

            doRoundTripTest(dataCoder, raba);
            
        }

        if (dataCoder.isKeyCoder()) {

            /*
             * Note: B+Tree keys based on random values. The keys must be
             * ordered, may not contain duplicates, and may not contain nulls.
             */
 
            final byte[][] data = new byte[capacity][];

            /*
             * The nominal maximum possible increment between successive keys.
             * The actual increment will be a random number in [1:nominal].
             */
            final int nominalIncRange = 5000;
            
            // any integer value.
            long lastKey = r.nextLong();

            // The #of keys that we actually generated.
            int nactual = 0;

            for (int i = 0; i < size; i++, nactual++) {

                data[i] = TestKeyBuilder.asSortKey(lastKey);

                final long remainder = Long.MAX_VALUE - lastKey;

                if (remainder == 1) {

                    // out of room in the long value space.
                    break;

                }

                final int incRange = (int) Math.max(nominalIncRange, Math
                        .min(nominalIncRange, remainder));
                
                // increment is always at least by one to avoid duplicate keys.
                final int inc = r.nextInt(incRange) + 1;

                lastKey += inc;

            }

            // layer on interface.
            final IRaba raba = new ReadOnlyKeysRaba(0/* fromIndex */,
                    nactual/* toIndex */, capacity, data);

            doRoundTripTest(dataCoder, raba);

        }

    }

    static public void doRoundTripTest(final IRabaCoder rabaCoder,
            final IRaba expected) {

        try {

            /*
             * Verify that we can (de-)serialize the coder itself.
             */
            {
                final byte[] a = SerializerUtil.serialize(rabaCoder);
                
                final IRabaCoder b = (IRabaCoder)SerializerUtil.deserialize(a);
                
            }
            
            // Test the live coded path (returns coded raba instance for immediate use).
            final ICodedRaba liveCodedRaba = rabaCoder.encodeLive(expected,
                    new DataOutputBuffer());

            final AbstractFixedByteArrayBuffer liveCodedData = liveCodedRaba.data();
            
            AbstractBTreeTestCase.assertSameRaba(expected, liveCodedRaba);
            
            final AbstractFixedByteArrayBuffer originalData = rabaCoder.encode(
                    expected, new DataOutputBuffer());
            
            {

                /*
                 * Verify that we can read the byte[] out of [data]. This is
                 * really a test of the data.getDataInput() and the returned
                 * DataInputBuffer.
                 */

                assertEquals(0, originalData.off());
                
                final byte[] tmp = new byte[originalData.len()];

                originalData.getDataInput().readFully(tmp);

                // compare against result from encode()
                assertTrue(BytesUtil.compareBytesWithLenAndOffset(originalData.off(),
                        originalData.len(), originalData.array(), 0, tmp.length, tmp) == 0);

                // compare against result from encodeLive.
                assertTrue(BytesUtil.compareBytesWithLenAndOffset(originalData
                        .off(), originalData.len(), originalData.array(),
                        liveCodedData.off(), liveCodedData.len(), liveCodedData
                                .array()) == 0);

            }

            // verify we can decode the encoded data.
            {
             
                // decode.
                final ICodedRaba actual0 = rabaCoder.decode(originalData);

                // Verify encode() results in object which can decode the
                // byte[]s.
                AbstractBTreeTestCase.assertSameRaba(expected, actual0);

                // Verify decode when we build the decoder from the serialized
                // format.
                AbstractBTreeTestCase.assertSameRaba(expected, rabaCoder
                        .decode(actual0.data()));
            }

            // Verify encode with a non-zero offset for the DataOutputBuffer
            // returns a slice which has the same data.
            {

                // buffer w/ non-zero offset.
                final int off = 10;
                final DataOutputBuffer out = new DataOutputBuffer(off,
                        new byte[100 + off]);

                // encode onto that buffer.
                final AbstractFixedByteArrayBuffer slice = rabaCoder.encode(
                        expected, out);

                // verify same encoded data for the slice.
                assertEquals(originalData.toByteArray(), slice.toByteArray());
                
            }

            // Verify decode when we build the decoder from a slice with a
            // non-zero offset
            {

                final int off = 10;
                final byte[] tmp = new byte[off + originalData.len()];
                System.arraycopy(originalData.array(), originalData.off(), tmp,
                        off, originalData.len());

                // create slice
                final FixedByteArrayBuffer slice = new FixedByteArrayBuffer(
                        tmp, off, originalData.len());

                // verify same slice.
                assertEquals(originalData.toByteArray(), slice.toByteArray());

                // decode the slice.
                final IRaba actual = rabaCoder.decode(slice);
                
                // verify same raba.
                AbstractBTreeTestCase.assertSameRaba(expected, actual);
                
            }
            
        } catch (Throwable t) {

            fail("Cause=" + t + ", expectedRaba=" + expected, t);

        }

    }

    /**
     * Performance stress test for keys. Performance tuning should give more
     * weight to coded raba access times, including search and key retrieval or
     * copy, than coding times since most use will be access on the coded data.
     * Those costs are not factored apart in the stress test times. They are
     * parameterized here by a normalized vector of the rates of the different
     * operations (search(), get(), length(), etc).
     * 
     * <dl>
     * <dt>nops</dt>
     * <dd>
     * The #of random operations to be performed. Large values for <i>nops</i>
     * need to be used to get beyond the initial JVM performance tuning so you
     * can more accurately compare the performance of the different coders. For
     * example, a value of 1M (1000000) will run for ~ 30-40s for the
     * front-coded coders. For shorter run times, the order in which we test the
     * coders will dominate their performance.</dd>
     * <dt>size</dt>
     * <dd>The #of entries in the raba to be tested (must be LTE the capacity)</dd>
     * </dl>
     * 
     * @param args
     *            [nops [generator [size]]]
     * 
     *            FIXME parameterize the generator choice.
     */
    static public void main(final String[] args) {

        final Random r = new Random();

        // default nops.
        int nops = 200000;
//        int nops = 1000000; // ~30-40s per coder @ 1M.
        if (args.length > 0)
            nops = Integer.valueOf(args[0]);
        if (nops <= 0)
            throw new IllegalArgumentException();
        
//        // default capacity (branching factor).
//        int capacity = 256;
//        if (args.length > 1)
//            capacity = Integer.valueOf(args[1]);
//        if (capacity <= 0)
//            throw new IllegalArgumentException();

        // default size (#of keys).
        int size = 256;
        if (args.length > 2)
            nops = Integer.valueOf(args[2]);
        if (size <= 0)
            throw new IllegalArgumentException();
        
        // The coders to be tested.
        final IRabaCoder[] coders = new IRabaCoder[] {
                new MutableRabaCoder(), // provides performance baseline.
                SimpleRabaCoder.INSTANCE, // simplest coding.
////                new FrontCodedRabaCoder(2/* ratio */),
                new FrontCodedRabaCoder(8/* ratio */), // front-coding.
////                new FrontCodedRabaCoder(32/* ratio */),
                CanonicalHuffmanRabaCoder.INSTANCE // huffman coding.
                };

        System.out.println("nops=" + nops + ", size=" + size + ", ncoders="
                + coders.length);

        /*
         * Generate a raba.  The same data is used for each coder. 
         */

        // The raw data.
        final byte[][] a;

        // Random keys based on random variable length byte[]s.
//        a = new RandomKeysGenerator(r, size + r.nextInt(size)/* maxKeys */, 20/* maxKeyLength */)
//                .generateKeys(size);

        // Random URIs in sorted order.
//        a = new RandomURIGenerator(r).generateKeys(size);

        // based on a tokenized source code file.
        a = new TokenizeKeysGenerator(
                "bigdata/src/test/com/bigdata/btree/raba/codec/AbstractRabaCoderTestCase.java")
                .generateKeys(size);
        
        /*
         * isNull, length, get, copy, search, iterator, recode.
         * 
         * Note: isNull is not used for keys!
         */
        final Op op = new Op(0.0f, .01f, .4f, .2f, .6f, .2f, .04f);

        /*
         * Test each IRabaCoder.
         * 
         * @todo should also test on coded B+Tree values, which would be a
         * different [expected] instance.
         */
        for(IRabaCoder rabaCoder : coders) {

            // the read-only raba.
            final ReadOnlyKeysRaba expected = new ReadOnlyKeysRaba(size, a);

            final long begin = System.nanoTime();

            int recordLength = -1;
            try {

                recordLength = doRabaCoderPerformanceTest(expected, rabaCoder,
                        size, nops, r, op);
                
            } catch (Throwable t) {

                System.err.println("coder failed: " + rabaCoder);
                
                t.printStackTrace(System.err);
                
            }

            final long elapsed = System.nanoTime() - begin;

            System.out.println(rabaCoder.toString() + " : elapsed="
                    + TimeUnit.NANOSECONDS.toMillis(elapsed)
                    + ", recordLength="
                    + (recordLength == -1 ? "N/A" : recordLength));

        }
        
    }
    
    /**
     * A test designed to measure the performance of an {@link IRabaCoder} for
     * operations on B+Tree keys, including search.
     */
    public void test_keyCoderPerformance() {
        
        // test is only for coders which can code keys.
        if(!rabaCoder.isKeyCoder()) return;

        /*
         * Some branching factors to choose from.
         */
        final int[] branchingFactors = new int[] { 3, 4, 8, 16, 27, 32, 48,
                64, 96, 99, 112, 128, 256, 512, 1024, 4096 };

        final int capacity = branchingFactors[r
                .nextInt(branchingFactors.length)];

        final int size = r.nextInt(capacity) + 1;

        // Generate a read-only raba.
        final ReadOnlyKeysRaba expected = new ReadOnlyKeysRaba(size,
                AbstractBTreeTestCase.getRandomKeys(capacity, size));

        final int nops = 50000;
        
        /*
         * isNull, length, get, copy, search, iterator, recode.
         * 
         * Note: isNull is not used for keys!
         */
        final Op op = new Op(0.0f, .01f, .4f, .2f, .6f, .2f, .04f);

        doRabaCoderPerformanceTest(expected, rabaCoder, size, nops, r, op);
        
    }

    /**
     * Do a performance stress test consisting of random operations on a
     * randomly generated B+Tree keys {@link IRaba}. The operations will be
     * checked against ground truth.
     * 
     * @param rabaCoder
     *            The coder to be tested.
     * @param size
     *            The #of keys.
     * @param nops
     *            The #of operations to perform.
     * @param r
     *            The random number generator.
     * @param op
     *            The distribution of the operations to be performed.
     * 
     * @return The size of the coded record.
     */
    static public int doRabaCoderPerformanceTest(final IRaba expected,
            final IRabaCoder rabaCoder, final int size, final int nops,
            final Random r, final Op op) {

        // The raba under test.  This can be recoded by one of the ops.
        ICodedRaba actual;
        final byte[] originalData;
        {

            // encode the record.
            final AbstractFixedByteArrayBuffer data = rabaCoder.encode(
                    expected, new DataOutputBuffer());

            // save off a copy of the original coded record.
            originalData = data.toByteArray();
            
            // decode the record.
            actual = rabaCoder.decode(data);

            // verify correct initial coding.
            AbstractBTreeTestCase.assertSameRaba(expected, actual);

        }

        // reused buffer.
        final DataOutputBuffer os = new DataOutputBuffer();

        // #of operations per operation type.
        final long[] count = new long[op._dist.length];

        // elapsed ns per operation type.
        final long[] elapsed = new long[op._dist.length];
        
        for (int i = 0; i < nops; i++) {

            final long begin = System.nanoTime();
            final int code;
            switch (code = op.nextOp(r)) {
            case Op.ISNULL: {
                if (expected.isKeys()) {
                    // method not defined for keys.
                    continue;
                }
                final int index = r.nextInt(size);
                if (log.isDebugEnabled())
                    log.debug(op.getName(code) + "(" + index + ") : expected="
                            + expected.isNull(index));
                assertEquals(op.getName(code), expected.isNull(index), actual
                        .isNull(index));
                break;
            }
            case Op.LENGTH: {
                final int index = r.nextInt(size);
                if (log.isDebugEnabled())
                    log.debug(op.getName(code) + "(" + index + ") : expected="
                            + expected.length(index));
                assertEquals(op.getName(code), expected.length(index), actual
                        .length(index));
                break;
            }
            case Op.GET: {
                final int index = r.nextInt(size);
                if (log.isDebugEnabled())
                    log.debug(op.getName(code) + "(" + index + ") : expected="
                            + BytesUtil.toString(expected.get(index)));
                assertEquals(op.getName(code), expected.get(index), actual
                        .get(index));
                break;
            }
            case Op.COPY: {
                /*
                 * Note: We reuse the same output buffer all the time for this.
                 * This is not a problem since we are not overwriting the data
                 * backing the raba.
                 */
                final int index = r.nextInt(size);
                final int len = expected.length(index);
                if (log.isDebugEnabled())
                    log.debug(op.getName(code) + "(" + index + ") : expected="
                            + BytesUtil.toString(expected.get(index)));
                // reset the buffer.
                os.reset();
                assertEquals(op.getName(code), len, actual.copy(index, os));
                assertTrue(0 == BytesUtil.compareBytesWithLenAndOffset(0, len,
                        os.array(), 0, len, expected.get(index)));
                break;
            }
            case Op.SEARCH: {
                /*
                 * Search with a key chosen randomly from the original data.
                 */
                if (!expected.isKeys()) {
                    // method not defined for values.
                    continue;
                }
                final int index = r.nextInt(size);
                final byte[] key = expected.get(index);
                if (log.isDebugEnabled())
                    log.debug(op.getName(code) + "(" + index + ") : key="
                            + BytesUtil.toString(key));
                { // search at the key.

                    assertEquals(op.getName(code), index, actual.search(key));
                    
                }
                { // search at key plus a random byte[] suffix.
                    
                    // random suffix length.
                    final int suffixLength = r.nextInt(1 + (key.length / 2)) + 1;
                    
                    // random fill of entire key.
                    final byte[] key2 = new byte[key.length + suffixLength];
                    r.nextBytes(key2);
                    
                    // copy shared prefix (all of the original key).
                    System.arraycopy(key, 0, key2, 0, key.length);
                    
                    // expected insert position (or index iff found).
                    final int epos = expected.search(key2);
                    
                    // actual result from search on the coded raba.
                    final int apos = actual.search(key2);
                    
                    assertEquals(op.getName(code), epos, apos);
                    
                }
                { // search at random length prefix of the key.
                    
                    // random prefix length (may be zero).
                    final int prefixLength = Math.max(0, r.nextInt(Math.max(1,
                            key.length)) - 1);
                    
                    // copy shared prefix.
                    final byte[] key2 = new byte[prefixLength];
                    System.arraycopy(key, 0, key2, 0, prefixLength);

                    // expected insert position (or index iff found).
                    final int epos = expected.search(key2);
                    
                    // actual result from search on the coded raba.
                    final int apos = actual.search(key2);
                    
                    assertEquals(op.getName(code), epos, apos);
                    
                }
                break;
            }
            case Op.ITERATOR: {
                if (log.isDebugEnabled())
                    log.debug(op.getName(code) + "()");
                assertSameIterator(expected.iterator(), actual.iterator());
                break;
            }
            case Op.RECODE: {

                /*
                 * Note: this uses a new buffer instance so we do not stomp on
                 * the existing coded representation backing the raba. The
                 * backing array for the buffer is preallocated to a modest size
                 * and filled with random data. When we setup the buffer, we
                 * then advance it a random #of bytes into the buffer so the
                 * raba will frequently be coded at a non-zero offset in the
                 * buffer.
                 */

                // backing byte[]. sometimes empty. will be extended on demand.
                final byte[] tmp = (r.nextFloat() < .1 ? new byte[0]
                        : new byte[r.nextInt(100) * size]);

                // fill it with random data.
                r.nextBytes(tmp);

                /*
                 * Start at random (but small) offset into the buffer with a
                 * bias to start at zero.
                 * 
                 * Note: A lot of re-coding errors are linked to a non-zero
                 * starting offset. If you set [start] to zero explicitly and
                 * the re-coding problem goes away, then the problem is a
                 * non-zero offset. Likewise, you can explicitly choose a
                 * non-zero start to debug a problem.
                 */

                // random start offset.
                final int start = Math.min(tmp.length, (r.nextFloat() < .2 ? 0
                        : r.nextInt(20)));
//                final int start = 0;

                if (log.isDebugEnabled())
                    log.debug(op.getName(code) + "(): start=" + start
                            + ", buf.len=" + tmp.length);

                // output buffer wrapping that byte[].
                final DataOutputBuffer buf = new DataOutputBuffer(start, tmp);

                try {
                    
                    // recode onto the buffer.
                    final AbstractFixedByteArrayBuffer data = rabaCoder.encode(
                            actual, buf);

                    // verify the same coding was produced.
                    assertEquals(originalData, data.toByteArray());
                    
                    // new instance wrapping the buffer.
                    actual = rabaCoder.decode(data);

                    // verify recoded raba.
                    AbstractBTreeTestCase.assertSameRaba(expected, actual);
                    
                } catch (AssertionFailedError ex) {
                    fail(op.getName(code) + "(): start=" + start + ", buf.len="
                            + tmp.length, ex);
                }

                break;
            }
            default:
                throw new AssertionError();
            }
            elapsed[code] = System.nanoTime() - begin;
            count[code]++;

        }

        double totalNS = 0;
        for (long ns : elapsed)
            totalNS += ns;
        
        final NumberFormat percentF = NumberFormat.getPercentInstance();
        percentF.setMinimumFractionDigits(2);
        final NumberFormat rateF = NumberFormat.getInstance();
        rateF.setMinimumFractionDigits(0);
        rateF.setMaximumFractionDigits(0);
        System.out.println("op\tcount\tnanos\t%time\tops/ms");
        for (int i = 0; i < count.length; i++) {

            if (count[i] == 0)
                continue;
            
            System.out.println(//
                    op.getName(i) + "\t"
                    + count[i]
                    + "\t"
                    + elapsed[i]
                    + "\t"
                    + percentF.format(elapsed[i] / totalNS)//
                    + "\t"
                    + (elapsed[i] == 0 ? "N/A" : rateF.format(count[i]
                                    / (elapsed[i] * scalingFactor))) //
                    );

        }
        
        // The size of the coded record.
        return originalData.length;
        
    }

    /**
     * Verify same byte[] iterators.
     * 
     * @param eitr
     *            The expected iterator.
     * @param aitr
     *            The actual iterator.
     */
    static protected void assertSameIterator(final Iterator<byte[]> eitr,
            final Iterator<byte[]> aitr) {

        int i = 0;
        while (eitr.hasNext()) {

            assertTrue("hasNext", aitr.hasNext());
            
            // verify same byte[] (compare data, may both be null).
            assertEquals("byte[" + i + "]", eitr.next(), aitr.next());

            i++;
            
        }
        
        assertFalse("hasNext", aitr.hasNext());
        
    }
    
    /**
     * Scaling factor converts nanoseconds to milliseconds.
     */
    static protected final double scalingFactor = 1d / TimeUnit.NANOSECONDS
            .convert(1, TimeUnit.MILLISECONDS);

    /**
     * Helper class generates a random sequence of operation codes obeying the
     * probability distribution described in the constructor call.
     * 
     * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a>
     * @version $Id$
     */
    static class Op {
        
        static public final int ISNULL = 0;
        static public final int LENGTH = 1;
        static public final int GET = 2;
        static public final int COPY = 3;
        static public final int SEARCH = 4;
        static public final int ITERATOR = 5;
        static public final int RECODE = 6;
        
        /**
         * The last defined operator.
         */
        static final int lastOp = RECODE;
        
//        final private Random r = new Random();
        
        final private float[] _dist;

        /*
         * isNull, length, get, copy, search, iterator, recode.
         */
        public Op(float isNullRate, float lengthRate, float getRate,
                float copyRate, float searchRate, float iteratorRate,
                float recodeRate)
        {
            if (isNullRate < 0 || lengthRate < 0 || getRate < 0
                    || copyRate < 0 || searchRate < 0 || iteratorRate < 0
                    || recodeRate < 0) {
                throw new IllegalArgumentException("negative rate");
            }
            float total = isNullRate + lengthRate + getRate + copyRate
                    + searchRate + iteratorRate + recodeRate;
            if( total == 0.0 ) {
                throw new IllegalArgumentException("all rates are zero.");
            }
            /*
             * Convert to normalized distribution in [0:1].
             */
            isNullRate /= total;
            lengthRate /= total;
            getRate /= total;
            copyRate /= total;
            searchRate /= total;
            iteratorRate /= total;
            recodeRate /= total;
            /*
             * Save distribution.
             */
            int i = 0;
            _dist = new float[lastOp+1];
            _dist[ i++ ] = isNullRate;
            _dist[ i++ ] = lengthRate;
            _dist[ i++ ] = getRate;
            _dist[ i++ ] = copyRate;
            _dist[ i++ ] = searchRate;
            _dist[ i++ ] = iteratorRate;
            _dist[ i++ ] = recodeRate;

            /*
             * Checksum.
             */
            float sum = 0f;
            for( i = 0; i<_dist.length; i++ ) {
                sum += _dist[ i ];
            }
            if( Math.abs( sum - 1f) > 0.01 ) {
                throw new AssertionError("sum of distribution is: "+sum+", but expecting 1.0");
            }
            
        }
        
        /**
         * Return the name of the operator.
         * 
         * @param op
         * @return
         */
        public String getName( final int op ) {
            if( op < 0 || op > lastOp ) {
                throw new IllegalArgumentException();
            }
            /*
             * isNull, length, get, copy, search, iterator, recode.
             */
            switch( op ) {
            case ISNULL:  return "isNull";
            case LENGTH:  return "length";
            case GET:     return "get   ";
            case COPY:    return "copy  ";
            case SEARCH:  return "search";
            case ITERATOR:return "itr   ";
            case RECODE:  return "recode";
            default:
                throw new AssertionError();
            }
        }
        
        /**
         * An array of normalized probabilities assigned to each operator. The
         * array may be indexed by the operator, e.g., dist[{@link #fetch}]
         * would be the probability of a fetch operation.
         * 
         * @return The probability distribution over the defined operators.
         */
        public float[] getDistribution() {
            return _dist;
        }

        /**
         * Generate a random operator according to the distribution described to
         * to the constructor.
         * 
         * @return A declared operator selected according to a probability
         *         distribution.
         */
        public int nextOp(final Random r) {
            final float rand = r.nextFloat(); // [0:1)
            float cumprob = 0f;
            for( int i=0; i<_dist.length; i++ ) {
                cumprob += _dist[ i ];
                if( rand <= cumprob ) {
                    return i;
                }
            }
            throw new AssertionError();
        }
        
    }

    /**
     * Tests of the {@link Op} test helper class.
     * 
     * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a>
     * @version $Id$
     */
    public static class TestOp extends TestCase {

        private final Random r = new Random();
        
        public void test_Op() {
            /*
             * isNull, length, get, copy, search, iterator, recode.
             */
            Op gen = new Op(.2f, .05f, .2f, 05f, .1f, .05f, .001f);
            doOpTest(gen);
        }

        public void test_Op2() {
            /*
             * isNull, length, get, copy, search, iterator, recode.
             */
            Op gen = new Op(0f,0f,0f,1f,0f,0f,0f);
            doOpTest(gen);
        }

        /**
         * Correct rejection test when all rates are zero.
         */
        public void test_correctRejectionAllZero() {
            /*
             * isNull, length, get, copy, search, iterator, recode.
             */
            try {
                new Op(0f,0f,0f,0f,0f,0f,0f);
                fail("Expecting: "+IllegalArgumentException.class);
            }
            catch(IllegalArgumentException ex) {
                log.info("Ignoring expected exception: "+ex);
            }
        }

        /**
         * Correct rejection test when one or more rates are negative.
         */
        public void test_correctRejectionNegativeRate() {
            /*
             * isNull, length, get, copy, search, iterator, recode.
             */
            try {
                new Op(0f,0f,0f,-1f,0f,1f,0f);
                fail("Expecting: "+IllegalArgumentException.class);
            }
            catch(IllegalArgumentException ex) {
                log.info("Ignoring expected exception: "+ex);
            }
        }

        /**
         * Verifies the {@link Op} class given an instance with some probability
         * distribution.
         */
        void doOpTest(final Op gen) {
            final int limit = 10000;
            int[] ops = new int[limit];
            int[] sums = new int[Op.lastOp + 1];
            for (int i = 0; i < limit; i++) {
                int op = gen.nextOp(r);
                assertTrue(op >= 0);
                assertTrue(op <= Op.lastOp);
                ops[i] = op;
                sums[op]++;
            }
            float[] expectedProbDistribution = gen.getDistribution();
            float[] actualProbDistribution = new float[Op.lastOp + 1];
            float sum = 0f;
            for (int i = 0; i <= Op.lastOp; i++) {
                sum += expectedProbDistribution[i];
                actualProbDistribution[i] = (float) ((double) sums[i] / (double) limit);
                float diff = Math.abs(actualProbDistribution[i]
                        - expectedProbDistribution[i]);
                System.err.println("expected[i=" + i + "]="
                        + expectedProbDistribution[i] + ", actual[i=" + i
                        + "]=" + actualProbDistribution[i] + ", diff="
                        + ((int) (diff * 1000)) / 10f + "%");
                assertTrue(diff < 0.02); // difference is less than 2%
                                            // percent.
            }
            assertTrue(Math.abs(sum - 1f) < 0.01); // essential 1.0
        }

    }
    
}