/* Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved. Contact: SYSTAP, LLC DBA Blazegraph 2501 Calvert ST NW #106 Washington, DC 20008 licenses@blazegraph.com This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2 of the License. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /* * Created on Aug 6, 2009 */ package com.bigdata.btree.raba.codec; import it.unimi.dsi.compression.CanonicalFast64CodeWordDecoder; import it.unimi.dsi.compression.HuffmanCodec; import java.io.IOException; import java.io.UnsupportedEncodingException; import java.text.NumberFormat; import java.util.Arrays; import java.util.Iterator; import java.util.Random; import java.util.concurrent.TimeUnit; import junit.framework.AssertionFailedError; import junit.framework.TestCase; import junit.framework.TestCase2; import com.bigdata.btree.AbstractBTreeTestCase; import com.bigdata.btree.keys.KeyBuilder; import com.bigdata.btree.keys.TestKeyBuilder; import com.bigdata.btree.raba.IRaba; import com.bigdata.btree.raba.ReadOnlyKeysRaba; import com.bigdata.btree.raba.ReadOnlyValuesRaba; import com.bigdata.io.AbstractFixedByteArrayBuffer; import com.bigdata.io.DataOutputBuffer; import com.bigdata.io.FixedByteArrayBuffer; import com.bigdata.io.SerializerUtil; import com.bigdata.util.BytesUtil; import com.bigdata.util.BytesUtil.UnsignedByteArrayComparator; /** * Abstract test suite for {@link IRabaCoder} implementations. * * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a> * @version $Id$ */ abstract public class AbstractRabaCoderTestCase extends TestCase2 { /** * */ public AbstractRabaCoderTestCase() { } /** * @param name */ public AbstractRabaCoderTestCase(String name) { super(name); } /** * Return <code>true</code> if the {@link IRabaCoder} only handles fixed * length values (the default always returns <code>false</code>). This is * used to skip tests which have variable length byte[]s when testing the * {@link FixedLengthValueRabaCoder}. */ protected boolean isFixedLength() { return false; } /** * The fixture under test. This will be <code>null</code> unless you * explicitly set it in {@link #setUp()}. */ protected IRabaCoder rabaCoder = null; /** * A simple unit test. */ public void test_mike_personick() throws UnsupportedEncodingException { if(isFixedLength()) return; final byte[][] a = new byte[2][]; a[0] = "mike".getBytes("US-ASCII"); a[1] = "personick".getBytes("US-ASCII"); if (rabaCoder.isValueCoder()) { doRoundTripTest(rabaCoder, new ReadOnlyValuesRaba(a)); } if (rabaCoder.isKeyCoder()) { final IRaba expected = new ReadOnlyKeysRaba(a); doRoundTripTest(rabaCoder, expected); { /* * Spot check the correct computation of the insertion point for * a variety of search keys. */ final AbstractFixedByteArrayBuffer data = rabaCoder.encode( expected, new DataOutputBuffer()); final ICodedRaba actual = rabaCoder.decode(data); // verify correct insertion point for an empty byte[]. assertEquals(-1, actual.search(new byte[] {})); assertEquals(-1, actual.search(new byte[] { 'm', 'i', 'k' })); assertEquals(-2, actual.search(new byte[] { 'm', 'i', 'k', 'e', 's' })); assertEquals(-2, actual.search("personic".getBytes("US-ASCII"))); assertEquals(-3, actual.search("personicks" .getBytes("US-ASCII"))); } } } /** * Test with byte values which are negative values when interpreted as as * signed 8 bit integers. */ public void test_negativeByteValues() throws UnsupportedEncodingException { if(isFixedLength()) return; final byte[][] a = new byte[1][]; a[0] = new byte[] { 64, -64 }; if (rabaCoder.isKeyCoder()) { doRoundTripTest(rabaCoder, new ReadOnlyKeysRaba(a)); } if (rabaCoder.isValueCoder()) { doRoundTripTest(rabaCoder, new ReadOnlyValuesRaba(a)); } } /** * Test with an empty byte[] element. * * @throws UnsupportedEncodingException */ public void test_emptyElement() throws UnsupportedEncodingException { if(isFixedLength()) return; final byte[][] a = new byte[3][]; a[0] = new byte[0]; a[1] = "mike".getBytes("US-ASCII"); a[2] = "personick".getBytes("US-ASCII"); if (rabaCoder.isKeyCoder()) { doRoundTripTest(rabaCoder, new ReadOnlyKeysRaba(a)); } if (rabaCoder.isValueCoder()) { doRoundTripTest(rabaCoder, new ReadOnlyValuesRaba(a)); } } /** * Test with a single byte value (nsymbols:=1). This test was written to a * known bug in {@link HuffmanCodec} and * {@link CanonicalFast64CodeWordDecoder}. A workaround for that bug has * been implemented in the {@link CanonicalHuffmanRabaCoder}. */ public void test_nsymbolsOne() { if(isFixedLength()) return; final byte[][] a = new byte[1][]; a[0] = new byte[]{1}; if (rabaCoder.isKeyCoder()) { doRoundTripTest(rabaCoder, new ReadOnlyKeysRaba(a)); } if (rabaCoder.isValueCoder()) { doRoundTripTest(rabaCoder, new ReadOnlyValuesRaba(a)); } } /** * Test with a single byte value (nsymbols=1) and some nulls. This test was * written to a known bug in {@link HuffmanCodec} and * {@link CanonicalFast64CodeWordDecoder}. A workaround for that bug has * been implemented in the {@link CanonicalHuffmanRabaCoder}. */ public void test_nsymbolsOne_nulls() { if(isFixedLength()) return; final byte[][] a = new byte[3][]; a[0] = new byte[]{1}; a[1] = null; a[2] = null; if (rabaCoder.isValueCoder()) { doRoundTripTest(rabaCoder, new ReadOnlyValuesRaba(a)); } } /** * Test with a null value. * * @throws UnsupportedEncodingException */ public void test_withNulls() throws UnsupportedEncodingException { if (!rabaCoder.isValueCoder()) { // coded does not allow nulls. return; } final byte[][] a = new byte[3][]; a[0] = getRandomValue(rabaCoder); a[1] = getRandomValue(rabaCoder); a[2] = null; final IRaba expected = new ReadOnlyValuesRaba(a); doRoundTripTest(rabaCoder, expected); } /** * Test with a single value, which is null. * * FIXME Due to a bug in the {@link CanonicalFast64CodeWordDecoder} ctor * there is a problem handling a logical byte[][] consisting solely of * <code>null</code>s. We handle this for the case of an empty logical * byte[][] using an {@link EmptyRabaValueDecoder}. I suppose that could be * parameterized to identify the <code>null</code>s or else just fix the * ctor. */ public void test_withNulls2() { if (!rabaCoder.isValueCoder()) { // coded does not allow nulls. return; } final byte[][] a = new byte[1][]; a[0] = null; final IRaba expected = new ReadOnlyValuesRaba(a); doRoundTripTest(rabaCoder, expected); } final Random r = new Random(); public void test_empty() throws IOException { doRandomRoundTripTest(rabaCoder, 0/* size */, 0/*capacity*/); doRandomRoundTripTest(rabaCoder, 0/* size */, 1/*capacity*/); doRandomRoundTripTest(rabaCoder, 0/* size */, 2/*capacity*/); doRandomRoundTripTest(rabaCoder, 0/* size */, 10/*capacity*/); } /** * Test with {@link IRaba} having a size of ONE (1) and a variety of * capacities. */ public void test_entryCount1() throws IOException { doRandomRoundTripTest(rabaCoder, 1/* n */, 1/* capacity */); doRandomRoundTripTest(rabaCoder, 1/* n */, 2/* capacity */); doRandomRoundTripTest(rabaCoder, 1/* n */, 10/* capacity */); } /** * Test with {@link IRaba} having a size of TWO (2) and a variety of * capacities. */ public void test_entryCount2() throws IOException { doRandomRoundTripTest(rabaCoder, 2/* n */, 2/* capacity */); doRandomRoundTripTest(rabaCoder, 2/* n */, 3/* capacity */); doRandomRoundTripTest(rabaCoder, 2/* n */, 10/* capacity */); } /** * This test case was developed for the {@link FrontCodedRabaCoder}. */ public void test_error1() throws IOException { if(isFixedLength()) return; final byte b187 = KeyBuilder.encodeByte(187); final byte b146 = KeyBuilder.encodeByte(146); final byte b207 = KeyBuilder.encodeByte(207); final byte[][] a = new byte[][] {// new byte[]{121, b187, b146, b207, 99, 112, 24, 116},// new byte[]{121, b187, b146, b207, 99, 112, 43, 68},// new byte[]{121, b187, b146, b207, 99, 112, 46, 78},// new byte[]{121, b187, b146, b207, 99, 112, 54, KeyBuilder.encodeByte(176)},// new byte[]{121, b187, b146, b207, 99, 112, 54, KeyBuilder.encodeByte(236)},// new byte[]{121, b187, b146, b207, 99, 112, 55, KeyBuilder.encodeByte(209)},// new byte[]{121, b187, b146, b207, 99, 112, 62, 85},// new byte[]{121, b187, b146, b207, 99, 112, 63, KeyBuilder.encodeByte(238)},// new byte[]{121, b187, b146, b207, 99, 112, 71, 124},// new byte[]{121, b187, b146, b207, 99, 112, 73, 49}// }; if(rabaCoder.isKeyCoder()) { final IRaba expected = new ReadOnlyKeysRaba(a); // final IRaba actual = rabaCoder.encode(expected); // System.err.println(actual.toString()); // // this is the one whose rlen/clen appear to be incorrect. // assertEquals(expected.get(4),actual.get(4)); // assertEquals(0, actual.search(expected.get(0))); // assertEquals(1, actual.search(expected.get(1))); // assertEquals(2, actual.search(expected.get(2))); // assertEquals(3, actual.search(expected.get(3))); // assertEquals(8, actual.search(expected.get(8))); // assertEquals(4, actual.search(expected.get(4))); // assertEquals(5, actual.search(expected.get(5)));//broken doRoundTripTest(rabaCoder, expected); } if(rabaCoder.isValueCoder()) { doRoundTripTest(rabaCoder, new ReadOnlyValuesRaba(a)); } } /** * This test case was developed for the {@link CanonicalHuffmanRabaCoder}. * * <pre> * junit.framework.AssertionFailedError: search([44, 186, 169, 175, 191, 31, 36, 227]): expectedIndex=1, actualIndex=-1, * expected=com.bigdata.btree.raba.ReadOnlyKeysRaba{ capacity=3, size=2, isKeys=true, isReadOnly=true, [ * [44, 186, 169, 175, 191, 31, 36, 12], * [44, 186, 169, 175, 191, 31, 36, 227]]}, * actual=com.bigdata.btree.raba.codec.CanonicalHuffmanRabaCoder$RabaDecoder{ capacity=2, size=2, isKeys=true, isReadOnly=true, [ * [44, 186, 169, 175, 191, 31, 36, 12], * [44, 186, 169, 175, 191, 31, 36, 227]]} * at junit.framework.Assert.fail(Assert.java:47) * at com.bigdata.btree.AbstractBTreeTestCase.assertSameRaba(AbstractBTreeTestCase.java:583) * at com.bigdata.btree.raba.codec.AbstractRabaCoderTestCase.doRoundTripTest(AbstractRabaCoderTestCase.java:499) * at com.bigdata.btree.raba.codec.AbstractRabaCoderTestCase.doRandomRoundTripTest(AbstractRabaCoderTestCase.java:487) * at com.bigdata.btree.raba.codec.AbstractRabaCoderTestCase.test_entryCount2(AbstractRabaCoderTestCase.java:247) * </pre> * * <pre> * junit.framework.AssertionFailedError: search([44, 197, 214, 208, 192, 31, 36, 156]): expectedIndex=1, actualIndex=-1, * expected=com.bigdata.btree.raba.ReadOnlyKeysRaba{ capacity=2, size=2, isKeys=true, isReadOnly=true, [ * [44, 197, 214, 208, 192, 31, 36, 12], * [44, 197, 214, 208, 192, 31, 36, 156]]}, * actual=com.bigdata.btree.raba.codec.CanonicalHuffmanRabaCoder$RabaDecoder{ capacity=2, size=2, isKeys=true, isReadOnly=true, [ * [44, 197, 214, 208, 192, 31, 36, 12], * [44, 197, 214, 208, 192, 31, 36, 156]]} * at junit.framework.Assert.fail(Assert.java:47) * at com.bigdata.btree.AbstractBTreeTestCase.assertSameRaba(AbstractBTreeTestCase.java:583) * at com.bigdata.btree.raba.codec.AbstractRabaCoderTestCase.doRoundTripTest(AbstractRabaCoderTestCase.java:538) * at com.bigdata.btree.raba.codec.AbstractRabaCoderTestCase.test_error2(AbstractRabaCoderTestCase.java:325) * </pre> */ public void test_error2() { if(isFixedLength()) return; final int n = 2; final byte[][] a = new byte[n][]; a[0] = new byte[]{44, 127-186, 127-169, 127-175, 127-191, 31, 36, 12}; a[1] = new byte[]{44, 127-186, 127-169, 127-175, 127-191, 31, 36, 127-227}; // a[0] = new byte[]{44, 186-127, 169-127, 175-127, 191-127, 31, 36, 12}; // a[1] = new byte[]{44, 186-127, 169-127, 175-127, 191-127, 31, 36, 227-127}; if(rabaCoder.isKeyCoder()) { final IRaba expected = new ReadOnlyKeysRaba(a); doRoundTripTest(rabaCoder, expected); } if(rabaCoder.isValueCoder()) { doRoundTripTest(rabaCoder, new ReadOnlyValuesRaba(a)); } } public void test_randomOnce() throws IOException { // #of elements. final int n = r.nextInt(100); // capacity of the array. final int capacity = n + r.nextInt(n + 1); doRandomRoundTripTest(rabaCoder, n, capacity); } public void testStress() throws IOException { for (int i = 0; i < 1000; i++) { // #of elements. final int n = r.nextInt(100); // capacity of the array. final int capacity = n + r.nextInt(n + 1); doRandomRoundTripTest(rabaCoder, n, capacity); } } /** * Test using a sequence of random URIs (formed with successive prefixes). * * @throws Exception */ public void test_randomURIs() throws Exception { if(isFixedLength()) return; // random, distinct, unordered w/o nulls. final byte[][] data = new RandomURIGenerator(r).generateValues(100); if (rabaCoder.isValueCoder()) { // layer on interface. final IRaba raba = new ReadOnlyValuesRaba(0/* fromIndex */, data.length/* toIndex */, data.length/* capacity */, data); doRoundTripTest(rabaCoder, raba); } if (rabaCoder.isKeyCoder()) { // put into sorted order. Arrays.sort(data, 0, data.length, UnsignedByteArrayComparator.INSTANCE); // layer on interface. final IRaba raba = new ReadOnlyKeysRaba(0/* fromIndex */, data.length/* toIndex */, data.length/* capacity */, data); doRoundTripTest(rabaCoder, raba); } } /** * Return a random byte array. The byte array will also have a random length * in [0:512] unless the {@link IRabaCoder} is a * {@link FixedLengthValueRabaCoder}, in which case a byte[] having the * appropriate length will be returned. * * @param dataCoder * The coder. * * @return The random byte[]. */ protected byte[] getRandomValue(final IRabaCoder dataCoder) { final int len; if (dataCoder instanceof FixedLengthValueRabaCoder) { len = ((FixedLengthValueRabaCoder) dataCoder).getLength(); } else { len = r.nextInt(512); } final byte[] a = new byte[len]; r.nextBytes(a); return a; } /** * Generates a random byte[][] and verifies round-trip encoding and * decoding. * <p> * Note: This uses a uniform random distribution. Therefore the huffman * codes tend to occupy MORE space than the original byte[][] since the * canonical huffman code can not be shorter than the original byte values * on average when all values are equally likely. When you want to measure * the expected compression ratio you need to test with a gaussian * distribution (bell curve) over the values, a USASCII distribution, etc. * * @param size * The #of entries in the byte[][]. * @param capacity * The capacity of the byte[][]. * @throws IOException */ protected void doRandomRoundTripTest(final IRabaCoder dataCoder, final int size, final int capacity) throws IOException { assert capacity >= size; if (dataCoder.isValueCoder()) { /* * Note: random values are not ordered and may contain nulls. */ final byte[][] data = new byte[capacity][]; for (int i = 0; i < size; i++) { final boolean isNull = r.nextFloat() < .03; if(isNull) { data[i] = null; } else { data[i] = getRandomValue(dataCoder); } } // layer on interface. final IRaba raba = new ReadOnlyValuesRaba(0/* fromIndex */, size/* toIndex */, capacity, data); doRoundTripTest(dataCoder, raba); } if (dataCoder.isKeyCoder()) { /* * Note: B+Tree keys based on random values. The keys must be * ordered, may not contain duplicates, and may not contain nulls. */ final byte[][] data = new byte[capacity][]; /* * The nominal maximum possible increment between successive keys. * The actual increment will be a random number in [1:nominal]. */ final int nominalIncRange = 5000; // any integer value. long lastKey = r.nextLong(); // The #of keys that we actually generated. int nactual = 0; for (int i = 0; i < size; i++, nactual++) { data[i] = TestKeyBuilder.asSortKey(lastKey); final long remainder = Long.MAX_VALUE - lastKey; if (remainder == 1) { // out of room in the long value space. break; } final int incRange = (int) Math.max(nominalIncRange, Math .min(nominalIncRange, remainder)); // increment is always at least by one to avoid duplicate keys. final int inc = r.nextInt(incRange) + 1; lastKey += inc; } // layer on interface. final IRaba raba = new ReadOnlyKeysRaba(0/* fromIndex */, nactual/* toIndex */, capacity, data); doRoundTripTest(dataCoder, raba); } } static public void doRoundTripTest(final IRabaCoder rabaCoder, final IRaba expected) { try { /* * Verify that we can (de-)serialize the coder itself. */ { final byte[] a = SerializerUtil.serialize(rabaCoder); final IRabaCoder b = (IRabaCoder)SerializerUtil.deserialize(a); } // Test the live coded path (returns coded raba instance for immediate use). final ICodedRaba liveCodedRaba = rabaCoder.encodeLive(expected, new DataOutputBuffer()); final AbstractFixedByteArrayBuffer liveCodedData = liveCodedRaba.data(); AbstractBTreeTestCase.assertSameRaba(expected, liveCodedRaba); final AbstractFixedByteArrayBuffer originalData = rabaCoder.encode( expected, new DataOutputBuffer()); { /* * Verify that we can read the byte[] out of [data]. This is * really a test of the data.getDataInput() and the returned * DataInputBuffer. */ assertEquals(0, originalData.off()); final byte[] tmp = new byte[originalData.len()]; originalData.getDataInput().readFully(tmp); // compare against result from encode() assertTrue(BytesUtil.compareBytesWithLenAndOffset(originalData.off(), originalData.len(), originalData.array(), 0, tmp.length, tmp) == 0); // compare against result from encodeLive. assertTrue(BytesUtil.compareBytesWithLenAndOffset(originalData .off(), originalData.len(), originalData.array(), liveCodedData.off(), liveCodedData.len(), liveCodedData .array()) == 0); } // verify we can decode the encoded data. { // decode. final ICodedRaba actual0 = rabaCoder.decode(originalData); // Verify encode() results in object which can decode the // byte[]s. AbstractBTreeTestCase.assertSameRaba(expected, actual0); // Verify decode when we build the decoder from the serialized // format. AbstractBTreeTestCase.assertSameRaba(expected, rabaCoder .decode(actual0.data())); } // Verify encode with a non-zero offset for the DataOutputBuffer // returns a slice which has the same data. { // buffer w/ non-zero offset. final int off = 10; final DataOutputBuffer out = new DataOutputBuffer(off, new byte[100 + off]); // encode onto that buffer. final AbstractFixedByteArrayBuffer slice = rabaCoder.encode( expected, out); // verify same encoded data for the slice. assertEquals(originalData.toByteArray(), slice.toByteArray()); } // Verify decode when we build the decoder from a slice with a // non-zero offset { final int off = 10; final byte[] tmp = new byte[off + originalData.len()]; System.arraycopy(originalData.array(), originalData.off(), tmp, off, originalData.len()); // create slice final FixedByteArrayBuffer slice = new FixedByteArrayBuffer( tmp, off, originalData.len()); // verify same slice. assertEquals(originalData.toByteArray(), slice.toByteArray()); // decode the slice. final IRaba actual = rabaCoder.decode(slice); // verify same raba. AbstractBTreeTestCase.assertSameRaba(expected, actual); } } catch (Throwable t) { fail("Cause=" + t + ", expectedRaba=" + expected, t); } } /** * Performance stress test for keys. Performance tuning should give more * weight to coded raba access times, including search and key retrieval or * copy, than coding times since most use will be access on the coded data. * Those costs are not factored apart in the stress test times. They are * parameterized here by a normalized vector of the rates of the different * operations (search(), get(), length(), etc). * * <dl> * <dt>nops</dt> * <dd> * The #of random operations to be performed. Large values for <i>nops</i> * need to be used to get beyond the initial JVM performance tuning so you * can more accurately compare the performance of the different coders. For * example, a value of 1M (1000000) will run for ~ 30-40s for the * front-coded coders. For shorter run times, the order in which we test the * coders will dominate their performance.</dd> * <dt>size</dt> * <dd>The #of entries in the raba to be tested (must be LTE the capacity)</dd> * </dl> * * @param args * [nops [generator [size]]] * * FIXME parameterize the generator choice. */ static public void main(final String[] args) { final Random r = new Random(); // default nops. int nops = 200000; // int nops = 1000000; // ~30-40s per coder @ 1M. if (args.length > 0) nops = Integer.valueOf(args[0]); if (nops <= 0) throw new IllegalArgumentException(); // // default capacity (branching factor). // int capacity = 256; // if (args.length > 1) // capacity = Integer.valueOf(args[1]); // if (capacity <= 0) // throw new IllegalArgumentException(); // default size (#of keys). int size = 256; if (args.length > 2) nops = Integer.valueOf(args[2]); if (size <= 0) throw new IllegalArgumentException(); // The coders to be tested. final IRabaCoder[] coders = new IRabaCoder[] { new MutableRabaCoder(), // provides performance baseline. SimpleRabaCoder.INSTANCE, // simplest coding. //// new FrontCodedRabaCoder(2/* ratio */), new FrontCodedRabaCoder(8/* ratio */), // front-coding. //// new FrontCodedRabaCoder(32/* ratio */), CanonicalHuffmanRabaCoder.INSTANCE // huffman coding. }; System.out.println("nops=" + nops + ", size=" + size + ", ncoders=" + coders.length); /* * Generate a raba. The same data is used for each coder. */ // The raw data. final byte[][] a; // Random keys based on random variable length byte[]s. // a = new RandomKeysGenerator(r, size + r.nextInt(size)/* maxKeys */, 20/* maxKeyLength */) // .generateKeys(size); // Random URIs in sorted order. // a = new RandomURIGenerator(r).generateKeys(size); // based on a tokenized source code file. a = new TokenizeKeysGenerator( "bigdata/src/test/com/bigdata/btree/raba/codec/AbstractRabaCoderTestCase.java") .generateKeys(size); /* * isNull, length, get, copy, search, iterator, recode. * * Note: isNull is not used for keys! */ final Op op = new Op(0.0f, .01f, .4f, .2f, .6f, .2f, .04f); /* * Test each IRabaCoder. * * @todo should also test on coded B+Tree values, which would be a * different [expected] instance. */ for(IRabaCoder rabaCoder : coders) { // the read-only raba. final ReadOnlyKeysRaba expected = new ReadOnlyKeysRaba(size, a); final long begin = System.nanoTime(); int recordLength = -1; try { recordLength = doRabaCoderPerformanceTest(expected, rabaCoder, size, nops, r, op); } catch (Throwable t) { System.err.println("coder failed: " + rabaCoder); t.printStackTrace(System.err); } final long elapsed = System.nanoTime() - begin; System.out.println(rabaCoder.toString() + " : elapsed=" + TimeUnit.NANOSECONDS.toMillis(elapsed) + ", recordLength=" + (recordLength == -1 ? "N/A" : recordLength)); } } /** * A test designed to measure the performance of an {@link IRabaCoder} for * operations on B+Tree keys, including search. */ public void test_keyCoderPerformance() { // test is only for coders which can code keys. if(!rabaCoder.isKeyCoder()) return; /* * Some branching factors to choose from. */ final int[] branchingFactors = new int[] { 3, 4, 8, 16, 27, 32, 48, 64, 96, 99, 112, 128, 256, 512, 1024, 4096 }; final int capacity = branchingFactors[r .nextInt(branchingFactors.length)]; final int size = r.nextInt(capacity) + 1; // Generate a read-only raba. final ReadOnlyKeysRaba expected = new ReadOnlyKeysRaba(size, AbstractBTreeTestCase.getRandomKeys(capacity, size)); final int nops = 50000; /* * isNull, length, get, copy, search, iterator, recode. * * Note: isNull is not used for keys! */ final Op op = new Op(0.0f, .01f, .4f, .2f, .6f, .2f, .04f); doRabaCoderPerformanceTest(expected, rabaCoder, size, nops, r, op); } /** * Do a performance stress test consisting of random operations on a * randomly generated B+Tree keys {@link IRaba}. The operations will be * checked against ground truth. * * @param rabaCoder * The coder to be tested. * @param size * The #of keys. * @param nops * The #of operations to perform. * @param r * The random number generator. * @param op * The distribution of the operations to be performed. * * @return The size of the coded record. */ static public int doRabaCoderPerformanceTest(final IRaba expected, final IRabaCoder rabaCoder, final int size, final int nops, final Random r, final Op op) { // The raba under test. This can be recoded by one of the ops. ICodedRaba actual; final byte[] originalData; { // encode the record. final AbstractFixedByteArrayBuffer data = rabaCoder.encode( expected, new DataOutputBuffer()); // save off a copy of the original coded record. originalData = data.toByteArray(); // decode the record. actual = rabaCoder.decode(data); // verify correct initial coding. AbstractBTreeTestCase.assertSameRaba(expected, actual); } // reused buffer. final DataOutputBuffer os = new DataOutputBuffer(); // #of operations per operation type. final long[] count = new long[op._dist.length]; // elapsed ns per operation type. final long[] elapsed = new long[op._dist.length]; for (int i = 0; i < nops; i++) { final long begin = System.nanoTime(); final int code; switch (code = op.nextOp(r)) { case Op.ISNULL: { if (expected.isKeys()) { // method not defined for keys. continue; } final int index = r.nextInt(size); if (log.isDebugEnabled()) log.debug(op.getName(code) + "(" + index + ") : expected=" + expected.isNull(index)); assertEquals(op.getName(code), expected.isNull(index), actual .isNull(index)); break; } case Op.LENGTH: { final int index = r.nextInt(size); if (log.isDebugEnabled()) log.debug(op.getName(code) + "(" + index + ") : expected=" + expected.length(index)); assertEquals(op.getName(code), expected.length(index), actual .length(index)); break; } case Op.GET: { final int index = r.nextInt(size); if (log.isDebugEnabled()) log.debug(op.getName(code) + "(" + index + ") : expected=" + BytesUtil.toString(expected.get(index))); assertEquals(op.getName(code), expected.get(index), actual .get(index)); break; } case Op.COPY: { /* * Note: We reuse the same output buffer all the time for this. * This is not a problem since we are not overwriting the data * backing the raba. */ final int index = r.nextInt(size); final int len = expected.length(index); if (log.isDebugEnabled()) log.debug(op.getName(code) + "(" + index + ") : expected=" + BytesUtil.toString(expected.get(index))); // reset the buffer. os.reset(); assertEquals(op.getName(code), len, actual.copy(index, os)); assertTrue(0 == BytesUtil.compareBytesWithLenAndOffset(0, len, os.array(), 0, len, expected.get(index))); break; } case Op.SEARCH: { /* * Search with a key chosen randomly from the original data. */ if (!expected.isKeys()) { // method not defined for values. continue; } final int index = r.nextInt(size); final byte[] key = expected.get(index); if (log.isDebugEnabled()) log.debug(op.getName(code) + "(" + index + ") : key=" + BytesUtil.toString(key)); { // search at the key. assertEquals(op.getName(code), index, actual.search(key)); } { // search at key plus a random byte[] suffix. // random suffix length. final int suffixLength = r.nextInt(1 + (key.length / 2)) + 1; // random fill of entire key. final byte[] key2 = new byte[key.length + suffixLength]; r.nextBytes(key2); // copy shared prefix (all of the original key). System.arraycopy(key, 0, key2, 0, key.length); // expected insert position (or index iff found). final int epos = expected.search(key2); // actual result from search on the coded raba. final int apos = actual.search(key2); assertEquals(op.getName(code), epos, apos); } { // search at random length prefix of the key. // random prefix length (may be zero). final int prefixLength = Math.max(0, r.nextInt(Math.max(1, key.length)) - 1); // copy shared prefix. final byte[] key2 = new byte[prefixLength]; System.arraycopy(key, 0, key2, 0, prefixLength); // expected insert position (or index iff found). final int epos = expected.search(key2); // actual result from search on the coded raba. final int apos = actual.search(key2); assertEquals(op.getName(code), epos, apos); } break; } case Op.ITERATOR: { if (log.isDebugEnabled()) log.debug(op.getName(code) + "()"); assertSameIterator(expected.iterator(), actual.iterator()); break; } case Op.RECODE: { /* * Note: this uses a new buffer instance so we do not stomp on * the existing coded representation backing the raba. The * backing array for the buffer is preallocated to a modest size * and filled with random data. When we setup the buffer, we * then advance it a random #of bytes into the buffer so the * raba will frequently be coded at a non-zero offset in the * buffer. */ // backing byte[]. sometimes empty. will be extended on demand. final byte[] tmp = (r.nextFloat() < .1 ? new byte[0] : new byte[r.nextInt(100) * size]); // fill it with random data. r.nextBytes(tmp); /* * Start at random (but small) offset into the buffer with a * bias to start at zero. * * Note: A lot of re-coding errors are linked to a non-zero * starting offset. If you set [start] to zero explicitly and * the re-coding problem goes away, then the problem is a * non-zero offset. Likewise, you can explicitly choose a * non-zero start to debug a problem. */ // random start offset. final int start = Math.min(tmp.length, (r.nextFloat() < .2 ? 0 : r.nextInt(20))); // final int start = 0; if (log.isDebugEnabled()) log.debug(op.getName(code) + "(): start=" + start + ", buf.len=" + tmp.length); // output buffer wrapping that byte[]. final DataOutputBuffer buf = new DataOutputBuffer(start, tmp); try { // recode onto the buffer. final AbstractFixedByteArrayBuffer data = rabaCoder.encode( actual, buf); // verify the same coding was produced. assertEquals(originalData, data.toByteArray()); // new instance wrapping the buffer. actual = rabaCoder.decode(data); // verify recoded raba. AbstractBTreeTestCase.assertSameRaba(expected, actual); } catch (AssertionFailedError ex) { fail(op.getName(code) + "(): start=" + start + ", buf.len=" + tmp.length, ex); } break; } default: throw new AssertionError(); } elapsed[code] = System.nanoTime() - begin; count[code]++; } double totalNS = 0; for (long ns : elapsed) totalNS += ns; final NumberFormat percentF = NumberFormat.getPercentInstance(); percentF.setMinimumFractionDigits(2); final NumberFormat rateF = NumberFormat.getInstance(); rateF.setMinimumFractionDigits(0); rateF.setMaximumFractionDigits(0); System.out.println("op\tcount\tnanos\t%time\tops/ms"); for (int i = 0; i < count.length; i++) { if (count[i] == 0) continue; System.out.println(// op.getName(i) + "\t" + count[i] + "\t" + elapsed[i] + "\t" + percentF.format(elapsed[i] / totalNS)// + "\t" + (elapsed[i] == 0 ? "N/A" : rateF.format(count[i] / (elapsed[i] * scalingFactor))) // ); } // The size of the coded record. return originalData.length; } /** * Verify same byte[] iterators. * * @param eitr * The expected iterator. * @param aitr * The actual iterator. */ static protected void assertSameIterator(final Iterator<byte[]> eitr, final Iterator<byte[]> aitr) { int i = 0; while (eitr.hasNext()) { assertTrue("hasNext", aitr.hasNext()); // verify same byte[] (compare data, may both be null). assertEquals("byte[" + i + "]", eitr.next(), aitr.next()); i++; } assertFalse("hasNext", aitr.hasNext()); } /** * Scaling factor converts nanoseconds to milliseconds. */ static protected final double scalingFactor = 1d / TimeUnit.NANOSECONDS .convert(1, TimeUnit.MILLISECONDS); /** * Helper class generates a random sequence of operation codes obeying the * probability distribution described in the constructor call. * * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a> * @version $Id$ */ static class Op { static public final int ISNULL = 0; static public final int LENGTH = 1; static public final int GET = 2; static public final int COPY = 3; static public final int SEARCH = 4; static public final int ITERATOR = 5; static public final int RECODE = 6; /** * The last defined operator. */ static final int lastOp = RECODE; // final private Random r = new Random(); final private float[] _dist; /* * isNull, length, get, copy, search, iterator, recode. */ public Op(float isNullRate, float lengthRate, float getRate, float copyRate, float searchRate, float iteratorRate, float recodeRate) { if (isNullRate < 0 || lengthRate < 0 || getRate < 0 || copyRate < 0 || searchRate < 0 || iteratorRate < 0 || recodeRate < 0) { throw new IllegalArgumentException("negative rate"); } float total = isNullRate + lengthRate + getRate + copyRate + searchRate + iteratorRate + recodeRate; if( total == 0.0 ) { throw new IllegalArgumentException("all rates are zero."); } /* * Convert to normalized distribution in [0:1]. */ isNullRate /= total; lengthRate /= total; getRate /= total; copyRate /= total; searchRate /= total; iteratorRate /= total; recodeRate /= total; /* * Save distribution. */ int i = 0; _dist = new float[lastOp+1]; _dist[ i++ ] = isNullRate; _dist[ i++ ] = lengthRate; _dist[ i++ ] = getRate; _dist[ i++ ] = copyRate; _dist[ i++ ] = searchRate; _dist[ i++ ] = iteratorRate; _dist[ i++ ] = recodeRate; /* * Checksum. */ float sum = 0f; for( i = 0; i<_dist.length; i++ ) { sum += _dist[ i ]; } if( Math.abs( sum - 1f) > 0.01 ) { throw new AssertionError("sum of distribution is: "+sum+", but expecting 1.0"); } } /** * Return the name of the operator. * * @param op * @return */ public String getName( final int op ) { if( op < 0 || op > lastOp ) { throw new IllegalArgumentException(); } /* * isNull, length, get, copy, search, iterator, recode. */ switch( op ) { case ISNULL: return "isNull"; case LENGTH: return "length"; case GET: return "get "; case COPY: return "copy "; case SEARCH: return "search"; case ITERATOR:return "itr "; case RECODE: return "recode"; default: throw new AssertionError(); } } /** * An array of normalized probabilities assigned to each operator. The * array may be indexed by the operator, e.g., dist[{@link #fetch}] * would be the probability of a fetch operation. * * @return The probability distribution over the defined operators. */ public float[] getDistribution() { return _dist; } /** * Generate a random operator according to the distribution described to * to the constructor. * * @return A declared operator selected according to a probability * distribution. */ public int nextOp(final Random r) { final float rand = r.nextFloat(); // [0:1) float cumprob = 0f; for( int i=0; i<_dist.length; i++ ) { cumprob += _dist[ i ]; if( rand <= cumprob ) { return i; } } throw new AssertionError(); } } /** * Tests of the {@link Op} test helper class. * * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a> * @version $Id$ */ public static class TestOp extends TestCase { private final Random r = new Random(); public void test_Op() { /* * isNull, length, get, copy, search, iterator, recode. */ Op gen = new Op(.2f, .05f, .2f, 05f, .1f, .05f, .001f); doOpTest(gen); } public void test_Op2() { /* * isNull, length, get, copy, search, iterator, recode. */ Op gen = new Op(0f,0f,0f,1f,0f,0f,0f); doOpTest(gen); } /** * Correct rejection test when all rates are zero. */ public void test_correctRejectionAllZero() { /* * isNull, length, get, copy, search, iterator, recode. */ try { new Op(0f,0f,0f,0f,0f,0f,0f); fail("Expecting: "+IllegalArgumentException.class); } catch(IllegalArgumentException ex) { log.info("Ignoring expected exception: "+ex); } } /** * Correct rejection test when one or more rates are negative. */ public void test_correctRejectionNegativeRate() { /* * isNull, length, get, copy, search, iterator, recode. */ try { new Op(0f,0f,0f,-1f,0f,1f,0f); fail("Expecting: "+IllegalArgumentException.class); } catch(IllegalArgumentException ex) { log.info("Ignoring expected exception: "+ex); } } /** * Verifies the {@link Op} class given an instance with some probability * distribution. */ void doOpTest(final Op gen) { final int limit = 10000; int[] ops = new int[limit]; int[] sums = new int[Op.lastOp + 1]; for (int i = 0; i < limit; i++) { int op = gen.nextOp(r); assertTrue(op >= 0); assertTrue(op <= Op.lastOp); ops[i] = op; sums[op]++; } float[] expectedProbDistribution = gen.getDistribution(); float[] actualProbDistribution = new float[Op.lastOp + 1]; float sum = 0f; for (int i = 0; i <= Op.lastOp; i++) { sum += expectedProbDistribution[i]; actualProbDistribution[i] = (float) ((double) sums[i] / (double) limit); float diff = Math.abs(actualProbDistribution[i] - expectedProbDistribution[i]); System.err.println("expected[i=" + i + "]=" + expectedProbDistribution[i] + ", actual[i=" + i + "]=" + actualProbDistribution[i] + ", diff=" + ((int) (diff * 1000)) / 10f + "%"); assertTrue(diff < 0.02); // difference is less than 2% // percent. } assertTrue(Math.abs(sum - 1f) < 0.01); // essential 1.0 } } }