/**
Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved.
Contact:
SYSTAP, LLC DBA Blazegraph
2501 Calvert ST NW #106
Washington, DC 20008
licenses@blazegraph.com
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/*
* Created on Oct 19, 2011
*/
package com.bigdata.htree;
import java.util.Arrays;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Random;
import java.util.Set;
import java.util.UUID;
import org.apache.log4j.Logger;
import com.bigdata.btree.BTreeCounters;
import com.bigdata.btree.DefaultTupleSerializer;
import com.bigdata.btree.HTreeIndexMetadata;
import com.bigdata.btree.ITupleSerializer;
import com.bigdata.btree.keys.ASCIIKeyBuilderFactory;
import com.bigdata.btree.keys.IKeyBuilder;
import com.bigdata.btree.keys.KeyBuilder;
import com.bigdata.btree.raba.codec.SimpleRabaCoder;
import com.bigdata.io.DirectBufferPool;
import com.bigdata.rawstore.IRawStore;
import com.bigdata.rwstore.sector.IMemoryManager;
import com.bigdata.rwstore.sector.MemStore;
import com.bigdata.rwstore.sector.MemoryManager;
import com.bigdata.util.Bytes;
import com.bigdata.util.PseudoRandom;
/**
* A simple demonstration which may be used to compare the {@link HTree}
* performance against a Java collections class such as {@link HashMap} or
* {@link LinkedHashMap}. This demonstration is focused on the performance curve
* when inserting a large number of keys into a collection. The Java collection
* classes are faster for small numbers of keys, but the {@link HTree} rapidly
* out performs them as the #of keys grows larger.
* <p>
* The {@link HTree} is run against the {@link MemoryManager}. This means that
* the data for the {@link HTree} is mostly stored on the Java native process
* heap using {@link java.nio.ByteBuffer#allocateDirect(int)}. Thus even a very
* large {@link HTree} instance can be run with a very small JVM object heap and
* creates nearly no GC pressure.
* <p>
* The {@link HTree} permits multiple entries for the same key. While variable
* length keys are supported, the key for the {@link HTree} is typically a 32
* (or 64) bit hash code for the value stored in the {@link HTree}. Normally,
* the application will lookup all tuples having the same hash code and then
* enforce {@link Set} or {@link Map} semantics by scanning those tuples for the
* presence of the same application object.
*
* @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a>
* @author <a href="mailto:martyncutcher@users.sourceforge.net">Martyn
* Cutcher</a>
* @version $Id$
*/
public class HTreeVersusHashMapDemo {
private static final Logger log = Logger
.getLogger(HTreeVersusHashMapDemo.class);
/**
* Provision and return an {@link HTree} instance.
*
* @param store
* The backing store.
* @param addressBits
* The #of address bits (10 is typical and gives you directory
* pages with 2^10 slots, which is a 1024 fan-out).
* @param rawRecords
* <code>true</code> iff raw record support will be enabled.
* @param writeRetentionQueueCapacity
* The write retention queue capacity controls how long mutable
* htree nodes will be buffered on the JVM heap before being
* incrementally evicted to the backing store.
*
* @return The {@link HTree} instance.
*/
static private HTree getHTree(final IRawStore store, final int addressBits,
final boolean rawRecords, final int writeRetentionQueueCapacity) {
final ITupleSerializer<?, ?> tupleSer = new DefaultTupleSerializer(
new ASCIIKeyBuilderFactory(Bytes.SIZEOF_INT),
// new FrontCodedRabaCoder(),// TODO FrontCodedRaba
new SimpleRabaCoder(),// keys
new SimpleRabaCoder() // vals
// EmptyRabaValueCoder.INSTANCE // if no values.
);
final HTreeIndexMetadata metadata = new HTreeIndexMetadata(
UUID.randomUUID());
if (rawRecords) {
metadata.setRawRecords(true);
metadata.setMaxRecLen(0);
}
metadata.setAddressBits(addressBits);
metadata.setTupleSerializer(tupleSer);
/*
* Note: A low retention queue capacity will drive evictions, which is
* good from the perspective of stressing the persistence store
* integration.
*/
metadata.setWriteRetentionQueueCapacity(writeRetentionQueueCapacity);
metadata.setWriteRetentionQueueScan(10); // Must be LTE capacity.
return HTree.create(store, metadata);
}
static final int REPORT_INTERVAL = 100000; // report interval.
/**
* Interface for reporting on progress.
*/
interface IReport {
/**
* Report hook.
*
* @param nops
* The #of operations performed.
* @param elapsed
* The elapsed time in milliseconds.
* @param store
* The backing store for the {@link HTree}.
*/
void report(long nops, long elapsed, final IMemoryManager mmgr,
IRawStore store);
}
private static class ReportListener implements IReport {
public ReportListener() {
System.out
.println("inserts\telapsed(ms)\tinserts/sec\tfreeMemory\ttotalMemory\tuserBytes\tmmgrBytes");
}
@Override
public void report(long nops, long elapsed, final IMemoryManager mmgr,
final IRawStore store) {
final long insertsPerSec = (long) (((double) nops) / (elapsed / 1000d));
final long freeMemory = Runtime.getRuntime().freeMemory();
final long totalMemory = Runtime.getRuntime().totalMemory();
// #of application bytes in the store.
final long userBytes = store == null ? 0L : store.size();
// total extent of the store (1M increments).
final long mmgrBytes = mmgr == null ? 0L : ((MemoryManager) mmgr)
.getExtent();
System.out.println(nops + "\t" + elapsed + "\t" + insertsPerSec
+ "\t" + freeMemory + "\t" + totalMemory + "\t" + userBytes
+ "\t" + mmgrBytes);
}
}
/**
* Version stuff the keys into a Java collection.
*/
private static class JavaCollectionDemo implements Runnable {
private final int nkeys;
private final int vectorSize;
private final IGenerator gen;
private final Map<Object,Object> c;
private final IReport report;
/**
*
* @param nkeys
* The #of keys to insert.
* @param vectorSize
* The #of keys which are sorted (vectored) as per the
* {@link HTree} variant.
* @param seed
* The random generator seed.
* @param c
* The Java collection.
*/
JavaCollectionDemo(final IReport report, final int nkeys, final int vectorSize,
final IGenerator gen,
final Map<Object,Object> c) {
this.report = report;
this.nkeys = nkeys;
this.vectorSize = vectorSize;
this.gen = gen;
this.c = c;
}
public void run() {
final long start = System.currentTimeMillis();
final IKeyBuilder keyBuilder = new KeyBuilder();
final int[] keys = new int[vectorSize];
int alen;
for (int i = 0; i < nkeys;) {
alen = Math.min(vectorSize, nkeys - i);
for (int j = 0; j < alen; j++) {
final int rnd = gen.next();
keys[j] = rnd;
}
i += alen;
// Vector the chunk.
Arrays.sort(keys, 0, alen);
for (int j = 0; j < alen; j++) {
final Integer key = keys[j];
// final byte[] key = keyBuilder.reset().append(keys[j])
// .getKey();
// if (!c.containsKey(key)) {
// /*
// * Do not store duplicate entries since we will compare
// * the performance to a Set.
// */
c.put(key,key);
// }
final long nops = i + j;
if (report != null && (nops % REPORT_INTERVAL) == 0L) {
final long elapsed = System.currentTimeMillis() - start;
report.report(nops, elapsed, null/* mmgr */, null/* mmgr */);
}
}
}
final long load = System.currentTimeMillis();
if (log.isInfoEnabled()) {
log.info("\nEntries: " + c.size() + "; Load took "
+ (load - start) + "ms, Generator="
+ gen.getClass().getSimpleName() + ", class="
+ this.getClass().getSimpleName());
}
}
}
/**
* Version stuff the keys into an {@link HTree}.
*/
private static class HTreeDemo implements Runnable {
private final int nkeys;
private final int vectorSize;
private final IGenerator gen;
private final int addressBits;
private final int writeRetentionQueueCapacity;
private final IReport report;
/**
*
* @param nkeys
* The #of keys to insert.
* @param vectorSize
* The #of keys which are sorted (vectored) to improve IO
* efficiency.
* @param seed
* The random generator seed.
* @param addressBits
* The address bits for the {@link HTree}.
* @param writeRetentionQueueCapacity
* The capacity of the write retention queue.
*/
HTreeDemo(final IReport report, final int nkeys, final int vectorSize,
final IGenerator gen,
final int addressBits,
final int writeRetentionQueueCapacity) {
this.report = report;
this.nkeys = nkeys;
this.vectorSize = vectorSize;
this.gen = gen;
this.addressBits = addressBits;
this.writeRetentionQueueCapacity = writeRetentionQueueCapacity;
}
public void run() {
final long start = System.currentTimeMillis();
final MemStore store = new MemStore(DirectBufferPool.INSTANCE);
try {
final HTree htree = getHTree(store, addressBits,
false/* rawRecords */, writeRetentionQueueCapacity);
final IKeyBuilder keyBuilder = new KeyBuilder();
// final byte[] val = null; // no value stored under the key.
final int[] keys = new int[vectorSize];
int alen;
for (int i = 0; i < nkeys; ) {
alen = Math.min(vectorSize, nkeys - i);
for (int j = 0; j < alen; j++) {
final int rnd = gen.next();
keys[j] = rnd;
}
i += alen;
// Vector the chunk.
Arrays.sort(keys, 0, alen);
for (int j = 0; j < alen; j++) {
final int rnd = keys[j];
final byte[] key = keyBuilder.reset().append(rnd)
.getKey();
// if (!htree.contains(key)) {
// /*
// * Do not store duplicate entries since we will
// * compare the performance to a Set.
// */
htree.insert(key, key);
// }
final long nops = i + j;
if (report != null && (nops % REPORT_INTERVAL) == 0L) {
final long elapsed = System.currentTimeMillis()
- start;
report.report(nops, elapsed,
store.getMemoryManager(), store);
}
}
}
final long load = System.currentTimeMillis();
final BTreeCounters counters = htree.getBtreeCounters();
if (log.isInfoEnabled()) {
log.info("\nEntries: "+htree.nentries+", Leaves: " + htree.nleaves + ", Evicted: "
+ counters.leavesWritten + ", Nodes: "
+ htree.nnodes + ", Evicted: "
+ counters.nodesWritten + "; Load took "
+ (load - start) + "ms, Generator="
+ gen.getClass().getSimpleName() + ", class="
+ this.getClass().getSimpleName());
}
// htree.writeCheckpoint();
} finally {
store.destroy();
}
}
}
private interface IGenerator {
int next();
}
/**
* Sequential numbers starting from zero.
*/
static private class SequentialGenerator implements IGenerator {
private int next = 0;
public int next() {
return next++;
}
}
/**
* Random numbers in a half-open range (does not cover all 32-bit values (no
* negative values)).
*/
static private class RandomGenerator implements IGenerator {
private int next = 0;
private final long seed;
private final int range;
private final Random r;
public RandomGenerator(final long seed, final int range) {
this.seed = seed;
this.range = range;
this.r = new Random(seed);
}
public int next() {
return r.nextInt(range);
}
}
/** Pseudo random numbers without replacement covering a half-open range. */
static private class PseudoRandomGenerator implements IGenerator {
private int range;
private PseudoRandom pr;
/**
*
* @param range The half-open range (0:range].
*/
public PseudoRandomGenerator(final int range) {
this(range, 0/*next*/);
}
/**
*
* @param range The half-open range (0:range].
* @param next The next value to visit.
*/
public PseudoRandomGenerator(final int range, final int next) {
this.range = range;
this.pr = new PseudoRandom(range, next);
}
public int next() {
return pr.next();
}
}
/**
* @param args
*/
public static void main(String[] args) {
// final int rnd = r.nextInt(); // random, not random w/o replacement.
// final int rnd = r.nextInt(nkeys); // random, not random w/o replacement.
// final int rnd = i+j; // sequential
// final int rnd = Integer.reverse(i+j); // sequential
final int nkeys = 2 * Bytes.megabyte32;
final int vectorSize = 1;//0000;
final IGenerator gen = true? new SequentialGenerator() : new PseudoRandomGenerator(nkeys);
// final IGenerator gen = new SequentialGenerator();
// final IGenerator gen = new PseudoRandomGenerator(nkeys);
// final IGenerator gen = new RandomGenerator(-91L/*seed*/,nkeys);
final int addressBits = 8; // pages with 2^10 slots.
final int writeRetentionQueueCapacity = 50;
if (false) {
new HTreeDemo(new ReportListener(), nkeys, vectorSize, gen,
addressBits, writeRetentionQueueCapacity).run();
} else {
new JavaCollectionDemo(new ReportListener(), nkeys, vectorSize,
gen, new HashMap<Object,Object>(nkeys)).run();
}
}
}