/** Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved. Contact: SYSTAP, LLC DBA Blazegraph 2501 Calvert ST NW #106 Washington, DC 20008 licenses@blazegraph.com This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2 of the License. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /* * Created on Jun 6, 2011 */ package com.bigdata.rdf.lexicon; import java.io.IOException; import java.util.concurrent.atomic.AtomicInteger; import org.apache.log4j.Logger; import org.openrdf.model.BNode; import org.openrdf.model.Value; import com.bigdata.btree.IIndex; import com.bigdata.btree.IRangeQuery; import com.bigdata.btree.ITuple; import com.bigdata.btree.ITupleIterator; import com.bigdata.btree.keys.IKeyBuilder; import com.bigdata.btree.keys.KVO; import com.bigdata.btree.keys.KeyBuilder; import com.bigdata.btree.keys.SuccessorUtil; import com.bigdata.io.ByteArrayBuffer; import com.bigdata.io.DataOutputBuffer; import com.bigdata.rdf.internal.INonInlineExtensionCodes; import com.bigdata.rdf.internal.IV; import com.bigdata.rdf.internal.VTE; import com.bigdata.rdf.internal.impl.BlobIV; import com.bigdata.rdf.model.BigdataValue; import com.bigdata.rdf.model.BigdataValueFactory; import com.bigdata.rdf.model.BigdataValueSerializer; import com.bigdata.rdf.store.AbstractTripleStore; import com.bigdata.util.Bytes; import com.bigdata.util.BytesUtil; /** * Helper class for operations on the BLOBS index. */ public class BlobsIndexHelper { private static final Logger log = Logger.getLogger(BlobsIndexHelper.class); public static final transient int SIZEOF_HASH = Bytes.SIZEOF_INT; /** * The size of the hash collision counter. */ public static final transient int SIZEOF_COUNTER = Bytes.SIZEOF_SHORT; /** The maximum value of the hash collision counter (unsigned short). */ public static final transient int MAX_COUNTER = ((1 << 16) - 1); /** The offset at which the counter occurs in the key. */ public static final transient int OFFSET_COUNTER = 1/* flags */+ 1/* extension */+ SIZEOF_HASH /* hashCode */; /** The size of a prefix key (a key without a hash collision counter). */ public static final transient int SIZEOF_PREFIX_KEY = OFFSET_COUNTER; /** * The size of a key in the TERMS index. * <P> * Note: The key is size is ONE (1) byte for the [flags] byte, ONE (1) for * the extension byte (which describes what kind of non-inline IV this is), * FOUR (4) bytes for the hash code, plus a TWO (2) byte counter (to break * ties within a collision bucket). * <p> * Note: The counter size was increased when the design purpose of this * index was changed to handling large RDF {@link Value}s only. In practice, * the hash codes of the RDF {@link Value} are well distributed and * collisions within a hash bucket (same hash code) are rare. A ONE (1) byte * counter is probably all the distinctions that we could require and * permits up to 256 hash collisions. However, when operating in scale-out * the TWO (2) byte (aka short) counter provides additional confidence that * hash collisions will not result in a hash bucket overflow. Given that the * terms index will be used only with larger RDF {@link Value}s and the * necessity for the "extension" byte, it seems a small added cost to have * the TWO (2) byte counter and provides additional peace of mind. However, * note that scanning large collision buckets is expensive. But by allowing * for large collision buckets, we will pay that cost only when the hash * codes have an unusual distribution for some specific value. * <p> * The total key size is only 8 bytes. Since only large values are being * stored under the TERMS index, they will always be written as raw records * on the backing store. This means that we have an 8 byte key paired with * an 8 byte address. That allows for practical branching factors of between * 512 and 1024 to obtain an expected average page sizes of ~ 8k (after * prefix compression, etc.). */ public static final transient int TERMS_INDEX_KEY_SIZE = 1 + 1 + SIZEOF_HASH + SIZEOF_COUNTER; /** * Arbitrary threshold for the collision counter for a given hash code at * which we will log @ WARN. This provides notice when there are large hash * collision buckets which can effect performance. */ public static final transient int LOG_WARN_COUNTER_THRESHOLD = 127; /** * Used to signal that the {@link Value} was not found on a read-only * request. */ public static final transient int NOT_FOUND = Integer.MIN_VALUE; /** * Generate the sort keys for {@link BigdataValue}s to be represented as * {@link BlobIV}s. The sort key is formed from the {@link VTE} of the * {@link BigdataValue} followed by the hashCode of the {@link BigdataValue} * . Note that the sort key formed in this manner is only a prefix key for * the TERMS index. The fully formed key also includes a counter to breaks * ties when the sort key formed from {@link VTE} and hashCode results in a * collision (different {@link BigdataValue}s having the same prefix key). * * @param valSer * The object used to generate the values to be written onto the * index. * @param terms * The terms whose sort keys will be generated. * @param numTerms * The #of terms in that array. * * @return An array of correlated key-value-object tuples. */ @SuppressWarnings("unchecked") public KVO<BigdataValue>[] generateKVOs( final BigdataValueSerializer<BigdataValue> valSer, final BigdataValue[] terms, final int numTerms) { if (valSer == null) throw new IllegalArgumentException(); if (terms == null) throw new IllegalArgumentException(); if (numTerms <= 0 || numTerms > terms.length) throw new IllegalArgumentException(); final KVO<BigdataValue>[] a = new KVO[numTerms]; final IKeyBuilder keyBuilder = newKeyBuilder(); final ByteArrayBuffer tmp = new ByteArrayBuffer(); final DataOutputBuffer out = new DataOutputBuffer(); try { for (int i = 0; i < numTerms; i++) { final BigdataValue term = terms[i]; final VTE vte = VTE.valueOf(term); final int hashCode = term.hashCode(); final byte[] key = makePrefixKey(keyBuilder.reset(), vte, hashCode); final byte[] val = valSer.serialize(term, out.reset(), tmp); a[i] = new KVO<BigdataValue>(key, val, term); } } finally { try { /* * Note: Both the outer and inner try/catch are just to please * find bugs. DataOutputStream.close() is a NOP. */ out.close(); } catch (IOException ex) { throw new RuntimeException(ex); } } return a; } /** * Resolve an existing record in the TERMS index and insert the record if * none is found. * * @param termsIndex * The TERMS index. * @param readOnly * <code>true</code> iff the operation is read only. * @param keyBuilder * The buffer will be reset as necessary. * @param baseKey * The base key for the hash code (without the counter suffix). * @param val * The (serialized and compressed) RDF Value. * @param tmp * The buffer used to format the <i>toKey</i> (optional). A new * byte[] will be allocated if this is <code>null</code>, but the * same byte[] can be reused for multiple invocations. The buffer * MUST be dimensioned to * {@link BlobsIndexHelper#SIZEOF_PREFIX_KEY}. * @param bucketSize * The size of the collision bucket is reported as a side-effect * (optional). * * @return The collision counter for the key under which the {@link Value} * was found (if pre-existing), the collision counter assigned to * the {@link Value} iff the value was not found and the operation * permitted writes -or- {@link Integer#MIN_VALUE} iff the * {@link Value} is not in the index and the operation is read-only. * * @throws CollisionBucketSizeException * if an attempt is made to insert a {@link Value} into a * collision bucket which is full. */ public int resolveOrAddValue(final IIndex termsIndex, final boolean readOnly, final IKeyBuilder keyBuilder, final byte[] baseKey, final byte[] val, final byte[] tmp, final AtomicInteger bucketSize) { assert baseKey.length == BlobsIndexHelper.SIZEOF_PREFIX_KEY : "Expecting " + BlobsIndexHelper.SIZEOF_PREFIX_KEY + " bytes, not " + baseKey.length; /* * This is the fixed length hash code prefix. When a collision * exists we can either append a counter -or- use more bits from the * prefix. An extensible hash index works by progressively * increasing the #of bits from the hash code which are used to * create a distinction in the index. Records with identical hash * values are stored in an (unordered, and possibly chained) bucket. * We can approximate this by using N-bits of the hash code for the * key and then increasing the #of bits in the key when there is a * hash collision. Unless a hash function is used which has * sufficient bits available to ensure that there are no collisions, * we may be forced eventually to append a counter to impose a * distinction among records which are hash identical but whose * values differ. * * In the case of a hash collision, we can determine the records * which have already collided using the fast range count between * the hash code key and the fixed length successor of that key. We * can create a guaranteed distinct key by creating a BigInteger * whose values is (#collisions+1) and appending it to the key. This * approach will give us keys whose byte length increases slowly as * the #of collisions grows (though these might not be the minimum * length keys - depending on how we are encoding the BigInteger in * the key.) * * When we have a hash collision, we first need to scan all of the * collision records and make sure that none of those records has * the same value as the given record. This is done using the fixed * length successor of the hash code key as the exclusive upper * bound of a key range scan. Each record associated with a tuple in * that key range must be compared for equality with the given * record to decide whether or not the given record already exists * in the index. * * The fromKey is strictly LT any full key for the hash code of this * val but strictly GT any key have a hash code LT the hash code of * this val. */ final byte[] fromKey = baseKey; // key strictly LT any successor of the hash code of this val. final byte[] toKey = makeToKey(fromKey, tmp); // fast range count. this tells us how many collisions there are. // this is an exact collision count since we are not deleting tuples // from the TERMS index. final long rangeCount = termsIndex.rangeCount(fromKey, toKey); if (bucketSize != null) bucketSize.set((int) rangeCount); if (rangeCount == 0 && readOnly) { // Fast path. return NOT_FOUND; } if (rangeCount >= MAX_COUNTER) { /* * Impose a hard limit on the #of hash collisions we will accept in * this utility. */ throw new CollisionBucketSizeException(rangeCount); } // // Force range count into (signed) byte // final byte counter = (byte) rangeCount; if (rangeCount == 0) { assert !readOnly; /* * This is the first time we have observed a Value which * generates this hash code, so append a [short] ZERO (0) to * generate the actual key and then insert the Value into the * index. Since there is nothing in the index for this hash * code, no collision is possible and we do not need to test the * index for the value before inserting the value into the * index. */ final byte[] key = makeKey(keyBuilder.reset(), baseKey, (int) rangeCount); if (termsIndex.insert(key, val) != null) { throw new AssertionError(); } return (int) rangeCount; } /* * Iterator over that key range * * Note: We need to visit the keys in case we have a match since we will * need to return that key (it will become wrapped as an IV). * * Note: We need to visit the values so we can test tuples within the * same collision bucket (same key up to the counter) to determine * whether or not the Value is in fact the same. */ final ITupleIterator<?> itr = termsIndex .rangeIterator(fromKey, toKey, 0/* capacity */, IRangeQuery.KEYS | IRangeQuery.VALS, null/* filter */); while(itr.hasNext()) { final ITuple<?> tuple = itr.next(); if (tuple.isNull()) { // Can not match a NullIV. continue; } final ByteArrayBuffer tb = tuple.getValueBuffer(); // final byte[] tmp2 = tuple.getValue(); // // // Note: Compares the compressed values ;-) // if(BytesUtil.bytesEqual(val, tmp2)) { // compare without materializing the tuple's value if (0 == BytesUtil.compareBytesWithLenAndOffset( 0/* aoff */, val.length/* alen */, val,// 0/* boff */, tb.limit()/* blen */, tb.array()/* b */ )) { // Already in the index. final short asFoundCounter = KeyBuilder.decodeShort(tuple .getKeyBuffer().array(), OFFSET_COUNTER); return asFoundCounter; } } if (readOnly) { // Not found. return NOT_FOUND; } /* * Hash collision. */ final byte[] key = makeKey(keyBuilder.reset(), baseKey, (int) rangeCount); // Insert into the index. if (termsIndex.insert(key, val) != null) { throw new AssertionError(); } if (rangeCount >= LOG_WARN_COUNTER_THRESHOLD) { log.warn("Collision: hashCode=" + BytesUtil.toString(key) + ", collisionBucketSize=" + rangeCount); } return (int) rangeCount; } /** * Add an entry for a {@link BNode} to the TERMS index (do NOT use when told * blank node semantics apply). * <p> * All {@link BNode}s entered by this method are distinct regardless of * their {@link BNode#getID()}. Since blank nodes can not be unified with * the TERMS index (unless we are using told blank node semantics) we simply * add another entry for the caller's {@link BNode} and return the key for * that entry which will be wrapped as an {@link IV}. That entry will be * made distinct from all other entries for the same {@link VTE} and * hashCode by appending the current collision counter (which is just the * range count). * * @param termsIndex * The TERMS index. * @param keyBuilder * The buffer will be reset as necessary. * @param baseKey * The base key for the hash code (without the counter suffix). * @param val * The (serialized and compressed) RDF {@link BNode}. * @param tmp * The buffer used to format the <i>toKey</i> (optional). A new * byte[] will be allocated if this is <code>null</code>, but the * same byte[] can be reused for multiple invocations. The buffer * MUST be dimensioned to * {@link BlobsIndexHelper#SIZEOF_PREFIX_KEY}. * * @return The collision counter. * * @throws CollisionBucketSizeException * if an attempt is made to insert a {@link Value} into a * collision bucket which is full. */ public int addBNode(final IIndex ndx, final IKeyBuilder keyBuilder, final byte[] baseKey, final byte[] val, final byte[] tmp) { /* * The fromKey is strictly LT any full key for the hash code of this val * but strictly GT any key have a hash code LT the hash code of this * val. */ final byte[] fromKey = baseKey; // key strictly LT any successor of the hash code of this val. final byte[] toKey = makeToKey(fromKey, tmp); // fast range count. this tells us how many collisions there are. // this is an exact collision count since we are not deleting tuples // from the TERMS index. final long rangeCount = ndx.rangeCount(fromKey, toKey); if (rangeCount >= MAX_COUNTER) { /* * Impose a hard limit on the #of hash collisions we will accept in * this utility. */ throw new CollisionBucketSizeException(rangeCount); } // // Force range count into (signed) byte // final byte counter = (byte) rangeCount; // Form a key using the collision counter (guaranteed distinct). final byte[] key = makeKey(keyBuilder.reset(), baseKey, (int) rangeCount); // Insert into the index. if (ndx.insert(key, val) != null) { throw new AssertionError(); } if (rangeCount >= LOG_WARN_COUNTER_THRESHOLD) { log.warn("Collision: hashCode=" + BytesUtil.toString(key) + ", collisionBucketSize=" + rangeCount); } return (int) rangeCount; } /** * Return the value associated with the {@link BlobIV} in the TERMS index. * <p> * Note: The returned <code>byte[]</code> may be decoded using the * {@link BigdataValueSerializer} associated with the * {@link BigdataValueFactory} for the namespace of the owning * {@link AbstractTripleStore}. * * @param ndx * The index. * @param iv * The {@link IV}. * @param keyBuilder * An object used to format the {@link IV} as a key for the * index. * * @return The byte[] value -or- <code>null</code> if there is no entry for * that {@link IV} in the index. */ public byte[] lookup(final IIndex ndx, final BlobIV<?> iv, final IKeyBuilder keyBuilder) { final byte[] key = iv.encode(keyBuilder.reset()).getKey(); return ndx.lookup(key); } /** * Create a fully formed key for the TERMS index from a baseKey and a hash * collision counter. * * @param keyBuilder * The caller is responsible for resetting the buffer as * required. * @param baseKey * The base key (including the flags byte and the hashCode). * @param counter * The counter value. * * @return The fully formed key. */ public byte[] makeKey(final IKeyBuilder keyBuilder, final byte[] baseKey, final int counter) { final byte[] key = keyBuilder.append(baseKey).append((short) counter) .getKey(); assert key.length == TERMS_INDEX_KEY_SIZE; return key; } /** * Create a fully formed key for the TERMS index from the {@link VTE}, the * hashCode of the {@link BigdataValue}, and the hash collision counter. * * @param keyBuilder * The caller is responsible for resetting the buffer as * required. * @param vte * The {@link VTE}. * @param hashCode * The hash code of the {@link BigdataValue}. * @param counter * The hash collision counter. * * @return The fully formed key. */ // Note: Only used by the unit tests. public byte[] makeKey(final IKeyBuilder keyBuilder, final VTE vte, final int hashCode, final int counter) { /* * Note: This MUST agree with TermId#encode(). */ keyBuilder.appendSigned(BlobIV.toFlags(vte)); // flags byte keyBuilder.appendSigned(INonInlineExtensionCodes.BlobIV); // extension byte. keyBuilder.append(hashCode); // hashCode keyBuilder.append((short) counter); // hash collision counter. final byte[] key = keyBuilder.getKey(); assert key.length == TERMS_INDEX_KEY_SIZE; return key; } /** * Create a prefix key for the TERMS index from the {@link VTE} and hashCode * of the {@link BigdataValue}. * * @param keyBuilder * The caller is responsible for resetting the buffer as * required. * @param vte * The {@link VTE}. * @param hashCode * The hash code of the {@link BigdataValue}. * * @return The prefix key. */ public byte[] makePrefixKey(final IKeyBuilder keyBuilder, final VTE vte, final int hashCode) { /* * Note: This MUST agree with TermId#encode(). */ // flags byte keyBuilder.appendSigned(BlobIV.toFlags(vte)); // extension byte. keyBuilder.appendSigned(INonInlineExtensionCodes.BlobIV); // hashCode keyBuilder.append(hashCode); final byte[] prefixKey = keyBuilder.getKey(); assert prefixKey.length == SIZEOF_PREFIX_KEY; return prefixKey; } /** * Create a prefix key for the TERMS index from the {@link BigdataValue}. * * @param keyBuilder * The caller is responsible for resetting the buffer as * required. * @param value * The {@link BigdataValue} * * @return The prefix key. */ public byte[] makePrefixKey(final IKeyBuilder keyBuilder, final BigdataValue value) { final VTE vte = VTE.valueOf(value); return makePrefixKey(keyBuilder, vte, value.hashCode()); } /** * Generate the successor of the fromKey. * * @param fromKey * The fromKey. * @param tmp * The buffer used to format the <i>toKey</i> (optional). A new * byte[] will be allocated if this is <code>null</code>, but the * same byte[] can be reused for multiple invocations. The buffer * MUST be dimensioned to * {@link BlobsIndexHelper#TERMS_INDEX_KEY_SIZE}. * * @return The toKey. */ byte[] makeToKey(final byte[] fromKey, final byte[] tmp) { assert fromKey.length == SIZEOF_PREFIX_KEY; final byte[] toKey; if (tmp == null) { // Allocate a temporary buffer. toKey = new byte[SIZEOF_PREFIX_KEY]; } else if (tmp.length != SIZEOF_PREFIX_KEY) { // Caller's buffer is the wrong size. throw new IllegalArgumentException(); } else { // Use the caller's buffer. toKey = tmp; } // Copy the fromKey into the temporary buffer. System.arraycopy(fromKey, 0/* srcPos */, toKey/* dest */, 0/* destPos */, SIZEOF_PREFIX_KEY/* length */); // Form the successor (side-effect on the toKey buffer). SuccessorUtil.successor(toKey); // Return the successor of the fromKey. return tmp; } /** * Return a new {@link IKeyBuilder} suitable for formatting keys for the * TERMS index. * * @return The {@link IKeyBuilder}. */ public IKeyBuilder newKeyBuilder() { return new KeyBuilder(TERMS_INDEX_KEY_SIZE); } /** * Exception thrown if the maximum size of the collision bucket would be * exceeded for some {@link BigdataValue}. * * @author thompsonbry */ public static class CollisionBucketSizeException extends RuntimeException { /** * */ private static final long serialVersionUID = 1L; public CollisionBucketSizeException(final long rangeCount) { super("ncoll=" + rangeCount); } } }