package com.bigdata.bop.rdf.filter; import java.util.Arrays; import java.util.Collections; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.Map; import java.util.Set; import java.util.UUID; import java.util.concurrent.atomic.AtomicBoolean; import com.bigdata.bop.BOp; import com.bigdata.bop.BTreeAnnotations; import com.bigdata.bop.HTreeAnnotations; import com.bigdata.bop.HashMapAnnotations; import com.bigdata.bop.ap.filter.BOpFilterBase; import com.bigdata.bop.ap.filter.DistinctFilter; import com.bigdata.bop.engine.IRunningQuery; import com.bigdata.btree.BTree; import com.bigdata.btree.BloomFilterFactory; import com.bigdata.btree.DefaultTupleSerializer; import com.bigdata.btree.HTreeIndexMetadata; import com.bigdata.btree.ICheckpointProtocol; import com.bigdata.btree.ITupleSerializer; import com.bigdata.btree.IndexMetadata; import com.bigdata.btree.keys.ASCIIKeyBuilderFactory; import com.bigdata.btree.keys.IKeyBuilder; import com.bigdata.btree.raba.codec.EmptyRabaValueCoder; import com.bigdata.btree.raba.codec.FrontCodedRabaCoder; import com.bigdata.htree.HTree; import com.bigdata.io.DirectBufferPool; import com.bigdata.rdf.internal.IV; import com.bigdata.rdf.internal.IVUtility; import com.bigdata.rdf.sparql.ast.eval.ASTConstructIterator; import com.bigdata.rdf.spo.ISPO; import com.bigdata.rdf.spo.SPO; import com.bigdata.rdf.spo.SPOKeyOrder; import com.bigdata.rwstore.sector.MemStore; import com.bigdata.rwstore.sector.MemoryManager; import com.bigdata.util.BytesUtil.UnsignedByteArrayComparator; import cutthecrap.utils.striterators.Filter; import cutthecrap.utils.striterators.Filterator; import cutthecrap.utils.striterators.ICloseable; import cutthecrap.utils.striterators.IPropertySet; /** * A scalable DISTINCT operator based for {@link SPO}s. * <p> * Note: While highly scalable, this class will absorb a minimum of one direct * buffer per use. This is because we do not have access to the memory manager * of the {@link IRunningQuery} on which the distinct filter is being run. For * this reason, it is allocating a private {@link MemStore} and using a * finalizer pattern to ensure the eventual release of that {@link MemStore} and * the backing direct buffers. * <p> * Note: This can not be used with pipelined joins because it would allocate one * instance per as-bound evaluation of the pipeline join. * <p> * Note: You can change the code over the HTree/BTree by modifying only a few * lines. See the comments in the file. * * TODO Reads against the index will eventually degrade since we can not use * ordered reads because the iterator filter pattern itself is not vectored. We * might be able to fix this with a chunked filter pattern. Otherwise fixing * this will require a more significant refactor. * * TODO It would be nicer if we left the MRU 10k in the map and evicted the LRU * 10k each time the map reached 20k. This can not be done with the * {@link LinkedHashMap} as its API is not sufficient for this purpose. However, * similar batch LRU update classes have been defined in the * <code>com.bigdata.cache</code> package and could be adapted here for that * purpose. * * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a> * @version $Id: DistinctElementFilter.java 3466 2010-08-27 14:28:04Z * thompsonbry $ */ public class NativeDistinctFilter extends BOpFilterBase { /** * */ private static final long serialVersionUID = 1L; public interface Annotations extends com.bigdata.bop.ap.filter.BOpFilter.Annotations, BTreeAnnotations, HashMapAnnotations { /** * Overrides the default initial capacity to be relatively large. Up to * this many entries will be permitted. Once the map reaches that * threshold, the entries will be flushed through to the backing index * in order to vector updates against that index. */ int DEFAULT_INITIAL_CAPACITY = 10000; /** * The default maximum length of an inlined {@link IV} before it is * coverted into a raw record reference. Encoded {@link IV}s tend to * be relatively small so we are generally better off inlining them * into the bucket page of the {@link HTree}. */ int DEFAULT_MAX_RECLEN = 32; /** */ String KEY_ORDER = "keyOrder"; } /** * A instance using the default configuration for the in memory hash map. * * @param indexKeyOrder * The natural order in which the {@link ISPO}s will arrive at * this filter. This is used to decide on the filter key order * which will have the best locality given the order of arrival. */ public static NativeDistinctFilter newInstance( final SPOKeyOrder indexKeyOrder) { return new NativeDistinctFilter(BOp.NOARGS, Collections.singletonMap( Annotations.KEY_ORDER, (Object) indexKeyOrder)); } /** * Constructor required for {@link com.bigdata.bop.BOpUtility#deepCopy(FilterNode)}. */ public NativeDistinctFilter(final NativeDistinctFilter op) { super(op); } /** * Required shallow copy constructor. */ public NativeDistinctFilter(final BOp[] args, final Map<String, Object> annotations) { super(args, annotations); // MUST be specified. getRequiredProperty(Annotations.KEY_ORDER); } @SuppressWarnings("rawtypes") @Override final protected Iterator filterOnce(Iterator src, final Object context) { return new Filterator(src, context, new DistinctFilterImpl(this)); } /** * Return the 3-component key order which has the best locality given that * the SPOs will be arriving in the natural order of the * <i>indexKeyOrder</i>. This is the keyOrder that we will use for the * filter. This gives the filter index structure the best possible locality * in terms of the order in which the SPOs are arriving. * <p> * The return valuer is an <code>int[3]</code>. The index is the ordinal * position of the triples mode key component for the filter keys. The value * at that index is the position in the {@link SPOKeyOrder} of the quads * mode index whose natural order determines the order of arrival of the * {@link ISPO} objects at this filter. * <p> * Thus, given indexKeyOrder = {@link SPOKeyOrder#CSPO}, the array: * * <pre> * int[] = {1,2,3} * </pre> * * would correspond to the filter key order SPO, which is the best possible * filter key order for the natural order order of the * {@link SPOKeyOrder#CSPO} index. * <p> * Note, however, that key orders can be expressed in this manner which are * not defined by {@link SPOKeyOrder}. For example, given * {@link SPOKeyOrder#PCSO} the best filter key order is <code>PSO</code>. * While there is no <code>PSO</code> key order declared by the * {@link SPOKeyOrder} class, we can use * * <pre> * int[] = {0,2,3} * </pre> * * which models the <code>PSO</code> key order for the purposes of this * class. * <p> * Note: This method now accepts triples in support of the * {@link ASTConstructIterator} * * @see Annotations#INDEX_KEY_ORDER * * @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/579"> * CONSTRUCT should apply DISTINCT (s,p,o) filter </a> */ public static int[] getFilterKeyOrder(final SPOKeyOrder indexKeyOrder) { if (indexKeyOrder == null) throw new IllegalArgumentException(); // if (indexKeyOrder.getKeyArity() != 4) // throw new IllegalArgumentException(); final int[] filterKeyOrder; switch (indexKeyOrder.index()) { // TRIPLES case SPOKeyOrder._SPO: filterKeyOrder = new int[] { 0, 1, 2 }; break; case SPOKeyOrder._POS: filterKeyOrder = new int[] { 1, 2, 0 }; break; case SPOKeyOrder._OSP: filterKeyOrder = new int[] { 2, 0, 1 }; break; // QUADS case SPOKeyOrder._SPOC: filterKeyOrder = new int[] { 0, 1, 2 }; break; case SPOKeyOrder._CSPO: filterKeyOrder = new int[] { 1, 2, 3 }; break; case SPOKeyOrder._POCS: filterKeyOrder = new int[] { 0, 1, 3 }; break; case SPOKeyOrder._OCSP: filterKeyOrder = new int[] { 0, 2, 3 }; break; case SPOKeyOrder._PCSO: filterKeyOrder = new int[] { 0, 2, 3 }; break; case SPOKeyOrder._SOPC: filterKeyOrder = new int[] { 0, 1, 2 }; break; default: throw new AssertionError(); } return filterKeyOrder; } /** * A {@link Filter} which passes only the DISTINCT {@link ISPO}s and is * backed by a scalable data structure (BTree or HTree). * * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan * Thompson</a> */ static public class DistinctFilterImpl extends Filter implements ICloseable { private static final long serialVersionUID = 1L; /** * The size at which we will batch evict things from the LRU. */ private final int nominalCapacity; /** * The fast JVM based cache. This is always allocated. */ private final LinkedHashMap<ISPO, byte[]> lru; /** * The metadata used to create the index. */ private final IndexMetadata metadata; /** * The object used to format the keys for the index. */ private final IKeyBuilder keyBuilder; /** * The key order used to build the triples mode keys for the DISTINCT * SPO filter. * * @see NativeDistinctFilter#getFilterKeyOrder(SPOKeyOrder) */ private final int[] filterKeyOrder; /** * A persistence capable index for very large data sets. This is * allocated IFF the {@link #lru} overflows at least once. * <p> * Note: Maybe either a {@link BTree} or an {@link HTree}. The code has * paths for both. */ private volatile ICheckpointProtocol index; /** * <code>true</code> until {@link #close() closed}. */ private final AtomicBoolean open = new AtomicBoolean(true); /** * When <code>true</code>, the {@link BTree} will be used. When * <code>false</code> the {@link HTree}. * <p> * Note: Historical testing indicated that the {@link BTree} was faster * for this application. * * TODO Edit HTree/BTree here. */ private final boolean isBTree = true; /** * The backing store (native memory). */ private volatile MemStore store; @Override protected void finalize() throws Throwable { close(); super.finalize(); } /** * Release resources associated with the filter. * <p> * Note: This is done automatically by {@link #finalize()}, but it * should be done pro-actively whenever possible. * * @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/582"> * IStriterator does not support close() protocol for Ifilter </a> */ @Override public void close() { if (open.compareAndSet(true/* expect */, false/* update */)) { /* * Close when first invoked. */ if (index != null) { index.close(); index = null; } if (store != null) { store.close(); store = null; } } } @SuppressWarnings("unchecked") static private <T> T getRequiredProperty(final IPropertySet pset, final String name) { final Object val = pset.getProperty(name); if (val == null) if (val == null) throw new IllegalStateException("Required property: " + name + " : " + NativeDistinctFilter.class); return (T) val; } @SuppressWarnings("unchecked") static private <T> T getProperty(final IPropertySet pset, final String name, final T defaultValue) { final Object val = pset.getProperty(name); if (val != null) return (T) val; return defaultValue; } /** * DISTINCT {@link ISPO} filter based on persistence capable data * structures. * * @param properties * Used to configure the DISTINCT filter. * * @see DistinctFilter.Annotations */ public DistinctFilterImpl(final IPropertySet properties) { final int initialCapacity = getProperty(properties, Annotations.INITIAL_CAPACITY, Annotations.DEFAULT_INITIAL_CAPACITY); final float loadFactor = getProperty(properties, Annotations.LOAD_FACTOR, Annotations.DEFAULT_LOAD_FACTOR); lru = new LinkedHashMap<ISPO, byte[]>(initialCapacity, loadFactor); this.nominalCapacity = initialCapacity; /* * Setup the index. It will be created on demand. * * Note: This sets up the keyBuilder. We need that even before the * index is created. */ { final SPOKeyOrder indexKeyOrder = (SPOKeyOrder) getRequiredProperty( properties, Annotations.KEY_ORDER); filterKeyOrder = getFilterKeyOrder(indexKeyOrder); // Setup BTree/HTree. metadata = isBTree ? new IndexMetadata(UUID.randomUUID()) : new HTreeIndexMetadata(UUID.randomUUID()); // IFF BTree metadata.setBranchingFactor(getProperty(properties, BTreeAnnotations.BRANCHING_FACTOR, 256));// TODO Overridden here. BTreeAnnotations.DEFAULT_BRANCHING_FACTOR)); // IFF HTree if (metadata instanceof HTreeIndexMetadata) { ((HTreeIndexMetadata) metadata) .setAddressBits(getProperty(properties, HTreeAnnotations.ADDRESS_BITS, HTreeAnnotations.DEFAULT_ADDRESS_BITS)); } metadata.setRawRecords(getProperty(properties, Annotations.RAW_RECORDS, Annotations.DEFAULT_RAW_RECORDS)); // No values. metadata.setMaxRecLen(0); metadata.setBloomFilterFactory(BloomFilterFactory.DEFAULT); metadata.setWriteRetentionQueueCapacity(getProperty(properties, Annotations.WRITE_RETENTION_QUEUE_CAPACITY, Annotations.DEFAULT_WRITE_RETENTION_QUEUE_CAPACITY)); final int ratio = 32; // TODO Config/tune front-coding ratio. @SuppressWarnings("rawtypes") final ITupleSerializer<?, ?> tupleSer = new DefaultTupleSerializer( new ASCIIKeyBuilderFactory(64/* initialCapacity */),// new FrontCodedRabaCoder(ratio),// keys EmptyRabaValueCoder.INSTANCE // vals ); metadata.setTupleSerializer(tupleSer); // Used for building the index keys. keyBuilder = tupleSer.getKeyBuilder(); } } /** * Evict everything in the LRU cache onto the persistence capable index. */ private void evictAll() { if (index == null) { // Lazily allocate the index. allocateIndex(); } final int n = lru.size(); final byte[][] a = new byte[n][]; { // Evict everything into an array. final Iterator<Map.Entry<ISPO, byte[]>> itr = lru.entrySet() .iterator(); int i = 0; while (itr.hasNext()) { a[i++] = itr.next().getValue(); itr.remove(); } } // Vector Arrays.sort(a, 0, a.length, UnsignedByteArrayComparator.INSTANCE); // Insert if (index instanceof BTree) { for (int i = 0; i < a.length; i++) { add((BTree) index, a[i]); } } else if (index instanceof HTree) { for (int i = 0; i < a.length; i++) { add((HTree) index, a[i]); } } else throw new AssertionError(); } /** * Create the persistence capable index. */ private void allocateIndex() { if (index != null) throw new IllegalStateException(); if (!open.get()) { // Explicitly closed. throw new IllegalStateException(); } /* * This wraps an efficient raw store interface around a child memory * manager created from the IMemoryManager which is backing the * query. */ store = new MemStore(new MemoryManager(DirectBufferPool.INSTANCE)); /* * Create the index. It will support incremental eviction and * persistence (against the memory manager). */ if (isBTree) { index = BTree.create(store, metadata); } else { index = HTree.create(store, (HTreeIndexMetadata) metadata); } } /** * Vectored DISTINCT filter. */ @Override public boolean isValid(final Object obj) { final ISPO spo = (ISPO) obj; return add(spo); } /** * Add the {@link SPO} to the collection. This has the same semantics as * {@link Set#add(Object)}. The "collection" has two layers. A JVM * {@link LinkedHashMap}, which provides fast tests for recently * observed objects, and a persistence capable index, which scales to * very large data sets. The {@link #lru} is checked first. If the * object is not found, then the persistence capable index is checked. * If the object is not found there either, then object will be added * the {@link #lru}. Objects inserted into the {@link #lru} are * eventually batched through to the backing index. * * @param spo * The {@link SPO}. * * @return <code>true</code> if the collection was modified. */ private boolean add(final ISPO spo) { if(lru.containsKey(spo)) { // already in the LRU return false; } /* * Build the key in the order in which the hash join is visiting the * B+Tree AP. This gives the DISTINCT index the best possible * locality in terms of the natural order in which the SPOs will be * arriving. */ keyBuilder.reset(); for (int i = 0; i < 3; i++) { IVUtility.encode(keyBuilder, spo.get(filterKeyOrder[i])); } final byte[] key = keyBuilder.getKey(); if (index != null) { // Test index for this key. if (index instanceof BTree) { if (((BTree) index).contains(key)) { // Already in the index. return false; } } else if (index instanceof HTree) { if (((HTree) index).contains(key)) { // Already in the index. return false; } } else throw new AssertionError(); } // Add to LRU. lru.put(spo, key); if (lru.size() >= nominalCapacity) { // Batch evict the entries into the index. evictAll(); } // Return true since this is an SPO we had not seen before. return true; } /** * Add to {@link BTree}. * * @param members * @param key * @return <code>true</code> iff not already present. */ private boolean add(final BTree members, final byte[] key) { if (members.contains(key)) { return false; } members.insert(key, null/* val */); return true; } /** * Add to {@link HTree}. * * @param members * @param key * @return <code>true</code> iff not already present. */ private boolean add(final HTree members, final byte[] key) { if(members.contains(key)) { return false; } // Add to the map. members.insert(key, null/* val */); return true; } } }