NativeDistinctFilter.java example

Explorer
blazegraph-master
- database-master
package com.bigdata.bop.rdf.filter;

import java.util.Arrays;
import java.util.Collections;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Set;
import java.util.UUID;
import java.util.concurrent.atomic.AtomicBoolean;

import com.bigdata.bop.BOp;
import com.bigdata.bop.BTreeAnnotations;
import com.bigdata.bop.HTreeAnnotations;
import com.bigdata.bop.HashMapAnnotations;
import com.bigdata.bop.ap.filter.BOpFilterBase;
import com.bigdata.bop.ap.filter.DistinctFilter;
import com.bigdata.bop.engine.IRunningQuery;
import com.bigdata.btree.BTree;
import com.bigdata.btree.BloomFilterFactory;
import com.bigdata.btree.DefaultTupleSerializer;
import com.bigdata.btree.HTreeIndexMetadata;
import com.bigdata.btree.ICheckpointProtocol;
import com.bigdata.btree.ITupleSerializer;
import com.bigdata.btree.IndexMetadata;
import com.bigdata.btree.keys.ASCIIKeyBuilderFactory;
import com.bigdata.btree.keys.IKeyBuilder;
import com.bigdata.btree.raba.codec.EmptyRabaValueCoder;
import com.bigdata.btree.raba.codec.FrontCodedRabaCoder;
import com.bigdata.htree.HTree;
import com.bigdata.io.DirectBufferPool;
import com.bigdata.rdf.internal.IV;
import com.bigdata.rdf.internal.IVUtility;
import com.bigdata.rdf.sparql.ast.eval.ASTConstructIterator;
import com.bigdata.rdf.spo.ISPO;
import com.bigdata.rdf.spo.SPO;
import com.bigdata.rdf.spo.SPOKeyOrder;
import com.bigdata.rwstore.sector.MemStore;
import com.bigdata.rwstore.sector.MemoryManager;
import com.bigdata.util.BytesUtil.UnsignedByteArrayComparator;

import cutthecrap.utils.striterators.Filter;
import cutthecrap.utils.striterators.Filterator;
import cutthecrap.utils.striterators.ICloseable;
import cutthecrap.utils.striterators.IPropertySet;

/**
 * A scalable DISTINCT operator based for {@link SPO}s.
 * <p>
 * Note: While highly scalable, this class will absorb a minimum of one direct
 * buffer per use. This is because we do not have access to the memory manager
 * of the {@link IRunningQuery} on which the distinct filter is being run. For
 * this reason, it is allocating a private {@link MemStore} and using a
 * finalizer pattern to ensure the eventual release of that {@link MemStore} and
 * the backing direct buffers.
 * <p>
 * Note: This can not be used with pipelined joins because it would allocate one
 * instance per as-bound evaluation of the pipeline join.
 * <p>
 * Note: You can change the code over the HTree/BTree by modifying only a few
 * lines. See the comments in the file.
 * 
 * TODO Reads against the index will eventually degrade since we can not use
 * ordered reads because the iterator filter pattern itself is not vectored. We
 * might be able to fix this with a chunked filter pattern. Otherwise fixing
 * this will require a more significant refactor.
 * 
 * TODO It would be nicer if we left the MRU 10k in the map and evicted the LRU
 * 10k each time the map reached 20k. This can not be done with the
 * {@link LinkedHashMap} as its API is not sufficient for this purpose. However,
 * similar batch LRU update classes have been defined in the
 * <code>com.bigdata.cache</code> package and could be adapted here for that
 * purpose.
 * 
 * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a>
 * @version $Id: DistinctElementFilter.java 3466 2010-08-27 14:28:04Z
 *          thompsonbry $
 */
public class NativeDistinctFilter extends BOpFilterBase {

    /**
     * 
     */
    private static final long serialVersionUID = 1L;

    public interface Annotations extends
            com.bigdata.bop.ap.filter.BOpFilter.Annotations, BTreeAnnotations,
            HashMapAnnotations {
        
        /**
         * Overrides the default initial capacity to be relatively large. Up to
         * this many entries will be permitted. Once the map reaches that
         * threshold, the entries will be flushed through to the backing index
         * in order to vector updates against that index.
         */
        int DEFAULT_INITIAL_CAPACITY = 10000;

        /**
         * The default maximum length of an inlined {@link IV} before it is
         * coverted into a raw record reference.  Encoded {@link IV}s tend to
         * be relatively small so we are generally better off inlining them
         * into the bucket page of the {@link HTree}.
         */
        int DEFAULT_MAX_RECLEN = 32;
        
        /**
         */
        String KEY_ORDER = "keyOrder";
        
    }

    /**
     * A instance using the default configuration for the in memory hash map.
     * 
     * @param indexKeyOrder
     *            The natural order in which the {@link ISPO}s will arrive at
     *            this filter. This is used to decide on the filter key order
     *            which will have the best locality given the order of arrival.
     */
    public static NativeDistinctFilter newInstance(
            final SPOKeyOrder indexKeyOrder) {

        return new NativeDistinctFilter(BOp.NOARGS, Collections.singletonMap(
                Annotations.KEY_ORDER, (Object) indexKeyOrder));

    }
    
    /**
     * Constructor required for {@link com.bigdata.bop.BOpUtility#deepCopy(FilterNode)}.
     */
    public NativeDistinctFilter(final NativeDistinctFilter op) {
        super(op);
    }

    /**
     * Required shallow copy constructor.
     */
    public NativeDistinctFilter(final BOp[] args,
            final Map<String, Object> annotations) {

        super(args, annotations);

        // MUST be specified.
        getRequiredProperty(Annotations.KEY_ORDER);

    }

    @SuppressWarnings("rawtypes")
    @Override
    final protected Iterator filterOnce(Iterator src, final Object context) {
        
        return new Filterator(src, context, new DistinctFilterImpl(this));
        
    }

    /**
     * Return the 3-component key order which has the best locality given that
     * the SPOs will be arriving in the natural order of the
     * <i>indexKeyOrder</i>. This is the keyOrder that we will use for the
     * filter. This gives the filter index structure the best possible locality
     * in terms of the order in which the SPOs are arriving.
     * <p>
     * The return valuer is an <code>int[3]</code>. The index is the ordinal
     * position of the triples mode key component for the filter keys. The value
     * at that index is the position in the {@link SPOKeyOrder} of the quads
     * mode index whose natural order determines the order of arrival of the
     * {@link ISPO} objects at this filter.
     * <p>
     * Thus, given indexKeyOrder = {@link SPOKeyOrder#CSPO}, the array:
     * 
     * <pre>
     * int[] = {1,2,3}
     * </pre>
     * 
     * would correspond to the filter key order SPO, which is the best possible
     * filter key order for the natural order order of the
     * {@link SPOKeyOrder#CSPO} index.
     * <p>
     * Note, however, that key orders can be expressed in this manner which are
     * not defined by {@link SPOKeyOrder}. For example, given
     * {@link SPOKeyOrder#PCSO} the best filter key order is <code>PSO</code>.
     * While there is no <code>PSO</code> key order declared by the
     * {@link SPOKeyOrder} class, we can use
     * 
     * <pre>
     * int[] = {0,2,3}
     * </pre>
     * 
     * which models the <code>PSO</code> key order for the purposes of this
     * class.
     * <p>
     * Note: This method now accepts triples in support of the
     * {@link ASTConstructIterator}
     * 
     * @see Annotations#INDEX_KEY_ORDER
     * 
     * @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/579">
     *      CONSTRUCT should apply DISTINCT (s,p,o) filter </a>
     */
    public static int[] getFilterKeyOrder(final SPOKeyOrder indexKeyOrder) {

        if (indexKeyOrder == null)
            throw new IllegalArgumentException();

//        if (indexKeyOrder.getKeyArity() != 4)
//            throw new IllegalArgumentException();

        final int[] filterKeyOrder;
        switch (indexKeyOrder.index()) {
        // TRIPLES
        case SPOKeyOrder._SPO:
            filterKeyOrder = new int[] { 0, 1, 2 };
            break;
        case SPOKeyOrder._POS:
            filterKeyOrder = new int[] { 1, 2, 0 };
            break;
        case SPOKeyOrder._OSP:
            filterKeyOrder = new int[] { 2, 0, 1 };
            break;
        // QUADS
        case SPOKeyOrder._SPOC:
            filterKeyOrder = new int[] { 0, 1, 2 };
            break;
        case SPOKeyOrder._CSPO:
            filterKeyOrder = new int[] { 1, 2, 3 };
            break;
        case SPOKeyOrder._POCS:
            filterKeyOrder = new int[] { 0, 1, 3 };
            break;
        case SPOKeyOrder._OCSP:
            filterKeyOrder = new int[] { 0, 2, 3 };
            break;
        case SPOKeyOrder._PCSO:
            filterKeyOrder = new int[] { 0, 2, 3 };
            break;
        case SPOKeyOrder._SOPC:
            filterKeyOrder = new int[] { 0, 1, 2 };
            break;
        default:
            throw new AssertionError();
        }
        return filterKeyOrder;
    }
    
    /**
     * A {@link Filter} which passes only the DISTINCT {@link ISPO}s and is
     * backed by a scalable data structure (BTree or HTree).
     * 
     * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan
     *         Thompson</a>
     */
    static public class DistinctFilterImpl extends Filter implements
            ICloseable {

        private static final long serialVersionUID = 1L;

        /**
         * The size at which we will batch evict things from the LRU.
         */
        private final int nominalCapacity;
        
        /**
         * The fast JVM based cache. This is always allocated.
         */
        private final LinkedHashMap<ISPO, byte[]> lru;

        /**
         * The metadata used to create the index.
         */
        private final IndexMetadata metadata;

        /**
         * The object used to format the keys for the index.
         */
        private final IKeyBuilder keyBuilder;

        /**
         * The key order used to build the triples mode keys for the DISTINCT
         * SPO filter.
         * 
         * @see NativeDistinctFilter#getFilterKeyOrder(SPOKeyOrder)
         */
        private final int[] filterKeyOrder;
        
        /**
         * A persistence capable index for very large data sets. This is
         * allocated IFF the {@link #lru} overflows at least once.
         * <p>
         * Note: Maybe either a {@link BTree} or an {@link HTree}. The code has
         * paths for both.
         */
        private volatile ICheckpointProtocol index;
        
        /**
         * <code>true</code> until {@link #close() closed}.
         */
        private final AtomicBoolean open = new AtomicBoolean(true);
        
        /**
         * When <code>true</code>, the {@link BTree} will be used. When
         * <code>false</code> the {@link HTree}.
         * <p>
         * Note: Historical testing indicated that the {@link BTree} was faster
         * for this application.
         * 
         * TODO Edit HTree/BTree here.
         */
        private final boolean isBTree = true;
        
        /**
         * The backing store (native memory).
         */
        private volatile MemStore store;
        
        @Override
        protected void finalize() throws Throwable {
            close();
            super.finalize();
        }

        /**
         * Release resources associated with the filter.
         * <p>
         * Note: This is done automatically by {@link #finalize()}, but it
         * should be done pro-actively whenever possible.
         * 
         * @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/582">
         *      IStriterator does not support close() protocol for Ifilter </a>
         */
        @Override
        public void close() {
            if (open.compareAndSet(true/* expect */, false/* update */)) {
                /*
                 * Close when first invoked.
                 */
                if (index != null) {
                    index.close();
                    index = null;
                }
                if (store != null) {
                    store.close();
                    store = null;
                }
            }
        }

        @SuppressWarnings("unchecked")
        static private <T> T getRequiredProperty(final IPropertySet pset,
                final String name) {

            final Object val = pset.getProperty(name);

            if (val == null)

                if (val == null)
                    throw new IllegalStateException("Required property: "
                            + name + " : " + NativeDistinctFilter.class);

            return (T) val;

        }

        @SuppressWarnings("unchecked")
        static private <T> T getProperty(final IPropertySet pset,
                final String name, final T defaultValue) {

            final Object val = pset.getProperty(name);

            if (val != null)
                return (T) val;

            return defaultValue;

        }
        
        /**
         * DISTINCT {@link ISPO} filter based on persistence capable data
         * structures.
         * 
         * @param properties
         *            Used to configure the DISTINCT filter.
         * 
         * @see DistinctFilter.Annotations
         */
        public DistinctFilterImpl(final IPropertySet properties) {
            
            final int initialCapacity = getProperty(properties,
                    Annotations.INITIAL_CAPACITY,
                    Annotations.DEFAULT_INITIAL_CAPACITY);

            final float loadFactor = getProperty(properties,
                    Annotations.LOAD_FACTOR,
                    Annotations.DEFAULT_LOAD_FACTOR);
                    
            lru = new LinkedHashMap<ISPO, byte[]>(initialCapacity, loadFactor);

            this.nominalCapacity = initialCapacity;

            /*
             * Setup the index. It will be created on demand.
             * 
             * Note: This sets up the keyBuilder. We need that even before the
             * index is created.
             */
            {

                final SPOKeyOrder indexKeyOrder = (SPOKeyOrder) getRequiredProperty(
                        properties, Annotations.KEY_ORDER);

                filterKeyOrder = getFilterKeyOrder(indexKeyOrder);

                // Setup BTree/HTree.
                metadata = isBTree ? new IndexMetadata(UUID.randomUUID())
                        : new HTreeIndexMetadata(UUID.randomUUID());

                // IFF BTree
                metadata.setBranchingFactor(getProperty(properties,
                        BTreeAnnotations.BRANCHING_FACTOR,
                        256));// TODO Overridden here. BTreeAnnotations.DEFAULT_BRANCHING_FACTOR));

                // IFF HTree
                if (metadata instanceof HTreeIndexMetadata) {
                    ((HTreeIndexMetadata) metadata)
                            .setAddressBits(getProperty(properties,
                                            HTreeAnnotations.ADDRESS_BITS,
                                            HTreeAnnotations.DEFAULT_ADDRESS_BITS));
                }

                metadata.setRawRecords(getProperty(properties,
                        Annotations.RAW_RECORDS,
                        Annotations.DEFAULT_RAW_RECORDS));

                // No values.
                metadata.setMaxRecLen(0);

                metadata.setBloomFilterFactory(BloomFilterFactory.DEFAULT);

                metadata.setWriteRetentionQueueCapacity(getProperty(properties,
                        Annotations.WRITE_RETENTION_QUEUE_CAPACITY,
                        Annotations.DEFAULT_WRITE_RETENTION_QUEUE_CAPACITY));

                final int ratio = 32; // TODO Config/tune front-coding ratio.

                @SuppressWarnings("rawtypes")
                final ITupleSerializer<?, ?> tupleSer = new DefaultTupleSerializer(
                        new ASCIIKeyBuilderFactory(64/* initialCapacity */),//
                        new FrontCodedRabaCoder(ratio),// keys
                        EmptyRabaValueCoder.INSTANCE // vals
                );

                metadata.setTupleSerializer(tupleSer);

                // Used for building the index keys.
                keyBuilder = tupleSer.getKeyBuilder();
            
            }
            
        }

        /**
         * Evict everything in the LRU cache onto the persistence capable index.
         */
        private void evictAll() {
            if (index == null) {
                // Lazily allocate the index.
                allocateIndex();
            }
            final int n = lru.size();
            final byte[][] a = new byte[n][];
            {
                // Evict everything into an array.
                final Iterator<Map.Entry<ISPO, byte[]>> itr = lru.entrySet()
                        .iterator();
                int i = 0;
                while (itr.hasNext()) {
                    a[i++] = itr.next().getValue();
                    itr.remove();
                }
            }
            // Vector
            Arrays.sort(a, 0, a.length, UnsignedByteArrayComparator.INSTANCE);
            // Insert
            if (index instanceof BTree) {
                for (int i = 0; i < a.length; i++) {
                    add((BTree) index, a[i]);
                }
            } else if (index instanceof HTree) {
                for (int i = 0; i < a.length; i++) {
                    add((HTree) index, a[i]);
                }
            } else
                throw new AssertionError();
        }
        
        /**
         * Create the persistence capable index.
         */
        private void allocateIndex() {

            if (index != null)
                throw new IllegalStateException();

            if (!open.get()) {
                // Explicitly closed.
                throw new IllegalStateException();
            }
            
            /*
             * This wraps an efficient raw store interface around a child memory
             * manager created from the IMemoryManager which is backing the
             * query.
             */
            store = new MemStore(new MemoryManager(DirectBufferPool.INSTANCE));

            /*
             * Create the index. It will support incremental eviction and
             * persistence (against the memory manager).
             */
            if (isBTree) {
                index = BTree.create(store, metadata);
            } else {
                index = HTree.create(store, (HTreeIndexMetadata) metadata);
            }
            
        }
        
        /**
         * Vectored DISTINCT filter.
         */
        @Override
        public boolean isValid(final Object obj) {

            final ISPO spo = (ISPO) obj;

            return add(spo);
            
        }
        
        /**
         * Add the {@link SPO} to the collection. This has the same semantics as
         * {@link Set#add(Object)}. The "collection" has two layers. A JVM
         * {@link LinkedHashMap}, which provides fast tests for recently
         * observed objects, and a persistence capable index, which scales to
         * very large data sets. The {@link #lru} is checked first. If the
         * object is not found, then the persistence capable index is checked.
         * If the object is not found there either, then object will be added
         * the {@link #lru}. Objects inserted into the {@link #lru} are
         * eventually batched through to the backing index.
         * 
         * @param spo
         *            The {@link SPO}.
         * 
         * @return <code>true</code> if the collection was modified.
         */
        private boolean add(final ISPO spo) {

            if(lru.containsKey(spo)) {
                // already in the LRU
                return false;
            }

            /*
             * Build the key in the order in which the hash join is visiting the
             * B+Tree AP. This gives the DISTINCT index the best possible
             * locality in terms of the natural order in which the SPOs will be
             * arriving.
             */
            keyBuilder.reset();

            for (int i = 0; i < 3; i++) {

                IVUtility.encode(keyBuilder, spo.get(filterKeyOrder[i]));

            }

            final byte[] key = keyBuilder.getKey();
            
            if (index != null) {
                // Test index for this key.
                if (index instanceof BTree) {
                    if (((BTree) index).contains(key)) {
                        // Already in the index.
                        return false;
                    }
                } else if (index instanceof HTree) {
                    if (((HTree) index).contains(key)) {
                        // Already in the index.
                        return false;
                    }
                } else
                    throw new AssertionError();
            }
            
            // Add to LRU.
            lru.put(spo, key);

            if (lru.size() >= nominalCapacity) {
                // Batch evict the entries into the index.
                evictAll();
            }

            // Return true since this is an SPO we had not seen before.
            return true;

        }

        /**
         * Add to {@link BTree}.
         * 
         * @param members
         * @param key
         * @return <code>true</code> iff not already present.
         */
        private boolean add(final BTree members, final byte[] key) {

            if (members.contains(key)) {

                return false;
                
            }
            
            members.insert(key, null/* val */);

            return true;
            
        }
        
        /**
         * Add to {@link HTree}.
         * 
         * @param members
         * @param key
         * @return <code>true</code> iff not already present.
         */
        private boolean add(final HTree members, final byte[] key) {

            if(members.contains(key)) {
                
                return false;
                
            }
            
            // Add to the map.
            members.insert(key, null/* val */);

            return true;

        }
        
    }

}