DGExpander.java example

Explorer
blazegraph-master
- database-master
package com.bigdata.rdf.sparql.ast.eval;

import java.util.Collection;
import java.util.LinkedList;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.concurrent.Executor;
import java.util.concurrent.Future;
import java.util.concurrent.FutureTask;

import org.apache.log4j.Logger;

import com.bigdata.bop.Constant;
import com.bigdata.bop.IPredicate;
import com.bigdata.bop.IVariable;
import com.bigdata.bop.ap.Predicate;
import com.bigdata.btree.BTree;
import com.bigdata.btree.IIndex;
import com.bigdata.counters.CAT;
import com.bigdata.rdf.internal.IV;
import com.bigdata.rdf.spo.ISPO;
import com.bigdata.rdf.spo.SPOAccessPath;
import com.bigdata.relation.accesspath.BlockingBuffer;
import com.bigdata.relation.accesspath.IAccessPath;
import com.bigdata.relation.rule.IAccessPathExpander;
import com.bigdata.service.IBigdataClient;
import com.bigdata.striterator.ChunkedWrappedIterator;
import com.bigdata.striterator.IChunkedOrderedIterator;
import com.bigdata.striterator.IKeyOrder;
import com.bigdata.util.concurrent.LatchedExecutor;

import cutthecrap.utils.striterators.ICloseableIterator;
import cutthecrap.utils.striterators.IFilter;
import cutthecrap.utils.striterators.Striterator;

/**
 * Parallel subquery for a default graph access path. An expander pattern is
 * used to ensure that the "DISTINCT SPO" constraint is applied across the
 * subqueries rather than to each subquery individually.
 * 
 * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a>
 * @version $Id$
 */
public class DGExpander implements IAccessPathExpander<ISPO> {

    protected static final Logger log = Logger.getLogger(DGExpander.class);

    private static final long serialVersionUID = 1L;

    final private int maxParallel;

    @SuppressWarnings("rawtypes")
    final private Collection<IV> graphs;

    final private long estimatedRangeCount;

    /**
     * 
     * @param maxParallel
     * @param graphs
     *            A dense ordered array of {@link IV}s.
     * @param estimatedRangeCount
     *            The estimated range count for the subquery operation across
     *            those graphs.
     * 
     * @todo A parallelism limitation is placed on the ClientIndexView through
     *       the
     *       {@link IBigdataClient.Options#CLIENT_MAX_PARALLEL_TASKS_PER_REQUEST}
     *       . We should be able to override that through annotations on a query
     *       plan.
     */
    @SuppressWarnings("rawtypes")
    public DGExpander(final int maxParallel, final Collection<IV> graphs,
            final long estimatedRangeCount) {

        this.maxParallel = maxParallel;

        this.graphs = graphs;

        this.estimatedRangeCount = estimatedRangeCount;

    }

    public boolean backchain() {
        return false;
    }

    public boolean runFirst() {
        return false;
    }

    public IAccessPath<ISPO> getAccessPath(final IAccessPath<ISPO> accessPath) {

        return new DefaultGraphParallelEvaluationAccessPath(
                (SPOAccessPath) accessPath);

    }

    public String toString() {

        return super.toString() + "{maxParallel=" + maxParallel + ",ngraphs="
                + graphs.size() + ", estimatedRangeCount="
                + estimatedRangeCount + ", graphs=" + graphs + "}";

    }

    /**
     * Inner class evaluates the access path for each context using limited
     * parallelism, discarding the context argument for each {@link ISPO}, and
     * filtering out duplicate triples based on their (s,p,o) term identifiers.
     * 
     * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan
     *         Thompson</a>
     */
    private final class DefaultGraphParallelEvaluationAccessPath implements
            IAccessPath<ISPO> {

        /**
         * The original access path.
         */
        private final SPOAccessPath sourceAccessPath;

        final private Executor executor;

        public String toString() {

            return super.toString() + "{baseAccessPath="
                    + sourceAccessPath.toString() + "}";

        }

        /**
         * @param accessPath
         *            The original access path.
         */
        public DefaultGraphParallelEvaluationAccessPath(
                final SPOAccessPath accessPath) {

            this.sourceAccessPath = accessPath;

            this.executor = new LatchedExecutor(accessPath.getIndexManager()
                    .getExecutorService(), maxParallel);

        }

        @Override
        public IIndex getIndex() {

            return sourceAccessPath.getIndex();

        }

        @Override
        public IKeyOrder<ISPO> getKeyOrder() {

            return sourceAccessPath.getKeyOrder();

        }

        @Override
        public IPredicate<ISPO> getPredicate() {

            return sourceAccessPath.getPredicate();

        }

        @Override
        public boolean isEmpty() {

//            System.err.println("Testing isEmpty(): "+getPredicate());

            final IChunkedOrderedIterator<ISPO> itr = iterator(0L/* offset */,
                    1/* limit */, 1/* capacity */);

            try {

                return !itr.hasNext();

            } finally {

                itr.close();

            }

        }

//        @Override
//        public ITupleIterator<ISPO> rangeIterator() {
//
//            return sourceAccessPath.rangeIterator();
//
//        }

        /**
         * Unsupported operation.
         * <p>
         * Note: this could be implemented by delegation but it is not used from
         * the context of SPARQL which lacks SELECT ... INSERT or SELECT ...
         * DELETE constructions, at least at this time.
         */
        @Override
        public long removeAll() {

            throw new UnsupportedOperationException();

        }

        @Override
        public IChunkedOrderedIterator<ISPO> iterator() {

            return iterator(0L/* offset */, 0L/* limit */, 0/* capacity */);

        }

//        public IChunkedOrderedIterator<ISPO> iterator(final int limit,
//                final int capacity) {
//
//            return iterator(0L/* offset */, limit, capacity);
//
//        }

        /**
         * This is the common entry point for all iterator implementations.
         * 
         * @todo Consider an alternative implementation using fully parallel
         *       evaluation of the access paths and a merge sort to combine
         *       chunks drawn from each access path, and then an iterator which
         *       skips over duplicates by considering the last returned (s,p,o).
         *       We need to: (a) allocate a buffer each time we draw from the
         *       current chunks based on the total size of the current chunks;
         *       and (b) we can only draw keys from the current chunks up to the
         *       min(nextKey) for each chunk. The min(nextKey) constraint is
         *       necessary to ensure that a merge sort will get rid of
         *       duplicates. Without that constraint it is possible that a
         *       latter chunk from some access path will report an (s,p,o) that
         *       has already be visited. (The constraint allows us to use a
         *       closed world assumption to filter duplicates after the merge
         *       sort.)
         */
        @Override
        public IChunkedOrderedIterator<ISPO> iterator(final long offset,
                final long limit, final int capacity) {

            final ICloseableIterator<ISPO> src = new InnerIterator1(offset,
                    limit, capacity);

            // if (src instanceof IChunkedOrderedIterator<?>) {
            //
            // return (IChunkedOrderedIterator<ISPO>) src;
            //
            // }

            return new ChunkedWrappedIterator<ISPO>(src);

        }

        /**
         * Iterator implementation based on limited parallelism over the
         * iterators for the {@link IAccessPath} associated with each graph in
         * the default graphs set and using a {@link BTree} to filter out
         * duplicate (s,p,o) tuples.
         * 
         * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan
         *         Thompson</a>
         * @version $Id: DefaultGraphSolutionExpander.java 3678 2010-09-29
         *          15:48:34Z thompsonbry $
         */
        private class InnerIterator1 implements ICloseableIterator<ISPO> {

            // private final long offset;
            //
            // private final long limit;
            //
            // private final int capacity;

            private final CAT nAPsWithHits = new CAT();
            private final CAT nhits = new CAT();
            
            /**
             * @todo buffer chunks of {@link #ISPO}s for more efficiency (lock
             *       amortization) and better alignment with the chunked source
             *       iterators. (It used to be that the only issue was
             *       {@link #hasNext()} having to maintain a chunk of known
             *       distinct tuples to be visited, but I think that wrapping
             *       the DISTINCT filter around the DGExpander fixed that
             *       problem.)
             */
            private final BlockingBuffer<ISPO> buffer;

            /**
             * The source iterator.
             */
            private final ICloseableIterator<ISPO> src;

            /**
             * @param offset
             * @param limit
             * @param capacity
             */
            public InnerIterator1(final long offset, final long limit,
                    final int capacity) {

                // this.offset = offset;
                //
                // this.limit = limit;
                //
                // this.capacity = capacity;

                this.buffer = new BlockingBuffer<ISPO>(sourceAccessPath
                        .getChunkCapacity());

                FutureTask<Void> future = null;
                try {

                    /*
                     * Note: We do NOT get() this Future. This task will run
                     * asynchronously.
                     * 
                     * The Future is canceled IF (hopefully WHEN) the iterator
                     * is closed.
                     * 
                     * If the task itself throws an error, then it will use
                     * buffer#abort(cause) to notify the buffer of the cause (it
                     * will be passed along to the iterator) and to close the
                     * buffer (the iterator will notice that the buffer has been
                     * closed as well as that the cause was set on the buffer).
                     *
                     * @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/707">
                     *      BlockingBuffer.close() does not unblock threads </a>
                     */

                    // Wrap task as FutureTask.
                    future = new FutureTask<Void>(newRunIteratorsTask(buffer));

                    // set the future on the BlockingBuffer.
                    buffer.setFuture(future);

                    // submit task for execution.
                    sourceAccessPath.getIndexManager().getExecutorService()
                            .submit(future);

                    /*
                     * The outer access path will impose the "DISTINCT SPO"
                     * constraint.
                     */
                    // /*
                    // * Wrap the asynchronous iterator with one that imposes
                    // * a distinct (s,p,o) filter.
                    // */
                    // src = sourceAccessPath.getRelation()
                    // .distinctSPOIterator(buffer.iterator());
                    final IFilter filter = sourceAccessPath.getPredicate()
                            .getAccessPathFilter();
                    if (filter != null) {
                        src = new ChunkedWrappedIterator<ISPO>(new Striterator(
                                buffer.iterator()).addFilter(filter));
                    } else {
                        src = buffer.iterator();
                    }

                } catch (Throwable ex) {

                    try {

                        buffer.close();

                        if (future != null) {

                            future.cancel(true/* mayInterruptIfRunning */);

                        }

                    } catch (Throwable t) {

                        log.error(t, t);

                    }

                    throw new RuntimeException(ex);

                }

            }

            public void close() {

                /*
                 * Close the iterator, interrupting the running task if
                 * necessary.
                 */

                src.close();

            }

            public boolean hasNext() {

                if (!src.hasNext()) {

                    if (log.isInfoEnabled())
                        log.info("#graphs=" + graphs.size() + ", nhits="
                                + nhits + ", apsWithHints=" + nAPsWithHits
                                + ", pred=" + getPredicate());

                    return false;
                    
                }

                return true;

            }

            public ISPO next() {

                return src.next();

            }

            public void remove() {

                throw new UnsupportedOperationException();

            }

            /**
             * Return task which will submit tasks draining the iterators for
             * each access path onto the caller's buffer.
             * 
             * @param buffer
             *            The elements drained from the iterators will be added
             *            to this buffer.
             * 
             * @return The task whose future is set on the buffer.
             */
            private Callable<Void> newRunIteratorsTask(
                    final BlockingBuffer<ISPO> buffer) {

                return new RunIteratorsTask(buffer);

            }

            /**
             * Inner {@link Callable} queues up the {@link DrainIteratorTask}s
             * on the {@link Executor}.
             */
            private final class RunIteratorsTask implements Callable<Void> {

                private final BlockingBuffer<ISPO> buffer;

                public RunIteratorsTask(final BlockingBuffer<ISPO> buffer) {

                    this.buffer = buffer;

                }

                /**
                 * Outer callable submits tasks for execution.
                 */
                public Void call() throws Exception {

                    final List<Future<Void>> tasks = new LinkedList<Future<Void>>();

                    try {

                        // Schedule tasks.
                        for (IV<?, ?> termId : graphs) {

                            final FutureTask<Void> ft = new FutureTask<Void>(
                                    new DrainIteratorTask(termId));

                            tasks.add(ft);

                            executor.execute(ft);

                        }

                        // Wait for the futures.
                        for (Future<Void> f : tasks) {

                            f.get();

                        }

                    } catch (Throwable ex) {

                        for (Future<Void> f : tasks)
                            f.cancel(true/* mayInterruptIfRunning */);

                        throw new RuntimeException(ex);

                    } finally {

                        // nothing more can be written onto the buffer.
                        buffer.close();

                    }

                    return null;

                }

            }

            /**
             * Inner callable runs an iterator for a specific access path,
             * draining the iterator onto the blocking buffer.
             * <p>
             * Note: don't pass the top-level offset, limit, capacity into the
             * per-graph AP iterator or it will skip over offset results per
             * graph! The limit needs to be imposed on the data pulled from the
             * blocking buffer, not here.
             */
            private final class DrainIteratorTask implements Callable<Void> {

                final IV<?, ?> termId;

                public DrainIteratorTask(final IV<?, ?> termId) {

                    if (termId == null)
                        throw new IllegalArgumentException();

                    this.termId = termId;

                }

                public Void call() throws Exception {

                    if (log.isDebugEnabled())
                        log.debug("Running iterator: c="
                                + termId);

                    /*
                     * Clear various annotations from source predicate.
                     * 
                     * expander: we are the expander.
                     * 
                     * accessPathFilter: This wraps the DGExpander. It should
                     * not be applied to each subquery.
                     * 
                     * keyOrder: The right index can change as soon as we bind
                     * [c].
                     */
                    final Predicate<ISPO> sourcePred = (Predicate<ISPO>) sourceAccessPath
                            .getPredicate()
                            .clearAnnotations(
                                    new String[] {
                                            IPredicate.Annotations.ACCESS_PATH_EXPANDER,
                                            IPredicate.Annotations.ACCESS_PATH_FILTER,
                                    // IPredicate.Annotations.KEY_ORDER
                                    });

                    // Bind the graph onto the context position variable.
                    final Predicate<ISPO> asBound = sourcePred.asBound(
                            (IVariable<?>) sourcePred.get(3/* cvar */),
                            new Constant<IV<?, ?>>(termId));

                    // Obtain the access path for the asBound predicate.
                    final IAccessPath<ISPO> asBoundAP = sourceAccessPath
                            .getRelation().getAccessPath(asBound);

//                    System.err.println(asBoundAP.toString());

                    final IChunkedOrderedIterator<ISPO> itr = asBoundAP
                            .iterator();

                    // Note: deprecated SPOAccessPath method.
                    // final IChunkedOrderedIterator<ISPO> itr =
                    // sourceAccessPath
                    // .bindContext(termId).iterator();

                    try {

                        long n = 0;

                        while (itr.hasNext()) {

                            // @todo chunk at a time processing.
                            final ISPO spo = itr.next();

                            buffer.add(spo);

                            // System.err.println(spo);

                            n++;

                        }

                        if (log.isDebugEnabled())
                            log.debug("Ran iterator: c="
                                    + termId + ", nvisited=" + n);

                        if(n>0) {
                            nhits.add(n);
                            nAPsWithHits.increment();
                        }
                        
                    } finally {

                        itr.close();

                    }

                    return null;

                }

            } // class DrainIteratorTask

        } // class InnerIterator

        /**
         * Return the estimated range count.
         */
        @Override
        public long rangeCount(final boolean exactIsIgnored) {

            return estimatedRangeCount;

        }

    } // class DefaultGraphAccessPath

}