JustificationIterator.java example

Explorer
blazegraph-master
- database-master
/**

Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016.  All rights reserved.

Contact:
     SYSTAP, LLC DBA Blazegraph
     2501 Calvert ST NW #106
     Washington, DC 20008
     licenses@blazegraph.com

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/
/*
 * Created on Nov 14, 2007
 */

package com.bigdata.rdf.inf;

import java.util.NoSuchElementException;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.Callable;
import java.util.concurrent.FutureTask;
import java.util.concurrent.atomic.AtomicBoolean;

import org.apache.log4j.Logger;

import com.bigdata.btree.IIndex;
import com.bigdata.btree.IRangeQuery;
import com.bigdata.btree.ITupleIterator;
import com.bigdata.journal.IIndexManager;
import com.bigdata.relation.accesspath.IElementFilter;
import com.bigdata.util.Bytes;

/**
 * Iterator visits {@link Justification}s reading from the justification index.
 * The iterator optionally supports asynchronous read ahead.
 * 
 * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a>
 * @version $Id$
 * 
 * @deprecated replace with appropriate access path reading on the
 *             justifications index for SPORelation (or treat as a secondary
 *             relation).
 */
public class JustificationIterator implements IJustificationIterator {

    private static final transient Logger log = Logger
            .getLogger(JustificationIterator.class);

    /**
     * The maximum #of statements that will be buffered by the iterator.
     */
    private static final transient int MAXIMUM_CAPACITY = 10 * Bytes.kilobyte32; // was 100k

    private boolean open = true;

    /**
     * The actual capacity of the buffer (never zero).
     */
    private final int capacity;

    /**
     * The #of statements that have been read <strong>from the source</strong>
     * and placed into the buffer. All such statements will also have passed the
     * optional {@link IElementFilter}.
     */
    private int numBuffered;

    /**
     * The #of statements that have been read by the caller using
     * {@link #next()}.
     */
    private int numReadByCaller;

    /**
     * The #of chunks that have been read by the caller.
     */
    private int nchunks = 0;

    /**
     * A buffer holding {@link Justification}s that have not been visited.
     * {@link Justification}s that have been visited are taken from the buffer,
     * making room for new statements which can be filled in asynchronously by
     * the {@link Reader}.
     */
    private ArrayBlockingQueue<Justification> buffer;

    /**
     * The source iterator reading on the selected justification index.
     */
    private ITupleIterator<?> src;

//    /**
//     * The executor service for the {@link Reader} (iff the {@link Reader} runs
//     * asynchronously).
//     */
//    private final ExecutorService readService;

    /**
     * The future for the {@link Reader} and <code>null</code> if a synchronous
     * read was performed (fully buffered read in the caller's thread).
     */
    private final FutureTask<Object> ft;
    
    /**
     * Set to true iff an asynchronous {@link Reader} is used AND there is
     * nothing more to be read.
     */
    private final AtomicBoolean readerDone = new AtomicBoolean(false);

    /**
     * The minimum desirable chunk size for {@link #nextChunk()}.
     */
    static private final int MIN_CHUNK_SIZE = 100;

//    /**
//     * If NO results show up within this timeout then {@link #nextChunk()} will
//     * throw a {@link RuntimeException} to abort the reader - the probably cause
//     * is a network outage.
//     */
//    static private final long TIMEOUT = Long.MAX_VALUE;

    /**
     * Create an iterator reading from the justifications index.
     * 
     * @param capacity
     *            The maximum #of statements that will be buffered. When ZERO
     *            (0) the iterator will range count the access path fully buffer
     *            if there are less than {@link #MAXIMUM_CAPACITY} statements
     *            selected by the triple pattern. When non-zero, the caller's
     *            value is used - this gives you control when you really, really
     *            want to have something fully buffered, e.g., for an in-memory
     *            self-join.
     */
    public JustificationIterator(final IIndexManager indexManager,
            final IIndex ndx, int capacity) {

        if (indexManager == null)
            throw new IllegalArgumentException();

        if (ndx == null)
            throw new IllegalArgumentException();

        if (capacity < 0)
            throw new IllegalArgumentException();

        /*
         * When true, asynchronous read-ahead will be used to refill the buffer
         * as it becomes depleted. When false, read-ahead will be synchronous
         * (this is useful when you want to read at most N statements from the
         * index).
         */
        boolean async = true;

        // The fast range count (upper bound)
        final long rangeCount = ndx.rangeCount();

        if (capacity == 0) {

            /*
             * Attempt to fully buffer the justifications.
             */

            if (capacity > MAXIMUM_CAPACITY || rangeCount > MAXIMUM_CAPACITY) {

                /*
                 * If the capacity would exceed the maximum then we limit
                 * the capacity to the maximum.
                 */

                capacity = MAXIMUM_CAPACITY;

            } else {
                
                // Otherwise use the range count (upper bound).
                capacity = (int) rangeCount;
                
            }

        } else {

            if (capacity > rangeCount) {

                /*
                 * If the caller has over-estimated the actual range count for
                 * the index then reduce the capacity to the real range count.
                 * This makes it safe for the caller to request a capacity of 1M
                 * SPOs and only a "right-sized" buffer will be allocated.
                 * 
                 * Note: The range count is generally an upper bound rather than
                 * an exact value.
                 */

                capacity = (int) rangeCount;

                /*
                 * Note: If the caller is making a best effort attempt to read
                 * everything into memory AND the data will fit within the
                 * caller's specified capacity, then we disable asynchronous
                 * reads so that they will get everything in one chunk.
                 */

                async = false;

            }

        }

        if (rangeCount < 100) {

            // Disable async reads if we are not reading much data.

            async = false;

        }

        if (capacity == 0) {

            /*
             * Note: The ArrayBlockingQueue has a minimum capacity of ONE (1).
             */

            capacity = 1;

        }

        this.capacity = capacity;

        this.src = ndx.rangeIterator(null/* fromKey */, null/* toKey */,
                0/* capacity */, IRangeQuery.KEYS, null/* filter */);

        this.buffer = new ArrayBlockingQueue<Justification>(capacity);

        if (async) {

            // wrap reader as Future
            ft = new FutureTask<Object>(new Reader());
            
            // submit for asynchronous read ahead
            indexManager.getExecutorService().submit(ft);

        } else {

            // Fill the buffer (synchronous).

            ft = null;
            
            fillBuffer();

        }

    }

    /**
     * Reads from the statement index, filling the {@link #buffer}.
     * 
     * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a>
     * @version $Id$
     */
    private class Reader implements Callable<Object> {

        /**
         * Runs the {@link Reader}.
         * 
         * @return <code>null</code>.
         */
        public Object call() throws Exception {

            while (src.hasNext()) {

                final Justification t = (Justification)src.next().getObject();

                try {

                    /*
                     * Note: This will block if the buffer is at capacity.
                     */

                    buffer.put(t);

                    numBuffered++;

                } catch (InterruptedException ex) {

                    throw new RuntimeException(ex);

                }

            }

            // Nothing left to read.

            readerDone.set(true);

            return null;

        }

    }

    /**
     * (Re-)fills the buffer up to its capacity or the exhaustion of the source
     * iterator.
     * 
     * @return false if the buffer is still empty.
     */
    private boolean fillBuffer() {

        assertOpen();

        if (ft != null) {

            // This method MUST NOT be invoked when using the async reader.
            throw new AssertionError();

        }

        try {
            // if(log.isDebugEnabled()) log.debug("(Re-)filling buffer: remainingCapacity="
            // + buffer.remainingCapacity());

            while (src.hasNext() && buffer.remainingCapacity() > 0) {

                final Justification jst = (Justification) src.next()
                        .getObject();

                try {

                    buffer.put(jst);

                    numBuffered++;

                } catch (InterruptedException ex) {

                    throw new RuntimeException(ex);

                }

            }

            // false if the buffer is still empty.

            return !buffer.isEmpty();

        } finally {

            if (log.isDebugEnabled())
                log.debug("(Re-)filled buffer: size=" + buffer.size()
                        + ", remainingCapacity=" + buffer.remainingCapacity()
                        + ", done=" + !src.hasNext());

        }

    }

    public boolean hasNext() {

        if (!open)
            return false;

        if (buffer.isEmpty()) {

            /*
             * The buffer is empty, but there may be more data available from
             * the underlying iterator.
             */

            if (ft != null) {

                // async reader - so wait on it.
                awaitReader();

            } else {

                // sync reader - so fill the buffer in this thread.
                fillBuffer();

            }

            if (buffer.isEmpty()) {

                // the buffer is still empty, so the iterator is exhausted.
                return false;

            }

        }

        // at least one Justification in the buffer.
        return true;

    }

    public Justification next() {

        if (!hasNext()) {

            throw new NoSuchElementException();

        }

        final Justification t;

        try {

            t = buffer.take();

        } catch (InterruptedException ex) {

            throw new RuntimeException(ex);

        }

        numReadByCaller++;

        return t;

    }

    /**
     * Returns a chunk whose size is the #of statements currently in the buffer.
     * <p>
     * Note: When asynchronous reads are used, the buffer will be transparently
     * refilled and should be ready for a next chunk by the time you are done
     * with this one.
     */
    public Justification[] nextChunk() {

        if (!hasNext()) {

            throw new NoSuchElementException();

        }

        if (ft != null) {

            // make sure that we fill the buffer before we deliver a chunk.
            awaitReader();

        }

        // there are at least this many in the buffer.
        final int n = buffer.size();

        // allocate the array.
        final Justification[] stmts = new Justification[n];

        for (int i = 0; i < n; i++) {

            stmts[i] = next();

        }

        if (log.isDebugEnabled())
            log.debug("chunkSize=" + n + ", nchunks=" + nchunks
                    + ", #read(caller)=" + numReadByCaller + ", #read(src)="
                    + numBuffered);

        return stmts;

    }

    /**
     * Await some data from the {@link Reader}.
     * <p>
     * Note: If there is some data available this will continue to wait until at
     * least {@link #MIN_CHUNK_SIZE} statements are available from the
     * {@link Reader} -or- until the reader signals that it is
     * {@link #readerDone done}. This helps to keep up the chunk size and hence
     * the efficiency of batch operations when we might otherwise get into a
     * race with the {@link Reader}.
     */
    private void awaitReader() {

        if (ft == null) {

            /*
             * This method MUST NOT be invoked unless you are using the async
             * reader.
             */

            throw new AssertionError();

        }

//        final long begin = System.currentTimeMillis();

        /*
         * Wait for at least N records to show up.
         */

        final int N = capacity < MIN_CHUNK_SIZE ? capacity : MIN_CHUNK_SIZE;

        while (buffer.size() < N && !readerDone.get()) {

            try {

                /*
                 * TODO This uses a Thread.sleep() to avoid a lock ordering
                 * problem because we did not have access to the lock used
                 * internally by the blocking queue when this code was written.
                 * However, we now have incorporated at least one JSR166
                 * blocking queue class which can use the caller's lock. That
                 * makes it possible to setup conditions which can be awaited
                 * for full/not-full, etc., but you have to be careful not to
                 * violate the manner in which the lock signal/notify semantics
                 * are used internally by the blocking queue implementation.
                 */

                Thread.sleep(10/*ms*/);

            } catch (InterruptedException ex) {

                throw new RuntimeException(ex);

            }

//            final long elapsed = System.currentTimeMillis() - begin;
//
//            if (elapsed > TIMEOUT && buffer.isEmpty()) {
//
//                throw new RuntimeException("Timeout after " + elapsed + "ms");
//
//            }

        }

    }

    /**
     * @throws UnsupportedOperationException always
     */
    public void remove() {

        assertOpen();

        throw new UnsupportedOperationException();

    }

    public void close() {

        if (!open) {

            // Already closed.
            return;

        }

        log.debug("Closing iterator");

        open = false;

        if (ft != null) {

            // terminate the Reader.
            ft.cancel(true/* mayInterruptIfRunning */);

//            try {
//
//                readService.awaitTermination(500, TimeUnit.MILLISECONDS);
//
//            } catch (InterruptedException e) {
//
//                log.warn("Read service did not terminate: " + e);
//
//            }

        }

        // discard buffer.
        buffer.clear();

        buffer = null;

        // discard the source iterator.
        src = null;

    }

    private final void assertOpen() {

        if (!open)
            throw new IllegalStateException();

    }

}