/** Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved. Contact: SYSTAP, LLC DBA Blazegraph 2501 Calvert ST NW #106 Washington, DC 20008 licenses@blazegraph.com This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2 of the License. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /* * Created on Nov 14, 2007 */ package com.bigdata.rdf.inf; import java.util.NoSuchElementException; import java.util.concurrent.ArrayBlockingQueue; import java.util.concurrent.Callable; import java.util.concurrent.FutureTask; import java.util.concurrent.atomic.AtomicBoolean; import org.apache.log4j.Logger; import com.bigdata.btree.IIndex; import com.bigdata.btree.IRangeQuery; import com.bigdata.btree.ITupleIterator; import com.bigdata.journal.IIndexManager; import com.bigdata.relation.accesspath.IElementFilter; import com.bigdata.util.Bytes; /** * Iterator visits {@link Justification}s reading from the justification index. * The iterator optionally supports asynchronous read ahead. * * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a> * @version $Id$ * * @deprecated replace with appropriate access path reading on the * justifications index for SPORelation (or treat as a secondary * relation). */ public class JustificationIterator implements IJustificationIterator { private static final transient Logger log = Logger .getLogger(JustificationIterator.class); /** * The maximum #of statements that will be buffered by the iterator. */ private static final transient int MAXIMUM_CAPACITY = 10 * Bytes.kilobyte32; // was 100k private boolean open = true; /** * The actual capacity of the buffer (never zero). */ private final int capacity; /** * The #of statements that have been read <strong>from the source</strong> * and placed into the buffer. All such statements will also have passed the * optional {@link IElementFilter}. */ private int numBuffered; /** * The #of statements that have been read by the caller using * {@link #next()}. */ private int numReadByCaller; /** * The #of chunks that have been read by the caller. */ private int nchunks = 0; /** * A buffer holding {@link Justification}s that have not been visited. * {@link Justification}s that have been visited are taken from the buffer, * making room for new statements which can be filled in asynchronously by * the {@link Reader}. */ private ArrayBlockingQueue<Justification> buffer; /** * The source iterator reading on the selected justification index. */ private ITupleIterator<?> src; // /** // * The executor service for the {@link Reader} (iff the {@link Reader} runs // * asynchronously). // */ // private final ExecutorService readService; /** * The future for the {@link Reader} and <code>null</code> if a synchronous * read was performed (fully buffered read in the caller's thread). */ private final FutureTask<Object> ft; /** * Set to true iff an asynchronous {@link Reader} is used AND there is * nothing more to be read. */ private final AtomicBoolean readerDone = new AtomicBoolean(false); /** * The minimum desirable chunk size for {@link #nextChunk()}. */ static private final int MIN_CHUNK_SIZE = 100; // /** // * If NO results show up within this timeout then {@link #nextChunk()} will // * throw a {@link RuntimeException} to abort the reader - the probably cause // * is a network outage. // */ // static private final long TIMEOUT = Long.MAX_VALUE; /** * Create an iterator reading from the justifications index. * * @param capacity * The maximum #of statements that will be buffered. When ZERO * (0) the iterator will range count the access path fully buffer * if there are less than {@link #MAXIMUM_CAPACITY} statements * selected by the triple pattern. When non-zero, the caller's * value is used - this gives you control when you really, really * want to have something fully buffered, e.g., for an in-memory * self-join. */ public JustificationIterator(final IIndexManager indexManager, final IIndex ndx, int capacity) { if (indexManager == null) throw new IllegalArgumentException(); if (ndx == null) throw new IllegalArgumentException(); if (capacity < 0) throw new IllegalArgumentException(); /* * When true, asynchronous read-ahead will be used to refill the buffer * as it becomes depleted. When false, read-ahead will be synchronous * (this is useful when you want to read at most N statements from the * index). */ boolean async = true; // The fast range count (upper bound) final long rangeCount = ndx.rangeCount(); if (capacity == 0) { /* * Attempt to fully buffer the justifications. */ if (capacity > MAXIMUM_CAPACITY || rangeCount > MAXIMUM_CAPACITY) { /* * If the capacity would exceed the maximum then we limit * the capacity to the maximum. */ capacity = MAXIMUM_CAPACITY; } else { // Otherwise use the range count (upper bound). capacity = (int) rangeCount; } } else { if (capacity > rangeCount) { /* * If the caller has over-estimated the actual range count for * the index then reduce the capacity to the real range count. * This makes it safe for the caller to request a capacity of 1M * SPOs and only a "right-sized" buffer will be allocated. * * Note: The range count is generally an upper bound rather than * an exact value. */ capacity = (int) rangeCount; /* * Note: If the caller is making a best effort attempt to read * everything into memory AND the data will fit within the * caller's specified capacity, then we disable asynchronous * reads so that they will get everything in one chunk. */ async = false; } } if (rangeCount < 100) { // Disable async reads if we are not reading much data. async = false; } if (capacity == 0) { /* * Note: The ArrayBlockingQueue has a minimum capacity of ONE (1). */ capacity = 1; } this.capacity = capacity; this.src = ndx.rangeIterator(null/* fromKey */, null/* toKey */, 0/* capacity */, IRangeQuery.KEYS, null/* filter */); this.buffer = new ArrayBlockingQueue<Justification>(capacity); if (async) { // wrap reader as Future ft = new FutureTask<Object>(new Reader()); // submit for asynchronous read ahead indexManager.getExecutorService().submit(ft); } else { // Fill the buffer (synchronous). ft = null; fillBuffer(); } } /** * Reads from the statement index, filling the {@link #buffer}. * * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a> * @version $Id$ */ private class Reader implements Callable<Object> { /** * Runs the {@link Reader}. * * @return <code>null</code>. */ public Object call() throws Exception { while (src.hasNext()) { final Justification t = (Justification)src.next().getObject(); try { /* * Note: This will block if the buffer is at capacity. */ buffer.put(t); numBuffered++; } catch (InterruptedException ex) { throw new RuntimeException(ex); } } // Nothing left to read. readerDone.set(true); return null; } } /** * (Re-)fills the buffer up to its capacity or the exhaustion of the source * iterator. * * @return false if the buffer is still empty. */ private boolean fillBuffer() { assertOpen(); if (ft != null) { // This method MUST NOT be invoked when using the async reader. throw new AssertionError(); } try { // if(log.isDebugEnabled()) log.debug("(Re-)filling buffer: remainingCapacity=" // + buffer.remainingCapacity()); while (src.hasNext() && buffer.remainingCapacity() > 0) { final Justification jst = (Justification) src.next() .getObject(); try { buffer.put(jst); numBuffered++; } catch (InterruptedException ex) { throw new RuntimeException(ex); } } // false if the buffer is still empty. return !buffer.isEmpty(); } finally { if (log.isDebugEnabled()) log.debug("(Re-)filled buffer: size=" + buffer.size() + ", remainingCapacity=" + buffer.remainingCapacity() + ", done=" + !src.hasNext()); } } public boolean hasNext() { if (!open) return false; if (buffer.isEmpty()) { /* * The buffer is empty, but there may be more data available from * the underlying iterator. */ if (ft != null) { // async reader - so wait on it. awaitReader(); } else { // sync reader - so fill the buffer in this thread. fillBuffer(); } if (buffer.isEmpty()) { // the buffer is still empty, so the iterator is exhausted. return false; } } // at least one Justification in the buffer. return true; } public Justification next() { if (!hasNext()) { throw new NoSuchElementException(); } final Justification t; try { t = buffer.take(); } catch (InterruptedException ex) { throw new RuntimeException(ex); } numReadByCaller++; return t; } /** * Returns a chunk whose size is the #of statements currently in the buffer. * <p> * Note: When asynchronous reads are used, the buffer will be transparently * refilled and should be ready for a next chunk by the time you are done * with this one. */ public Justification[] nextChunk() { if (!hasNext()) { throw new NoSuchElementException(); } if (ft != null) { // make sure that we fill the buffer before we deliver a chunk. awaitReader(); } // there are at least this many in the buffer. final int n = buffer.size(); // allocate the array. final Justification[] stmts = new Justification[n]; for (int i = 0; i < n; i++) { stmts[i] = next(); } if (log.isDebugEnabled()) log.debug("chunkSize=" + n + ", nchunks=" + nchunks + ", #read(caller)=" + numReadByCaller + ", #read(src)=" + numBuffered); return stmts; } /** * Await some data from the {@link Reader}. * <p> * Note: If there is some data available this will continue to wait until at * least {@link #MIN_CHUNK_SIZE} statements are available from the * {@link Reader} -or- until the reader signals that it is * {@link #readerDone done}. This helps to keep up the chunk size and hence * the efficiency of batch operations when we might otherwise get into a * race with the {@link Reader}. */ private void awaitReader() { if (ft == null) { /* * This method MUST NOT be invoked unless you are using the async * reader. */ throw new AssertionError(); } // final long begin = System.currentTimeMillis(); /* * Wait for at least N records to show up. */ final int N = capacity < MIN_CHUNK_SIZE ? capacity : MIN_CHUNK_SIZE; while (buffer.size() < N && !readerDone.get()) { try { /* * TODO This uses a Thread.sleep() to avoid a lock ordering * problem because we did not have access to the lock used * internally by the blocking queue when this code was written. * However, we now have incorporated at least one JSR166 * blocking queue class which can use the caller's lock. That * makes it possible to setup conditions which can be awaited * for full/not-full, etc., but you have to be careful not to * violate the manner in which the lock signal/notify semantics * are used internally by the blocking queue implementation. */ Thread.sleep(10/*ms*/); } catch (InterruptedException ex) { throw new RuntimeException(ex); } // final long elapsed = System.currentTimeMillis() - begin; // // if (elapsed > TIMEOUT && buffer.isEmpty()) { // // throw new RuntimeException("Timeout after " + elapsed + "ms"); // // } } } /** * @throws UnsupportedOperationException always */ public void remove() { assertOpen(); throw new UnsupportedOperationException(); } public void close() { if (!open) { // Already closed. return; } log.debug("Closing iterator"); open = false; if (ft != null) { // terminate the Reader. ft.cancel(true/* mayInterruptIfRunning */); // try { // // readService.awaitTermination(500, TimeUnit.MILLISECONDS); // // } catch (InterruptedException e) { // // log.warn("Read service did not terminate: " + e); // // } } // discard buffer. buffer.clear(); buffer = null; // discard the source iterator. src = null; } private final void assertOpen() { if (!open) throw new IllegalStateException(); } }