AsynchronousStatementBufferFactory.java example

Explorer
blazegraph-master
- database-master
/*

Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016.  All rights reserved.

Contact:
     SYSTAP, LLC DBA Blazegraph
     2501 Calvert ST NW #106
     Washington, DC 20008
     licenses@blazegraph.com

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

*/
/*
 * Created on Jun 25, 2009
 */

package com.bigdata.rdf.rio;

import java.beans.Statement;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FilenameFilter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.net.URL;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.AbstractExecutorService;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.Callable;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Future;
import java.util.concurrent.FutureTask;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.RejectedExecutionException;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.ScheduledFuture;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.atomic.AtomicReference;
import java.util.concurrent.locks.Condition;
import java.util.concurrent.locks.ReentrantLock;
import java.util.zip.GZIPInputStream;
import java.util.zip.ZipInputStream;

import org.apache.log4j.Logger;
import org.openrdf.model.BNode;
import org.openrdf.model.Resource;
import org.openrdf.model.URI;
import org.openrdf.model.Value;
import org.openrdf.rio.RDFFormat;
import org.openrdf.rio.RDFParser;

import com.bigdata.btree.AsynchronousIndexWriteConfiguration;
import com.bigdata.btree.IndexMetadata;
import com.bigdata.btree.keys.IKeyBuilder;
import com.bigdata.btree.keys.KVO;
import com.bigdata.btree.keys.KeyBuilder;
import com.bigdata.btree.proc.IAsyncResultHandler;
import com.bigdata.btree.proc.LongAggregator;
import com.bigdata.counters.CounterSet;
import com.bigdata.counters.Instrument;
import com.bigdata.counters.OneShotInstrument;
import com.bigdata.io.ByteArrayBuffer;
import com.bigdata.io.DataOutputBuffer;
import com.bigdata.journal.AbstractTask;
import com.bigdata.rdf.internal.IV;
import com.bigdata.rdf.internal.VTE;
import com.bigdata.rdf.internal.impl.BlobIV;
import com.bigdata.rdf.lexicon.AssignTermId;
import com.bigdata.rdf.lexicon.BigdataValueCentricFullTextIndex;
import com.bigdata.rdf.lexicon.BlobsIndexHelper;
import com.bigdata.rdf.lexicon.BlobsWriteProc;
import com.bigdata.rdf.lexicon.BlobsWriteProc.BlobsWriteProcConstructor;
import com.bigdata.rdf.lexicon.Id2TermWriteProc.Id2TermWriteProcConstructor;
import com.bigdata.rdf.lexicon.LexiconKeyBuilder;
import com.bigdata.rdf.lexicon.LexiconKeyOrder;
import com.bigdata.rdf.lexicon.LexiconRelation;
import com.bigdata.rdf.lexicon.Term2IdTupleSerializer;
import com.bigdata.rdf.lexicon.Term2IdWriteProc;
import com.bigdata.rdf.lexicon.Term2IdWriteProc.Term2IdWriteProcConstructor;
import com.bigdata.rdf.model.BigdataBNode;
import com.bigdata.rdf.model.BigdataBNodeImpl;
import com.bigdata.rdf.model.BigdataLiteral;
import com.bigdata.rdf.model.BigdataResource;
import com.bigdata.rdf.model.BigdataStatement;
import com.bigdata.rdf.model.BigdataURI;
import com.bigdata.rdf.model.BigdataValue;
import com.bigdata.rdf.model.BigdataValueFactory;
import com.bigdata.rdf.model.BigdataValueImpl;
import com.bigdata.rdf.model.BigdataValueSerializer;
import com.bigdata.rdf.model.StatementEnum;
import com.bigdata.rdf.spo.ISPO;
import com.bigdata.rdf.spo.SPOIndexWriteProc;
import com.bigdata.rdf.spo.SPOIndexWriter;
import com.bigdata.rdf.spo.SPOKeyOrder;
import com.bigdata.rdf.spo.SPORelation;
import com.bigdata.rdf.spo.SPOTupleSerializer;
import com.bigdata.rdf.store.AbstractTripleStore;
import com.bigdata.rdf.store.ScaleOutTripleStore;
import com.bigdata.relation.accesspath.BlockingBuffer;
import com.bigdata.relation.accesspath.IBuffer;
import com.bigdata.relation.accesspath.IRunnableBuffer;
import com.bigdata.relation.accesspath.UnsynchronizedUnboundedChunkBuffer;
import com.bigdata.search.TextIndexWriteProc;
import com.bigdata.service.AbstractFederation;
import com.bigdata.service.Split;
import com.bigdata.service.ndx.IScaleOutClientIndex;
import com.bigdata.service.ndx.pipeline.DefaultDuplicateRemover;
import com.bigdata.service.ndx.pipeline.KVOC;
import com.bigdata.service.ndx.pipeline.KVOLatch;
import com.bigdata.service.ndx.pipeline.KVOList;
import com.bigdata.striterator.ChunkedWrappedIterator;
import com.bigdata.striterator.IChunkedIterator;
import com.bigdata.striterator.IChunkedOrderedIterator;
import com.bigdata.striterator.IKeyOrder;
import com.bigdata.util.Bytes;
import com.bigdata.util.DaemonThreadFactory;
import com.bigdata.util.concurrent.Latch;
import com.bigdata.util.concurrent.ShutdownHelper;
import com.bigdata.util.concurrent.ThreadPoolExecutorBaseStatisticsTask;

import cutthecrap.utils.striterators.Filter;
import cutthecrap.utils.striterators.Striterator;

/**
 * Factory object for high-volume RDF data load.
 * <p>
 * The asynchronous statement buffer w/o SIDs is much simpler that w/. If we
 * require that the document is fully buffered in memory, then we can simplify
 * this to just:
 * 
 * <pre>
 * 
 * Given:
 * 
 * value[] - RDF Values observed in the S,P,O, or C positions.
 * 
 * statement[] - RDF Statements reported by the parser.
 * 
 * Do:
 * 
 * value[] => TERM2ID (Sync RPC, assigning TIDs)
 * value[] => BLOBS   (Sync RPC, assigning TIDs)
 * 
 * value[] => ID2TERM (Async)
 * 
 * value[] => Text (Async, iff enabled)
 * 
 * statement[] => (SPO,POS,OSP) (Async)
 * </pre>
 * 
 * Note: This DOES NOT support truth maintenance. Truth maintenance requires
 * that the term identifiers are resolved against the database's lexicon while
 * the statements are written onto a local (and temporary) triple store. There
 * is no (or at least less) reason to use asynchronous writes against a local
 * store. However, TM could use this to copy the data from the temporary triple
 * store to the database. This should be plugged in transparently in the
 * copyStatements() API for the tripleStore.
 * <p>
 * Note: This DOES NOT support SIDS.
 * 
 * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a>
 * @version $Id$
 * 
 * @param <S>
 *            The generic type of the statement objects.
 * @param <R>
 *            The generic type of the resource identifier (File, URL, etc).
 * 
 * FIXME Modify to support SIDs. We basically need to loop in the
 * {@link #workflowLatch_bufferTids} workflow state until all SIDs have been
 * assigned. However, the termination conditions will be a little more complex.
 * During termination, if we have the TIDs but not yet the SIDs then we need to
 * flush the SID requests rather than allowing them to timeout. Since SID
 * processing is cyclic, we may have to do this one or more times.
 * 
 * <pre>
 * AsynchronousStatementBufferWithSids:
 * 
 * When SIDs are enabled, we must identify the minimum set of statements
 * whose SIDs are referenced by blank nodes in the S, P, O positions of
 * other statements.  Since we can not make that determination until we
 * reach the end of the document, all statements which use blank nodes
 * are placed into the deferredStatements container.
 * 
 * Further, and unlike the synchronous StatementBuffer, we must defer
 * writes of grounded statements until we know whether or not their SID
 * appears in a blank node reference by another statement.  We MUST use
 * synchronous RPC to obtain the SIDs for those statements.  This means
 * that the entire document MUST be parsed into memory.  Since we must
 * buffer the entire document in memory when SIDs are enabled (when using
 * asynchronous writes), distinct implementations of the asynchronous
 * statement buffer are used depending on whether or not SIDs are
 * enabled. [Actually, we fully buffer anyway so we can use the same
 * implementation class.]
 * 
 * Once the end of the document has been reached, we iteratively divide
 * the parsed statements into three collections.  This process ends once
 * all three collections are empty.
 * 
 *    1. groundedStatements : These are statements which are not
 *       referenced by other statements using their SID and which do not
 *       contain references to the SIDs of other statements. The
 *       groundedStatements are written asynchronously since there is no
 *       dependency on their SIDs.
 * 
 *    2. referencedStatements : These are statements whose SID has not
 *       been assigned yet and which do not reference other statements
 *       but which are themselves referenced by other statements using a
 *       blank node. These statements are written using synchronous RPC
 *       so that we may obtain their SIDs and thereby convert one or more
 *       deferredStatements to either groundedStatements or
 *       referencedStatements.
 * 
 *    3. deferredStatements : These are statements using a blank node to
 *       reference another statement whose SID has not been assigned yet.
 *       These statements MAY also be referenced by other deferred
 *       statements.  However, those references MAY NOT form a cycle.
 *       Deferred statements are moved to either the groundedStatements
 *       or the referencedStatements collection once their blank node
 *       references have been assigned SIDs.
 * 
 * Given:
 * 
 * value[] - RDF Values observed in the S, P, O, and C positions.
 * 
 * unresolvedRefs[] - RDF blank nodes observed in the C position are
 *            entered into this collection.  They are removed
 *            from the collection as they are resolved.
 * 
 * statement[] - RDF Statements reported by the parser.
 * 
 * Do:
 * 
 * // remove blank nodes serving as SIDs from the value[].
 * value[] := value[] - unresolvedRef[];
 * 
 * value[] => TERM2ID (Sync RPC, assigning TIDs)
 * 
 * value[] => ID2TERM (Async)
 * 
 * value[] => Text (Async, iff enabled)
 * 
 * // initially, all statements are deferred.
 * deferredStatements := statements;
 * 
 * while(!groundedStatements.isEmpty() && !referencedStatements.isEmpty()
 *    && !deferredStatements.isEmpty()) {
 * 
 *   groundedStatement[] => TERM2ID (async)
 * 
 *   groundedStatement[] := []; // empty.
 * 
 *   referencedStatement[] => TERM2ID (Sync RPC, assigning SIDs)
 * 
 *   foreach spo : referencedStatements {
 * 
 *     unresolvedRefs.remove( spo.c );
 * 
 *   }
 * 
 *   referencedStatement[] := []; // empty.
 * 
 *   foreach spo : deferredStatement[i] {
 * 
 *      if(spo.isGrounded) {
 * 
 *         // true iff S.tid, P.tid, and O.tid are bound, implying that
 *         // this statement does not have any unresolved references to
 *         // other statements.
 * 
 *     if(unresolvedReferences.contains(spo.c)) {
 * 
 *         // will be written synchronously.
 *         referencedStatements.add( spo );
 * 
 *     } else {
 * 
 *         // will be written asynchronously.
 *         groundedStatement.add( spo );
 * 
 *     }
 * 
 *      }
 * 
 *   }
 * 
 * }
 * </pre>
 *
 * @todo evaluate this approach for writing on a local triple store. if there is
 *       a performance benefit then refactor accordingly (requires asynchronous
 *       write API for BTree and friends).
 */
public class AsynchronousStatementBufferFactory<S extends BigdataStatement, R>
        implements IAsynchronousWriteStatementBufferFactory<S> {

    final private transient static Logger log = Logger
            .getLogger(AsynchronousStatementBufferFactory.class);

    /**
     * The database into which the statements will be written.
     */
    private final ScaleOutTripleStore tripleStore;
    
    /**
     * The lexicon.
     */
    private final LexiconRelation lexiconRelation;

    /**
     * The triples.
     */
    private final SPORelation spoRelation;

    /**
     * The initial capacity of the canonicalizing mapping for RDF
     * {@link Value}.
     */
    private final int valuesInitialCapacity;

    /**
     * The initial capacity of the canonicalizing mapping for RDF
     * {@link BNode}s.
     */
    private final int bnodesInitialCapacity;
    
    /**
     * The chunk size used by the producer to break the terms and statements
     * into chunks before writing them onto the {@link BlockingBuffer} for
     * the master.
     */
    private final int producerChunkSize;
    
    /**
     * The default {@link RDFFormat}.
     */
    private final RDFFormat defaultFormat;

    /**
     * The value that will be used for the graph/context co-ordinate when
     * loading data represented in a triple format into a quad store.
     */
    private final String defaultGraph;

    /**
     * Options for the {@link RDFParser}.
     */
    private final RDFParserOptions parserOptions;

    /**
     * Delete files after they have been successfully loaded when
     * <code>true</code>.
     */
    private final boolean deleteAfter;

    /**
     * Delete files after they have been successfully loaded when
     * <code>true</code>.
     */
    protected boolean isDeleteAfter() {
 
        return deleteAfter;
        
    }

    /**
     * The default RDF interchange format that will be used when the format can
     * not be determined.
     */
    protected RDFFormat getDefaultRDFFormat() {

        return defaultFormat;
        
    }
    
    /**
     * When <code>true</code> and the full text index is enabled, then also
     * index datatype literals.
     */
    private final boolean indexDatatypeLiterals;

    /*
     * Asynchronous index write buffers.
     */
    
    final private IRunnableBuffer<KVO<BigdataValue>[]> buffer_t2id;
    final private IRunnableBuffer<KVO<BigdataValue>[]> buffer_id2t;
    final private IRunnableBuffer<KVO<BigdataValue>[]> buffer_blobs;
    final private IRunnableBuffer<KVO<BigdataValue>[]> buffer_text;

    /**
     * A map containing an entry for each statement index on which this
     * class will write.
     */
    private final Map<SPOKeyOrder, IRunnableBuffer<KVO<ISPO>[]>> buffer_stmts;

    /**
     * Counts statements written on the database (applied only to the SPO index
     * so we do not double count).
     */
    private final LongAggregator statementResultHandler = new LongAggregator();

    /**
     * Counts tuples written on the full text index.
     */
    private final LongAggregator textResultHandler = new LongAggregator();

    /**
     * The timestamp set when {@link #notifyStart()} is invoked. This is done
     * when the factory is created.
     */
    private volatile long startTime;

    /**
     * The timestamp set when {@link #notifyEnd()} is invoked. This is done when
     * the factory is {@link #close()}d or when execution is
     * {@link #cancelAll(boolean) cancelled}.
     */
    private long endTime;

    /**
     * Notify that the factory will begin running tasks. This sets the
     * {@link #startTime} used by {@link #getElapsedMillis()} to report the run
     * time of the tasks.
     */
    protected void notifyStart() {

        /*
         * Note: uses the lock to make this atomic since we do this when we
         * accept each document and we already own the lock at that point.
         */
        if (!lock.isHeldByCurrentThread())
            throw new IllegalMonitorStateException();

        if (startTime == 0L) {

            endTime = 0L;

            startTime = System.currentTimeMillis();

        }
        
    }

    /**
     * Notify that the factory is done running tasks (for now). This places a
     * cap on the time reported by {@link #elapsed()}.
     */
    protected void notifyEnd() {
        
        endTime = System.currentTimeMillis();

        parserService.shutdownNow();
        
        tidsWriterService.shutdownNow();
        
        otherWriterService.shutdownNow();

        notifyService.shutdownNow();

        if (serviceStatisticsTask != null) {
            
            serviceStatisticsTask.cancel();
            
        }
        
    }

    /**
     * The elapsed milliseconds, counting only the time between
     * {@link #notifyStart()} and {@link #notifyEnd()}.
     */
    public long getElapsedMillis() {

        if (startTime == 0L)
            return 0L;

        if (endTime == 0L) {

            return System.currentTimeMillis() - startTime;
            
        }
        
        return endTime - startTime;
        
    }

    /*
     * Cumulative counters. These do not need to be protected by the lock as
     * they do not guard transitions between workflow states.
     */
    
    /**
     * The #of documents that have been parsed (cumulative total).
     */
    private final AtomicLong documentsParsedCount = new AtomicLong(0L);

    /**
     * The #of documents whose TIDs have been assigned (cumulative total).
     */
    private final AtomicLong documentTIDsReadyCount = new AtomicLong(0L);

    /**
     * The #of documents that are waiting on their TIDs (current value). The
     * counter is incremented when a document begins to buffer writes on the
     * TERM2ID/BLOBS indices. The counter is decremented as soon as those writes are
     * restart safe.
     * <p>
     * Note: The {@link #workflowLatch_bufferTids} is only decremented when
     * the document begins to write on the other indices, so
     * {@link #documentTIDsWaitingCount} will be decremented before
     * {@link #workflowLatch_bufferTids}. The two counters will track very
     * closely unless the {@link #otherWriterService} has a backlog.
     * <p>
     * Note: The {@link #workflowLatch_bufferOther} is decremented as soon as
     * the writes on the other indices are restart safe since there is no
     * transition to another workflow state. This is why there is no counter for
     * "documentOtherWaitingCount".
     */
    private final AtomicLong documentTIDsWaitingCount = new AtomicLong(0L);

    /**
     * The #of told triples parsed from documents using this factory and
     * made restart safe on the database. This is incremented each time a
     * document has been made restart safe by the #of distinct told triples
     * parsed from that document.
     * <p>
     * Note: The same triple can occur in more than one document, and
     * documents having duplicate triples may be loaded by distributed
     * clients. The actual #of triples on the database is only available by
     * querying the database.
     */
    private final AtomicLong toldTriplesRestartSafeCount = new AtomicLong();
    
    /**
     * The #of documents which have been fully processed and are
     * restart-safe on the database (cumulative total).
     */
    private final AtomicLong documentRestartSafeCount = new AtomicLong();

    /**
     * The #of documents for which the {@link BufferOtherWritesTask} failed.
     */
    private final AtomicLong documentErrorCount = new AtomicLong();

    /*
     * Latches. The latches guard transitions between workflow states and must
     * be protected by the lock.
     */

    /**
     * The {@link #lock} is used to makes the observable state changes for the
     * factory atomic and guards the termination conditions in {@link #close()}.
     * You MUST own the {@link #lock} when incrementing or decrementing any of
     * the {@link Latch}s. The {@link Latch} transitions must be accomplished
     * while you are holding the lock. For example, the transition between
     * <i>parsing</i> and <i>buffering TERM2ID/BLOBS writes</i> requires that we
     * decrement one latch and increment the other while hold the {@link #lock}.
     * <p>
     * The counter associated with each {@link Latch} indicates the total #of
     * documents associated with that workflow state but does not differentiate
     * between documents waiting on the work queue for the corresponding thread
     * pool (e.g., the {@link #parserService}), documents assigned to a worker
     * thread and running in the thread pool, and documents waiting for some
     * state change (e.g., the return from an asynchronous write) before they
     * can be transferred to the next workflow state. However, you can gain
     * additional information about the various thread pools from their
     * counters, including their work queue size, the #of active tasks, etc.
     */
    private final ReentrantLock lock = new ReentrantLock();

    /**
     * A global {@link Latch} guarding all documents which have been accepted for
     * processing and have not yet reached an absorbing state (either an error
     * state or been made restart safe).
     */
    private final Latch workflowLatch_document = new Latch("document",lock);

    /**
     * A {@link Latch} guarding documents which have been accepted for parsing
     * but have not been transferred to the {@link #workflowLatch_bufferTids}.
     * 
     * @todo We could add a resolver latch for network IO required to buffer the
     *       document locally. E.g., a read from a DFS or a web page.
     */
    private final Latch workflowLatch_parser = new Latch("parser", lock);

    /**
     * A {@link Latch} guarding documents that have begun to buffering their
     * writes on the TERM2ID/BLOBS indices but have not been transferred to the
     * {@link #workflowLatch_bufferOther}.
     */
    private final Latch workflowLatch_bufferTids = new Latch(
            "bufferTids", lock);

    /**
     * A {@link Latch} guarding documents that have begun to buffer their writes
     * on the other indices but have not yet completed their processing.
     */
    private final Latch workflowLatch_bufferOther = new Latch("bufferOther",
            lock);

    /*
     * Latches used to guard tasks buffering writes. There is one such latch for
     * TERM2ID/BLOBS and one for the rest of the buffers. During close() we will close
     * the buffers to flush their writes as soon as these latches hit zero.
     * 
     * Note: These latches allow us to close the buffers in a timely manner. The
     * other latches guard the workflow state transitions. However, if we do not
     * close the buffers in a timely manner then close() will hang until a chunk
     * or idle timeout (if any) causes the buffers to be flushed!
     */

    /**
     * {@link Latch} guarding tasks until they have buffered their writes on the
     * TERM2ID/BLOBS indices.  This latch is decremented as soon as the writes for a given
     * document have been buffered.  This is used to close the TERM2ID/BLOBS buffers in
     * a timely manner in {@link #close()}.
     */
    private final Latch guardLatch_term2Id = new Latch("guard_term2Id", lock);

    /**
     * {@link Latch} guarding tasks until they have buffered their writes on the
     * remaining index buffers. This latch is decremented as soon as the writes
     * for a given document have been buffered. This is used to close the other
     * buffers in a timely manner in {@link #close()}.
     */
    private final Latch guardLatch_other = new Latch("guard_other", lock);

    /**
     * {@link Latch} guarding the notify service until all notices have been
     * delivered.
     */
    private final Latch guardLatch_notify = new Latch("guard_notify", lock);

    /*
     * Parser service pause/resume.
     */

    /**
     * New parser tasks submitted to the {@link #parserService} will block when
     * the {@link #unbufferedStatementCount} is GT this value. This is used to
     * control the RAM demand of the parsed (but not yet buffered) statements.
     * The RAM demand of the buffered statements is controlled by the capacity
     * of the master and sink queues on which those statements are buffered.
     * 
     * @todo it is possible that the buffered writes on term2id/blobs could limit
     *       throughput when the parser pool is paused since the decision to
     *       pause the parser pool is based on the #of unbuffered statements
     *       overall not just those staged for the term2id/blobs or the other indices.
     */
    private final long pauseParserPoolStatementThreshold;

    /**
     * The #of statements which have been parsed but not yet written onto the
     * asynchronous index write buffers. This is incremented when all statements
     * for a given document have been parsed by the #of distinct statements in
     * that document. This is decremented when all statements for that document
     * have been placed onto the asynchronous index write buffers, or if
     * processing fails for that document. This is used to prevent new parser
     * threads from overrunning the database when the parsers are faster than
     * the database.
     */
    private final AtomicLong unbufferedStatementCount = new AtomicLong();

    /**
     * The #of RDF {@link Statement}s that have been parsed but which are not
     * yet restart safe on the database. This is incremented when all statements
     * for a given document have been parsed by the #of distinct statements in
     * that document. This is decremented when all writes for that document have
     * been made restart safe on the database, or if processing fails for that
     * document. This may be used as a proxy for the amount of data which is
     * unavailable for garbage collection and thus for the size of the heap
     * entailed by processing.
     */
    private final AtomicLong outstandingStatementCount = new AtomicLong();

    /**
     * In order to prevent runaway demand on RAM, new parser tasks must await
     * this {@link Condition} if the #of parsed but not yet buffered statements
     * is GTE the configured {@link #pauseParserPoolStatementThreshold}
     * threshold.
     */
    private Condition unpaused = lock.newCondition();

    /**
     * The #of threads which are currently paused awaiting the {@link #unpaused}
     * {@link Condition}.
     */
    private AtomicLong pausedThreadCount = new AtomicLong();

    /**
     * The #of times the {@link #parserService} has been paused.
     */
    private AtomicLong poolPausedCount = new AtomicLong();
    
    /**
     * Verify counters for latches which must sum atomically to the
     * {@link #workflowLatch_document}.
     */
    private void assertSumOfLatchs() {
        
        if(!lock.isHeldByCurrentThread())
            throw new IllegalMonitorStateException();
        
        /*
         * Sum the latches for the distinct workflow states for a document
         * across all documents.
         */
        final long n1 = workflowLatch_parser.get()//
                + workflowLatch_bufferTids.get()//
                + workflowLatch_bufferOther.get()//
        ;
        
        final long n2 = workflowLatch_document.get();
        
        if (n1 != n2) {

            throw new AssertionError("Sum of Latches=" + n1
                    + ", but unfinished=" + n2 + " : "
                    + getCounters().toString());
            
        }
        
    }
    
    /**
     * Bounded thread pool using a bounded work queue to run the parser tasks.
     * If a backlog develops, then the thread pool is <em>paused</em>, and new
     * tasks will not start until the backlog is cleared. This will cause the
     * work queue to fill up, and the threads feeding that work queue to block.
     * This is done to place bounds on the memory demands of the total pipeline.
     */
    private final ParserThreadPoolExecutor parserService;
    
    /**
     * Bounded thread pool using an unbounded work queue to buffer writes for
     * the TERM2ID/BLOBS indices (these are the indices which assign tids).
     * Tasks are added to the work queue by the parser task in
     * {@link AsynchronousStatementBufferImpl#flush()}.
     */
    private final ThreadPoolExecutor tidsWriterService;

    /**
     * Bounded thread pool using an unbounded work queue to run
     * {@link BufferOtherWritesTask}s. Tasks are added to the work queue by
     * the "TIDs Ready" {@link KVOLatch}. Once the index writes have been
     * buffered, the statement buffer is placed onto the
     * {@link #docsWaitingQueue}. This {@link ExecutorService} MUST be
     * unbounded since tasks will be assigned by {@link KVOLatch#signal()}
     * and that method MUST NOT block.
     */
    private final ThreadPoolExecutor otherWriterService;

    /**
     * Bounded thread pool with an unbounded work queue used process per file
     * success or failure notices.
     */
    private final ThreadPoolExecutor notifyService;

    /**
     * {@link Runnable} collects performance counters on services used by the
     * factory.
     */
    private final ServiceStatisticsTask serviceStatisticsTask;
    
    /**
     * Return an estimate of the #of statements written on the indices.
     * <p>
     * This value is aggregated across any {@link IStatementBuffer} obtained
     * from {@link #newStatementBuffer()} for this instance.
     * <p>
     * This value actually reports the #of statements written on the SPO index
     * for the database. Statements are written asynchronously in chunks and the
     * writes MAY proceed at different rates for each of the statement indices.
     * The counter value will be stable once the {@link #awaitAll()} returns
     * normally.
     * 
     * @see SPOIndexWriteProc
     */
    public long getStatementCount() {

        return statementResultHandler.getResult().longValue();

    }

    /**
     * The #of documents submitted to the factory which could not be processed
     * due to some error.
     */
    public long getDocumentErrorCount() {

        return documentErrorCount.get();

    }

    /**
     * The #of documents submitted to the factory which have been processed
     * successfully.
     */
    public long getDocumentDoneCount() {

        return documentRestartSafeCount.get();

    }

    /**
     * Note: do not invoke this directly. It does not know how to set the
     * resource identifier on the statement buffer impl.
     */
    public IStatementBuffer<S> newStatementBuffer() {
        
        return newStatementBuffer(null/* resource */);

    }

    protected AsynchronousStatementBufferImpl newStatementBuffer(
            final R resource) {

        return new AsynchronousStatementBufferImpl(resource);

    }

    /**
     * Submit a resource for processing.
     * 
     * @param resource
     *            The resource (file or URL, but not a directory).
     * 
     * @throws Exception
     *             if there is a problem creating the parser task.
     * @throws RejectedExecutionException
     *             if the work queue for the parser service is full.
     */
    public void submitOne(final R resource) throws Exception {

        lock.lock();
        try {

            // Note: the parser task will obtain the lock when it runs.
            final Callable<?> task = newParserTask(resource);

            submitOne(resource, task);
            
        } finally {
            
            lock.unlock();
            
        }

    }

    /**
     * Inner method allows the caller to allocate the task once when the caller
     * will retry if there is a {@link RejectedExecutionException}.
     * 
     * @param The
     *            resource (file or URL, but not a directory).
     * @param The
     *            parser task to run.
     * 
     * @throws Exception
     *             if there is a problem creating the parser task.
     * @throws RejectedExecutionException
     *             if the work queue for the parser service is full.
     */
    private void submitOne(final R resource, final Callable<?> task)
            throws Exception {

        if (resource == null)
            throw new IllegalArgumentException();

        if (task == null)
            throw new IllegalArgumentException();

        lock.lock();
        try {

            assertSumOfLatchs();

            notifyStart();

            /*
             * Note: The total processing of the documents will not terminate
             * until this latch has been decremented back to zero.
             */
            workflowLatch_document.inc();

            workflowLatch_parser.inc();

            assertSumOfLatchs();

            try {
                
                /*
                 * Submit resource for parsing.
                 * 
                 * @todo it would be nice to return a Future here that tracked the
                 * document through the workflow.
                 */

                parserService.submit(task);
                
            } catch (RejectedExecutionException ex) {

                /*
                 * Back out the document since the task was not accepted for
                 * execution.
                 */

//                lock.lock();
//                try {
                    assertSumOfLatchs();
                    workflowLatch_document.dec();
                    workflowLatch_parser.dec();
                    assertSumOfLatchs();
//                } finally {
//                    lock.unlock();
//                }
                
                throw ex;

            }

        } finally {

            lock.unlock();

        }
        
    }

    /**
     * Submit a resource for processing.
     * 
     * @param resource
     *            The resource (file or URL, but not a directory).
     * @param retryMillis
     *            The number of milliseconds to wait between retries when the
     *            parser service work queue is full. When ZERO (0L), a
     *            {@link RejectedExecutionException} will be thrown out instead.
     * 
     * @throws Exception
     *             if there is a problem creating the parser task.
     * @throws RejectedExecutionException
     *             if the service is shutdown -or- the retryMillis is ZERO(0L).
     */
    public void submitOne(final R resource, final long retryMillis)
            throws InterruptedException, Exception {

        if (resource == null)
            throw new IllegalArgumentException();

        if (retryMillis < 0)
            throw new IllegalArgumentException();
        
        int retryCount = 0;
        
        final long begin = System.currentTimeMillis();
        long lastLogTime = begin;
        
        // Note: the parser task will obtain the lock when it runs.
        final Callable<?> task = newParserTask(resource);

        while (true) {
            
            try {

                // submit resource for processing.
                submitOne(resource, task);
                
                return;

            } catch (RejectedExecutionException ex) {

                if(parserService.isShutdown()) {
                    
                    // Do not retry since service is closed.
                    throw ex;
                    
                }
                
                if (retryMillis == 0L) {

                    // Do not retry since if retry interval is 0L.
                    throw ex;

                }

                // sleep for the retry interval.
                Thread.sleep(retryMillis);

                retryCount++;

                if (log.isInfoEnabled()) {
                    final long now = System.currentTimeMillis();
                    final long elapsedSinceLastLogTime = now - lastLogTime;
                    if (elapsedSinceLastLogTime > 5000) {
                        final long elapsed = now - begin;
                        lastLogTime = now;
                        log.info("Parser pool blocking: retryCount="
                                + retryCount + ", elapsed=" + elapsed
                                + "ms, resource=" + resource);
//                        log.info(getCounters().toString());
                    }
                }
                
                // retry
                continue;

            } catch (InterruptedException ex) {

                throw ex;

            } catch (Exception ex) {

                log.error(resource, ex);

            }

        }

    }
    
    /**
     * Submit all files in a directory for processing via
     * {@link #submitOne(String)}.
     * 
     * @param fileOrDir
     *            The file or directory.
     * @param filter
     *            An optional filter. Only the files selected by the filter will
     *            be processed.
     * @param retryMillis
     *            The number of milliseconds to wait between retries when the
     *            parser service work queue is full. When ZERO (0L), a
     *            {@link RejectedExecutionException} will be thrown out instead.
     * 
     * @return The #of files that were submitted for processing.
     * 
     * @throws Exception
     */
    public int submitAll(final File fileOrDir, final FilenameFilter filter,
            final long retryMillis)
            throws Exception {

        return new RunnableFileSystemLoader(fileOrDir, filter, retryMillis)
                .call();

    }

    /**
     * Open an buffered input stream reading from the resource. If the resource
     * ends with <code>.gz</code> or <code>.zip</code> then the appropriate
     * decompression will be applied.
     * 
     * @param resource
     *            The resource identifier.
     * 
     * @todo This will only read the first entry from a ZIP file. Archives need
     *       to be recognized as such by the driver and expanded into a sequence
     *       of parser calls with the input stream. That will require a
     *       different entry point since we can't close the
     *       {@link ZipInputStream} until we have read all the entries in that
     *       file. The {@link ZipInputStream} is likely not thread safe so the
     *       same parser thread would have to consume each of the entries even
     *       though they must also be dealt with as distinct documents. Given
     *       all that, reading more than the first entry might not be worth it.
     */
    protected InputStream getInputStream(final R resource) throws IOException {
        
        InputStream is;

        if (resource instanceof File) {

            is = new FileInputStream((File) resource);

            final String name = ((File) resource).getName(); 
            
            if (name.endsWith(".gz")) {
                
                is = new GZIPInputStream(is);
                
            } else if(name.endsWith(".zip")) {
                
                is = new ZipInputStream(is);
                
            }
            
        } else if (resource instanceof URL) {

            is = ((URL) resource).openStream();

        } else {

            throw new UnsupportedOperationException();

        }

        return is;
        
    }
    
    /**
     * Return a task to parse the document. The task should allocate an
     * {@link AsynchronousStatementBufferImpl} for the document. When
     * that buffer is flushed, the document will be queued for further
     * processing.
     * 
     * @param resource
     *            The resource to be parsed.
     * @return The task to execute.
     * 
     * @throws Exception
     */
    protected Callable<?> newParserTask(final R resource) throws Exception {

        final String resourceStr = resource.toString();
        
        if (log.isInfoEnabled())
            log.info("resource=" + resourceStr);
        
        final RDFFormat defaultFormat = getDefaultRDFFormat();
        /* @todo This might be ignorant of .gz and .zip extensions.
         * @todo when resource is URL use reported MimeTYPE also.
         */
        final RDFFormat rdfFormat = (defaultFormat == null //
                ? RDFFormat.forFileName(resourceStr) //
                : RDFFormat.forFileName(resourceStr, defaultFormat)//
        );
        
        if (rdfFormat == null) {
            
            final String msg = "Could not determine interchange syntax - skipping : file="
                    + resource;

            log.error(msg);
            
            throw new RuntimeException(msg);

        }

        // Convert the resource identifier to a URL.
        final String baseURI; ;
        if (getClass().getResource(resourceStr) != null) {
            
            baseURI = getClass().getResource(resourceStr).toURI()
                    .toString();
            
        } else {
            
            baseURI = new File(resourceStr).toURI().toString();
            
        }

        return new ParserTask(resource, baseURI, rdfFormat);

    }
    
    /**
     * Tasks either loads a RDF resource or verifies that the told triples found
     * in that resource are present in the database. The difference between data
     * load and data verify is just the behavior of the {@link IStatementBuffer}.
     * 
     * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a>
     */
    protected class ParserTask implements Callable<Void> {

        /**
         * The resource to be loaded.
         */
        private final R resource;
        
        /**
         * The base URL for that resource.
         */
        private final String baseURL;
        
        /**
         * The RDF interchange syntax that the file uses.
         */
        private final RDFFormat rdfFormat;

        /**
         * 
         * @param resource
         *            The resource to be loaded (a plain file or URL, but not a
         *            directory).
         * @param baseURL
         *            The base URL for that resource.
         * @param rdfFormat
         *            The RDF interchange syntax that the file uses.
         */
        public ParserTask(final R resource, final String baseURL,
                final RDFFormat rdfFormat) {

            if (resource == null)
                throw new IllegalArgumentException();

            if (baseURL == null)
                throw new IllegalArgumentException();

            this.resource = resource;

            this.baseURL = baseURL;

            this.rdfFormat = rdfFormat;
            
        }

        public Void call() throws Exception {
            // Note: buffer will be pass along from queue to queue.
            final AsynchronousStatementBufferImpl buffer = AsynchronousStatementBufferFactory.this
                    .newStatementBuffer(resource);
            try {
                // open reader on the file.
                final InputStream rdfStream = getInputStream(resource);
                try {
                    // Obtain a buffered reader on the input stream.
                    final Reader reader = new BufferedReader(new InputStreamReader(
                            rdfStream));
                    try {
                        // run the parser.
                        new PresortRioLoader(buffer).loadRdf(reader, baseURL,
                                rdfFormat, defaultGraph == null ? baseURL
                                        : defaultGraph, parserOptions);
                    } finally {
                        reader.close();
                    }
                } finally {
                    rdfStream.close();
                }
                lock.lock();
                try {
                    // done parsing this document.
                    documentsParsedCount.incrementAndGet();

                    // new workflow state (code lifted from BufferTerm2IdWrites).
//                    lock.lock();
//                    try {
                    guardLatch_term2Id.inc();
//                    if(ENABLE_BLOBS)
//                        guardLatch_term2Id.inc();
                    workflowLatch_parser.dec();
                    workflowLatch_bufferTids.inc();
                    documentTIDsWaitingCount.incrementAndGet();
                    assertSumOfLatchs();
//                    } finally {
//                        lock.unlock();
//                    }

                    // queue tasks to buffer writes on TERM2ID/BLOBS indices.
                    tidsWriterService
                            .submit(new BufferTidWrites(buffer));
//                    if(ENABLE_BLOBS)
//                    tidsWriterService
//                            .submit(new BufferBlobsWrites(buffer));
                    // increment #of outstanding statements (parsed but not restart safe). 
                    outstandingStatementCount.addAndGet(buffer.statementCount);
                    // increment #of unbuffered statements.
                    unbufferedStatementCount.addAndGet(buffer.statementCount);
                } finally {
                    lock.unlock();
                }
            } catch (Throwable ex) {
                // error state.
                lock.lock();
                try {
                    workflowLatch_parser.dec();
                    documentError(resource, ex);
                    throw new Exception(ex);
                } finally {
                    lock.unlock();
                }
            }
            // done.
            if (log.isInfoEnabled())
                log.info("resource=" + resource + " : " + this);
            return null;
        }

    } // ParserTask

    public String toString() {

        return super.toString() + "::" + getCounters();
        
    }

    /**
     * 
     * @param tripleStore
     * @param producerChunkSize
     *            The chunk size used when writing chunks onto the master for
     *            the asynchronous index write API. If this value is on the
     *            order of the #of terms or statements in the parsed documents,
     *            then all terms / statements will be written onto the master in
     *            one chunk. The master will split the chunk based on the
     *            separator keys for the index partitions and write splits onto
     *            the sink for each index partition. The master and sink
     *            configuration is specified via the {@link IndexMetadata} when
     *            the triple store indices are created.
     * @param valuesInitialCapacity
     *            The initial capacity of the map of the distinct RDF
     *            {@link Value}s parsed from a single document.
     * @param bnodesInitialCapacity
     *            The initial capacity of the map of the distinct RDF
     *            {@link BNode}s parsed from a single document.
     * @param defaultFormat
     *            The default {@link RDFFormat} which will be assumed.
     * @param defaultGraph
     *            The value that will be used for the graph/context co-ordinate when
     *            loading data represented in a triple format into a quad store. If
     *            not given, then the context will be the resource identifier for
     *            the resource being parsed.
     * @param parserOptions
     *            Options for the {@link RDFParser}.
     * @param deleteAfter
     *            <code>true</code> if the resource should be deleted once the
     *            statements from that resource are restart safe on the target
     *            database.
     * @param parserPoolSize
     *            The #of worker threads in the thread pool for parsing RDF
     *            documents.
     * @param parserQueueCapacity
     *            The capacity of the bounded work queue for the service running
     *            the parser tasks.
     * @param term2IdWriterPoolSize
     *            The #of worker threads in the thread pool for buffering
     *            asynchronous writes on the TERM2ID/BLOBS indices.
     * @param otherWriterPoolSize
     *            The #of worker threads in the thread pool for buffering
     *            asynchronous index writes on the other indices.
     * @param notifyPoolSize
     *            The #of worker threads in the thread pool for handling
     *            document success and document error notices.
     * @param pauseParsedPoolStatementThreshold
     *            The maximum #of statements which can be parsed but not yet
     *            buffered before requests for new parser tasks are paused [0:
     *            {@link Long#MAX_VALUE}]. This allows you to place a constraint
     *            on the RAM of the parsers. The RAM demand of the asynchronous
     *            index write buffers is controlled by their master and sink
     *            queue capacity and chunk size.
     */
    public AsynchronousStatementBufferFactory(//
            final ScaleOutTripleStore tripleStore,//
            final int producerChunkSize, //
            final int valuesInitialCapacity,//
            final int bnodesInitialCapacity, //
            final RDFFormat defaultFormat,//
            final String defaultGraph,//
            final RDFParserOptions parserOptions,//
            final boolean deleteAfter,//
            final int parserPoolSize,//
            final int parserQueueCapacity,//
            final int term2IdWriterPoolSize,//
            final int otherWriterPoolSize,//
            final int notifyPoolSize,//
            final long pauseParsedPoolStatementThreshold//
            ) {

        if (tripleStore == null)
            throw new IllegalArgumentException();
        if (parserOptions== null)
            throw new IllegalArgumentException();
        if (producerChunkSize <= 0)
            throw new IllegalArgumentException();
        if (valuesInitialCapacity <= 0)
            throw new IllegalArgumentException();
        if (bnodesInitialCapacity <= 0)
            throw new IllegalArgumentException();
        if (pauseParsedPoolStatementThreshold < 0)
            throw new IllegalArgumentException();
        
        this.tripleStore = tripleStore;
        
        this.lexiconRelation = tripleStore.getLexiconRelation();

        this.spoRelation = tripleStore.getSPORelation();

        this.producerChunkSize = producerChunkSize;

        this.valuesInitialCapacity = valuesInitialCapacity;
        
        this.bnodesInitialCapacity = bnodesInitialCapacity;
        
        this.defaultFormat  = defaultFormat;
        
        this.defaultGraph = defaultGraph;
        
        this.parserOptions = parserOptions;
        
        this.deleteAfter = deleteAfter;

        this.pauseParserPoolStatementThreshold = pauseParsedPoolStatementThreshold;

        if (tripleStore.isStatementIdentifiers()) {

            throw new UnsupportedOperationException("SIDs not supported");
            
        }

        /*
         * Open the necessary buffers.
         * 
         * Note: Lock is required by reopenBuffer_xxx() methods.
         */
        lock.lock();
        try {

            // TERM2ID/ID2TERM
            {

                final AsynchronousIndexWriteConfiguration config = tripleStore
                        .getLexiconRelation().getTerm2IdIndex()
                        .getIndexMetadata()
                        .getAsynchronousIndexWriteConfiguration();

                assertLiveness(lexiconRelation.getTerm2IdIndex()
                        .getIndexMetadata().getName(), config);

                buffer_t2id = ((IScaleOutClientIndex) lexiconRelation
                        .getTerm2IdIndex())
                        .newWriteBuffer(
                                new Term2IdWriteProcAsyncResultHandler(false/* readOnly */),
                                new DefaultDuplicateRemover<BigdataValue>(true/* testRefs */),
                                new Term2IdWriteProcConstructor(
                                        false/* readOnly */, lexiconRelation
                                                .isStoreBlankNodes(),
                                        lexiconRelation
                                                .getTermIdBitsToReverse()));

                buffer_id2t = ((IScaleOutClientIndex) lexiconRelation
                        .getId2TermIndex())
                        .newWriteBuffer(
                                null/* resultHandler */,
                                new DefaultDuplicateRemover<BigdataValue>(true/* testRefs */),
                                Id2TermWriteProcConstructor.INSTANCE);

            }

            // BLOBS
            {

                final AsynchronousIndexWriteConfiguration config = tripleStore
                        .getLexiconRelation().getBlobsIndex()
                        .getIndexMetadata()
                        .getAsynchronousIndexWriteConfiguration();

                assertLiveness(lexiconRelation.getBlobsIndex()
                        .getIndexMetadata().getName(), config);

                buffer_blobs = ((IScaleOutClientIndex) lexiconRelation
                        .getBlobsIndex())
                        .newWriteBuffer(
                                new BlobsWriteProcAsyncResultHandler(false/* readOnly */),
                                new DefaultDuplicateRemover<BigdataValue>(true/* testRefs */),
                                new BlobsWriteProcConstructor(
                                        false/* readOnly */, lexiconRelation
                                                .isStoreBlankNodes()));
            }

            // TEXT
            {

                if (lexiconRelation.isTextIndex()) {

                    /*
                     * FIXME Must hook in once the tids are available so we can
                     * tokenize the RDF Literals (Note: only the literals, and
                     * only those literals that will be indexed) and write out
                     * the tuples on the text index.
                     * 
                     * TODO Unit tests. Must enable the full text index and must
                     * verify that both TermIds and BlobIVs were indexed. Inline
                     * Unicode IVs also need to be indexed (they are small,
                     * unless we change to [s] centric full text indexing).
                     */
                    
                    final BigdataValueCentricFullTextIndex tmp = (BigdataValueCentricFullTextIndex) lexiconRelation
                            .getSearchEngine();

                    buffer_text = ((IScaleOutClientIndex) tmp.getIndex()).newWriteBuffer(
                                    textResultHandler,// counts tuples written on index
                                    new DefaultDuplicateRemover<BigdataValue>(true/* testRefs */),
                                    TextIndexWriteProc.IndexWriteProcConstructor.NO_OVERWRITE);

                    indexDatatypeLiterals = Boolean
                            .parseBoolean(lexiconRelation.getProperties().getProperty(
                                    AbstractTripleStore.Options.TEXT_INDEX_DATATYPE_LITERALS,
                                    AbstractTripleStore.Options.DEFAULT_TEXT_INDEX_DATATYPE_LITERALS));

                } else {

                    buffer_text = null;
                    indexDatatypeLiterals = false;
                    
                }
                
            }

            /*
             * STATEMENT INDICES
             * 
             * Allocate and populate map with the SPOKeyOrders that we will be
             * using.
             */
            {

                buffer_stmts = new LinkedHashMap<SPOKeyOrder, IRunnableBuffer<KVO<ISPO>[]>>(
                        tripleStore.isQuads() ? 6 : 3);

                final Iterator<SPOKeyOrder> itr = tripleStore.getSPORelation()
                        .statementKeyOrderIterator();

                while (itr.hasNext()) {

                    final SPOKeyOrder keyOrder = itr.next();

                    final IRunnableBuffer<KVO<ISPO>[]> buffer = ((IScaleOutClientIndex) spoRelation
                            .getIndex(keyOrder))
                            .newWriteBuffer(
                                    keyOrder.isPrimaryIndex() ? statementResultHandler
                                            : null,
                                    new DefaultDuplicateRemover<ISPO>(true/* testRefs */),
                                    SPOIndexWriteProc.IndexWriteProcConstructor.INSTANCE);

                    buffer_stmts.put(keyOrder, buffer);

                }

            }

        } finally {

            lock.unlock();

        }

        /*
         * Set iff this is a federation based triple store. The various queue
         * statistics are reported only for this case.
         */
        final AbstractFederation<?> fed;
        if (tripleStore.getIndexManager() instanceof AbstractFederation) {

            fed = (AbstractFederation<?>) tripleStore.getIndexManager();

        } else {

            fed = null;
        }

        /*
         * Note: This service must not reject tasks as long as the statement
         * buffer factory is open. It is configured with a bounded workQueue and
         * a bounded thread pool. The #of threads in the pool should build up to
         * the maximumPoolSize and idle threads will be retired, but only after
         * several minutes. 
         */
        parserService = new ParserThreadPoolExecutor(//
                1, // corePoolSize
                parserPoolSize, // maximumPoolSize
                1, // keepAliveTime
                TimeUnit.MINUTES, // keepAlive units.
                new LinkedBlockingQueue<Runnable>(parserQueueCapacity),// workQueue
                new DaemonThreadFactory(getClass().getName()+"_parserService") // threadFactory
        );

        /*
         * Note: This service MUST NOT block or reject tasks as long as the
         * statement buffer factory is open. It is configured with an
         * unbounded workQueue and a bounded thread pool. The #of threads in
         * the pool should build up to the maximumPoolSize and idle threads
         * will be retired, but only after several minutes.
         * 
         * Note: Since we are using an unbounded queue, at most corePoolSize
         * threads will be created. Therefore we interpret the caller's argument
         * as both the corePoolSize and the maximumPoolSize.
         */
        tidsWriterService = new ThreadPoolExecutor(//
                term2IdWriterPoolSize, // corePoolSize
                term2IdWriterPoolSize, // maximumPoolSize
                1, // keepAliveTime
                TimeUnit.MINUTES, // keepAlive units.
                new LinkedBlockingQueue<Runnable>(/* unbounded */),// workQueue
                new DaemonThreadFactory(getClass().getName()+"_term2IdWriteService") // threadFactory
        );

        /*
         * Note: This service MUST NOT block or reject tasks as long as the
         * statement buffer factory is open. It is configured with an unbounded
         * workQueue and a bounded thread pool. The #of threads in the pool
         * should build up to the maximumPoolSize and idle threads will be
         * retired, but only after several minutes.
         * 
         * Note: Since we are using an unbounded queue, at most corePoolSize
         * threads will be created. Therefore we interpret the caller's argument
         * as both the corePoolSize and the maximumPoolSize.
         */
        otherWriterService = new ThreadPoolExecutor(//
                otherWriterPoolSize, // corePoolSize
                otherWriterPoolSize, // maximumPoolSize
                1, // keepAliveTime
                TimeUnit.MINUTES, // keepAlive units.
                new LinkedBlockingQueue<Runnable>(/* unbounded */),// workQueue
                new DaemonThreadFactory(getClass().getName()+"_otherWriteService") // threadFactory
        );

        /*
         * Note: This service MUST NOT block or reject tasks as long as the
         * statement buffer factory is open. It is configured with an unbounded
         * workQueue and a bounded thread pool. The #of threads in the pool
         * should build up to the maximumPoolSize and idle threads will be
         * retired, but only after several minutes.
         * 
         * Note: Since we are using an unbounded queue, at most corePoolSize
         * threads will be created. Therefore we interpret the caller's argument
         * as both the corePoolSize and the maximumPoolSize.
         */
        notifyService = new ThreadPoolExecutor(//
                notifyPoolSize, // corePoolSize
                notifyPoolSize, // maximumPoolSize
                1, // keepAliveTime
                TimeUnit.MINUTES, // keepAlive units.
                new LinkedBlockingQueue<Runnable>(/* unbounded */),// workQueue
                new DaemonThreadFactory(getClass().getName()+"_notifyService") // threadFactory
                );

        /*
         * @todo If sampling should be done for non-federation cases then we
         * need to pass in the ScheduledExecutorService, expose a method to
         * start sampling on the caller's service, or create a
         * ScheduledExecutorService within this factory class.
         */
        serviceStatisticsTask = (fed == null ? null
                : new ServiceStatisticsTask(fed.getScheduledExecutorService()));

    } // ctor

    /**
     * Note: If there is a large sink idle timeout on the TERM2ID index then the
     * sink will not flush itself automatically once its master is no longer
     * pushing data. This situation can occur any time the parser pool is
     * paused. A low sink idle timeout is required for the TERM2ID sink to flush
     * its writes to the database, so the TIDs will be assigned, statements for
     * the parsed documents will be buffered, and new parser threads can begin.
     * 
     * @todo This should probably be automatically overridden for this use case.
     *       However, the asynchronous index configuration is not currently
     *       passed through with the requests but is instead global (on the
     *       IndexMetadata object for the index on the MDS).
     */
    private static void assertLiveness(
            final String name,
            final AsynchronousIndexWriteConfiguration config) {

        if (config.getSinkIdleTimeoutNanos() > TimeUnit.SECONDS.toNanos(60)) {

            log.error("Large idle timeout will not preserve liveness: index="
                    + name + ", config=" + config);

        }

    }
    
    /**
     * {@link Runnable} samples the services and provides reporting via
     * {@link #getCounters()}.
     */
    private class ServiceStatisticsTask implements Runnable {

        private final Map<String, ThreadPoolExecutorBaseStatisticsTask> tasks = new LinkedHashMap<String, ThreadPoolExecutorBaseStatisticsTask>();
        
        private final ScheduledFuture<?> serviceStatisticsFuture;
        
        public ServiceStatisticsTask(final ScheduledExecutorService scheduledService) {

            /*
             * Add scheduled tasks to report the moving average of the queue
             * length, active count, etc. for the various services used by this
             * factory.
             */
            tasks.put("parserService",
                    new ThreadPoolExecutorBaseStatisticsTask(parserService));

            tasks.put("term2IdWriterService",
                    new ThreadPoolExecutorBaseStatisticsTask(
                            tidsWriterService));

            tasks.put("otherWriterService",
                            new ThreadPoolExecutorBaseStatisticsTask(
                                    otherWriterService));

            tasks.put("notifyService",
                    new ThreadPoolExecutorBaseStatisticsTask(notifyService));

            // schedule this task to sample performance counters.
            serviceStatisticsFuture = scheduledService.scheduleWithFixedDelay(
                    this, 0/* initialDelay */, 1000/* delay */,
                    TimeUnit.MILLISECONDS);

        }

        protected void finalize() throws Exception {

            cancel();

        }

        public void cancel() {

            serviceStatisticsFuture.cancel(true/* mayInterruptIfRunning */);

        }
        
        public void run() {

            for(Runnable r : tasks.values()) {

                try {

                    r.run();
                    
                } catch(Throwable t) {
                    
                    log.error(r,t);
                    
                }

            }

        }

        public CounterSet getCounters() {

            final CounterSet counterSet = new CounterSet();

            for (Map.Entry<String, ThreadPoolExecutorBaseStatisticsTask> e : tasks
                    .entrySet()) {

                counterSet.makePath(e.getKey()).attach(
                        e.getValue().getCounters());

            }

            return counterSet;

        }

    }
    
    public boolean isAnyDone() {

        /*
         * Note: lock is required to make this test atomic with respect to
         * re-opening of buffers.
         */
        lock.lock();
        try {

            if (buffer_blobs != null)
                if (buffer_blobs.getFuture().isDone())
                    return true;
            
            if (buffer_t2id != null)
                if (buffer_t2id.getFuture().isDone())
                    return true;

            if (buffer_id2t.getFuture().isDone())
                return true;

            if (buffer_text != null)
                if (buffer_text.getFuture().isDone())
                    return true;

            for (Map.Entry<SPOKeyOrder, IRunnableBuffer<KVO<ISPO>[]>> e : buffer_stmts
                    .entrySet()) {

                final IRunnableBuffer<KVO<ISPO>[]> buffer = e.getValue();

                if (buffer != null && buffer.getFuture().isDone())
                    return true;

            }

            if (parserService.isTerminated())
                return true;

            if (tidsWriterService.isTerminated())
                return true;

            if (otherWriterService.isTerminated())
                return true;

            if (notifyService != null && notifyService.isTerminated())
                return true;

            return false;
            
        } finally {

            lock.unlock();

        }

    }
    
    public void cancelAll(final boolean mayInterruptIfRunning) {

        if(log.isInfoEnabled())
            log.info("Cancelling futures.");

        if (buffer_blobs != null)
            buffer_blobs.getFuture().cancel(mayInterruptIfRunning);

        if (buffer_t2id != null)
            buffer_t2id.getFuture().cancel(mayInterruptIfRunning);

        buffer_id2t.getFuture().cancel(mayInterruptIfRunning);

        if (buffer_text != null)
            buffer_text.getFuture().cancel(mayInterruptIfRunning);

        for (Map.Entry<SPOKeyOrder, IRunnableBuffer<KVO<ISPO>[]>> e : buffer_stmts
                .entrySet()) {

            final IRunnableBuffer<KVO<ISPO>[]> buffer = e.getValue();

            if (buffer != null)
                buffer.getFuture().cancel(mayInterruptIfRunning);

        }

        notifyEnd();

    }

    /**
     * Awaits a signal that all documents which have queued writes are
     * finished and then closes the remaining buffers.
     */
    public void close() {
        
        log.info("");
        
        try {
            lock.lockInterruptibly();
            try {

                assertSumOfLatchs();
                
                // not decremented until doc fails parse or is doing TERM2ID writes.
                workflowLatch_parser.await();

                assertSumOfLatchs();

                /*
                 * No more tasks will request TIDs, so close the TERM2ID and
                 * BLOBS masters. It will flush its writes.
                 */
                guardLatch_term2Id.await();
                {

                    if (buffer_t2id != null) {
                        if (log.isInfoEnabled()) {
                            log.info("Closing TERM2ID buffer.");
                        }
                        buffer_t2id.close();
                    }
                    
                    if (buffer_blobs != null) {
                        if (log.isInfoEnabled()) {
                            log.info("Closing BLOBS buffer.");
                        }
                        buffer_blobs.close();
                    }

                    workflowLatch_bufferTids.await();
                    tidsWriterService.shutdown();
                    new ShutdownHelper(tidsWriterService, 10L,
                            TimeUnit.SECONDS) {
                        protected void logTimeout() {
                            log
                                    .warn("Waiting for term2Id write service shutdown.");
                        }
                    };

                    assertSumOfLatchs();
                }

                /*
                 * No new index write tasks may start (and all should have
                 * terminated by now).
                 */
                guardLatch_other.await();
                {
                    if (log.isInfoEnabled())
                        log.info("Closing remaining buffers.");

                    buffer_id2t.close();

                    if (buffer_text != null)
                        buffer_text.close();


                    for (Map.Entry<SPOKeyOrder, IRunnableBuffer<KVO<ISPO>[]>> e : buffer_stmts
                            .entrySet()) {

                        final IRunnableBuffer<KVO<ISPO>[]> buffer = e.getValue();

                        if(buffer!=null)
                            buffer.close();
                        
                    }
                    
                    workflowLatch_bufferOther.await();
                    otherWriterService.shutdown();
                    new ShutdownHelper(otherWriterService, 10L, TimeUnit.SECONDS) {
                        protected void logTimeout() {
                            log.warn("Waiting for other write service shutdown.");
                        }
                    };

                    assertSumOfLatchs();

                }
                
                // wait for the global latch.
                workflowLatch_document.await();

                assertSumOfLatchs();

                if (notifyService != null) {
                    // wait until no notifications are pending.
                    guardLatch_notify.await();
                    // note: shutdown should be immediate since nothing should
                    // be pending.
                    notifyService.shutdown();
                    new ShutdownHelper(notifyService, 10L, TimeUnit.SECONDS) {
                        protected void logTimeout() {
                            log.warn("Waiting for delete service shutdown.");
                        }
                    };
                }

            } finally {
                lock.unlock();
                notifyEnd();
            }
            
        } catch (InterruptedException ex) {
            
            // @todo should declare this exception in the API.
            throw new RuntimeException(ex);
            
        }

    }

    public void awaitAll() throws InterruptedException, ExecutionException {

        if(log.isInfoEnabled())
            log.info("Start");

        // Close the asynchronous write buffers.
        close();
        
        // Await futures for the asynchronous write buffers.
        if(log.isInfoEnabled())
            log.info("Awaiting futures.");

        if (buffer_blobs != null)
            buffer_blobs.getFuture().get();

        if (buffer_t2id != null)
            buffer_t2id.getFuture().get();

        buffer_id2t.getFuture().get();

        if (buffer_text != null)
            buffer_text.getFuture().get();

        for (Map.Entry<SPOKeyOrder, IRunnableBuffer<KVO<ISPO>[]>> e : buffer_stmts
                .entrySet()) {

            final IRunnableBuffer<KVO<ISPO>[]> buffer = e.getValue();

            if (buffer != null) {

                buffer.getFuture().get();

            }

        }
        
        if(log.isInfoEnabled())
            log.info("Done.");

    }

    /**
     * Invoked after a document has become restart safe. If
     * {@link #newSuccessTask(Object)} returns a {@link Runnable} then that will
     * be executed on the {@link #notifyService}.
     * 
     * @param resource
     *            The document identifier.
     */
    final protected void documentDone(final R resource) {

        if (!lock.isHeldByCurrentThread())
            throw new IllegalMonitorStateException();
        
        try {
            
            final Runnable task = newSuccessTask(resource);

            if (task != null) {

                // increment before we submit the task.
//                lock.lock();
//                try {
                    guardLatch_notify.inc();
//                } finally {
//                    lock.unlock();
//                }
                try {

                    // queue up success notice.
                    notifyService.submit(new Runnable() {
                        public void run() {
                            try {
                                task.run();
                            } finally {
                                lock.lock(); // acquire latch w/in task.
                                try {
                                    // decrement after the task is done.
                                    guardLatch_notify.dec();
                                } finally {
                                    lock.unlock();
                                }
                            }
                        }
                    });

                } catch (RejectedExecutionException ex) {
                    // decrement latch since tasks did not run.
//                    lock.lock();
//                    try {
                        guardLatch_notify.dec();
//                    } finally {
//                        lock.unlock();
//                    }
                    // rethrow exception (will be logged below).
                    throw ex;
                }

            }
            
        } catch (Throwable t) {

            // Log @ ERROR and ignore.
            log.error(t, t);
            
        }

    }

    /**
     * Invoked after a document has failed. If
     * {@link #newFailureTask(Object, Throwable)} returns a {@link Runnable}
     * then that will be executed on the {@link #notifyService}.
     * 
     * @param resource
     *            The document identifier.
     * @param t
     *            The exception.
     */
    final protected void documentError(final R resource, final Throwable t) {

        if (!lock.isHeldByCurrentThread()) throw new IllegalMonitorStateException();

        documentErrorCount.incrementAndGet();

        /*
         * Note: this is responsible for decrementing the #of documents whose
         * processing is not yet complete. This must be done for each task whose
         * future is not watched. However, we MUST NOT do this twice for any
         * given document since that would mess with the counter. That counter
         * is critical as it forms part of the termination condition for the
         * total data load operation.
         */

        workflowLatch_document.dec();

        try {
            
            final Runnable task = newFailureTask(resource, t);

            if (task != null) {

                // increment before we submit the task.
                guardLatch_notify.inc();

                try {

                    // queue up failure notice.
                    notifyService.submit(new Runnable() {
                        public void run() {
                            try {
                                task.run();
                            } finally {
                                lock.lock(); // acquire latch w/in task.
                                try {
                                    // decrement after the task is done.
                                    guardLatch_notify.dec();
                                } finally {
                                    lock.unlock();
                                }
                            }
                        }
                    });

                } catch (RejectedExecutionException ex) {
                    // decrement latch since tasks did not run.
//                    lock.lock();
//                    try {
                        guardLatch_notify.dec();
//                    } finally {
//                        lock.unlock();
//                    }
                    // rethrow exception (will be logged below).
                    throw ex;
                }

            }

        } catch (Throwable ex) {

            log.error(ex, ex);

        }

    }

    /**
     * Return the optional task to be executed for a resource which has been
     * successfully processed and whose assertions are now restart safe on the
     * database. The task, if any, will be run on the {@link #notifyService}.
     * <p>
     * The default implementation runs a {@link DeleteTask} IFF
     * <i>deleteAfter</i> was specified as <code>true</code> to the ctor and
     * otherwise returns <code>null</code>. The event is logged @ INFO.
     * 
     * @param resource
     *            The resource.
     * 
     * @return The task to run -or- <code>null</code> if no task should be run.
     */
    protected Runnable newSuccessTask(final R resource) {
        
        if (log.isInfoEnabled())
            log.info("resource=" + resource);
        
        if (deleteAfter) {
            
            return new DeleteTask(resource);
            
        }

        return null;
        
    }

    /**
     * Return the optional task to be executed for a resource for which
     * processing has failed. The task, if any, will be run on the
     * {@link #notifyService}.
     * <p>
     * The default implementation logs a message @ ERROR.
     * 
     * @param resource
     *            The resource.
     * @param cause
     *            The cause.
     * 
     * @return The task to run -or- <code>null</code> if no task should be
     *         run.
     */
    protected Runnable newFailureTask(final R resource, final Throwable cause) {

        return new Runnable() {

            public void run() {

                log.error(resource, cause);

            }

        };

    }
    
    /**
     * Task deletes a resource from the local file system.
     * 
     * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a>
     */
    protected class DeleteTask implements Runnable {

        private final R resource;

        public DeleteTask(final R resource) {

            if (resource == null)
                throw new IllegalArgumentException();
            
            this.resource = resource;
            
        }
        
        public void run() {

            deleteResource(resource);

        }

    }

    /**
     * Delete a file whose data have been made restart safe on the database from
     * the local file system (this must be overridden to handle resources which
     * are not {@link File}s).
     * 
     * @param resource
     *            The resource.
     */
    protected void deleteResource(final R resource) {

        if(resource instanceof File) {

            if (!((File) resource).delete()) {

                log.warn("Could not delete: " + resource);

            }
            
        }

    }
    
    public CounterSet getCounters() {
        
        final CounterSet counterSet = new CounterSet();

        /**
         * The elapsed milliseconds.
         */
        counterSet.addCounter("elapsedMillis", new Instrument<Long>() {
            @Override
            protected void sample() {
                setValue(getElapsedMillis());
            }
        });

        /**
         * The #of documents that have been parsed.
         */
        counterSet.addCounter("documentsParsedCount", new Instrument<Long>() {
            @Override
            protected void sample() {
                setValue(documentsParsedCount.get());
            }
        });

        /**
         * The #of documents whose TERM2ID/BLOBS writes have begun to be
         * buffered but are not yet restart-safe on the database.
         */
        counterSet.addCounter("documentTIDsWaitingCount", new Instrument<Long>() {
            @Override
            protected void sample() {
                setValue(documentTIDsWaitingCount.get());
            }
        });

        /**
         * The #of documents whose TERM2ID/BLOBS writes are restart-safe on the
         * database.
         */
        counterSet.addCounter("documentTIDsReadyCount", new Instrument<Long>() {
            @Override
            protected void sample() {
                setValue(documentTIDsReadyCount.get());
            }
        });

        /**
         * The #of tuples written on the full text index (this does not count
         * triples that were already present on the index).
         */
        counterSet.addCounter("fullTextTupleWriteCount", new Instrument<Long>() {
            @Override
            protected void sample() {
                setValue(textResultHandler.getResult().longValue());
            }
        });

        /**
         * The #of triples written on the SPO index (this does not count triples
         * that were already present on the index).
         */
        counterSet.addCounter("toldTriplesWriteCount", new Instrument<Long>() {
            @Override
            protected void sample() {
                setValue(getStatementCount());
            }
        });

        /**
         * The #of told triples parsed from documents using this factory and
         * made restart safe on the database. This is incremented each time a
         * document has been made restart safe by the #of distinct told triples
         * parsed from that document.
         * <p>
         * Note: The same triple can occur in more than one document, and
         * documents having duplicate triples may be loaded by distributed
         * clients. The actual #of triples on the database is only available by
         * querying the database.
         */
        counterSet.addCounter("toldTriplesRestartSafeCount",
                new Instrument<Long>() {
                    @Override
                    protected void sample() {
                        setValue(toldTriplesRestartSafeCount.get());
                    }
                });

        /**
         * The told triples per second rate which have been made restart safe by
         * this factory object. When you are loading using multiple clients,
         * then the total told triples per second rate is the aggregation across
         * all of those instances.
         */
        counterSet.addCounter("toldTriplesRestartSafePerSec",
                new Instrument<Long>() {

                    @Override
                    protected void sample() {

                        final long elapsed = getElapsedMillis();

                        final double tps = (long) (((double) toldTriplesRestartSafeCount
                                .get())
                                / ((double) elapsed) * 1000d);

                        setValue((long) tps);

                    }
                });

        /**
         * The #of documents which have been processed by this client and are
         * restart safe on the database by this client.
         */
        counterSet.addCounter("documentRestartSafeCount",
                new Instrument<Long>() {
                    @Override
                    protected void sample() {
                        setValue(documentRestartSafeCount.get());
                    }
                });

        /**
         * The #of documents for which the buffer index writes task failed.
         */
        counterSet.addCounter("documentErrorCount", new Instrument<Long>() {
            @Override
            protected void sample() {
                setValue(documentErrorCount.get());
            }
        });

        /*
         * The latches are used to guard the termination conditions for the
         * factory. If they are non-zero the factory can not terminate normally.
         */
        {

            final CounterSet workflowLatchSet = counterSet.makePath("workflowLatch");

            workflowLatchSet.addCounter("parser", new Instrument<Long>() {
                @Override
                protected void sample() {
                    setValue(workflowLatch_parser.get());
                }
            });

            workflowLatchSet.addCounter("bufferTids", new Instrument<Long>() {
                @Override
                protected void sample() {
                    setValue(workflowLatch_bufferTids.get());
                }
            });

            workflowLatchSet.addCounter("bufferOther", new Instrument<Long>() {
                @Override
                protected void sample() {
                    setValue(workflowLatch_bufferOther.get());
                }
            });

            // latch over the total life cycle for a document.
            workflowLatchSet.addCounter("document", new Instrument<Long>() {
                @Override
                protected void sample() {
                    setValue(workflowLatch_document.get());
                }
            });

        } // latches

        /**
         * Latches used to guard the buffers and close them in a timely manner.
         */
        {
            
            final CounterSet bufferGuardSet = counterSet.makePath("bufferGuard");

            bufferGuardSet.addCounter("guardTerm2Id", new Instrument<Long>() {
                @Override
                protected void sample() {
                    setValue(guardLatch_term2Id.get());
                }
            });

            bufferGuardSet.addCounter("guardOther", new Instrument<Long>() {
                @Override
                protected void sample() {
                    setValue(guardLatch_other.get());
                }
            });

            bufferGuardSet.addCounter("guardNotify", new Instrument<Long>() {
                @Override
                protected void sample() {
                    setValue(guardLatch_notify.get());
                }
            });
            
        }

        /*
         * Counters pertaining to the logic which suspects new parser task
         * requests if too many statements are currently buffered.
         */
        {
            
            final CounterSet pauseSet = counterSet.makePath("pause");

            /*
             * The #of parsed or buffered RDF Statements not yet restart safe
             * (current value).
             */
            pauseSet.addCounter("outstandingStatementCount",
                    new Instrument<Long>() {
                        @Override
                        protected void sample() {
                            setValue(outstandingStatementCount.get());
                        }
                    });

            /*
             * The #of parsed but not yet buffered RDF Statements (current
             * value).
             */
            pauseSet.addCounter("unbufferedStatementCount",
                    new Instrument<Long>() {
                        @Override
                        protected void sample() {
                            setValue(unbufferedStatementCount.get());
                        }
                    });

            /*
             * The maximum #of statements parsed but not yet buffered before we
             * suspend new parse requests.
             */
            pauseSet.addCounter("pauseParserPoolStatementThreshold",
                    new OneShotInstrument<Long>(
                            pauseParserPoolStatementThreshold));

            // The #of suspended parse request threads (current value).
            pauseSet.addCounter("pausedThreadCount", new Instrument<Long>() {
                @Override
                protected void sample() {
                    setValue(pausedThreadCount.get());
                }
            });

            // The #of suspended parse request threads (cumulative).
            pauseSet.addCounter("poolPausedCount", new Instrument<Long>() {
                @Override
                protected void sample() {
                    setValue(poolPausedCount.get());
                }
            });

        }
        
        // services
        {
            
            counterSet.makePath("services").attach(
                    serviceStatisticsTask.getCounters());
            
        }

//        if(log.isInfoEnabled())
//        { // @todo this is just for debugging problems with parser blocking.
//
//            final String fqn = tripleStore.getLexiconRelation().getFQN(
//                    LexiconKeyOrder.TERM2ID);
//
//            counterSet.makePath("TERM2ID").attach(
//                    ((AbstractFederation) tripleStore.getIndexManager())
//                            .getIndexCounters(fqn).getCounters());
//
//        }
        
        return counterSet;

    }

    /**
     * {@link Runnable} class applies the factory to either a single file or to
     * all files within a directory.
     */
    private class RunnableFileSystemLoader implements Callable<Integer> {

//        volatile boolean done = false;

        private int count = 0;

//        private long retryCount = 0L;
        
        final File fileOrDir;

        final FilenameFilter filter;

        final long retryMillis;
        
        /**
         * 
         * @param fileOrDir
         *            The file or directory to be loaded.
         * @param filter
         *            An optional filter on files that will be accepted when
         *            processing a directory.
         * @param retryMillis
         *            The number of milliseconds to wait between retrys when the
         *            parser service work queue is full. When ZERO (0L), a
         *            {@link RejectedExecutionException} will be thrown out
         *            instead.
         */
        public RunnableFileSystemLoader(final File fileOrDir,
                final FilenameFilter filter, final long retryMillis) {

            if (fileOrDir == null)
                throw new IllegalArgumentException();

            if (retryMillis < 0)
                throw new IllegalArgumentException();
            
            this.fileOrDir = fileOrDir;

            this.filter = filter; // MAY be null.

            this.retryMillis = retryMillis;
            
        }

        /**
         * Creates a task using the {@link #taskFactory}, submits it to the
         * {@link #loader} and and waits for the task to complete. Errors are
         * logged, but not thrown.
         * 
         * @throws RuntimeException
         *             if interrupted.
         */
        public Integer call() throws Exception {

            process2(fileOrDir);

            return count;

        }

        /**
         * Scans file(s) recursively starting with the named file, and, for each
         * file that passes the filter, submits the task.
         * 
         * @param file
         *            Either a URL, a plain file or directory containing files
         *            to be processed.
         * 
         * @throws InterruptedException
         *             if the thread is interrupted while queuing tasks.
         */
        private void process2(final File file) throws InterruptedException {

            if (file.isHidden()) {

                // ignore hidden files.
                return;

            }

            if (file.isDirectory()) {

                if (log.isInfoEnabled())
                    log.info("Scanning directory: " + file);

                // filter is optional.
                final File[] files = filter == null ? file.listFiles() : file
                        .listFiles(filter);

                for (final File f : files) {

                    process2(f);

                }

            } else {

                /*
                 * Processing a standard file.
                 */

                if(log.isInfoEnabled())
                    log.info("Will load: "+file);
                    
                try {

                    submitOne((R) file, retryMillis);

                    count++;

                    return;

                } catch (InterruptedException ex) {

                    throw ex;

                } catch (Exception ex) {

                    log.error(file, ex);

                }

            }

        }

    }

    /**
     * Class applies the term identifiers assigned by the
     * {@link Term2IdWriteProc} to the {@link BigdataValue} references in the
     * {@link KVO} correlated with each {@link Split} of data processed by that
     * procedure.
     * <p>
     * Note: Of necessity, this requires access to the {@link BigdataValue}s
     * whose term identifiers are being resolved. This implementation presumes
     * that the array specified to the ctor and the array returned for each
     * chunk that is processed have correlated indices and that the offset into
     * the array is given by {@link Split#fromIndex}.
     * 
     * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a>
     */
    static private class Term2IdWriteProcAsyncResultHandler
            implements
            IAsyncResultHandler<Term2IdWriteProc.Result, Void, BigdataValue, KVO<BigdataValue>> {

        private final boolean readOnly;

        /**
         * 
         * @param readOnly
         *            if readOnly was specified for the {@link Term2IdWriteProc}
         *            .
         */
        public Term2IdWriteProcAsyncResultHandler(final boolean readOnly) {

            this.readOnly = readOnly;

        }

        /**
         * NOP
         * 
         * @see #aggregateAsync(KVO[],
         *      com.bigdata.rdf.lexicon.Term2IdWriteProc.Result, Split)
         */
        public void aggregate(final Term2IdWriteProc.Result result,
                final Split split) {

        }

        /**
         * Copy the assigned / discovered term identifiers onto the
         * corresponding elements of the terms[].
         */
        public void aggregateAsync(final KVO<BigdataValue>[] chunk,
                final Term2IdWriteProc.Result result, final Split split) {

            for (int i = 0; i < chunk.length; i++) {

                @SuppressWarnings("rawtypes")
                final IV iv = result.ivs[i];

                if (iv == null) {

                    if (!readOnly)
                        throw new AssertionError();

                } else {

                    // assign the term identifier.
                    chunk[i].obj.setIV(iv);

                    if (chunk[i] instanceof KVOList) {

                        final KVOList<BigdataValue> tmp = (KVOList<BigdataValue>) chunk[i];

                        if (!tmp.isDuplicateListEmpty()) {

                            // assign the term identifier to the duplicates.
                            tmp.map(new AssignTermId(iv));

                        }

                    }

                    if (log.isDebugEnabled()) {
                        log
                                .debug("termId=" + iv + ", term="
                                        + chunk[i].obj);
                    }

                }

            }

        }

        public Void getResult() {

            return null;

        }

    }

    /**
     * Class applies the term identifiers assigned by the
     * {@link BlobsWriteProc} to the {@link BigdataValue} references in the
     * {@link KVO} correlated with each {@link Split} of data processed by that
     * procedure.
     * <p>
     * Note: Of necessity, this requires access to the {@link BigdataValue}s
     * whose term identifiers are being resolved. This implementation presumes
     * that the array specified to the ctor and the array returned for each
     * chunk that is processed have correlated indices and that the offset into
     * the array is given by {@link Split#fromIndex}.
     * 
     * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a>
     */
    static private class BlobsWriteProcAsyncResultHandler
            implements
            IAsyncResultHandler<BlobsWriteProc.Result, Void, BigdataValue, KVO<BigdataValue>> {

        private final boolean readOnly;

        /**
         * 
         * @param readOnly
         *            if readOnly was specified for the {@link BlobsWriteProc}
         *            .
         */
        public BlobsWriteProcAsyncResultHandler(final boolean readOnly) {

            this.readOnly = readOnly;

        }

        /**
         * NOP
         * 
         * @see #aggregateAsync(KVO[],
         *      com.bigdata.rdf.lexicon.BlobsWriteProc.Result, Split)
         */
        public void aggregate(final BlobsWriteProc.Result result,
                final Split split) {

        }

        /**
         * Copy the assigned / discovered term identifiers onto the
         * corresponding elements of the terms[].
         */
        public void aggregateAsync(final KVO<BigdataValue>[] chunk,
                final BlobsWriteProc.Result result, final Split split) {

            for (int i = 0; i < chunk.length; i++) {

                final int counter = result.counters[i];

                if (counter == BlobsIndexHelper.NOT_FOUND) {

                    if (!readOnly)
                        throw new AssertionError();

                } else {

                    // The value whose IV we have discovered/asserted.
                    final BigdataValue value = chunk[i].obj;
                    
                    // Rebuild the IV.
                    @SuppressWarnings("rawtypes")
                    final BlobIV<?> iv = new BlobIV(VTE.valueOf(value), value
                            .hashCode(), (short) counter);

                    // assign the term identifier.
                    value.setIV(iv);

                    if (chunk[i] instanceof KVOList) {

                        final KVOList<BigdataValue> tmp = (KVOList<BigdataValue>) chunk[i];

                        if (!tmp.isDuplicateListEmpty()) {

                            // assign the term identifier to the duplicates.
                            tmp.map(new AssignTermId(iv));

                        }

                    }

                    if (log.isDebugEnabled()) {
                        log
                                .debug("termId=" + iv + ", term="
                                        + chunk[i].obj);
                    }

                }

            }

        }

        public Void getResult() {

            return null;

        }

    }

    /**
     * Wrap a {@link BigdataValue}[] with a chunked iterator.
     * <p>
     * Note: This resolves inline {@link IV}s and filters them out of the
     * visited {@link BigdataValue}s as a side-effect.
     */
    @SuppressWarnings({ "unchecked", "rawtypes" })
    static <V extends BigdataValue> IChunkedIterator<V> newValuesIterator(
            final LexiconRelation r,
            final Iterator<V> itr, final int chunkSize) {

        return new ChunkedWrappedIterator(new Striterator(itr)
                .addFilter(new Filter() {

                    private static final long serialVersionUID = 1L;

                    @Override
                    public boolean isValid(final Object obj) {
                        /*
                         * Assigns the IV as a side effect iff the RDF Value can
                         * be inlined according to the governing lexicon
                         * configuration and returns true iff the value CAN NOT
                         * be inlined. Thus, inlining is done as a side effect
                         * while the caller sees only those Values which need to
                         * be written onto the TERM2ID/BLOBS index.
                         */
                        return r.getInlineIV((Value) obj) == null;
                    }
                }), chunkSize, BigdataValue.class);

    }

    /**
     * Wrap a {@link BigdataValue}[] with a chunked iterator which filters out
     * blank nodes and blobs (neither of which is written onto the reverse
     * index).
     */
    @SuppressWarnings({ "unchecked", "rawtypes" })
    private static <V extends BigdataValue> IChunkedIterator<V> newId2TIterator(
            final LexiconRelation r, final Iterator<V> itr, final int chunkSize) {

        return new ChunkedWrappedIterator(new Striterator(itr)
                .addFilter(new Filter() {

                    private static final long serialVersionUID = 1L;

                    /*
                     * Filter hides blank nodes since we do not write them onto
                     * the reverse index.
                     * 
                     * Filter does not visit blobs since we do not want to write
                     * those onto the reverse index either.
                     */
                    @Override
                    public boolean isValid(final Object obj) {

                        final BigdataValue v = (BigdataValue) obj;

                        if (v instanceof BNode)
                            return false;

                        if (r.isBlob(v))
                            return false;

                        return true;

                    }

                }), chunkSize, BigdataValue.class);

    }

    /**
     * Return iterator visiting only the {@link BigdataLiteral}s that we want
     * to write on the full text index.
     * @param r
     * @param itr
     * @param chunkSize
     * @return
     */
    @SuppressWarnings({ "unchecked", "rawtypes" })
    private static <V extends BigdataValue> IChunkedIterator<V> newTextIterator(
            final LexiconRelation r, final Iterator<V> itr,
            final int chunkSize, final boolean indexDatatypeLiterals) {

        return new ChunkedWrappedIterator(
                new Striterator(itr).addFilter(new Filter() {

                    private static final long serialVersionUID = 1L;

                    /*
                     * Filter hides blank nodes since we do not write them onto
                     * the TEXT index.
                     */
                    @Override
                    public boolean isValid(final Object obj) {

                        if (!(obj instanceof BigdataLiteral)) {
                            // Only index Literals.
                            return false;
                        }

                        final BigdataLiteral lit = (BigdataLiteral) obj;

                        if (!indexDatatypeLiterals && lit.getDatatype() != null) {
                            // Ignore datatype literals.
                            return false;
                        }

                        return true;

                    }

                }), chunkSize, BigdataValue.class);

    }

    /**
     * Asynchronous writes on the TERM2ID index.
     * 
     * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan
     *         Thompson</a>
     * 
     * @todo something similar for the SIDs
     */
    static class AsyncTerm2IdIndexWriteTask implements Callable<Void> {

        final protected transient static Logger log = Logger
                .getLogger(AsyncTerm2IdIndexWriteTask.class);

        private final KVOLatch latch;

        private final IChunkedIterator<BigdataValue> src;

        private final LexiconRelation lexiconRelation;
        
        private final Term2IdTupleSerializer tupleSerTerm2Id;
//        private final BlobsTupleSerializer tupleSerBlobs;

        private final IRunnableBuffer<KVO<BigdataValue>[]> bufferTerm2Id;
        private final IRunnableBuffer<KVO<BigdataValue>[]> bufferBlobs;

        /**
         * 
         * @param latch
         * @param r
         * @param src
         *            The visits chunks of distinct {@link Value}s.
         * @param bufferTerm2Id
         * @param bufferBlobs
         */
        public AsyncTerm2IdIndexWriteTask(final KVOLatch latch,
                final LexiconRelation r,
                final IChunkedIterator<BigdataValue> src,
                final IRunnableBuffer<KVO<BigdataValue>[]> bufferTerm2Id,
                final IRunnableBuffer<KVO<BigdataValue>[]> bufferBlobs) {

            if (latch == null)
                throw new IllegalArgumentException();

            if (r == null)
                throw new IllegalArgumentException();

            if (src == null)
                throw new IllegalArgumentException();

            if (bufferTerm2Id == null && bufferBlobs == null)
                throw new IllegalArgumentException();

            this.latch = latch;

            this.lexiconRelation = r;
            
            this.tupleSerTerm2Id = bufferTerm2Id == null ? null
                    : (Term2IdTupleSerializer) r
                            .getIndex(LexiconKeyOrder.TERM2ID)
                            .getIndexMetadata().getTupleSerializer();

            this.src = src;

            this.bufferTerm2Id = bufferTerm2Id;
            
            this.bufferBlobs = bufferBlobs;

        }

        /**
         * Return <code>true</code> if the {@link BigdataValue} will be stored
         * against the BLOBS index.
         */
        private boolean isBlob(final BigdataValue v) {
            
            return lexiconRelation.isBlob(v);
            
        }

//        /**
//         * Return <code>true</code> iff the {@link BigdataValue} is fully inline
//         * (in which case the {@link IV} is set as a side-effect on the
//         * {@link BigdataValue}).
//         */
//        private boolean isInline(final BigdataValue v) {
//            
//            return lexiconRelation.getInlineIV(v) != null;
//            
//        }
        
        /**
         * Reshapes the {@link #src} into {@link KVOC}[]s a chunk at a time and
         * submits each chunk to the write buffer for the TERM2ID index.
         */
        public Void call() throws Exception {

            /*
             * This is a thread-local instance, which is why we defer obtaining
             * this object until call() is executing.
             */
            final LexiconKeyBuilder keyBuilderTerm2Id = bufferTerm2Id == null ? null
                    : tupleSerTerm2Id.getLexiconKeyBuilder();

            // BLOBS stuff.
            final BigdataValueSerializer<BigdataValue> valSer = lexiconRelation
                    .getValueFactory().getValueSerializer();
            final BlobsIndexHelper h = new BlobsIndexHelper();
            final IKeyBuilder keyBuilder = h.newKeyBuilder();
            final DataOutputBuffer out = new DataOutputBuffer(512);
            final ByteArrayBuffer tmp = new ByteArrayBuffer(512);

            latch.inc();

            try {

                List<KVOC<BigdataValue>> terms = null;
                List<KVOC<BigdataValue>> blobs = null;

                while (src.hasNext()) {

                    final BigdataValue[] chunkIn = src.nextChunk();

                    for (BigdataValue v : chunkIn) {

                        /*
                         * Note: The iterator we are visiting has already had
                         * the IVs for fully inline Values resolved and set as a
                         * side-effect and the inline Values have been filtered
                         * out. We will only see non-inline values here, but
                         * they may wind up as TermIds or BlobIVs.
                         */
//                        if (isInline(v)) {
//                            // Immediately resolve the IV via a side-effect.
//                            System.err.println("inline: "+v);
//                            continue;
//                        }

                        if (bufferBlobs != null && isBlob(v)) {

                            final byte[] key = h.makePrefixKey(keyBuilder.reset(), v);
                            
                            final byte[] val = valSer.serialize(v, out.reset(), tmp);

                            if (blobs == null) {
                                // Lazily allocate.
                                blobs = new ArrayList<KVOC<BigdataValue>>();
                            }
                            // Assign a sort key to each Value.
                            blobs.add(new KVOC<BigdataValue>(key, val, v, latch));
//                            System.err.println("blob  : "+v);

                        } else {

                            if (terms == null) {
                                // Lazily allocate to chunkSize.
                                terms = new ArrayList<KVOC<BigdataValue>>(
                                        chunkIn.length);
                            }
                            // Assign a sort key to each Value.
                            terms.add(new KVOC<BigdataValue>(keyBuilderTerm2Id
                                    .value2Key(v), null/* val */, v, latch));
//                            System.err.println("term  : "+v);
                            
                        }
                        
                    }

                    if (terms != null && !terms.isEmpty()) {

                        @SuppressWarnings("unchecked")
                        final KVOC<BigdataValue>[] a = terms
                                .toArray(new KVOC[terms.size()]);

                        // Place in KVO sorted order (by the byte[] keys).
                        Arrays.sort(a);

                        if (log.isInfoEnabled())
                            log.info("Adding chunk to TERM2ID master: chunkSize="
                                    + a.length);

                        // add chunk to async write buffer
                        bufferTerm2Id.add(a);

                        // Clear list.
                        terms.clear();

                    }

                    if (blobs != null && !blobs.isEmpty()) {

                        @SuppressWarnings("unchecked")
                        final KVOC<BigdataValue>[] a = blobs
                                .toArray(new KVOC[blobs.size()]);

                        // Place in KVO sorted order (by the byte[] keys).
                        Arrays.sort(a);

                        if (log.isInfoEnabled())
                            log.info("Adding chunk to BLOBS master: chunkSize="
                                    + a.length);

                        // add chunk to async write buffer
                        bufferBlobs.add(a);

                        // Clear list.
                        blobs.clear();

                    }

                }

            } finally {

                latch.dec();

            }

            // Done.
            return null;

        }

    }
    
    /**
     * Asynchronous writes on the ID2TERM index.
     * 
     * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan
     *         Thompson</a>
     */
    static class AsyncId2TermIndexWriteTask implements Callable<Void> {

        final protected transient static Logger log = Logger
                .getLogger(AsyncId2TermIndexWriteTask.class);

        private final KVOLatch latch;

        private final BigdataValueFactory valueFactory;

        private final IChunkedIterator<BigdataValue> src;

        private final IRunnableBuffer<KVO<BigdataValue>[]> buffer;

        /**
         * 
         * @param src
         *            The visits chunks of distinct {@link Value}s with their
         *            TIDs assigned. Blank nodes will automatically be filtered
         *            out.
         */
        public AsyncId2TermIndexWriteTask(final KVOLatch latch,
                final BigdataValueFactory valueFactory,
                final IChunkedIterator<BigdataValue> src,
                final IRunnableBuffer<KVO<BigdataValue>[]> buffer) {

            if (latch == null)
                throw new IllegalArgumentException();

            if (valueFactory == null)
                throw new IllegalArgumentException();

            if (src == null)
                throw new IllegalArgumentException();

            if (buffer == null)
                throw new IllegalArgumentException();

            this.latch = latch;

            this.valueFactory = valueFactory;

            this.src = src;

            this.buffer = buffer;

        }

        public Void call() throws Exception {

            // used to serialize the Values for the BTree.
            final BigdataValueSerializer<BigdataValue> ser = valueFactory
                    .getValueSerializer();

            // thread-local key builder removes single-threaded constraint.
            final IKeyBuilder tmp = KeyBuilder.newInstance(Bytes.SIZEOF_LONG);

            // buffer is reused for each serialized term.
            final DataOutputBuffer out = new DataOutputBuffer();
            final ByteArrayBuffer tbuf = new ByteArrayBuffer();

            latch.inc();

            try {

                while (src.hasNext()) {

                    final BigdataValue[] chunkIn = src.nextChunk();

                    @SuppressWarnings("unchecked")
                    final KVOC<BigdataValue>[] chunkOut = new KVOC[chunkIn.length];

                    int i = 0;

                    for (BigdataValue v : chunkIn) {

                        assert v != null;

                        if (v instanceof BNode) {

                            // Do not write blank nodes on the reverse index.
                            continue;

                        }

                        if (v.getIV() == null) {

                            throw new RuntimeException("No TID: " + v);

                        }

                        if (v.getIV().isInline()) {

                            // Do not write inline values on the reverse index.
                            continue;

                        }

                        final byte[] key = v.getIV().encode(tmp.reset())
                                .getKey();

                        // Serialize the term.
                        final byte[] val = ser.serialize((BigdataValueImpl)v, out.reset(), tbuf);

                        /*
                         * Note: The BigdataValue instance is NOT supplied to
                         * the KVO since we do not want it to be retained and
                         * since there is no side-effect on the BigdataValue for
                         * writes on ID2TERM (unlike the writes on TERM2ID).
                         */
                        chunkOut[i++] = new KVOC<BigdataValue>(key, val,
                                null/* v */, latch);

                    }

                    // make dense.
                    final KVO<BigdataValue>[] dense = KVO.dense(chunkOut, i);

                    /*
                     * Put into key order in preparation for writing on the
                     * reverse index.
                     */
                    Arrays.sort(dense);

                    // add chunk to asynchronous write buffer
                    buffer.add(dense);

                }

            } finally {

                latch.dec();

            }

            // Done.
            return null;

        }

    }

    /**
     * Asynchronous writes on the TEXT index.
     * 
     * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan
     *         Thompson</a>
     */
    static class AsyncTextIndexWriteTask implements Callable<Void> {

        final protected transient static Logger log = Logger
                .getLogger(AsyncTextIndexWriteTask.class);

        private final KVOLatch latch;

        private final BigdataValueCentricFullTextIndex textIndex;
        
        private final IChunkedIterator<BigdataValue> src;

        private final IRunnableBuffer<KVO<BigdataValue>[]> buffer;

        /**
         * 
         * @param src
         *            The visits chunks of distinct {@link BigdataLiteral}s with
         *            their TIDs assigned.  Anything which should not be indexed
         *            has already been filtered out.
         */
        public AsyncTextIndexWriteTask(final KVOLatch latch,
                final BigdataValueCentricFullTextIndex textIndex,
                final IChunkedIterator<BigdataValue> src,
                final IRunnableBuffer<KVO<BigdataValue>[]> buffer) {

            if (latch == null)
                throw new IllegalArgumentException();

            if (textIndex == null)
                throw new IllegalArgumentException();

            if (src == null)
                throw new IllegalArgumentException();

            if (buffer == null)
                throw new IllegalArgumentException();

            this.latch = latch;

            this.textIndex = textIndex;
            
            this.src = src;

            this.buffer = buffer;

        }

        /**
         * FIXME This will on the full text index using the
         * {@link BigdataValueCentricFullTextIndex} class. That class will wind up doing
         * gathered batch inserts in chunks of up to the capacity set inline in
         * the method below. However, it will use Sync RPC rather than the ASYNC
         * [buffer_text] index write pipeline. While this should be enough to
         * write unit tests for the full text indexing feature, it is not going
         * to scale well.
         * 
         * @see BigdataValueCentricFullTextIndex
         */
        public Void call() throws Exception {

            latch.inc();

            try {
                
                /* 
                 * TODO capacity for the full text index writes.
                 */
                final int capacity = 100000;
                
                textIndex.index(capacity, src);

            } finally {

                latch.dec();

            }

            // Done.
            return null;

        }

    }

    /**
     * Writes the statement chunks onto the specified statement index using the
     * asynchronous write API.
     * <p>
     * Note: This is similar to the {@link SPOIndexWriter}, but the latter uses
     * synchronous RPC.
     * 
     * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan
     *         Thompson</a>
     */
    static class AsyncSPOIndexWriteTask implements Callable<Void> {

        final protected transient static Logger log = Logger
                .getLogger(AsyncSPOIndexWriteTask.class);

        private final KVOLatch latch;

        private final IKeyOrder<ISPO> keyOrder;

        /* Note: problem with java 1.6.0_07 and _12 on linux when typed. */
        @SuppressWarnings("rawtypes")
        private final IChunkedOrderedIterator/* <ISPO> */src;

        private final IRunnableBuffer<KVO<ISPO>[]> writeBuffer;

        private final SPOTupleSerializer tupleSer;

        @SuppressWarnings("rawtypes")
        public AsyncSPOIndexWriteTask(final KVOLatch latch,
                final IKeyOrder<ISPO> keyOrder, final SPORelation spoRelation,
                /* Note: problem with java 1.6.0_07 and _12 on linux when typed. */
                final IChunkedOrderedIterator/* <ISPO> */src,
                final IRunnableBuffer<KVO<ISPO>[]> writeBuffer) {

            if (latch == null)
                throw new IllegalArgumentException();

            if (keyOrder == null)
                throw new IllegalArgumentException();

            if (writeBuffer == null)
                throw new IllegalArgumentException();

            this.latch = latch;

            this.keyOrder = keyOrder;

            this.src = src;

            this.writeBuffer = writeBuffer;

            // the tuple serializer for this access path.
            this.tupleSer = (SPOTupleSerializer) spoRelation.getIndex(keyOrder)
                    .getIndexMetadata().getTupleSerializer();

        }

        public Void call() throws Exception {

            long chunksOut = 0;
            long elementsOut = 0;

            latch.inc();

            try {

                while (src.hasNext()) {

                    // next chunk, in the specified order.
                    @SuppressWarnings("unchecked")
                    final ISPO[] chunk = (ISPO[]) src.nextChunk(keyOrder);

                    // note: a[] will be dense since nothing is filtered.
                    @SuppressWarnings("unchecked")
                    final KVOC<ISPO>[] a = new KVOC[chunk.length];

                    for (int i = 0; i < chunk.length; i++) {

                        final ISPO spo = chunk[i];

                        if (spo == null)
                            throw new IllegalArgumentException();

                        if (!spo.isFullyBound())
                            throw new IllegalArgumentException(
                                    "Not fully bound: " + spo.toString());

                        // generate key for the index.
                        final byte[] key = tupleSer.serializeKey(spo);

                        // generate value for the index.
                        final byte[] val = tupleSer.serializeVal(spo);

                        /*
                         * Note: The SPO is deliberately not provided to the KVO
                         * instance since it is not required (there is nothing
                         * being passed back from the write via a side-effect on
                         * the BigdataStatementImpl) and since it otherwise will
                         * force the retention of the RDF Value objects in its
                         * s/p/o/c positions.
                         */
                        a[i] = new KVOC<ISPO>(key, val, null/* spo */, latch);

                    }

                    // put chunk into sorted order based on assigned keys.
                    Arrays.sort(a);

                    // write chunk on the buffer.
                    writeBuffer.add(a);

                    chunksOut++;
                    elementsOut += a.length;

                    if (log.isDebugEnabled())
                        log.debug("Wrote chunk: index=" + keyOrder
                                + ", chunksOut=" + chunksOut + ", elementsOut="
                                + elementsOut + ", chunkSize=" + a.length);

                    if (log.isTraceEnabled())
                        log.trace("Wrote: index=" + keyOrder + ", chunk="
                                + Arrays.toString(a));

                }

            } finally {

                latch.dec();

            }

            if (log.isDebugEnabled())
                log.debug("Done: index=" + keyOrder + ", chunksOut="
                        + chunksOut + ", elementsOut=" + elementsOut);

            // done.
            return null;

        }

    }

    /**
     * Inner class provides the statement buffer.
     * 
     * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan
     *         Thompson</a>
     * @param <S>
     * @param <F>
     */
    protected class AsynchronousStatementBufferImpl implements
            IStatementBuffer<S> {

        /** The document identifier. */
        private final R resource;

        private final AbstractTripleStore database;

        private final BigdataValueFactory valueFactory;

        /**
         * A canonicalizing map for RDF {@link Value}s. The use of this map
         * provides a ~40% performance gain.
         */
        private LinkedHashMap<Value, BigdataValue> values;

        /**
         * A canonicalizing map for blank nodes. This map MUST be cleared before
         * you begin to add statements to the buffer from a new "source"
         * otherwise it will co-reference blank nodes from distinct sources. The
         * life cycle of the map is the life cycle of the document being loaded,
         * so if you are loading a large document with a lot of blank nodes the
         * map will also become large.
         */
        private final AtomicReference<Map<String, BigdataBNode>> bnodes = new AtomicReference<Map<String, BigdataBNode>>();
        
        /**
         * The total #of parsed statements so far.
         * 
         * {@link IBuffer}
         */
        private int statementCount;

        /**
         * Buffer used to accumulate chunks of statements.
         */
        private UnsynchronizedUnboundedChunkBuffer<S> statements;

        public final AbstractTripleStore getDatabase() {

            return database;

        }

        /**
         * Returns <code>null</code>.
         * <p>
         * Note: This implementation does not support the concept of a
         * focusStore so it can not be used for truth maintenance.
         */
        public AbstractTripleStore getStatementStore() {

            return null;

        }

        public boolean isEmpty() {

            return statementCount == 0;

        }

        public int size() {

            return statementCount;

        }

        /**
         * Return the identifier for the document.
         */
        public R getDocumentIdentifier() {

            return resource;

        }

        /**
         * @param resource
         *            The document identifier.
         */
        protected AsynchronousStatementBufferImpl(final R resource) {

            this.resource = resource;

            this.database = AsynchronousStatementBufferFactory.this.tripleStore;

            this.valueFactory = database.getValueFactory();

        }

        /**
         * Note: this implementation always returns ZERO (0).
         * 
         * @see ParserTask
         */
        public long flush() {

            return 0L;

        }

        /**
         * Clears all buffered data, including the canonicalizing mapping for
         * blank nodes and deferred provenance statements.
         */
        public void reset() {

            if (log.isInfoEnabled())
                log.info("resource=" + getDocumentIdentifier());

            /*
             * Note: clear the reference NOT the contents of the map! This makes
             * it possible for the caller to reuse the same map across multiple
             * StatementBuffer instances.
             */
            bnodes.set(null);

            values = null;

            statements = null;

            statementCount = 0;

        }

        public void setBNodeMap(final Map<String, BigdataBNode> bnodes) {

            if (bnodes == null)
                throw new IllegalArgumentException();

            if (!this.bnodes
                    .compareAndSet(null/* expect */, bnodes/* update */)) {

                throw new IllegalStateException();

            }

        }

        /**
         * Add an "explicit" statement to the buffer (flushes on overflow, no
         * context).
         * 
         * @param s
         * @param p
         * @param o
         */
        public void add(Resource s, URI p, Value o) {

            add(s, p, o, null, StatementEnum.Explicit);

        }

        /**
         * Add an "explicit" statement to the buffer (flushes on overflow).
         * 
         * @param s
         * @param p
         * @param o
         * @param c
         */
        public void add(Resource s, URI p, Value o, Resource c) {

            add(s, p, o, c, StatementEnum.Explicit);

        }

        /**
         * Add a statement to the buffer (core impl).
         */
        public void add(final Resource s, final URI p, final Value o,
                final Resource c, final StatementEnum type) {

            // add to the buffer.
            handleStatement(s, p, o, c, type);

        }

        public void add(final S e) {

            add(e.getSubject(), e.getPredicate(), e.getObject(),
                    e.getContext(),
                    (e instanceof BigdataStatement ? ((BigdataStatement) e)
                            .getStatementType() : null));

        }

        /**
         * Canonicalizing mapping for blank nodes.
         * <p>
         * Note: This map MUST stay in effect while reading from a given source
         * and MUST be cleared (or set to null) before reading from another
         * source.
         */
        private BigdataBNode getCanonicalBNode(final BigdataBNodeImpl bnode) {

            // the BNode's ID.
            final String id = bnode.getID();

            Map<String, BigdataBNode> bnodes = this.bnodes.get();
            if (bnodes == null) {

                /*
                 * Allocate a canonicalizing map for blank nodes. Since this
                 * will be a private map it does not need to be thread-safe.
                 */
                setBNodeMap(new HashMap<String, BigdataBNode>(
                        bnodesInitialCapacity));

                // fall through.
                bnodes = this.bnodes.get();

                if (bnodes == null)
                    throw new AssertionError();

            }

            /*
             * Specialized for a concurrent hash map.
             */
            if (bnodes instanceof ConcurrentHashMap) {

                final BigdataBNode tmp = ((ConcurrentHashMap<String, BigdataBNode>) bnodes)
                        .putIfAbsent(id, bnode);

                if (tmp != null) {

                    // already exists in the map.
                    return tmp;

                }

                if (log.isTraceEnabled())
                    log.trace("added: " + bnode);

                // was inserted into the map.
                return bnode;

            }

            /*
             * Synchronized on the map to make the conditional insert atomic.
             */
            synchronized (bnodes) {

                final BigdataBNode tmp = bnodes.get(id);

                if (tmp != null) {

                    // already exists in the map.
                    return tmp;

                }

                // insert this blank node into the map.
                bnodes.put(id, bnode);

                if (log.isTraceEnabled())
                    log.trace("added: " + bnode);

                // was inserted into the map.
                return bnode;

            } // synchronized

        }

        /**
         * Canonicalizing mapping for a term.
         * <p>
         * Note: Blank nodes are made canonical with the scope of the source
         * from which the data are being read. See {@link #bnodes}. All other
         * kinds of terms are made canonical within the scope of the buffer's
         * current contents in order to keep down the demand on the heap with
         * reading either very large documents or a series of small documents.
         * 
         * @param term
         *            A term.
         * 
         * @return Either the term or the pre-existing term in the buffer with
         *         the same data.
         */
        private BigdataValue getCanonicalValue(final BigdataValue term0) {

            if (term0 == null) {

                // Note: This handles an empty context position.
                return term0;

            }

            final BigdataValue term;

            if (term0 instanceof BNode) {

                // impose canonicalizing mapping for blank nodes.
                term = getCanonicalBNode((BigdataBNodeImpl) term0);

                /*
                 * Fall through.
                 * 
                 * Note: This also records the blank node in the values map so
                 * that we can process the values map without having to consider
                 * the blank nodes as well.
                 */

            } else {

                // not a blank node.
                term = term0;

            }

            if (values == null) {

                /*
                 * Create a private (non-thread safe) canonicalizing mapping for
                 * RDF Values.
                 * 
                 * Note: A linked hash map is used to make the iterator faster.
                 */

                values = new LinkedHashMap<Value, BigdataValue>(
                        valuesInitialCapacity);

            }

            /*
             * Impose a canonicalizing mapping on the term.
             */

            final BigdataValue tmp = values.get(term);

            if (tmp != null) {

                // already exists.
                return tmp;

            }

            // add to the map.
            if (values.put(term, term) != null) {

                throw new AssertionError();

            }

            if (log.isTraceEnabled())
                log.trace("n=" + values.size() + ", added: " + term);

            // return the new term.
            return term;

        }

        /**
         * Adds the values and the statement into the buffer.
         * 
         * @param s
         *            The subject.
         * @param p
         *            The predicate.
         * @param o
         *            The object.
         * @param c
         *            The context (may be null).
         * @param type
         *            The statement type.
         * 
         * @throws IndexOutOfBoundsException
         *             if the buffer capacity is exceeded.
         * 
         * @see #nearCapacity()
         */
        private void handleStatement(final Resource s, final URI p,
                final Value o, final Resource c, final StatementEnum type) {

            _handleStatement(
                    (Resource) getCanonicalValue((BigdataResource) valueFactory
                            .asValue(s)),//
                    (URI) getCanonicalValue((BigdataURI) valueFactory
                            .asValue(p)),//
                    (Value) getCanonicalValue((BigdataValue) valueFactory
                            .asValue(o)),//
                    (Resource) getCanonicalValue((BigdataResource) valueFactory
                            .asValue(c)), //
                    type);

        }

        /**
         * Form the BigdataStatement object using the valueFactory now that we
         * bindings which were (a) allocated by the valueFactory and (b) are
         * canonical for the scope of this document.
         */
        @SuppressWarnings("unchecked")
        private void _handleStatement(final Resource s, final URI p,
                final Value o, final Resource c, final StatementEnum type) {

            final BigdataStatement stmt = valueFactory.createStatement(
                    (BigdataResource) s, (BigdataURI) p, (BigdataValue) o,
                    (BigdataResource) c, type);

            if (statements == null) {

                statements = new UnsynchronizedUnboundedChunkBuffer<S>(
                        producerChunkSize,
                        (Class<? extends S>) BigdataStatement.class);

            }

            statements.add((S) stmt);

            // total #of statements accepted.
            statementCount++;

            if (log.isTraceEnabled())
                log.trace("n=" + statementCount + ", added: " + stmt);

        }

        /**
         * Buffers the asynchronous writes on the TERM2ID and BLOBS indices.
         * Those indices will assign tids. If {@link BigdataValue} is fully
         * inline, then its {@link IV} is resolved immediately. If the
         * {@link BigdataValue} will be stored as a BLOB, then it is written
         * onto the buffer for the BLOBS index. Otherwise it is written onto the
         * buffer for the TERM2ID index.
         */
        private void bufferTidWrites() throws Exception {

            if (log.isInfoEnabled()) {
                final Map<String, BigdataBNode> bnodes = this.bnodes.get();
                final int bnodeCount = (bnodes == null ? 0 : bnodes.size());
                log.info("bnodeCount=" + bnodeCount + ", values="
                        + values.size() + ", statementCount=" + statementCount);
            }

            if (isAnyDone()) {

                throw new RuntimeException("Factory closed?");

            }

            /*
             * Run task which will queue BigdataValue[] chunks onto the TERM2ID
             * async write buffers.
             * 
             * Note: This is responsible for assigning the TIDs (term
             * identifiers) to the {@link BigdataValue}s. We CAN NOT write on
             * the other indices until we have those TIDs.
             * 
             * Note: If there is not enough load being placed the async index
             * write then it can wait up to its idle/chunk timeout. Normally we
             * want to use an infinite chunk timeout so that all chunks written
             * on the index partitions are as full as possible. Therefore, the
             * TERM2ID async writer should use a shorter idle timeout or it can
             * live lock. Ideally, there should be some explicit notice when we
             * are done queuing writes on TERM2ID across all source documents.
             * Even then we can live lock if the input queue is not large
             * enough.
             */

            /*
             * Latch notifies us when all writes for _this_ document on TERM2ID
             * are complete such that we have the assigned term identifiers for
             * all BigdataValues appearing in the document. This event is used
             * to transfer the document to another queue.
             */
            final KVOLatch tidsLatch = new KVOLatch() {

                public String toString() {

                    return super.toString() + " : tidsLatch";

                }

                @Override
                protected void signal() throws InterruptedException {

                    super.signal();
                    /*
                     * Note: There is no requirement for an atomic state
                     * transition for these two counters so there is no reason
                     * to take the lock here.
                     */
//                    lock.lock();
//                    try {

                        documentTIDsWaitingCount.decrementAndGet();

                        documentTIDsReadyCount.incrementAndGet();

//                    } finally {
//                    
//                        lock.unlock();
//                        
//                    }

                    // Note: otherWriterService MUST have unbounded queue.
                    otherWriterService.submit(new BufferOtherWritesTask(
                            AsynchronousStatementBufferImpl.this));

                }

            };

            // pre-increment to avoid notice on transient zeros.
            tidsLatch.inc(); // Note: decremented in the finally{} clause.

            try {

                final Callable<Void> task1 = new AsyncTerm2IdIndexWriteTask(
                        tidsLatch, lexiconRelation, newValuesIterator(//
                                lexiconRelation,//
                                values.values().iterator(),//
                                producerChunkSize),
                                buffer_t2id,
                                buffer_blobs);

                // queue chunks onto the write buffer.
                task1.call();

            } finally {

                /*
                 * Decrement now that all chunks have been queued for
                 * asynchronous writes.
                 */

                tidsLatch.dec();

            }

            /*
             * Note: At this point the writes on TERM2ID indices have been buffered.
             */

        }

//        /**
//         * Buffers the asynchronous writes on the BLOBS index.
//         * 
//         * @throws Exception
//         */
//        private void bufferBlobsWrites() throws Exception {
//
//            if (log.isInfoEnabled()) {
//                final Map<String, BigdataBNode> bnodes = this.bnodes.get();
//                final int bnodeCount = (bnodes == null ? 0 : bnodes.size());
//                log.info("bnodeCount=" + bnodeCount + ", values="
//                        + values.size() + ", statementCount=" + statementCount);
//            }
//
//            if (isAnyDone()) {
//
//                throw new RuntimeException("Factory closed?");
//
//            }
//
//            /*
//             * Run task which will queue BigdataValue[] chunks onto the TERMS
//             * async write buffer.
//             * 
//             * Note: This is responsible for assigning the TIDs (term
//             * identifiers) to the {@link BigdataValue}s. We CAN NOT write on
//             * the other indices until we have those TIDs.
//             * 
//             * Note: If there is not enough load being placed the async index
//             * write then it can wait up to its idle/chunk timeout. Normally we
//             * want to use an infinite chunk timeout so that all chunks written
//             * on the index partitions are as full as possible. Therefore, the
//             * TERMS async writer should use a shorter idle timeout or it can
//             * live lock. Ideally, there should be some explicit notice when we
//             * are done queuing writes on TERMS across all source documents.
//             * Even then we can live lock if the input queue is not large
//             * enough.
//             */
//
//            /*
//             * Latch notifies us when all writes for _this_ document on TERMS
//             * are complete such that we have the assigned term identifiers for
//             * all BigdataValues appearing in the document. This event is used
//             * to transfer the document to another queue.
//             */
//            final KVOLatch tidsLatch = new KVOLatch() {
//
//                public String toString() {
//
//                    return super.toString() + " : tidsLatch";
//
//                }
//
//                @Override
//                protected void signal() throws InterruptedException {
//
//                    super.signal();
//                    /*
//                     * Note: There is no requirement for an atomic state
//                     * transition for these two counters so there is no reason
//                     * to take the lock here.
//                     */
////                    lock.lock();
////                    try {
//
//                        documentTIDsWaitingCount.decrementAndGet();
//
//                        documentTIDsReadyCount.incrementAndGet();
//
////                    } finally {
////                    
////                        lock.unlock();
////                        
////                    }
//
//                    // Note: otherWriterService MUST have unbounded queue.
//                    otherWriterService.submit(new BufferOtherWritesTask(
//                            AsynchronousStatementBufferImpl.this));
//
//                }
//
//            };
//
//            // pre-increment to avoid notice on transient zeros.
//            tidsLatch.inc();
//
//            try {
//
//                final Callable<Void> task = new AsyncBlobsIndexWriteTask(
//                        tidsLatch, lexiconRelation, newValuesIterator(//
//                                lexiconRelation,//
//                                values.values().iterator(),//
//                                producerChunkSize),
//                        buffer_blobs);
//
//                // queue chunks onto the write buffer.
//                task.call();
//
//            } finally {
//
//                /*
//                 * Decrement now that all chunks have been queued for
//                 * asynchronous writes.
//                 */
//
//                tidsLatch.dec();
//
//            }
//
//            /*
//             * Note: At this point the writes on TERMS have been buffered.
//             */
//
//        }
        
        /**
         * Buffers write requests for the remaining indices (everything except
         * TERM2ID/BLOBS indices).
         * 
         * @throws InterruptedException
         * @throws ExecutionException
         */
        private void bufferOtherWrites() throws InterruptedException,
                ExecutionException {

            if (log.isDebugEnabled()) {

                log.debug("Writing on remaining indices.");

            }

            /*
             * Setup tasks which can run asynchronously. These tasks have no
             * dependencies. They can each proceed at their own rate. However,
             * we can not return from within this method until they are all
             * done.
             * 
             * Note: Each task runs in parallel.
             * 
             * Note: Each task uses the asynchronous write API. When the Future
             * for that task is complete all it means is that the data are now
             * buffered on the asynchronous write buffer for the appropriate
             * index. It DOES NOT mean that those writes are complete. However,
             * the [documentStableLatch] DOES indicate when the data is restart
             * safe.
             * 
             * Note: These tasks all process iterators. This approach was chosen
             * to isolate the tasks (which queue data for asynchronous writes)
             * from the data structures in this IStatementBuffer implementation.
             * An example of something which WOULD NOT work is if these tasks
             * were inner classes accessing the instance fields on this class
             * since reset() would clear those fields which might cause
             * spontaneous failures within ongoing processing.
             */
            final List<Callable> tasks = new LinkedList<Callable>();

            /*
             * The #of triples parsed from this document. This is added to the
             * total #of restart safe told triples loaded by this client when
             * the latch is triggered. Of course, the actual #of triples on the
             * database is only available by querying the database since the
             * same triple can occur in more than one document, and documents
             * are loaded by distributed clients so there is no way to correct
             * for such duplicate told triples short of querying the database.
             */
            final int toldTriplesThisDocument = statementCount;

            /*
             * Latch is signaled when all data buffered for this document is
             * RESTART SAFE on the database.
             * 
             * Note: In order for the latch to have those semantics we have to
             * include it on each KVO object buffered for all remaining indices.
             * The semantics are valid in the presence of duplicate removes IFF
             * they obey the contract for KVOList and link together the
             * duplicates such that the latch is decremented for each distinct
             * KVOC instance, including those which were eliminated as
             * duplicates.
             */
            final KVOLatch documentRestartSafeLatch = new KVOLatch() {

                public String toString() {

                    return super.toString() + " : documentRestartSafeLatch";

                }

                @Override
                protected void signal() throws InterruptedException {

                    super.signal();

                    lock.lock();
                    try {
                        workflowLatch_bufferOther.dec();
                        workflowLatch_document.dec();
                        assertSumOfLatchs();
                        documentRestartSafeCount.incrementAndGet();
                        toldTriplesRestartSafeCount
                                .addAndGet(toldTriplesThisDocument);
                        outstandingStatementCount
                                .addAndGet(-toldTriplesThisDocument);

                        // notify that the document is done.
                        documentDone(getDocumentIdentifier());
                        
                    } finally {

                        lock.unlock();

                    }

                }

            };

            tasks.add(new AsyncId2TermIndexWriteTask(documentRestartSafeLatch,
                    valueFactory, newId2TIterator(lexiconRelation, values
                            .values().iterator(), producerChunkSize),
                    buffer_id2t));

            if (buffer_text != null) {

                tasks.add(new AsyncTextIndexWriteTask(documentRestartSafeLatch,
                        (BigdataValueCentricFullTextIndex) lexiconRelation
                                .getSearchEngine(), newTextIterator(
                                lexiconRelation, values.values().iterator(),
                                producerChunkSize, indexDatatypeLiterals),
                        buffer_text));

            }

            for (Map.Entry<SPOKeyOrder, IRunnableBuffer<KVO<ISPO>[]>> e : buffer_stmts
                    .entrySet()) {

                final SPOKeyOrder keyOrder = e.getKey();

                final IRunnableBuffer<KVO<ISPO>[]> buffer = e.getValue();
                
                tasks.add(new AsyncSPOIndexWriteTask(documentRestartSafeLatch,
                        keyOrder, spoRelation,
                        // (IChunkedOrderedIterator<ISPO>)
                        statements.iterator(), buffer));

            }
            
            /*
             * Submit all tasks. They will run in parallel. If they complete
             * successfully then all we know is that the data has been buffered
             * for asynchronous writes on the various indices.
             * 
             * Note: java 1.6.0_07/12 build problems under linux when typed as
             * <Future> or any other combination that I have tried.
             */
            final List futures;

            /*
             * This latch is incremented _before_ buffering writes, and within
             * each routine that buffers writes, to avoid false triggering. This
             * is done to ensure that the latch will be positive until we exit
             * the try / finally block. We do this around the submit of the
             * tasks and do not decrement the latch until the futures are
             * available so we known that all data is buffered.
             */
            documentRestartSafeLatch.inc();
            try {

                futures = tripleStore.getExecutorService().invokeAll(
                        (List) tasks);

            } finally {

                // decrement so that the latch can be triggered.
                documentRestartSafeLatch.dec();

            }

            try {

                /*
                 * Make sure that no errors were reported by those tasks.
                 */
                for (Object f : futures) {

                    ((Future) f).get();

                }
                
            } finally {
                
                /*
                 * At this point all writes have been buffered. We now discard
                 * the buffered data (RDF Values and statements) since it will
                 * no longer be used.
                 */
                reset();

                lock.lock();
                try {
                    if (unbufferedStatementCount
                            .addAndGet(-toldTriplesThisDocument) <= pauseParserPoolStatementThreshold) {
                        unpaused.signalAll();
                    }
                } finally {
                    lock.unlock();
                }
                
            }

        }

    }// StatementBuffer impl.

    /**
     * Task buffers the asynchronous writes on the TERM2ID index.
     */
    private class BufferTidWrites implements Callable<Void> {

        private final AsynchronousStatementBufferImpl buffer;

        public BufferTidWrites(final AsynchronousStatementBufferImpl buffer) {

            if (buffer == null)
                throw new IllegalArgumentException();

            this.buffer = buffer;

        }
        
        public Void call() throws Exception {

//            // new workflow state.
//            lock.lock();
//            try {
//                guardLatch_term2Id.inc();
//                workflowLatch_parser.dec();
//                workflowLatch_bufferTerm2Id.inc();
//                documentTIDsWaitingCount.incrementAndGet();
//                assertSumOfLatchs();
//            } finally {
//                lock.unlock();
//            }

            try {

                buffer.bufferTidWrites();

                lock.lock();
                try {
                    guardLatch_term2Id.dec();
                } finally {
                    lock.unlock();
                }
                
                return null;
                
            } catch (Throwable t) {

                lock.lock();
                try {
                    guardLatch_term2Id.dec();
                    workflowLatch_bufferTids.dec();
                    documentTIDsWaitingCount.decrementAndGet();
                    documentError(buffer.getDocumentIdentifier(), t);
                    outstandingStatementCount.addAndGet(-buffer.statementCount);
                    if (unbufferedStatementCount
                            .addAndGet(-buffer.statementCount) <= pauseParserPoolStatementThreshold) {
                        unpaused.signalAll();
                    }
                    throw new Exception(t);
                } finally {
                    lock.unlock();
                }

            }

        }

    }

//    /**
//     * Task buffers the asynchronous writes on the BLOBS index.
//     */
//    private class BufferBlobsWrites implements Callable<Void> {
//
//        private final AsynchronousStatementBufferImpl buffer;
//
//        public BufferBlobsWrites(final AsynchronousStatementBufferImpl buffer) {
//
//            if (buffer == null)
//                throw new IllegalArgumentException();
//
//            this.buffer = buffer;
//
//        }
//        
//        public Void call() throws Exception {
//
////            // new workflow state.
////            lock.lock();
////            try {
////                guardLatch_term2Id.inc();
////                workflowLatch_parser.dec();
////                workflowLatch_bufferTerm2Id.inc();
////                documentTIDsWaitingCount.incrementAndGet();
////                assertSumOfLatchs();
////            } finally {
////                lock.unlock();
////            }
//
//            try {
//
//                buffer.bufferBlobsWrites();
//
//                lock.lock();
//                try {
//                    guardLatch_term2Id.dec();
//                } finally {
//                    lock.unlock();
//                }
//                
//                return null;
//                
//            } catch (Throwable t) {
//
//                lock.lock();
//                try {
//                    guardLatch_term2Id.dec();
//                    workflowLatch_bufferTerm2Id.dec();
//                    documentTIDsWaitingCount.decrementAndGet();
//                    documentError(buffer.getDocumentIdentifier(), t);
//                    outstandingStatementCount.addAndGet(-buffer.statementCount);
//                    if (unbufferedStatementCount
//                            .addAndGet(-buffer.statementCount) <= pauseParserPoolStatementThreshold) {
//                        unpaused.signalAll();
//                    }
//                    throw new Exception(t);
//                } finally {
//                    lock.unlock();
//                }
//
//            }
//
//        }
//
//    }

    /**
     * Task which buffers index writes for the remaining indices (everything
     * other than TERM2ID).
     * 
     * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan
     *         Thompson</a>
     */
    private class BufferOtherWritesTask implements Callable<Void> {

        private final AsynchronousStatementBufferImpl buffer;

        public BufferOtherWritesTask(final AsynchronousStatementBufferImpl buffer) {

            if (buffer == null)
                throw new IllegalArgumentException();

            this.buffer = buffer;

        }

        public Void call() throws Exception {

            // new workflow state.
            lock.lock();
            try {
                guardLatch_other.inc();
                workflowLatch_bufferTids.dec();
                workflowLatch_bufferOther.inc();
                assertSumOfLatchs();
            } finally {
                lock.unlock();
            }

            try {

                buffer.bufferOtherWrites();

                lock.lock();
                try {
                    guardLatch_other.dec();
                } finally {
                    lock.unlock();
                }
                
                return null;
                
            } catch (Throwable t) {

                lock.lock();
                try {
                    guardLatch_other.dec();
                    workflowLatch_bufferOther.dec();
                    documentError(buffer.getDocumentIdentifier(), t);
                    outstandingStatementCount.addAndGet(-buffer.statementCount);
                    if (unbufferedStatementCount
                            .addAndGet(-buffer.statementCount) <= pauseParserPoolStatementThreshold) {
                        unpaused.signalAll();
                    }
                    throw new Exception(t);
                } finally {
                    lock.unlock();
                }

            }

        }

    }

    /**
     * Thread pool with pause/resume semantics based on the amount of buffered
     * state for the outer class.
     * 
     * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan
     *         Thompson</a>
     */
    private class ParserThreadPoolExecutor extends ThreadPoolExecutor {

        /**
         * @param corePoolSize
         * @param maximumPoolSize
         * @param keepAliveTime
         * @param unit
         * @param workQueue
         * @param threadFactory
         */
        public ParserThreadPoolExecutor(final int corePoolSize,
                final int maximumPoolSize, final long keepAliveTime,
                final TimeUnit unit, final BlockingQueue<Runnable> workQueue,
                final ThreadFactory threadFactory) {

            super(corePoolSize, maximumPoolSize, keepAliveTime, unit,
                    workQueue, threadFactory);

        }
        
        /**
         * <code>true</code> if worker tasks must wait in
         * {@link #beforeExecute(Thread, Runnable)}
         */
        private boolean isPaused() {

            return unbufferedStatementCount.get() > pauseParserPoolStatementThreshold;

        }

        /**
         * Overridden to have worker threads pause if {@link #isPaused()}
         * returns true.
         * 
         * @param t
         *            The thread that will run the task.
         * @param r
         *            The {@link Runnable} wrapping the {@link AbstractTask} -
         *            this is actually a {@link FutureTask}. See
         *            {@link AbstractExecutorService}.
         */
        protected void beforeExecute(final Thread t, final Runnable r) {

            // Note: [r] is the FutureTask.

            lock.lock();

            try {

                if(isPaused()) {
                    
                    pausedThreadCount.incrementAndGet();

                    poolPausedCount.incrementAndGet();
                    
                    if (log.isInfoEnabled())
                        log.info("PAUSE : "
                                + AsynchronousStatementBufferFactory.this
                                        .toString());
                    
                    while (isPaused()) {
                        unpaused.await();
                    }
//                        if (!unpaused.await(60, TimeUnit.SECONDS)) {
//
//                            /*
//                             * Note: This was a trial workaround for a liveness
//                             * problem.  Unfortunately, it did not fix the
//                             * problem. [The issue was a deadlock in the global
//                             * LRU, which has been fixed.]
//                             */
//                            
//                            log.error("Flushing TERM2ID buffer: "
//                                            + AbstractStatisticsCollector.fullyQualifiedHostName);
//
//                            reopenBuffer_term2Id();
//
//                            // fall through : while(isPaused()) will retest.
//                            
//                        }
//
//                    }

                    pausedThreadCount.decrementAndGet();

                    if (log.isInfoEnabled())
                        log.info("RESUME: "
                                + AsynchronousStatementBufferFactory.this
                                        .toString());

                }

            } catch (InterruptedException ie) {

                t.interrupt();

            } finally {

                lock.unlock();

            }

            super.beforeExecute(t, r);

        }

    }

} // StatementBufferFactory impl.