ParseOp.java example

Explorer
blazegraph-master
- database-master
/**

Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016.  All rights reserved.

Contact:
     SYSTAP, LLC DBA Blazegraph
     2501 Calvert ST NW #106
     Washington, DC 20008
     licenses@blazegraph.com

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/
/*
 * Created on Mar 17, 2012
 */

package com.bigdata.bop.rdf.update;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.Serializable;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLConnection;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.concurrent.Callable;
import java.util.concurrent.FutureTask;

import org.apache.log4j.Logger;
import org.openrdf.model.Resource;
import org.openrdf.model.Statement;
import org.openrdf.model.URI;
import org.openrdf.model.Value;
import org.openrdf.rio.RDFFormat;
import org.openrdf.rio.RDFHandlerException;
import org.openrdf.rio.RDFParseException;
import org.openrdf.rio.RDFParser;
import org.openrdf.rio.RDFParserFactory;
import org.openrdf.rio.RDFParserRegistry;
import org.openrdf.rio.RDFParser.DatatypeHandling;
import org.openrdf.rio.helpers.RDFHandlerBase;

import com.bigdata.bop.BOp;
import com.bigdata.bop.BOpContext;
import com.bigdata.bop.Constant;
import com.bigdata.bop.IBindingSet;
import com.bigdata.bop.IConstant;
import com.bigdata.bop.ILocatableResourceAnnotations;
import com.bigdata.bop.PipelineOp;
import com.bigdata.bop.Var;
import com.bigdata.bop.bindingSet.ListBindingSet;
import com.bigdata.rdf.internal.IV;
import com.bigdata.rdf.internal.VTE;
import com.bigdata.rdf.internal.impl.TermId;
import com.bigdata.rdf.lexicon.LexiconRelation;
import com.bigdata.rdf.model.BigdataValue;
import com.bigdata.rdf.rio.IRDFParserOptions;
import com.bigdata.rdf.rio.PresortRioLoader;
import com.bigdata.rdf.rio.RDFParserOptions;
import com.bigdata.rdf.rio.StatementBuffer;
import com.bigdata.rdf.sail.webapp.client.MiniMime;
import com.bigdata.rdf.sparql.ast.LoadGraph;
import com.bigdata.rdf.store.AbstractTripleStore;
import com.bigdata.rdf.store.DataLoader;
import com.bigdata.rdf.store.DataLoader.ClosureEnum;
import com.bigdata.rdf.store.DataLoader.CommitEnum;
import com.bigdata.rdf.store.DataLoader.Options;
import com.bigdata.relation.accesspath.UnsyncLocalOutputBuffer;
import com.bigdata.util.Bytes;

/**
 * Operator parses a RDF data source, writing bindings which represent
 * statements onto the output sink. This operator is compatible with the
 * {@link ChunkedResolutionOp} and the {@link InsertStatementsOp}.
 * 
 * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a>
 * @version $Id$
 * 
 *          TODO Examine the integration point for Truth Maintenance (TM).
 *          <p>
 *          {@link ClosureEnum} and {@link CommitEnum} shape the way in which
 *          the update plan is generated. They are not options on the
 *          {@link ParseOp} itself.
 *          <p>
 *          We need to setup the assertion and retraction buffers such that they
 *          have the appropriate scope or (for database at once closure) we do
 *          not setup those buffers but we recompute the closure of the database
 *          afterwards.
 *          <p>
 *          The assertion buffers might be populated after the IV resolution
 *          step and before we write on the indices. We then compute the fixed
 *          point of the closure over the delta and then write that onto the
 *          database. We should be able to specify that some sources contain
 *          data to be removed (INSERT DATA and REMOVE DATA or UNLOAD src). The
 *          operation should combine assertions and retractions to be efficient.
 *          <p>
 *          See {@link DataLoader}.
 * 
 *          TODO Add an operator which handles a zip archive, creating a LOAD
 *          for each resource in that archive. Recursive directory processing is
 *          similar. Both should result in multiple ParseOp instances which can
 *          run in parallel. Those ParseOp instances will feed the IV
 *          resolution, optional TM, and statement writer operations.
 *          <p>
 *          If we can make the SOURCE_URI a value expression, then we could flow
 *          solutions into the LOAD operation which would be the bindings for
 *          the source URI. Very nice! Then we could hash partition the LOAD
 *          operator across a cluster and do a parallel load very easily. If the
 *          source for those solutions was the parse of a single RDF file (or
 *          streamed URI) containing the files to be loaded then we could also
 *          gain the indirection necessary to load large numbers of files in
 *          parallel on a cluster.
 * 
 *          TODO In at least the SIDS mode, we need to do some special
 *          operations when the statement buffer is flushed. That statement
 *          buffer could either be fed directly by the {@link ParserOp} or
 *          indirectly through solutions modeling statements flowing through the
 *          query engine. I am inclined to the latter for better parallelism.
 *          Even though there is more stuff on the heap and more latency within
 *          the stages, I think that we will get more out of the increased
 *          parallelism.
 * 
 *          TODO Any annotation here should be configurable from the
 *          {@link LoadGraph} AST node and (ideally) the SPARQL UPDATE syntax.
 * 
 *          FIXME This does not handle SIDS. The {@link StatementBuffer} logic
 *          needs to get into {@link InsertStatementsOp} for that to work, or
 *          the plan needs to be slightly different and hit a different insert
 *          operator for statements all together.
 * 
 *          FIXME This does not handle Truth Maintenance.
 * 
 * @see PresortRioLoader
 * @see StatementBuffer
 * @see DataLoader
 * @see DataLoader.Options
 * @see RDFParserOptions
 * @see ClosureEnum
 * @see CommitEnum
 */
public class ParseOp extends PipelineOp {

    private static final transient Logger log = Logger.getLogger(ParseOp.class);

    /**
     * 
     */
    private static final long serialVersionUID = 1L;

    /**
     * Note: {@link BOp.Annotations#TIMEOUT} is respected to limit the read time
     * on an HTTP connection.
     * <p>
     * Note: The {@link RDFParserOptions} are initialized based on
     * {@link AbstractTripleStore#getProperties()}. Those defaults are then
     * overriden by any {@link RDFParserOptions.Options} which are specified as
     * annotations. When {@link LexiconRelation#isStoreBlankNodes()} is
     * <code>true</code>, then blank nodes IDs will be preserved unless that
     * option has been explicitly overridden.
     */
    public interface Annotations extends PipelineOp.Annotations,
            ILocatableResourceAnnotations, RDFParserOptions.Options {

        /**
         * The data source to be parsed.
         */
        String SOURCE_URI = ParseOp.class.getName() + ".sourceUri";

        /**
         * The base URI for that data source (defaults to the
         * {@link #SOURCE_URI}).
         */
        String BASE_URI = ParseOp.class.getName() + ".baseUri";

        /**
         * The target graph (optional).
         * <p>
         * Note: This is ignored unless we are in quads mode. If we are in quads
         * mode and the data is not quads, then it is an error.
         */
        String TARGET_URI = ParseOp.class.getName() + ".targetUri";

        /**
         * When <code>true</code>, a parse error will be ignored.
         */
        String SILENT = ParseOp.class.getName() + ".silent";
        
        boolean DEFAULT_SILENT = false;
        
//        /**
//         * Optional property specifying the capacity of the
//         * {@link StatementBuffer} (default is {@value #DEFAULT_BUFFER_CAPACITY}
//         * statements).
//         */
//        String BUFFER_CAPACITY = ParseOp.class.getName() + ".bufferCapacity";
//
//        int DEFAULT_BUFFER_CAPACITY = 100000;

        /**
         * The name of the fallback {@link RDFFormat} (default
         * {@link #DEFAULT_FALLBACK}).
         * <p>
         * Note: {@link RDFFormat} is not {@link Serializable}, so this
         * annotation must be the format name rather than the {@link RDFFormat}
         * instance.
         */
        String FALLBACK = ParseOp.class.getName() + ".fallback";

        String DEFAULT_FALLBACK = RDFFormat.RDFXML.getName();
        
        /**
         * When <code>true</code>, the context position will be stripped from
         * the statements. This may be used to remove context from quads data
         * when loading into a triples or SIDs mode database. It may also be
         * used to force quads data into a specific context in quads mode.
         */
        String STRIP_CONTEXT = ParseOp.class.getName() + ".stripContext";
        
        boolean DEFAULT_STRIP_CONTEXT = false;
        
        /**
         * <code>true</code> iff HTTP requests may use cached responses.
         */
        String USES_CACHE = ParseOp.class.getName() + ".usesCache";
        
        boolean DEFAULT_USES_CACHE = true;

        /**
         * The {@link BufferedReader}'s internal buffer size (default is
         * {@value #DEFAULT_READ_BUFFER_SIZE}).
         */
        String READ_BUFFER_SIZE = ParseOp.class.getName() + ".readBufferSize";

        /**
         * Note: 8k is the default buffer size for {@link BufferedReader} so
         * this default is the same as not specifying an override.
         */
        int DEFAULT_READ_BUFFER_SIZE = Bytes.kilobyte32 * 8;

    }
    
    public ParseOp(final BOp[] args, final Map<String, Object> annotations) {

        super(args, annotations);
        
        getRequiredProperty(Annotations.SOURCE_URI);
        
        getRequiredProperty(Annotations.RELATION_NAME);
        
        getRequiredProperty(Annotations.TIMESTAMP);
        
    }

    public ParseOp(final ParseOp op) {
        
        super(op);
        
    }

    /**
     * The s, p, o, and c variable names.
     */
    static protected final Var<?> s = Var.var("s"), p = Var.var("p"), o = Var
            .var("o"), c = Var.var("c");

    @Override
    public ParserStats newStats() {

        return new ParserStats();

    }

    @Override
    public FutureTask<Void> eval(BOpContext<IBindingSet> context) {

        return new FutureTask<Void>(new ChunkTask(context, this));

    }

    static private class ChunkTask implements Callable<Void> {

        private final BOpContext<IBindingSet> context;
        private final ParserStats stats;
        
        private final URI sourceUri;
        private final String uriStr;
        private final String baseUri;
        private final URI targetUri;
        
        /**
         * When true, attempt to resolve the resource against CLASSPATH.
         * 
         * TODO Should we permit this?
         */
        private final boolean allowClassPath = true;

        /**
         * The default {@link RDFFormat}.
         */
        private final RDFFormat fallback;

        private final boolean sids, quads, silent;

        private final boolean stripContext;
        
        /** The connection timeout (ms) -or- ZERO (0) for an infinite timeout. */
        private final int timeout;

        /**
         * When <code>true</code> HTTP caching of the request is allowed.
         */
        private final boolean usesCache;

        /**
         * The {@link BufferedReader}'s internal buffer size (default is 8k).
         */
        private final int readBufferSize;

        /**
         * Output chunk size.
         */
        private final int chunkCapacity;
        
        /**
         * TODO Javadoc for annotations (which need to be defined) and
         * interaction with the triple store properties.
         */
        private final IRDFParserOptions parserOptions;

        /**
         * The {@link AbstractTripleStore} on which the statements will
         * eventually be written.
         */
        private final AbstractTripleStore database;
        
        /*
         * FIXME Both the {@link #buffer} and the {@link #tm} objects need to be
         * part of shared state in order to support truth maintenance. That
         * state needs to be shared across the operations used to add/resolve
         * IVs, compute the closure over the statements to be added and/or
         * removed, and then insert/remove the statements and entailments,
         * updating the proof chains.
         * 
         * This does not belong on the ParserOp, but as something which wraps
         * ANY sequence of UPDATE operations (INSERT DATA, REMOVE DATA, LOAD
         * DATA, DELETE/INSERT, CLEAR, COPY, ADD, etc). TM is only available for
         * triples and SIDS on a Journal.
         */
        //
//        /** StatementBuffer capacity. */
//        private final int bufferCapacity;
//        
//        /**
//         * The object used to compute entailments for the database.
//         */
//        private final InferenceEngine inferenceEngine;
//        
//        /**
//         * The object used to compute entailments for the database.
//         */
//        public InferenceEngine getInferenceEngine() {
//
//            return inferenceEngine;
//
//        }
//
//        /**
//         * Used to buffer writes.
//         * 
//         * @see #getAssertionBuffer()
//         */
//        private StatementBuffer<?> buffer;
//
//        /**
//         * The object used to maintain the closure for the database iff
//         * incremental truth maintenance is enabled.
//         */
//        private final TruthMaintenance tm;
//        
//        /**
//         * Return the assertion buffer.
//         * <p>
//         * The assertion buffer is used to buffer statements that are being
//         * asserted so as to maximize the opportunity for batch writes. Truth
//         * maintenance (if enabled) will be performed no later than the commit
//         * of the transaction.
//         * <p>
//         * Note: The same {@link #buffer} is reused by each loader so that we
//         * can on the one hand minimize heap churn and on the other hand disable
//         * auto-flush when loading a series of small documents. However, we
//         * obtain a new buffer each time we perform incremental truth
//         * maintenance.
//         * <p>
//         * Note: When non-<code>null</code> and non-empty, the buffer MUST be
//         * flushed (a) if a transaction completes (otherwise writes will not be
//         * stored on the database); or (b) if there is a read against the
//         * database during a transaction (otherwise reads will not see the
//         * unflushed statements).
//         * <p>
//         * Note: if {@link #truthMaintenance} is enabled then this buffer is
//         * backed by a temporary store which accumulates the {@link SPO}s to be
//         * asserted. Otherwise it will write directly on the database each time
//         * it is flushed, including when it overflows.
//         * 
//         * @todo this should be refactored as an {@link IStatementBufferFactory}
//         *       where the appropriate factory is required for TM vs non-TM
//         *       scenarios (or where the factory is parameterize for tm vs
//         *       non-TM).
//         */
//        private StatementBuffer<?> getAssertionBuffer() {
//
//            if (buffer == null) {
//
//                if (tm != null) {
//
//                    buffer = new StatementBuffer(tm.newTempTripleStore(),
//                            database, bufferCapacity);
//
//                } else {
//
//                    buffer = new StatementBuffer(database, bufferCapacity) {
//                        
//                    };
//
//                }
//
//            }
//
//            return buffer;
//
//        }

        public ChunkTask(final BOpContext<IBindingSet> context, final ParseOp op) {

            this.context = context;

            this.stats = (ParserStats) context.getStats();
            
            this.sourceUri = (URI) op
                    .getRequiredProperty(Annotations.SOURCE_URI);

            // String value of that URI.
            this.uriStr = sourceUri.stringValue();
            
            // base URI defaults to the source URI
            this.baseUri = op.getProperty(Annotations.BASE_URI,
                    sourceUri.stringValue());

            final String namespace = ((String[]) op
                    .getRequiredProperty(Annotations.RELATION_NAME))[0];

            final long timestamp = (Long) op
                    .getRequiredProperty(Annotations.TIMESTAMP);

            this.database = (AbstractTripleStore) context.getResource(
                    namespace, timestamp);

            this.sids = database.isStatementIdentifiers();

            this.quads = database.isQuads();

            this.silent = op.getProperty(Annotations.SILENT,
                    Annotations.DEFAULT_SILENT);
            
            /*
             * Note: This is used as the default context position for the
             * statement if the database mode is quads. It needs to be a
             * BigdataURI from the value factory for the target database - the
             * same factory that is used by the parser.
             */
            this.targetUri = (URI) database.asValue((URI) op
                    .getProperty(Annotations.TARGET_URI));

            this.chunkCapacity = op.getChunkCapacity();
            
            /*
             * Note: RDFFormat is not Serializable, so this annotation must be
             * the format name rather than the RDFFormat instance.
             */
            this.fallback = RDFFormat.valueOf(op.getProperty(
                    Annotations.FALLBACK, Annotations.DEFAULT_FALLBACK));

            this.stripContext = op.getProperty(Annotations.STRIP_CONTEXT,
                    Annotations.DEFAULT_STRIP_CONTEXT);

//            this.inferenceEngine = database.getInferenceEngine();
//
//            if (database.getAxioms().isNone()) {
//                
//                tm = null;
//                
//            } else {
//
//                /*
//                 * Truth maintenance: buffer will write on a tempStore.
//                 */
//
//                tm = new TruthMaintenance(inferenceEngine);
//
//            }
//            
//          this.bufferCapacity = op.getProperty(Annotations.BUFFER_CAPACITY,
//          Annotations.DEFAULT_BUFFER_CAPACITY);
            
            // HTTPConnection timeout.
            this.timeout = op.getProperty(Annotations.TIMEOUT, 0/* infinite */);

            // HTTP caching allowed.
            this.usesCache = op.getProperty(Annotations.USES_CACHE,
                    Annotations.DEFAULT_USES_CACHE);
            
            // BufferedReader buffer size.
            this.readBufferSize = op.getProperty(Annotations.READ_BUFFER_SIZE,
                    Annotations.DEFAULT_READ_BUFFER_SIZE);

            /*
             * Setup the parser options.
             */
            {
                
                final Properties properties = database.getProperties();

                /*
                 * Initialize the parser options based on the database
                 * properties.
                 */
                
                this.parserOptions = new RDFParserOptions(properties);

                /*
                 * Now do explicit annotations which override anything we picked
                 * up above.
                 */

                if (op.getProperty(Options.VERIFY_DATA) != null) {
                    
                    parserOptions.setVerifyData((Boolean) op
                            .getProperty(Options.VERIFY_DATA));
                    
                }
                
                if (op.getProperty(Options.STOP_AT_FIRST_ERROR) != null) {
                    
                    parserOptions.setStopAtFirstError((Boolean) op
                            .getProperty(Options.STOP_AT_FIRST_ERROR));
                    
                }

                if (op.getProperty(Options.DATATYPE_HANDLING) != null) {
                
                    parserOptions.setDatatypeHandling((DatatypeHandling) op
                            .getProperty(Options.DATATYPE_HANDLING));
                    
                }

                if (op.getProperty(Options.PRESERVE_BNODE_IDS) != null) {
                
                    parserOptions.setPreserveBNodeIDs((Boolean) op
                            .getProperty(Options.PRESERVE_BNODE_IDS));
                    
                }

                if ((properties.getProperty(Options.PRESERVE_BNODE_IDS) == null)
                        && op.getProperty(Options.PRESERVE_BNODE_IDS) == null
                        && database.getLexiconRelation().isStoreBlankNodes()) {

                    /*
                     * Note: preserveBNodeIDs is overridden based on whether or
                     * not the target is storing the blank node identifiers
                     * (unless the property was explicitly set - this amounts to
                     * a conditional default).
                     */

                    parserOptions.setPreserveBNodeIDs(true);

                }

            }

        }

        /**
         */
        @Override
        public Void call() throws Exception {

            InputStream is = null;
            Reader reader = null;
            RDFFormat fmt = null;
            
            try {

                /*
                 * The expected format based on the file name component of the
                 * URL.
                 */
                fmt = RDFFormat.forFileName(uriStr, fallback);
                
                String contentEncoding = null;
                
                if (allowClassPath) {
                    
                    // try the classpath :
                    is = getClass()
                            .getResourceAsStream(uriStr);

                    if (is == null) {

                        /*
                         * Searching for the resource from the root of the class
                         * returned by getClass() (relative to the class'
                         * package) failed. Next try searching for the desired
                         * resource from the root of the jar; that is, search
                         * the jar file for an exact match of the input string.
                         */
                        
                        is = getClass().getClassLoader().getResourceAsStream(
                                uriStr);

                    }
                    
                }

                if (is == null) {

                    /*
                     * TODO Refactor to use the apache http components and
                     * provide for managed connection pools, authentication,
                     * etc. However, make sure that we preserve the ability to
                     * read from the local file system.
                     */

                    URLConnection conn = null;

                    final URL url = new URL(uriStr);

                    conn = (URLConnection) url.openConnection();
                    
                    final HttpURLConnection conn2 = (conn instanceof HttpURLConnection) ? (HttpURLConnection) conn
                            : null;
                    
                    if (conn2 != null) {

                        /*
                         * Only if using HTTP.
                         */

                        conn2.setRequestMethod("GET");

                        /*
                         * Set the AcceptHeader based on the expected format.
                         */
                        final String acceptHeader;
                        {
                        
                            final StringBuilder sb = new StringBuilder();

                            final List<String> acceptParams = RDFFormat
                                    .getAcceptParams(RDFFormat.values(),
                                            quads/* requireContext */, fmt/* preferredRDFFormat */);

                            for (String acceptParam : acceptParams) {

                                if (sb.length() > 0) {

                                    sb.append(",");

                                }

                                sb.append(acceptParam);

                            }

                            acceptHeader = sb.toString();

                            if (log.isDebugEnabled())
                                log.debug("Accept: " + acceptHeader);
                            
                        }

                        conn2.setRequestProperty("Accept", acceptHeader);
                        
                        conn.setUseCaches(usesCache);
                        
                        conn.setReadTimeout(timeout);
                        
                    }

                    conn.setDoInput(true);

                    // connect.
                    conn.connect();

                    if (conn2 != null) {

                        // Extract the MIME type from the Content-Type header.
                        final String mimeType = new MiniMime(
                                conn.getContentType()).getMimeType();

                        // Figure out the RDFFormat from that.
                        fmt = RDFFormat.forMIMEType(mimeType, fallback);
                        
                        contentEncoding = conn2.getContentEncoding();
                        
                    }

                    is = conn.getInputStream();

                }

                /*
                 * Obtain a buffered reader on the input stream.
                 */

                if (contentEncoding == null) {

                    /*
                     * Assume the default content encoding if we have no better
                     * information.
                     */

                    contentEncoding = fmt.getCharset().name();

                }

                reader = new BufferedReader(new InputStreamReader(is,
                        contentEncoding), readBufferSize);

                parse(reader, baseUri, fmt, targetUri);

            } catch (Exception ex) {

                final String msg = "While loading: " + uriStr
                        + (fmt != null ? ", fmt=" + fmt : "");

                if (silent) {

                    log.warn(msg);
                    
                } else {

                    throw new RuntimeException(msg, ex);
                
                }

            } finally {

                if (reader != null)
                    reader.close();

                if (is != null) {
                    is.close();
                }

            }
            
            // done.
            return null;

        }
        
        /**
         * 
         * @param source
         * @param baseURL
         * @param rdfFormat
         * @param defaultGraph
         * 
         * @throws IOException 
         * @throws RDFHandlerException 
         * @throws RDFParseException 
         */
        private void parse(final Reader source, final String baseURL,
                final RDFFormat fmt, final URI defaultGraph
                ) throws RDFParseException, RDFHandlerException, IOException {

            final RDFParserFactory rdfParserFactory = RDFParserRegistry
                    .getInstance().get(fmt);

            if (rdfParserFactory == null) {

                throw new RuntimeException(
                        "Parser factory not found: source=" + uriStr
                                + ", fmt=" + fmt);

            }

            final RDFParser rdfParser = rdfParserFactory.getParser();
            rdfParser.setValueFactory(database.getValueFactory());
            rdfParser.setVerifyData(parserOptions.getVerifyData());
            rdfParser.setStopAtFirstError(parserOptions.getStopAtFirstError());
            rdfParser.setDatatypeHandling(parserOptions.getDatatypeHandling());
            rdfParser.setPreserveBNodeIDs(parserOptions.getPreserveBNodeIDs());

            /*
             * Note: The vector size should be pretty big for this. 100k might
             * be good (that is the DataLoader default).
             */
            final UnsyncLocalOutputBuffer<IBindingSet> unsyncBuffer = new UnsyncLocalOutputBuffer<IBindingSet>(
                    chunkCapacity, context.getSink());

            rdfParser.setRDFHandler(new AddStatementHandler(unsyncBuffer));

            /*
             * Run the parser, which will cause statements to be inserted.
             */

            rdfParser.parse(source, baseUri);

            unsyncBuffer.flush();

        }

        /**
         * Helper class adds statements to the sail as they are visited by a parser.
         */
        private class AddStatementHandler extends RDFHandlerBase {

            private final UnsyncLocalOutputBuffer<IBindingSet> unsyncBuffer;

            public AddStatementHandler(
                    final UnsyncLocalOutputBuffer<IBindingSet> unsyncBuffer
                    ) {

                this.unsyncBuffer = unsyncBuffer;
                
            }

            @SuppressWarnings({ "rawtypes", "unchecked" })
            private final IConstant<IV> asConst(final Value v) {

                final IV iv = TermId.mockIV(VTE.valueOf(v));

                iv.setValue((BigdataValue)v);

                return new Constant<IV>(iv);

            }
            
            public void handleStatement(final Statement stmt)
                    throws RDFHandlerException {

                final ListBindingSet bset = new ListBindingSet();
                
                bset.set(s, asConst(stmt.getSubject()));
                
                bset.set(p, asConst(stmt.getPredicate()));
                
                bset.set(o, asConst(stmt.getObject()));

                Resource context = stmt.getContext();
                
                if (stripContext) {
                    
                    // Strip off the context position.
                    context = null;
                }

                if (quads && context == null) {

                    // Use the default context.
                    context = targetUri;
                    
                }

                if (quads && context == null) {

                    throw new RuntimeException(
                            "Quads mode, but data are triples and the target graph was not specified: "
                                    + uriStr);

                }

                if (context != null) {

                    /*
                     * Bind [c] if available regardless of the database mode.
                     * This stops people from loading quads data into a triples
                     * or SIDs database in a way which throws away the context
                     * when they are not expecting that.
                     */

                    bset.set(c, asConst(context));

                }

                unsyncBuffer.add(bset);

                stats.toldTriples.increment();

            }

        }

    } // ChunkTask

}