/** Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved. Contact: SYSTAP, LLC DBA Blazegraph 2501 Calvert ST NW #106 Washington, DC 20008 licenses@blazegraph.com This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2 of the License. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /* * Created on Mar 17, 2012 */ package com.bigdata.rdf.sparql.ast.eval; import java.io.IOException; import java.io.InputStream; import java.net.HttpURLConnection; import java.net.URL; import java.net.URLConnection; import java.util.Arrays; import java.util.HashMap; import java.util.Iterator; import java.util.LinkedHashSet; import java.util.Map; import java.util.Properties; import java.util.Set; import java.util.UUID; import java.util.concurrent.atomic.AtomicLong; import java.util.zip.GZIPInputStream; import org.apache.log4j.Logger; import org.openrdf.model.Resource; import org.openrdf.model.Statement; import org.openrdf.model.URI; import org.openrdf.model.Value; import org.openrdf.model.impl.URIImpl; import org.openrdf.query.BindingSet; import org.openrdf.query.GraphQueryResult; import org.openrdf.query.QueryEvaluationException; import org.openrdf.query.UpdateExecutionException; import org.openrdf.query.algebra.StatementPattern.Scope; //import org.openrdf.query.impl.MutableTupleQueryResult; import org.openrdf.repository.RepositoryException; import org.openrdf.repository.RepositoryResult; import org.openrdf.rio.RDFFormat; import org.openrdf.rio.RDFHandlerException; import org.openrdf.rio.RDFParseException; import org.openrdf.rio.RDFParser; import org.openrdf.rio.RDFParser.DatatypeHandling; import org.openrdf.rio.RDFParserFactory; import org.openrdf.rio.RDFParserRegistry; import org.openrdf.rio.helpers.RDFHandlerBase; import org.openrdf.sail.SailException; import com.bigdata.bop.BOp; import com.bigdata.bop.BOpUtility; import com.bigdata.bop.Constant; import com.bigdata.bop.IBindingSet; import com.bigdata.bop.IVariable; import com.bigdata.bop.NV; import com.bigdata.bop.PipelineOp; import com.bigdata.bop.Var; import com.bigdata.bop.bindingSet.ListBindingSet; import com.bigdata.bop.engine.IRunningQuery; import com.bigdata.bop.rdf.update.ChunkedResolutionOp; import com.bigdata.bop.rdf.update.CommitOp; import com.bigdata.bop.rdf.update.InsertStatementsOp; import com.bigdata.bop.rdf.update.ParseOp; import com.bigdata.bop.rdf.update.RemoveStatementsOp; import com.bigdata.rdf.error.SparqlDynamicErrorException.GraphEmptyException; import com.bigdata.rdf.error.SparqlDynamicErrorException.GraphExistsException; import com.bigdata.rdf.error.SparqlDynamicErrorException.SolutionSetDoesNotExistException; import com.bigdata.rdf.error.SparqlDynamicErrorException.SolutionSetExistsException; import com.bigdata.rdf.error.SparqlDynamicErrorException.UnknownContentTypeException; import com.bigdata.rdf.internal.IV; import com.bigdata.rdf.lexicon.LexiconRelation; import com.bigdata.rdf.model.BigdataStatement; import com.bigdata.rdf.model.BigdataURI; import com.bigdata.rdf.rio.IRDFParserOptions; import com.bigdata.rdf.rio.RDFParserOptions; import com.bigdata.rdf.sail.BigdataSail; import com.bigdata.rdf.sail.BigdataSail.BigdataSailConnection; import com.bigdata.rdf.sail.SPARQLUpdateEvent; import com.bigdata.rdf.sail.SPARQLUpdateEvent.DeleteInsertWhereStats; import com.bigdata.rdf.sail.Sesame2BigdataIterator; import com.bigdata.rdf.sail.webapp.client.MiniMime; import com.bigdata.rdf.sparql.ast.ASTContainer; import com.bigdata.rdf.sparql.ast.AbstractGraphDataUpdate; import com.bigdata.rdf.sparql.ast.AddGraph; import com.bigdata.rdf.sparql.ast.ConstantNode; import com.bigdata.rdf.sparql.ast.ConstructNode; import com.bigdata.rdf.sparql.ast.CopyGraph; import com.bigdata.rdf.sparql.ast.CreateGraph; import com.bigdata.rdf.sparql.ast.DatasetNode; import com.bigdata.rdf.sparql.ast.DeleteInsertGraph; import com.bigdata.rdf.sparql.ast.DropGraph; import com.bigdata.rdf.sparql.ast.JoinGroupNode; import com.bigdata.rdf.sparql.ast.LoadGraph; import com.bigdata.rdf.sparql.ast.MoveGraph; import com.bigdata.rdf.sparql.ast.NamedSubqueryInclude; import com.bigdata.rdf.sparql.ast.ProjectionNode; import com.bigdata.rdf.sparql.ast.QuadData; import com.bigdata.rdf.sparql.ast.QuadsDataOrNamedSolutionSet; import com.bigdata.rdf.sparql.ast.QueryRoot; import com.bigdata.rdf.sparql.ast.QueryType; import com.bigdata.rdf.sparql.ast.StatementPatternNode; import com.bigdata.rdf.sparql.ast.StaticAnalysis; import com.bigdata.rdf.sparql.ast.TermNode; import com.bigdata.rdf.sparql.ast.Update; import com.bigdata.rdf.sparql.ast.UpdateRoot; import com.bigdata.rdf.sparql.ast.UpdateType; import com.bigdata.rdf.sparql.ast.VarNode; import com.bigdata.rdf.spo.ISPO; import com.bigdata.rdf.store.AbstractTripleStore; import com.bigdata.rdf.store.BD; import com.bigdata.rdf.store.BigdataOpenRDFBindingSetsResolverator; import com.bigdata.striterator.Chunkerator; import cutthecrap.utils.striterators.ICloseableIterator; import cutthecrap.utils.striterators.Resolver; import cutthecrap.utils.striterators.Striterator; import info.aduna.iteration.CloseableIteration; /** * Class handles SPARQL update query plan generation. * * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a> * @version $Id$ */ public class AST2BOpUpdate extends AST2BOpUtility { private static final Logger log = Logger.getLogger(AST2BOpUpdate.class); /** * When <code>true</code>, convert the SPARQL UPDATE into a physical * operator plan and execute it on the query engine. When <code>false</code> * , the UPDATE is executed using the {@link BigdataSail} API. * * TODO By coming in through the SAIL, we automatically pick up truth * maintenance and related logics. All of that needs to be integrated into * the generated physical operator plan before we can run updates on the * query engine. However, there will be advantages to running updates on the * query engine, including declarative control of parallelism, more * similarity for code paths between a single machine and cluster * deployments, and a unified operator model for query and update * evaluation. */ private final static boolean runOnQueryEngine = false; /** * */ public AST2BOpUpdate() { super(); } /** * The s,p,o, and c variable names used for binding sets which model * {@link Statement}s. */ private static final Var<?> s = Var.var("s"), p = Var.var("p"), o = Var .var("o"), c = Var.var("c"); /** * Convert the query * <p> * Note: This is currently a NOP. */ protected static void optimizeUpdateRoot(final AST2BOpUpdateContext context) { // final ASTContainer astContainer = context.astContainer; // // // Clear the optimized AST. // // astContainer.clearOptimizedUpdateAST(); // // /* // * Build up the optimized AST for the UpdateRoot for each Update to // * be executed. Maybe do this all up front before we run anything since // * we might reorder or regroup some operations (e.g., parallelized LOAD // * operations, parallelized INSERT data operations, etc). // */ // final UpdateRoot updateRoot = astContainer.getOriginalUpdateAST(); // // /* // * Evaluate each update operation in the optimized UPDATE AST in turn. // */ // for (Update op : updateRoot) { // // ... // // } } /** * Convert and/or execute the update request. * * @throws Exception */ protected static PipelineOp convertUpdate(final AST2BOpUpdateContext context) throws Exception { if (context.db.isReadOnly()) throw new UnsupportedOperationException("Not a mutable view."); if (context.conn.isReadOnly()) throw new UnsupportedOperationException("Not a mutable view."); if (log.isTraceEnabled()) log.trace("beforeUpdate:\n" + context.getAbstractTripleStore().dumpStore()); final ASTContainer astContainer = context.astContainer; /* * Note: Change this to the optimized AST if we start doing AST * optimizations for UPDATE. */ final UpdateRoot updateRoot = astContainer.getOriginalUpdateAST(); // Set as annotation on the ASTContainer. // astContainer.setQueryPlan(left); /* * Evaluate each update operation in the optimized UPDATE AST in turn. */ PipelineOp left = null; int updateIndex = 0; for (Update op : updateRoot) { // log.error("\nbefore op=" + op + "\n" + context.conn.getTripleStore().dumpStore()); long connectionFlushNanos = 0L; long batchResolveNanos = 0L; if (updateIndex > 0) { /* * There is more than one update operation in this request. */ /* * Note: We need to flush the assertion / retraction buffers if * the Sail is local since some of the code paths supporting * UPDATEs do not go through the BigdataSail and would otherwise * not have their updates flushed until the commit (which does * go through the BigdataSail). * * @see https://sourceforge.net/apps/trac/bigdata/ticket/558 */ final long t1 = System.nanoTime(); context.conn.flush(); final long t2 = System.nanoTime(); connectionFlushNanos = t2 - t1; /* * We need to re-resolve any RDF Values appearing in this UPDATE * operation which have a 0L term identifier in case they have * become defined through the previous update(s). * * Note: Since 2.0, also re-resolves RDF Values appearing in the * binding sets or data set. * * @see BLZG-1176 SPARQL Parsers should not be db mode aware * * @see https://sourceforge.net/apps/trac/bigdata/ticket/558 */ ASTDeferredIVResolution.resolveUpdate(context.db, op, context.getQueryBindingSet(), context.getDataset()); batchResolveNanos = System.nanoTime() - t2; } final long begin = System.nanoTime(); final DeleteInsertWhereStats deleteInsertWhereStats = new DeleteInsertWhereStats(); // @see BLZG-1446. Throwable cause = null; try { // convert/run the update operation. left = convertUpdateSwitch(left, op, context, deleteInsertWhereStats); } catch (Throwable t) { cause = t; log.error("SPARQL UPDATE failure: op=" + op + ", ex=" + t, t); // notify listener(s) final long elapsed = System.nanoTime() - begin; context.conn.getSailConnection().fireEvent( new SPARQLUpdateEvent(op, elapsed, connectionFlushNanos, batchResolveNanos, cause, deleteInsertWhereStats)); if (t instanceof Exception) throw (Exception) t; if (t instanceof RuntimeException) throw (RuntimeException) t; throw new RuntimeException(t); } final long elapsed = System.nanoTime() - begin; // notify listener(s) context.conn.getSailConnection().fireEvent( new SPARQLUpdateEvent(op, elapsed, connectionFlushNanos, batchResolveNanos, cause, deleteInsertWhereStats)); updateIndex++; } /* * Commit mutation. */ left = convertCommit(left, context); if (log.isTraceEnabled()) log.trace("afterCommit:\n" + context.getAbstractTripleStore().dumpStore()); return left; } /** * MP: Make SPARQL Update an auto-commit operation by default. * * TODO: Make this private once merged down. */ public static boolean AUTO_COMMIT = Boolean.parseBoolean(System .getProperty(AST2BOpBase.Annotations.AUTO_COMMIT, "true")); /** * Commit. * <p> * Note: Not required on cluster (Shard-wise ACID updates). * <p> * Note: Not required unless the end of the update sequence or we desire a * checkpoint on the sequences of operations. * <p> * Note: The commit really must not happen until the update plan(s) are * known to execute successfully. We can do that with an AT_ONCE annotation * on the {@link CommitOp} or we can just invoke commit() at appropriate * checkpoints in the UPDATE operation. */ private static PipelineOp convertCommit(PipelineOp left, final AST2BOpUpdateContext context) throws Exception { /* * Note: Since we are using the BigdataSail interface, we DO have to * do a commit on the cluster. It is only if we are running on the * query engine that things could be different (but that requires a * wholly different plan). */ // if (!context.isCluster()) if (AUTO_COMMIT) { if (runOnQueryEngine) { left = new CommitOp(leftOrEmpty(left), NV.asMap( // new NV(BOp.Annotations.BOP_ID, context.nextId()),// new NV(CommitOp.Annotations.TIMESTAMP, context .getTimestamp()),// new NV(CommitOp.Annotations.PIPELINED, false)// )); } else { final long commitTime = context.conn.commit2(); context.setCommitTime(commitTime); if (log.isDebugEnabled()) log.debug("COMMIT: commitTime=" + commitTime); } } return left; } /** * Method provides the <code>switch()</code> for handling the different * {@link UpdateType}s. * * @param left * @param op * @param context * @return * @throws Exception */ private static PipelineOp convertUpdateSwitch(PipelineOp left, final Update op, final AST2BOpUpdateContext context, final DeleteInsertWhereStats deleteInsertWhereStats) throws Exception { final UpdateType updateType = op.getUpdateType(); switch (updateType) { case Create: { left = convertCreateGraph(left, (CreateGraph) op, context); break; } case Add: { // Copy all statements from source to target. left = convertAddGraph(left, (AddGraph) op, context); break; } case Copy: { // Drop() target, then Add(). left = convertCopyGraph(left, (CopyGraph) op, context); break; } case Move: { // Drop() target, Add(source,target), Drop(source). left = convertMoveGraph(left, (MoveGraph) op, context); break; } case Clear: case Drop: { left = convertClearOrDropGraph(left, (DropGraph) op, context); break; } case InsertData: case DeleteData: { left = convertInsertOrDeleteData(left, (AbstractGraphDataUpdate) op, context); break; } case Load: { left = convertLoadGraph(left, (LoadGraph) op, context); break; } case DeleteInsert: { left = convertDeleteInsert(left, (DeleteInsertGraph) op, context, deleteInsertWhereStats); break; } case DropEntailments: { left = convertDropEntailments(left, context); break; } case CreateEntailments: { left = convertCreateEntailments(left, context); break; } case EnableEntailments: { left = convertEnableEntailments(left, context); break; } case DisableEntailments: { left = convertDisableEntailments(left, context); break; } default: throw new UnsupportedOperationException("updateType=" + updateType); } return left; } /** * <pre> * ( WITH IRIref )? * ( ( DeleteClause InsertClause? ) | InsertClause ) * ( USING ( NAMED )? IRIref )* * WHERE GroupGraphPattern * </pre> * * @param left * @param op * @param context * @return * * @throws QueryEvaluationException * @throws RepositoryException * @throws SailException */ private static PipelineOp convertDeleteInsert(PipelineOp left, final DeleteInsertGraph op, final AST2BOpUpdateContext context, final DeleteInsertWhereStats deleteInsertWhereStats) throws QueryEvaluationException, RepositoryException, SailException { if (runOnQueryEngine) throw new UnsupportedOperationException(); /* * This models the DELETE/INSERT request as a QUERY. The data from the * query are fed into a handler which adds or removes the statements (as * appropriate) from the [conn]. */ { /* * Create a new query using the WHERE clause. */ final JoinGroupNode whereClause = new JoinGroupNode( op.getWhereClause()); final QueryRoot queryRoot = new QueryRoot(QueryType.SELECT); queryRoot.setWhereClause(whereClause); final DatasetNode dataset = op.getDataset(); if (dataset != null) queryRoot.setDataset(dataset); /* * Setup the PROJECTION for the new query. * * TODO retainAll() for only those variables used in the template * for the InsertClause or RemoveClause (less materialization, more * efficient). */ { final StaticAnalysis sa = new StaticAnalysis(queryRoot,context); final Set<IVariable<?>> projectedVars = sa .getMaybeProducedBindings(whereClause, new LinkedHashSet<IVariable<?>>()/* vars */, true/* recursive */); for (IBindingSet bs : context.getBindings()) { @SuppressWarnings("rawtypes") final Iterator<IVariable> it = bs.vars(); while (it.hasNext()) { projectedVars.add(it.next()); } } final ProjectionNode projection = new ProjectionNode(); for (IVariable<?> var : projectedVars) { projection.addProjectionVar(new VarNode(var.getName())); } queryRoot.setProjection(projection); } final ASTContainer astContainer = new ASTContainer(queryRoot); /* * Inherit 'RESOLVED' flag, so resolution will not be run * for ASTContainer, constracted from parts of already resolved update. */ astContainer.setProperty(ASTContainer.Annotations.RESOLVED, context.astContainer.getProperty(ASTContainer.Annotations.RESOLVED)); final QuadsDataOrNamedSolutionSet insertClause = op.getInsertClause(); if (insertClause == null && op.getDeleteClause() == null) { /* * DELETE WHERE QuadPattern * * We need to build the appropriate CONSTRUCT clause from the * WHERE clause. * * Note: This could be lifted into an AST optimizer, but we are * not yet running those against the UPDATE AST. */ final QuadData deleteTemplate = new QuadData(); final Iterator<StatementPatternNode> itr = BOpUtility.visitAll( whereClause, StatementPatternNode.class); while (itr.hasNext()) { final StatementPatternNode t = (StatementPatternNode) itr .next().clone(); deleteTemplate.addChild(t); } final QuadsDataOrNamedSolutionSet deleteClause = new QuadsDataOrNamedSolutionSet( deleteTemplate); op.setDeleteClause(deleteClause); } final QuadsDataOrNamedSolutionSet deleteClause = op.getDeleteClause(); // Just the insert clause. /* * TODO FIXME Forcing all updates through the delete+insert code path. * * https://jira.blazegraph.com/browse/BLZG-1913 */ final boolean isInsertOnly = false; //insertClause != null && deleteClause == null; // // Just the delete clause. // final boolean isDeleteOnly = insertClause == null // && deleteClause != null; // Both the delete clause and the insert clause. /* * TODO FIXME Forcing all updates through the delete+insert code path. * * https://jira.blazegraph.com/browse/BLZG-1913 */ final boolean isDeleteInsert = true; //insertClause != null && deleteClause != null; /* * Run the WHERE clause. */ if (isDeleteInsert) { /* * DELETE + INSERT. * * Note: The semantics of DELETE + INSERT are that the WHERE * clause is executed once. The solutions to that need to be fed * once through the DELETE clause. After the DELETE clause has * been processed for all solutions to the WHERE clause, the * INSERT clause is then processed. So, we need to materialize * the WHERE clause results when both the DELETE clause and the * INSERT clause are present. * * FIXME For large intermediate results, we would be much better * off putting the data onto an HTree (or, better yet, a chain * of blocks) and processing the bindings as IVs rather than * materializing them as RDF Values (and even for small data * sets, we would be better off avoiding materialization of the * RDF Values and using an ASTConstructIterator which builds * ISPOs using IVs rather than Values). * * Note: Unlike operations against a graph, we do NOT perform * truth maintenance for updates against solution sets, * therefore we could get by nicely with operations on * IBindingSet[]s without RDF Value materialization. * * @see https://sourceforge.net/apps/trac/bigdata/ticket/524 * (SPARQL Cache) */ final LexiconRelation lexicon = context .getAbstractTripleStore().getLexiconRelation(); final int chunkSize = 100; // TODO configure. /* * Run as a SELECT query. * * Note: This *MUST* use the view of the tripleStore which is * associated with the SailConnection in case the view is * isolated by a transaction. */ // Note: Blocks until the result set is materialized. final long beginWhereClauseNanos = System.nanoTime(); final MutableTupleQueryResult result = new MutableTupleQueryResult( ASTEvalHelper.evaluateTupleQuery( context.conn.getTripleStore(), astContainer, context.getQueryBindingSet()/* bindingSets */, null /* dataset */)); deleteInsertWhereStats.whereNanos.set(System.nanoTime() - beginWhereClauseNanos); // If the query contains a nativeDistinctSPO query hint then // the line below unfortunately isolates the query so that the hint does // not impact any other execution, this is hacked by putting a property on the query root. final boolean nativeDistinct = astContainer.getOptimizedAST().getProperty(ConstructNode.Annotations.NATIVE_DISTINCT, ConstructNode.Annotations.DEFAULT_NATIVE_DISTINCT); try { // Play it once through the DELETE clause. if (deleteClause != null) { final long beginDeleteNanos = System.nanoTime(); // rewind. result.beforeFirst(); // Figure out if operating on solutions or graphs. final boolean isSolutionSet = deleteClause.isSolutions(); if (isSolutionSet) { /* * Target is solution set. * * @see * https://sourceforge.net/apps/trac/bigdata/ticket * /524 (SPARQL Cache) * * FIXME [Is this fixed now?] The DELETE+INSERT code * path is failing because it is based on the DELETE * FROM SELECT code path below and attempts to * rewrite the query to use a MINUS operator. * However, the setup is different in this case * since we have already run the original WHERE * clause into a rewindable tuple result set. * * The best way to fix this would be to stay within * the native IBindingSet[] model and write the * solutions from the WHERE clause onto a chained * list of blocks, just as we do when writing on a * named solution set (or an htree with appropriate * join variables). That could then be joined into * the query with an INCLUDE. Since we do not want * this "temporary" solution set to be visible, we * could prefix it with a UUID and make sure that it * is written onto a memory manager, and also make * sure that we eventually delete the named solution * set since it should be temporary. */ // The named solution set on which we will write. final String solutionSet = deleteClause.getName(); // A unique named solution set used to INCLUDE the // solutions to be deleted. final String tempSolutionSet = "-" + solutionSet + "-" + UUID.randomUUID(); // Write solutions to be deleted onto temp set. context.solutionSetManager.putSolutions( tempSolutionSet, asBigdataIterator(lexicon, chunkSize, result)); try { /* * Replace WHERE clause with an join group * containing an INCLUDE for the solutions to be * removed. * * WHERE := { INCLUDE %namedSet MINUS {INCLUDE %temp} } */ // final JoinGroupNode oldWhereClause = (JoinGroupNode) queryRoot // .getWhereClause(); final JoinGroupNode newWhereClause = new JoinGroupNode(); queryRoot.setWhereClause(newWhereClause); // Include the source solutions. newWhereClause.addArg(new NamedSubqueryInclude( solutionSet)); // MINUS solutions to be removed. final JoinGroupNode minusOp = new JoinGroupNode( new NamedSubqueryInclude( tempSolutionSet)); newWhereClause.addArg(minusOp); minusOp.setMinus(true); // log.error("oldWhereClause="+oldWhereClause); // log.error("newWhereClause="+newWhereClause); // /* // * Re-write the AST to handle DELETE solutions. // */ // convertQueryForDeleteSolutions(queryRoot, // solutionSet); // Set the projection node. queryRoot.setProjection(deleteClause .getProjection()); /* * Run as a SELECT query : Do NOT materialize * IVs. * * Note: This *MUST* use the view of the * tripleStore which is associated with the * SailConnection in case the view is isolated * by a transaction. */ final ICloseableIterator<IBindingSet[]> titr = ASTEvalHelper .evaluateTupleQuery2( context.conn.getTripleStore(), astContainer, context.getQueryBindingSet()/* bindingSets */, false/* materialize */); try { // Write onto named solution set. context.solutionSetManager.putSolutions( solutionSet, titr); } finally { titr.close(); } } finally { /* * Make sure that we do not leave this hanging * around. */ context.solutionSetManager .clearSolutions(tempSolutionSet); } } else { /* * DELETE triples/quads constructed from the * solutions. */ final ConstructNode template = op.getDeleteClause() .getQuadData().flatten(new ConstructNode(context)); template.setDistinctQuads(true); if (nativeDistinct) { template.setNativeDistinct(true); } final ASTConstructIterator itr = new ASTConstructIterator( context,// context.conn.getTripleStore(), template, op.getWhereClause(), null/* bnodesMap */, result); while (itr.hasNext()) { final BigdataStatement stmt = itr.next(); addOrRemoveStatement( context.conn.getSailConnection(), stmt, false/* insert */); } } deleteInsertWhereStats.deleteNanos.set(System.nanoTime() - beginDeleteNanos); } // End DELETE clause. // Play it once through the INSERT clause. if (insertClause != null) { final long beginInsertNanos = System.nanoTime(); // rewind. result.beforeFirst(); // Figure out if operating on solutions or graphs. final boolean isSolutionSet = insertClause.isSolutions(); if (isSolutionSet) { /* * Target is solution set. * * @see * https://sourceforge.net/apps/trac/bigdata/ticket * /524 (SPARQL Cache) */ // The named solution set on which we will write. final String solutionSet = insertClause.getName(); // Set the projection node. queryRoot.setProjection(insertClause.getProjection()); final ICloseableIterator<IBindingSet[]> titr = asBigdataIterator( lexicon, chunkSize, result); try { // Write the solutions onto the named solution // set. context.solutionSetManager.putSolutions(solutionSet, titr); } finally { titr.close(); } } else { /* * INSERT triples/quads CONSTRUCTed from solutions. */ final ConstructNode template = op.getInsertClause() .getQuadData().flatten(new ConstructNode(context)); template.setDistinctQuads(true); if (nativeDistinct) { template.setNativeDistinct(true); } final ASTConstructIterator itr = new ASTConstructIterator( context,// context.conn.getTripleStore(), template, op.getWhereClause(), null/* bnodesMap */, result); while (itr.hasNext()) { final BigdataStatement stmt = itr.next(); addOrRemoveStatement( context.conn.getSailConnection(), stmt, true/* insert */); } } deleteInsertWhereStats.insertNanos.set(System.nanoTime() - beginInsertNanos); } // End INSERT clause } finally { // Close the result set. result.close(); } } else { /* * DELETE/INSERT. * * Note: For this code path, only the INSERT clause -or- the * DELETE clause was specified. We handle the case where BOTH * clauses were specified above. */ // true iff this is an INSERT final boolean isInsert = insertClause != null; // final boolean isDelete = deleteClause != null; // The clause (either for INSERT or DELETE) final QuadsDataOrNamedSolutionSet clause = isInsert ? insertClause : deleteClause; assert clause != null; // Figure out if operating on solutions or graphs. final boolean isSolutionSet = clause.isSolutions(); if(isSolutionSet) { /* * Target is solution set. * * @see https://sourceforge.net/apps/trac/bigdata/ticket/524 * (SPARQL Cache) */ // The named solution set on which we will write. final String solutionSet = clause.getName(); // Set the projection node. queryRoot.setProjection(clause.getProjection()); if (!isInsert) { /* * Re-write the AST to handle DELETE solutions. */ convertQueryForDeleteSolutions(queryRoot, solutionSet); } /* * Run as a SELECT query : Do NOT materialize IVs. * * Note: This *MUST* use the view of the tripleStore which * is associated with the SailConnection in case the view is * isolated by a transaction. */ final ICloseableIterator<IBindingSet[]> result = ASTEvalHelper .evaluateTupleQuery2(context.conn.getTripleStore(), astContainer, context.getQueryBindingSet()/* bindingSets */, false/* materialize */); try { // Write the solutions onto the named solution set. context.solutionSetManager.putSolutions(solutionSet, result); } finally { result.close(); } } else { /* * Target is graph. */ final QuadData quadData = (insertClause == null ? deleteClause : insertClause).getQuadData(); // Flatten the original WHERE clause into a CONSTRUCT // template. final ConstructNode template = quadData .flatten(new ConstructNode(context)); template.setDistinctQuads(true); // Set the CONSTRUCT template (quads patterns). queryRoot.setConstruct(template); /* * Run as a CONSTRUCT query * * FIXME Can we avoid IV materialization for this code path? * Note that we have to do Truth Maintenance. However, I * suspect that we do not need to do IV materialization if * we can tunnel into the Sail's assertion and retraction * buffers. * * Note: This *MUST* use the view of the tripleStore which * is associated with the SailConnection in case the view is * isolated by a transaction. */ final GraphQueryResult result = ASTEvalHelper .evaluateGraphQuery(context.conn.getTripleStore(), astContainer, context.getQueryBindingSet()/* bindingSets */, null /* dataset */); try { while (result.hasNext()) { final BigdataStatement stmt = (BigdataStatement) result .next(); addOrRemoveStatement( context.conn.getSailConnection(), stmt, isInsertOnly); } } finally { result.close(); } } } } return null; } /** * Efficiently resolve openrdf {@link BindingSet} into a chunked bigdata * {@link IBindingSet}[] iterator. The closeable semantics of the iteration * pattern are preserved. * * @param r * The {@link LexiconRelation}. * @param chunkSize * When converting the openrdf binding set iteration pattern into * a chunked iterator pattern, this will be the target chunk * size. * @param result * The openrdf solutions. * @return An iterator visiting chunked bigdata solutions. * * TODO We should not have to do this. We should stay within native * bigdata IBindingSet[]s and the native bigdata iterators */ private static ICloseableIterator<IBindingSet[]> asBigdataIterator( final LexiconRelation r, final int chunkSize, final CloseableIteration<BindingSet, QueryEvaluationException> result) { // Wrap with streaming iterator pattern. final Striterator sitr = new Striterator( // Chunk up the openrdf solutions. new Chunkerator<BindingSet>( // Convert the Sesame iteration into a Bigdata iterator. new Sesame2BigdataIterator<BindingSet, QueryEvaluationException>( result), chunkSize)); // Add filter to batch resolve BindingSet[] => IBindingSet[]. sitr.addFilter(new Resolver() { private static final long serialVersionUID = 1L; @Override protected Object resolve(Object obj) { // Visiting openrdf BindingSet[] chunks. final BindingSet[] in = (BindingSet[]) obj; // Batch resolve to IBindingSet[]. final IBindingSet[] out = BigdataOpenRDFBindingSetsResolverator .resolveChunk(r, in); // Return Bigdata IBindingSet[]. return out; } }); return sitr; } /** * We need to find and remove the matching solutions. We handle this by * transforming the WHERE clause with a MINUS joining against the target * solution set via an INCLUDE. The solutions which are produced by the * query can then be written directly onto the named solution set. That way, * both DELETE and INSERT will wind up as putSolutions(). * * <pre> * WHERE {...} * </pre> * * is rewritten as * * <pre> * WHERE { INCLUDE %namedSet MINUS { ... } } * </pre> * * TODO If there is a BIND() to a constant in the SELECT expression, then * this transform will not capture that binding. We would also need to * process the PROJECTION and pull down any constants into BIND()s in the * WHERE clause. */ private static void convertQueryForDeleteSolutions( final QueryRoot queryRoot, final String solutionSet) { final JoinGroupNode oldWhereClause = (JoinGroupNode) queryRoot .getWhereClause(); final JoinGroupNode newWhereClause = new JoinGroupNode(); queryRoot.setWhereClause(newWhereClause); final NamedSubqueryInclude includeOp = new NamedSubqueryInclude(solutionSet); newWhereClause.addArg(includeOp); final JoinGroupNode minusOp = new JoinGroupNode(); minusOp.setMinus(true); newWhereClause.addArg(minusOp); minusOp.addArg(oldWhereClause.clone()); // log.error("oldWhereClause="+oldWhereClause); // log.error("newWhereClause="+newWhereClause); } /** * Copy all statements from source to target. * * @param left * @param op * @param context * @return * @throws RepositoryException */ private static PipelineOp convertAddGraph(PipelineOp left, final AddGraph op, final AST2BOpUpdateContext context) throws RepositoryException { if (runOnQueryEngine) throw new UnsupportedOperationException(); final BigdataURI sourceGraph = (BigdataURI) (op.getSourceGraph() == null ? null : op.getSourceGraph().getValue()); final BigdataURI targetGraph = (BigdataURI) (op.getTargetGraph() == null ? null : op.getTargetGraph().getValue()); copyStatements(// context, // op.isSilent(), // sourceGraph,// targetGraph// ); return null; } /** * Copy all statements from the sourceGraph to the targetGraph. * <p> * Note: The SILENT keyword for ADD, COPY, and MOVE indicates that the * implementation SHOULD/MAY report an error if the source graph does not * exist (the spec is not consistent here across those operations). Further, * there is no explicit create/drop of graphs in bigdata so it WOULD be Ok * if we just ignored the SILENT keyword. */ private static void copyStatements(final AST2BOpUpdateContext context, final boolean silent, final BigdataURI sourceGraph, final BigdataURI targetGraph) throws RepositoryException { if (log.isDebugEnabled()) log.debug("sourceGraph=" + sourceGraph + ", targetGraph=" + targetGraph); if (!silent) { // assertGraphNotEmpty(context, sourceGraph); } final RepositoryResult<Statement> result = context.conn.getStatements( null/* s */, null/* p */, null/* o */, context.isIncludeInferred(), new Resource[] { sourceGraph }); try { context.conn.add(result, new Resource[] { targetGraph }); } finally { result.close(); } } /** * Drop() target, Add(source,target), Drop(source). * * @param left * @param op * @param context * @return * @throws RepositoryException * @throws SailException */ private static PipelineOp convertMoveGraph(PipelineOp left, final MoveGraph op, final AST2BOpUpdateContext context) throws RepositoryException, SailException { if (runOnQueryEngine) throw new UnsupportedOperationException(); final BigdataURI sourceGraph = (BigdataURI) (op.getSourceGraph() == null ? context.f .asValue(BD.NULL_GRAPH) : op.getSourceGraph().getValue()); final BigdataURI targetGraph = (BigdataURI) (op.getTargetGraph() == null ? context.f .asValue(BD.NULL_GRAPH) : op.getTargetGraph().getValue()); if (log.isDebugEnabled()) log.debug("sourceGraph=" + sourceGraph + ", targetGraph=" + targetGraph); if (!sourceGraph.equals(targetGraph)) { clearOneGraph(targetGraph, context); copyStatements(context, op.isSilent(), sourceGraph, targetGraph); clearOneGraph(sourceGraph, context); } return null; } /** * Drop() target, then Add(). * * @param left * @param op * @param context * @return * @throws RepositoryException * @throws SailException */ private static PipelineOp convertCopyGraph(PipelineOp left, final CopyGraph op, final AST2BOpUpdateContext context) throws RepositoryException, SailException { if (runOnQueryEngine) throw new UnsupportedOperationException(); final BigdataURI sourceGraph = (BigdataURI) (op.getSourceGraph() == null ? context.f .asValue(BD.NULL_GRAPH) : op.getSourceGraph().getValue()); final BigdataURI targetGraph = (BigdataURI) (op.getTargetGraph() == null ? context.f .asValue(BD.NULL_GRAPH) : op.getTargetGraph().getValue()); if (log.isDebugEnabled()) log.debug("sourceGraph=" + sourceGraph + ", targetGraph=" + targetGraph); if (!sourceGraph.equals(targetGraph)) { clearOneGraph(targetGraph, context); copyStatements(context, op.isSilent(), sourceGraph, targetGraph); } return null; } /** * <pre> * LOAD ( SILENT )? IRIref_from ( INTO GRAPH IRIref_to )? * </pre> * * @param left * @param op * @param context * @return * @throws Exception */ private static PipelineOp convertLoadGraph(PipelineOp left, final LoadGraph op, final AST2BOpUpdateContext context) throws Exception { if (!runOnQueryEngine) { final AtomicLong nmodified = new AtomicLong(); final String urlStr = op.getSourceGraph().getValue().stringValue(); try { final URL sourceURL = new URL(urlStr); final BigdataURI defaultContext = (BigdataURI) (op .getTargetGraph() == null ? null : op.getTargetGraph() .getValue()); if (log.isDebugEnabled()) log.debug("sourceURI=" + urlStr + ", defaultContext=" + defaultContext); // Take overrides from LOAD request, defaults from triple store and fall back to static defaults. final Properties defaults = context.getAbstractTripleStore().getProperties(); final boolean verifyData = Boolean .parseBoolean(op.getProperty(LoadGraph.Annotations.VERIFY_DATA, p.getProperty( RDFParserOptions.Options.VERIFY_DATA, RDFParserOptions.Options.DEFAULT_VERIFY_DATA))); final boolean preserveBlankNodeIDs = Boolean .parseBoolean(op.getProperty(LoadGraph.Annotations.PRESERVE_BLANK_NODE_IDS, p.getProperty(RDFParserOptions.Options.PRESERVE_BNODE_IDS, RDFParserOptions.Options.DEFAULT_PRESERVE_BNODE_IDS))); final boolean stopAtFirstError = Boolean .parseBoolean(op.getProperty(LoadGraph.Annotations.STOP_AT_FIRST_ERROR, p.getProperty(RDFParserOptions.Options.STOP_AT_FIRST_ERROR, RDFParserOptions.Options.DEFAULT_STOP_AT_FIRST_ERROR))); final DatatypeHandling dataTypeHandling = DatatypeHandling .valueOf(op.getProperty(LoadGraph.Annotations.DATA_TYPE_HANDLING, p.getProperty(RDFParserOptions.Options.DATATYPE_HANDLING, RDFParserOptions.Options.DEFAULT_DATATYPE_HANDLING))); final RDFParserOptions parserOptions = new RDFParserOptions(// verifyData,// preserveBlankNodeIDs,// stopAtFirstError,// dataTypeHandling// ); doLoad(context.conn.getSailConnection(), sourceURL, defaultContext, parserOptions, nmodified, op); } catch (Throwable t) { final String msg = "Could not load: url=" + urlStr + ", cause=" + t; if (op.isSilent()) { log.warn(msg); } else { throw new RuntimeException(msg, t); } } return null; } /* * Parse the file. * * Note: After the parse step, the remainder of the steps are just like * INSERT DATA. */ { final Map<String, Object> anns = new HashMap<String, Object>(); anns.put(BOp.Annotations.BOP_ID, context.nextId()); // required. anns.put(ParseOp.Annotations.SOURCE_URI, op.getSourceGraph() .getValue()); if(op.isSilent()) anns.put(ParseOp.Annotations.SILENT, true); // optional. if (op.getTargetGraph() != null) anns.put(ParseOp.Annotations.TARGET_URI, op.getTargetGraph()); // required. anns.put(ParseOp.Annotations.TIMESTAMP, context.getTimestamp()); anns.put(ParseOp.Annotations.RELATION_NAME, new String[] { context.getNamespace() }); /* * TODO 100k is the historical default for the data loader. We * generally want to parse a lot of data at once and vector it in * big chunks. However, we could have a lot more parallelism with * the query engine. So, if there are multiple source URIs to be * loaded, then we might want to reduce the vector size (or maybe * not, probably depends on the JVM heap). */ anns.put(ParseOp.Annotations.CHUNK_CAPACITY, 100000); left = new ParseOp(leftOrEmpty(left), anns); } /* * Append the pipeline operations to add/resolve IVs against the lexicon * and insert/delete statemetns. */ left = addInsertOrDeleteDataPipeline(left, true/* insert */, context); /* * Execute the update. */ executeUpdate(left, context.getBindings()/* bindingSets */, context); // Return null since pipeline was evaluated. return null; } /** * * Utility method to get the {@link RDFFormat} for filename. * * It checks for compressed endings and is provided as a utility. * * @param fileName * @return */ public static RDFFormat rdfFormatForFile(final String fileName) { /* * Try to get the RDFFormat from the URL's file path. */ RDFFormat fmt = RDFFormat.forFileName(fileName); if (fmt == null && fileName.endsWith(".zip")) { fmt = RDFFormat.forFileName(fileName.substring(0, fileName.length() - 4)); } if (fmt == null && fileName.endsWith(".gz")) { fmt = RDFFormat.forFileName(fileName.substring(0, fileName.length() - 3)); } if (fmt == null) { // Default format. fmt = RDFFormat.RDFXML; } return fmt; } /** * Parse and load a document. * * @param conn * @param sourceURL * @param defaultContext * @param nmodified * @return * @throws IOException * @throws RDFHandlerException * @throws RDFParseException * * TODO See {@link ParseOp} for a significantly richer pipeline * operator which will parse a document. However, this method is * integrated into all of the truth maintenance mechanisms in * the Sail and is therefore easier to place into service. */ private static void doLoad(final BigdataSailConnection conn, final URL sourceURL, final URI defaultContext, final IRDFParserOptions parserOptions, final AtomicLong nmodified, final LoadGraph op) throws IOException, RDFParseException, RDFHandlerException { // Use the default context if one was given and otherwise // the URI from which the data are being read. final Resource defactoContext = defaultContext == null ? new URIImpl( sourceURL.toExternalForm()) : defaultContext; URLConnection hconn = null; try { hconn = sourceURL.openConnection(); if (hconn instanceof HttpURLConnection) { ((HttpURLConnection) hconn).setRequestMethod("GET"); } hconn.setDoInput(true); hconn.setDoOutput(false); hconn.setReadTimeout(0);// no timeout? http param? /* * There is a request body, so let's try and parse it. */ final String contentType = hconn.getContentType(); // The baseURL (passed to the parser). final String baseURL = sourceURL.toExternalForm(); // The file path. //BLZG-1929 final String n = sourceURL.getPath(); /** * Attempt to obtain the format from the Content-Type. * * <a href="https://sourceforge.net/apps/trac/bigdata/ticket/620"> * UpdateServlet fails to parse MIMEType when doing conneg. </a> */ RDFFormat format = RDFFormat.forMIMEType(new MiniMime(contentType) .getMimeType()); if (format == null) { format = rdfFormatForFile(n); } if (format == null) throw new UnknownContentTypeException(contentType); final RDFParserFactory rdfParserFactory = RDFParserRegistry .getInstance().get(format); if (rdfParserFactory == null) throw new UnknownContentTypeException(contentType); final RDFParser rdfParser = rdfParserFactory .getParser(); rdfParser.setValueFactory(conn.getTripleStore().getValueFactory()); /* * Apply the RDF parser options. */ rdfParser.setVerifyData(parserOptions.getVerifyData()); rdfParser.setPreserveBNodeIDs(parserOptions.getPreserveBNodeIDs()); rdfParser.setStopAtFirstError(parserOptions.getStopAtFirstError()); rdfParser.setDatatypeHandling(parserOptions.getDatatypeHandling()); rdfParser.setRDFHandler(new AddStatementHandler(conn, nmodified, defactoContext, op)); /* * Setup the input stream. */ InputStream is = hconn.getInputStream(); try { /* * Setup decompression. */ if (n.endsWith(".gz")) { is = new GZIPInputStream(is); // } else if (n.endsWith(".zip")) { // // /* // * TODO This will not process all entries in a zip input // * stream, just the first. // */ // is = new ZipInputStream(is); } } catch (Throwable t) { if (is != null) { try { is.close(); } catch (Throwable t2) { log.warn(t2, t2); } throw new RuntimeException(t); } } /* * Run the parser, which will cause statements to be * inserted. */ rdfParser.parse(is, baseURL); } finally { if (hconn instanceof HttpURLConnection) { /* * Disconnect, but only after we have loaded all the * URLs. Disconnect is optional for java.net. It is a * hint that you will not be accessing more resources on * the connected host. By disconnecting only after all * resources have been loaded we are basically assuming * that people are more likely to load from a single * host. */ ((HttpURLConnection) hconn).disconnect(); } } } /** * Helper class adds statements to the sail as they are visited by a parser. */ private static class AddStatementHandler extends RDFHandlerBase { private final LoadGraph op; private final long beginNanos; private final BigdataSailConnection conn; private final AtomicLong nmodified; private final Resource[] defaultContexts; public AddStatementHandler(final BigdataSailConnection conn, final AtomicLong nmodified, final Resource defaultContext, final LoadGraph op) { this.conn = conn; this.nmodified = nmodified; final boolean quads = conn.getTripleStore().isQuads(); if (quads && defaultContext != null) { // The default context may only be specified for quads. this.defaultContexts = new Resource[] { defaultContext }; } else { this.defaultContexts = new Resource[0]; } this.op = op; this.beginNanos = System.nanoTime(); } public void handleStatement(final Statement stmt) throws RDFHandlerException { try { conn.addStatement(// stmt.getSubject(), // stmt.getPredicate(), // stmt.getObject(), // (Resource[]) (stmt.getContext() == null ? defaultContexts : new Resource[] { stmt.getContext() })// ); } catch (SailException e) { throw new RDFHandlerException(e); } final long nparsed = nmodified.incrementAndGet(); if ((nparsed % 10000) == 0L) { final long elapsed = System.nanoTime() - beginNanos; // notify listener(s) conn.fireEvent(new SPARQLUpdateEvent.LoadProgress(op, elapsed, nparsed, false/* done */)); } } /** * Overridden to send out an incremental progress report for the end of * the LOAD operation. */ @Override public void endRDF() throws RDFHandlerException { final long nparsed = nmodified.get(); final long elapsed = System.nanoTime() - beginNanos; // notify listener(s) conn.fireEvent(new SPARQLUpdateEvent.LoadProgress(op, elapsed, nparsed, true/* done */)); } } /** * Note: Bigdata does not support empty graphs, so {@link UpdateType#Clear} * and {@link UpdateType#Drop} have the same semantics. * * <pre> * DROP ( SILENT )? (GRAPH IRIref | DEFAULT | NAMED | ALL ) * </pre> * * @param left * @param op * @param context * @return * @throws RepositoryException * @throws SailException */ private static PipelineOp convertClearOrDropGraph(PipelineOp left, final DropGraph op, final AST2BOpUpdateContext context) throws RepositoryException, SailException { if (runOnQueryEngine) throw new UnsupportedOperationException(); final TermNode targetGraphNode = op.getTargetGraph(); final BigdataURI targetGraph = targetGraphNode == null ? null : (BigdataURI) targetGraphNode.getValue(); clearGraph(op.isSilent(), op.getTargetSolutionSet(), targetGraph, op.getScope(), op.isAllGraphs(), op.isAllSolutionSets(), context); return left; } /** * Clear one graph (SILENT). * * @param targetGraph * The graph to be cleared -or- <code>null</code> if no target * graph was named. * @param context * The {@link AST2BOpUpdateContext} used to perform the * operation. * * @throws RepositoryException * @throws SailException */ private static final void clearOneGraph(final URI targetGraph, // final AST2BOpUpdateContext context// ) throws RepositoryException, SailException { clearGraph(true/* silent */, null/* targetSolutionSet */, targetGraph, null/* scope */, false/* allGraphs */, false/* allSolutionSets */, context); } /** * Clear one or more graphs and/or solution sets. * * @param silent * When <code>true</code>, some kinds of problems will not be * reported to the caller. * @param targetSolutionSet * The target solution set to be cleared -or- <code>null</code> * if no target solution set was named. * @param targetGraph * The graph to be cleared -or- <code>null</code> if no target * graph was named. * @param scope * The scope iff just the graphs in either the * {@link Scope#DEFAULT_CONTEXTS} or {@link Scope#NAMED_CONTEXTS} * should be cleared and otherwise <code>null</code>. * @param allGraphs * iff all graphs should be cleared. * @param allSolutionSets * iff all solution sets should be cleared. * @param context * The {@link AST2BOpUpdateContext} used to perform the * operation. * * @throws RepositoryException * @throws SailException */ // CLEAR/DROP ( SILENT )? (GRAPH IRIref | DEFAULT | NAMED | ALL | GRAPHS | SOLUTIONS | SOLUTIONS %VARNAME) private static void clearGraph(// final boolean silent,// final String solutionSet,// final URI targetGraph, // final Scope scope,// final boolean allGraphs,// final boolean allSolutionSets,// final AST2BOpUpdateContext context// ) throws RepositoryException, SailException { if (log.isDebugEnabled()) log.debug("targetGraph=" + targetGraph + ", scope=" + scope); /* * Note: removeStatements() is not exposed by the RepositoryConnection. */ final BigdataSailConnection sailConn = context.conn.getSailConnection(); if (solutionSet != null) { // Clear the named solution set. if (!context.solutionSetManager.clearSolutions(solutionSet) && !silent) { // Named solution set does not exists, but should exist. throw new SolutionSetDoesNotExistException(solutionSet); } } if (targetGraph != null) { /* * Addressing a specific graph. */ sailConn.removeStatements(null/* s */, null/* p */, null/* o */, targetGraph); } if (scope != null) { if (scope == Scope.DEFAULT_CONTEXTS) { /* * Addressing the defaultGraph (Sesame nullGraph). */ sailConn.removeStatements(null/* s */, null/* p */, null/* o */, BD.NULL_GRAPH); } else { /* * Addressing ALL NAMED GRAPHS. * * Note: This is everything EXCEPT the nullGraph. */ final RepositoryResult<Resource> result = context.conn .getContextIDs(); try { while (result.hasNext()) { final Resource c = result.next(); sailConn.removeStatements(null/* s */, null/* p */, null/* o */, c); } } finally { result.close(); } } } if(allGraphs) { /* * Addressing ALL graphs. * * TODO This should be optimized. If we are doing truth maintenance, * then we need to discard the buffers, drop all statements and also * drop the proof chains. If we are not doing truth maintenance and * this is the unisolated connection, then delete all statements and * also clear the lexicon. (We should really catch this optimization * in the BigdataSailConnection.) */ sailConn.removeStatements(null/* s */, null/* p */, null/* o */); } /* * Note: We need to verify that the backing data structure is enabled * since the default semantics of CLEAR ALL and DROP ALL also imply all * named solution sets. */ if (allSolutionSets && context.solutionSetManager != null) { // Delete all solution sets. context.solutionSetManager.clearAllSolutions(); } } private static PipelineOp convertDropEntailments(final PipelineOp left, final AST2BOpUpdateContext context) throws SailException { long stmtCount = 0; if (log.isDebugEnabled()) { stmtCount = context.conn.getSailConnection().getTripleStore().getStatementCount(true); log.info("begin drop entailments"); } context.conn.getSailConnection().removeAllEntailments(); if (log.isDebugEnabled()) { long removedCount = stmtCount - context.conn.getSailConnection().getTripleStore().getStatementCount(true); log.debug("Removed statements = " + removedCount); } return left; } private static PipelineOp convertDisableEntailments(PipelineOp left, AST2BOpUpdateContext context) { if (log.isDebugEnabled()) { log.debug("Going to disable truth maintenance"); } if (context.conn.getSailConnection().isTruthMaintenanceConfigured()) { context.conn.getSailConnection().setTruthMaintenance(false); } else { log.debug("Truth maintenance is not configured"); } if (log.isDebugEnabled()) { log.debug("truthMaintenance = " + context.conn.getSailConnection().getTruthMaintenance()); } return left; } private static PipelineOp convertEnableEntailments(PipelineOp left, AST2BOpUpdateContext context) { if (log.isDebugEnabled()) { log.debug("Going to enable truth maintenance"); } if (context.conn.getSailConnection().isTruthMaintenanceConfigured()) { context.conn.getSailConnection().setTruthMaintenance(true); } else { log.debug("Truth maintenance is not configured"); } if (log.isDebugEnabled()) { log.debug("truthMaintenance = " + context.conn.getSailConnection().getTruthMaintenance()); } return left; } private static PipelineOp convertCreateEntailments(PipelineOp left, AST2BOpUpdateContext context) throws SailException { long stmtCount = 0; if (log.isDebugEnabled()) { stmtCount = context.conn.getSailConnection().getTripleStore().getStatementCount(true); log.info("begin compute closure"); } context.conn.getSailConnection().computeClosure(); if (log.isDebugEnabled()) { long inferredCount = context.conn.getSailConnection().getTripleStore().getStatementCount(true) - stmtCount; log.debug("Inferred statements = " + inferredCount); } return left; } /** * GRAPHS : If the graph already exists (context has at least one * statement), then this is an error (unless SILENT). Otherwise it is a NOP. * <p> * SOLUTIONS : If the named solution set already exists (is registered, but * may be empty), then this is an error (unless SILENT). Otherwise, the * named solution set is provisioned according to the optional parameters. * * @param left * @param op * @param context * @return */ private static PipelineOp convertCreateGraph(final PipelineOp left, final CreateGraph op, final AST2BOpUpdateContext context) { if (op.isTargetSolutionSet()) { final String solutionSet = op.getTargetSolutionSet(); final boolean exists = context.solutionSetManager .existsSolutions(solutionSet); if (!op.isSilent() && exists) { // Named solution set exists, but should not. throw new SolutionSetExistsException(solutionSet); } if (!exists) { context.solutionSetManager .createSolutions(solutionSet, op.getParams()); } } else { final BigdataURI c = (BigdataURI) ((CreateGraph) op) .getTargetGraph().getValue(); if (log.isDebugEnabled()) log.debug("targetGraph=" + c); if (!op.isSilent()) { assertGraphExists(context, c); } } return left; } /** * <pre> * INSERT DATA -or- DELETE DATA * </pre> * * @param left * @param op * @param context * @return * @throws Exception */ private static PipelineOp convertInsertOrDeleteData(PipelineOp left, final AbstractGraphDataUpdate op, final AST2BOpUpdateContext context) throws Exception { final boolean insert; switch (op.getUpdateType()) { case InsertData: insert = true; break; case DeleteData: insert = false; break; default: throw new UnsupportedOperationException(op.getUpdateType().name()); } if (!runOnQueryEngine) { final BigdataStatement[] stmts = op.getData(); if (log.isDebugEnabled()) log.debug((insert ? "INSERT" : "DELETE") + " DATA: #stmts=" + stmts.length); final BigdataSailConnection conn = context.conn.getSailConnection(); for (BigdataStatement s : stmts) { addOrRemoveStatementData(conn, s, insert); } return null; } /* * Convert the statements to be asserted or retracted into an * IBindingSet[]. */ final IBindingSet[] bindingSets; { // Note: getTargetGraph() is not defined for INSERT/DELETE DATA. final ConstantNode c = null;// op.getTargetGraph(); @SuppressWarnings("rawtypes") IV targetGraphIV = null; if (c != null) { targetGraphIV = c.getValue().getIV(); } if (targetGraphIV == null && context.isQuads()) { targetGraphIV = context.getNullGraph().getIV(); } bindingSets = getData(op.getData(), targetGraphIV, context.isQuads()); } /* * Append the pipeline operations to add/resolve IVs against the lexicon * and insert/delete statemetns. */ left = addInsertOrDeleteDataPipeline(left, insert, context); /* * Execute the update. */ executeUpdate(left, bindingSets, context); // Return null since pipeline was evaluated. return null; } /** * Insert or remove a statement. * * @param conn * The connection on which to write the mutation. * @param spo * The statement. * @param insert * <code>true</code> iff the statement is to be inserted and * <code>false</code> iff the statement is to be removed. * @throws SailException */ private static void addOrRemoveStatement(final BigdataSailConnection conn, final BigdataStatement spo, final boolean insert) throws SailException { final Resource s = (Resource) spo.getSubject(); final URI p = (URI) spo.getPredicate(); final Value o = (Value) spo.getObject(); /* * If [c] is not bound, then using an empty Resource[] for the contexts. * * On insert, this will cause the data to be added to the null graph. * * On remove, this will cause the statements to be removed from all * contexts (on remove it is interpreted as a wildcard). */ final Resource c = (Resource) (spo.getContext() == null ? null : spo .getContext()); final Resource[] contexts = (Resource[]) (c == null ? NO_CONTEXTS : new Resource[] { c }); if(log.isTraceEnabled()) log.trace((insert ? "INSERT" : "DELETE") + ": <" + s + "," + p + "," + o + "," + Arrays.toString(contexts)); if (insert) { conn.addStatement(s, p, o, contexts); } else { // /* // * We need to handle blank nodes (which can appear in the subject or // * object position) as unbound variables. // */ // // final Resource s1 = s instanceof BNode ? null : s; // // final Value o1 = o instanceof BNode ? null : o; // // conn.removeStatements(s1, p, o1, contexts); /** * * @see <a * href="https://sourceforge.net/apps/trac/bigdata/ticket/571"> * DELETE/INSERT WHERE handling of blank nodes </a> */ conn.removeStatements(s, p, o, contexts); } } /** * Insert or remove a statement (INSERT DATA or DELETE DATA). * * @param conn * The connection on which to write the mutation. * @param spo * The statement. * @param insert * <code>true</code> iff the statement is to be inserted and * <code>false</code> iff the statement is to be removed. * @throws SailException */ private static void addOrRemoveStatementData(final BigdataSailConnection conn, final BigdataStatement stmt, final boolean insert) throws SailException { // final Resource s = (Resource) spo.s().getValue(); // // final URI p = (URI) spo.p().getValue(); // // final Value o = (Value) spo.o().getValue(); final Resource s = stmt.getSubject(); final URI p = stmt.getPredicate(); final Value o = stmt.getObject(); /* * If [c] is not bound, then using an empty Resource[] for the contexts. * * On insert, this will cause the data to be added to the null graph. * * On remove, this will cause the statements to be removed from all * contexts (on remove it is interpreted as a wildcard). */ final Resource c = (Resource) (stmt.getContext() == null ? null : stmt .getContext()); final Resource[] contexts = (Resource[]) (c == null ? NO_CONTEXTS : new Resource[] { c }); if(log.isTraceEnabled()) log.trace((insert ? "INSERT" : "DELETE") + ": <" + s + "," + p + "," + o + "," + Arrays.toString(contexts)); if (insert) { conn.addStatement(s, p, o, contexts); } else { conn.removeStatements(s, p, o, contexts); } } /** * @param left * @param b * @param context * @return */ private static PipelineOp addInsertOrDeleteDataPipeline(PipelineOp left, final boolean insert, final AST2BOpUpdateContext context) { /* * Resolve/add terms against the lexicon. * * TODO Must do SIDs support. Probably pass the database mode in as an * annotation. See StatementBuffer. */ left = new ChunkedResolutionOp(leftOrEmpty(left), NV.asMap( // new NV(BOp.Annotations.BOP_ID, context.nextId()),// new NV(ChunkedResolutionOp.Annotations.TIMESTAMP, context .getTimestamp()),// new NV(ChunkedResolutionOp.Annotations.RELATION_NAME, new String[] { context.getLexiconNamespace() })// )); /* * Insert / remove statements. * * Note: namespace is the triple store, not the spo relation. This is * because insert is currently on the triple store for historical SIDs * support. * * Note: This already does TM for SIDs mode. * * TODO This must to TM for the subject-centric text index. * * TODO This must be able to do TM for triples+inference. */ if (insert) { left = new InsertStatementsOp(leftOrEmpty(left), NV.asMap( new NV(BOp.Annotations.BOP_ID, context.nextId()),// new NV(ChunkedResolutionOp.Annotations.TIMESTAMP, context .getTimestamp()),// new NV(ChunkedResolutionOp.Annotations.RELATION_NAME, new String[] { context.getNamespace() })// )); } else { left = new RemoveStatementsOp(leftOrEmpty(left), NV.asMap( new NV(BOp.Annotations.BOP_ID, context.nextId()),// new NV(ChunkedResolutionOp.Annotations.TIMESTAMP, context .getTimestamp()),// new NV(ChunkedResolutionOp.Annotations.RELATION_NAME, new String[] { context.getNamespace() })// )); } return left; } /** * Convert an {@link ISPO}[] into an {@link IBindingSet}[]. * * @param data * The {@link ISPO}[]. * @param targetGraph * The target graph (optional, but required if quads). * @param quads * <code>true</code> iff the target {@link AbstractTripleStore} * is in quads mode. * * @return The {@link IBindingSet}[]. * * TODO Either we need to evaluate this NOW (rather than deferring * it to pipelined evaluation later) or this needs to be pumped into * a hash index associated with the query plan in order to be * available when there is more than one INSERT DATA or REMOVE DATA * operation (or simply more than one UPDATE operation). * <p> * That hash index could be joined into the solutions immediate * before we undertake the chunked resolution operation which then * flows into the add/remove statements operation. * <p> * Variables in the query can not be projected into this operation * without causing us to insert/delete the cross product of those * variables, which has no interesting effect. * <p> * The advantage of running one plan per {@link Update} is that the * data can be flowed naturally into the {@link IRunningQuery}. */ @SuppressWarnings({ "rawtypes", "unchecked" }) private static IBindingSet[] getData(final ISPO[] data, final IV<?, ?> targetGraph, final boolean quads) { final IBindingSet[] bsets = new IBindingSet[data.length]; for (int i = 0; i < data.length; i++) { final ISPO spo = data[i]; final IBindingSet bset = bsets[i] = new ListBindingSet(); bset.set(s, new Constant(spo.s())); bset.set(p, new Constant(spo.p())); bset.set(o, new Constant(spo.o())); Constant g = null; if (spo.c() != null) g = new Constant(spo.c()); if (quads && g == null) { g = new Constant(targetGraph); } if (g != null) { bset.set(c, g); } } return bsets; } /** * Execute the update plan. * * @param left * @param bindingSets * The source solutions. * @param context * * @throws UpdateExecutionException */ static private void executeUpdate(final PipelineOp left, IBindingSet[] bindingSets, final AST2BOpUpdateContext context) throws Exception { if (!runOnQueryEngine) throw new UnsupportedOperationException(); if (left == null) throw new IllegalArgumentException(); if(bindingSets == null) { bindingSets = EMPTY_BINDING_SETS; } if (context == null) throw new IllegalArgumentException(); IRunningQuery runningQuery = null; try { // Submit update plan for evaluation. runningQuery = context.queryEngine.eval(left, bindingSets); // Wait for the update plan to complete. runningQuery.get(); } finally { if (runningQuery != null) { // ensure query is halted. runningQuery.cancel(true/* mayInterruptIfRunning */); } } } /** * Throw an exception if the graph is empty. * * @throws GraphEmptyException * if the graph does not exist and/or is empty. */ static private void assertGraphNotEmpty(final AST2BOpUpdateContext context, final BigdataURI sourceGraph) { if (sourceGraph == null || sourceGraph.equals(BD.NULL_GRAPH)) { /* * The DEFAULT graph is considered non-empty. * * Note: [null] for the sourceGraph indicates the default graph. But * the nullGraph can also indicate the default graph (empirically * observed). */ return; } if (sourceGraph.getIV() == null) { // Proof that the graph does not exist. throw new GraphEmptyException(sourceGraph); } if (context.conn .getTripleStore() .getAccessPath(null/* s */, null/* p */, null/* o */, sourceGraph).isEmpty()) { // Proof that the graph is empty. throw new GraphEmptyException(sourceGraph); } } /** * Throw an exception unless the graph is non-empty. * <p> * Note: This *MUST* use the view of the tripleStore which is associated * with the SailConnection in case the view is isolated by a transaction. * <P> * Note: If the IV could not be resolved, then that is proof that there is * no named graph for that RDF Resource. * * @see https://sourceforge.net/apps/trac/bigdata/ticket/569 * (LOAD-CREATE-LOAD using virgin journal fails with "Graph exists" * exception) */ static private void assertGraphExists(final AST2BOpUpdateContext context, final BigdataURI c) { if (c.getIV() != null && context.conn .getTripleStore() .getAccessPath(null/* s */, null/* p */, null/* o */, c.getIV()).rangeCount(false/* exact */) != 0) { throw new GraphExistsException(c); } } private static final IBindingSet[] EMPTY_BINDING_SETS = new IBindingSet[0]; private static final Resource[] NO_CONTEXTS = new Resource[0]; }