/**
Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved.
Contact:
SYSTAP, LLC DBA Blazegraph
2501 Calvert ST NW #106
Washington, DC 20008
licenses@blazegraph.com
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/*
* Created on Sep 1, 2011
*/
package com.bigdata.rdf.sparql.ast.eval;
import java.util.Collections;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import org.apache.log4j.Logger;
import org.apache.log4j.MDC;
import org.openrdf.model.Value;
import org.openrdf.query.Binding;
import org.openrdf.query.BindingSet;
import org.openrdf.query.Dataset;
import org.openrdf.query.GraphQueryResult;
import org.openrdf.query.MalformedQueryException;
import org.openrdf.query.QueryEvaluationException;
import org.openrdf.query.TupleQueryResult;
import org.openrdf.query.UpdateExecutionException;
import org.openrdf.query.algebra.evaluation.QueryBindingSet;
import org.openrdf.query.impl.GraphQueryResultImpl;
import org.openrdf.query.impl.TupleQueryResultImpl;
import org.openrdf.sail.SailException;
import com.bigdata.bop.BOp;
import com.bigdata.bop.Constant;
import com.bigdata.bop.IBindingSet;
import com.bigdata.bop.IConstant;
import com.bigdata.bop.IVariable;
import com.bigdata.bop.PipelineOp;
import com.bigdata.bop.bindingSet.ListBindingSet;
import com.bigdata.bop.engine.IRunningQuery;
import com.bigdata.bop.engine.QueryEngine;
import com.bigdata.bop.rdf.join.ChunkedMaterializationIterator;
import com.bigdata.journal.TimestampUtility;
import com.bigdata.rdf.internal.IV;
import com.bigdata.rdf.internal.IVCache;
import com.bigdata.rdf.model.BigdataBNode;
import com.bigdata.rdf.model.BigdataStatement;
import com.bigdata.rdf.model.BigdataValue;
import com.bigdata.rdf.sail.Bigdata2Sesame2BindingSetIterator;
import com.bigdata.rdf.sail.BigdataSailRepositoryConnection;
import com.bigdata.rdf.sail.RunningQueryCloseableIterator;
import com.bigdata.rdf.sparql.ast.ASTContainer;
import com.bigdata.rdf.sparql.ast.DatasetNode;
import com.bigdata.rdf.sparql.ast.DeleteInsertGraph;
import com.bigdata.rdf.sparql.ast.DescribeModeEnum;
import com.bigdata.rdf.sparql.ast.IDataSetNode;
import com.bigdata.rdf.sparql.ast.QueryRoot;
import com.bigdata.rdf.sparql.ast.QueryType;
import com.bigdata.rdf.sparql.ast.Update;
import com.bigdata.rdf.sparql.ast.UpdateRoot;
import com.bigdata.rdf.sparql.ast.cache.DescribeBindingsCollector;
import com.bigdata.rdf.sparql.ast.cache.DescribeCacheUpdater;
import com.bigdata.rdf.sparql.ast.cache.IDescribeCache;
import com.bigdata.rdf.sparql.ast.eval.ASTDeferredIVResolution.DeferredResolutionResult;
import com.bigdata.rdf.store.AbstractTripleStore;
import com.bigdata.rdf.store.BigdataBindingSetResolverator;
import com.bigdata.striterator.ChunkedWrappedIterator;
import com.bigdata.striterator.Dechunkerator;
import com.bigdata.striterator.IChunkedOrderedIterator;
import cutthecrap.utils.striterators.ICloseableIterator;
import info.aduna.iteration.CloseableIteration;
/**
* Helper class for evaluating SPARQL queries.
*
* @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a>
* @version $Id$
*/
public class ASTEvalHelper {
/**
* A logger whose sole purpose is to log the SPARQL queries which are being
* evaluated <strong>DO NOT USE THIS FOR OTHER PURPOSES !!! </strong>
*/
private static final Logger log = Logger.getLogger(ASTEvalHelper.class);
/**
* Evaluate a boolean query.
*
* @param store
* The {@link AbstractTripleStore} having the data.
* @param astContainer
* The {@link ASTContainer}.
* @param globallyScopedBS
* The initial solution to kick things off.
*
* @return <code>true</code> if there are any solutions to the query.
*
* @throws QueryEvaluationException
*/
static public boolean evaluateBooleanQuery(
final AbstractTripleStore store,
final ASTContainer astContainer,
final BindingSet globallyScopedBS,
final Dataset dataset)
throws QueryEvaluationException {
final AST2BOpContext context = new AST2BOpContext(astContainer, store);
final DeferredResolutionResult resolved;
try {
// @see https://jira.blazegraph.com/browse/BLZG-1176
resolved = ASTDeferredIVResolution.resolveQuery(
store, astContainer, globallyScopedBS, dataset, context);
} catch (MalformedQueryException e) {
throw new QueryEvaluationException(e.getMessage(), e);
}
if (resolved.dataset != null) {
astContainer.getOriginalAST().setDataset(
new DatasetNode(resolved.dataset, false/* update */));
}
// Clear the optimized AST.
astContainer.clearOptimizedAST();
// Batch resolve Values to IVs and convert to bigdata binding set.
final IBindingSet[] globallyScopedBSAsList = toBindingSet(resolved.bindingSet) ;
// Convert the query (generates an optimized AST as a side-effect).
AST2BOpUtility.convert(context, globallyScopedBSAsList);
// The optimized AST.
final QueryRoot optimizedQuery = astContainer.getOptimizedAST();
// Note: We do not need to materialize anything for ASK.
final boolean materializeProjectionInQuery = context.materializeProjectionInQuery
&& !optimizedQuery.hasSlice();
CloseableIteration<BindingSet, QueryEvaluationException> itr = null;
try {
itr = ASTEvalHelper.evaluateQuery(
astContainer,
context,
materializeProjectionInQuery,
new IVariable[0]// required
);
return itr.hasNext();
} finally {
if (itr != null) {
/**
* Ensure query is terminated. An interrupt during hasNext()
* should cause the query to terminate through itr.close().
*
* @see <a
* href="https://sourceforge.net/apps/trac/bigdata/ticket/707">
* BlockingBuffer.close() does not unblock threads </a>
*/
itr.close();
}
}
}
/**
* Evaluate a SELECT query.
*
* @param store
* The {@link AbstractTripleStore} having the data.
* @param queryPlan
* The {@link ASTContainer}.
* @param globallyScopedBS
* The initial solution to kick things off.
*
* @return An object from which the solutions may be drained.
*
* @throws QueryEvaluationException
*/
static public TupleQueryResult evaluateTupleQuery(
final AbstractTripleStore store,
final ASTContainer astContainer,
final QueryBindingSet globallyScopedBS,
final Dataset dataset) throws QueryEvaluationException {
final AST2BOpContext context = new AST2BOpContext(astContainer, store);
final QueryRoot optimizedQuery =
optimizeQuery(astContainer, context, globallyScopedBS, dataset);
// Get the projection for the query.
final IVariable<?>[] projected = astContainer.getOptimizedAST()
.getProjection().getProjectionVars();
final List<String> projectedSet = new LinkedList<String>();
for (IVariable<?> var : projected)
projectedSet.add(var.getName());
final boolean materializeProjectionInQuery = context.materializeProjectionInQuery
&& !optimizedQuery.hasSlice();
final CloseableIteration<BindingSet, QueryEvaluationException> itr = ASTEvalHelper
.evaluateQuery(astContainer, context,
materializeProjectionInQuery, projected);
TupleQueryResult r = null;
try {
r = new TupleQueryResultImpl(projectedSet, itr);
return r;
} finally {
if (r == null) {
/**
* Ensure query is terminated if assignment to fails. E.g., if
* interrupted during the ctor.
*
* @see <a
* href="https://sourceforge.net/apps/trac/bigdata/ticket/707">
* BlockingBuffer.close() does not unblock threads </a>
*/
itr.close();
}
}
}
/**
* Evaluate a SELECT query without converting the results into openrdf
* solutions.
*
* @param store
* The {@link AbstractTripleStore} having the data.
* @param queryPlan
* The {@link ASTContainer}.
* @param globallyScopedBS
* The initial solution to kick things off.
* @param materialize
* When <code>true</code>, {@link IV}s will be materialized
* (their {@link IVCache} association will be set to the
* corresponding RDF {@link Value}). When <code>false</code>,
* this materialization step will be skipped. However, it is
* possible that {@link IV}s in the query plan will be
* materialized anyway (for example, materialization might be
* required to support FILTERs in the query).
*
* @return An object from which the solutions may be drained.
*
* @throws QueryEvaluationException
*/
static public ICloseableIterator<IBindingSet[]> evaluateTupleQuery2(
final AbstractTripleStore store, final ASTContainer astContainer,
final QueryBindingSet globallyScopedBS, final boolean materialize)
throws QueryEvaluationException {
final AST2BOpContext context = new AST2BOpContext(astContainer, store);
// Clear the optimized AST.
astContainer.clearOptimizedAST();
// Batch resolve Values to IVs and convert to bigdata binding set.
final IBindingSet[] globallyScopedBSAsList = toBindingSet(globallyScopedBS) ;
// Convert the query (generates an optimized AST as a side-effect).
AST2BOpUtility.convert(context, globallyScopedBSAsList);
// The optimized AST.
final QueryRoot optimizedQuery = astContainer.getOptimizedAST();
// true iff we can materialize the projection inside of the query plan.
final boolean materializeProjectionInQuery = materialize && context.materializeProjectionInQuery
&& !optimizedQuery.hasSlice();
final List<String> projectedSet;
if (materialize) {
/*
* Add a materialization step.
*/
// Get the projection for the query.
final IVariable<?>[] projected = astContainer.getOptimizedAST()
.getProjection().getProjectionVars();
projectedSet = new LinkedList<String>();
for (IVariable<?> var : projected)
projectedSet.add(var.getName());
} else {
/*
* Do not add a materialization step.
*/
projectedSet = null;
}
doSparqlLogging(context);
final PipelineOp queryPlan = astContainer.getQueryPlan();
IRunningQuery runningQuery = null;
try {
// Submit query for evaluation.
runningQuery = context.queryEngine.eval(queryPlan, globallyScopedBSAsList);
// The iterator draining the query solutions.
final ICloseableIterator<IBindingSet[]> it1 = runningQuery
.iterator();
final ICloseableIterator<IBindingSet[]> it2;
if (materialize && !materializeProjectionInQuery
&& !projectedSet.isEmpty()) {
/*
* Materialize IVs as RDF Values.
*
* Note: This is the code path when we want to materialize the
* IVs and we can not do so within the query plan because the
* query uses a SLICE. If we want to materialize IVs and there
* is no slice, then the materialization step is done inside of
* the query plan.
*
* Note: This does not materialize the IVCache for inline IVs.
* The assumption is that the consumer is bigdata aware and can
* use inline IVs directly.
*/
// The variables to be materialized.
final IVariable<?>[] vars = projectedSet
.toArray(new IVariable[projectedSet.size()]);
// Wrap with chunked materialization logic.
it2 = new ChunkedMaterializationIterator(vars,
context.db.getLexiconRelation(),
false/* materializeInlineIVs */, it1);
} else {
it2 = it1;
}
return it2;
} catch (Throwable t) {
if (runningQuery != null) {
// ensure query is halted.
runningQuery.cancel(true/* mayInterruptIfRunning */);
}
throw new QueryEvaluationException(t);
}
}
/**
* Optimize a SELECT query.
*
* @param store
* The {@link AbstractTripleStore} having the data.
* @param queryPlan
* The {@link ASTContainer}.
* @param globallyScopedBS
* The initial solution to kick things off.
*
* @return An optimized AST.
*
* @throws QueryEvaluationException
*/
static public QueryRoot optimizeQuery(
final ASTContainer astContainer,
final AST2BOpContext context,
final QueryBindingSet globallyScopedBS,
final Dataset dataset) throws QueryEvaluationException {
final AbstractTripleStore store = context.getAbstractTripleStore();
final DeferredResolutionResult resolved;
try {
// @see https://jira.blazegraph.com/browse/BLZG-1176
resolved = ASTDeferredIVResolution.resolveQuery(
store, astContainer, globallyScopedBS, dataset, context);
} catch (MalformedQueryException e) {
throw new QueryEvaluationException(e.getMessage(), e);
}
if (resolved.dataset != null) {
astContainer.getOriginalAST().setDataset(
new DatasetNode(resolved.dataset, false/* update */));
}
// Clear the optimized AST.
astContainer.clearOptimizedAST();
// Batch resolve Values to IVs and convert to bigdata binding set.
final IBindingSet[] globallyScopedBSAsList = toBindingSet(resolved.bindingSet) ;
// Convert the query (generates an optimized AST as a side-effect).
AST2BOpUtility.convert(context, globallyScopedBSAsList);
// The optimized AST.
final QueryRoot optimizedQuery = astContainer.getOptimizedAST();
return optimizedQuery;
}
/**
* Evaluate a CONSTRUCT/DESCRIBE query.
* <p>
* Note: For a DESCRIBE query, this also updates the DESCRIBE cache.
*
* @param store
* The {@link AbstractTripleStore} having the data.
* @param astContainer
* The {@link ASTContainer}.
* @param globallyScopedBS
* The initial solution to kick things off.
*
* @throws QueryEvaluationException
*
* @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/584">
* DESCRIBE CACHE </a>
*/
public static GraphQueryResult evaluateGraphQuery(
final AbstractTripleStore store,
final ASTContainer astContainer,
final QueryBindingSet globallyScopedBS,
final Dataset dataset) throws QueryEvaluationException {
final AST2BOpContext context = new AST2BOpContext(astContainer, store);
// true iff the original query was a DESCRIBE.
final boolean isDescribe = astContainer.getOriginalAST().getQueryType() == QueryType.DESCRIBE;
/*
* A mapping that is used to preserve a consistent assignment from blank
* node IDs to BigdataBNodes scoped to the subgraph reported by the
* top-level DESCRIBE query.
*/
final Map<String, BigdataBNode> bnodes = (isDescribe ? new LinkedHashMap<String, BigdataBNode>()
: null);
final IDescribeCache describeCache;
final Set<IVariable<?>> describeVars;
if (isDescribe && context.describeCache != null) {
/*
* The DESCRIBE cache is enabled.
*/
describeCache = context.getDescribeCache();
/*
* The set of variables that were in the original DESCRIBE
* projection. This can include both variables explicitly given in
* the query (DESCRIBE ?foo WHERE {...}) and variables bound to
* constants by an AssignmentNode (DESCRIBE uri).
*/
describeVars = astContainer.getOriginalAST().getProjectedVars(
new LinkedHashSet<IVariable<?>>());
} else {
// DESCRIBE cache is not enabled.
describeCache = null;
describeVars = null;
}
final QueryRoot optimizedQuery =
optimizeQuery(astContainer, context, globallyScopedBS, dataset);
final boolean materializeProjectionInQuery = context.materializeProjectionInQuery
&& !optimizedQuery.hasSlice();
// The effective DescribeMode.
// final DescribeModeEnum describeMode = optimizedQuery.getProjection()
// .getDescribeMode() == null ? QueryHints.DEFAULT_DESCRIBE_MODE
// : optimizedQuery.getProjection().getDescribeMode();
final DescribeModeEnum describeMode = context
.getDescribeMode(optimizedQuery.getProjection());
final int describeIterationLimit = context
.getDescribeIterationLimit(optimizedQuery.getProjection());
final int describeStatementlimit = context
.getDescribeStatementLimit(optimizedQuery.getProjection());
// The final result to be returned.
GraphQueryResult result = null;
// Solutions to the WHERE clause (as projected).
final CloseableIteration<BindingSet, QueryEvaluationException> solutions = ASTEvalHelper
.evaluateQuery(astContainer, context, materializeProjectionInQuery//
, optimizedQuery.getProjection().getProjectionVars()//
);
try {
final CloseableIteration<BindingSet, QueryEvaluationException> solutions2;
final Set<BigdataValue> describedResources;
if (describeCache != null) {
/**
* If we are maintaining the DESCRIBE cache, then we need to know
* the distinct bindings that the projected variables in the
* original DESCRIBE query take on in the solutions. Those bound
* values identify the resources that were actually described by the
* query. This is necessary to handle cases such as
* <code>DESCRIBE ?foo WHERE {...}</code> or <code>DESCRIBE *</code>
*
* Note: We only do this for the top-level DESCRIBE. This step is
* NOT done the embedded DESCRIBE query(s) issued for Concise
* Bounded Description since we are only interested in caching the
* original resources that were being described.
*
* Note: The [describedResources] is a ConcurrentHashSet in order to
* provide thread safety since new bindings for the DESCRIBE
* variable(s) may be discovered concurrent with new constructed
* statements being observed. We need to have the new bindings
* become immediately visible in order to avoid missing any
* statements involving a resource in the original projection. The
* [describedResources] were "described" and must be updated in the
* DESCRIBE cache.
*/
// Concurrency safe set.
describedResources = Collections
.newSetFromMap(new ConcurrentHashMap<BigdataValue, Boolean>());
// Collect the bindings on those variables.
solutions2 = new DescribeBindingsCollector(//
describeVars,// what to collect
describedResources,// where to put the bindings.
solutions// source solutions
);
} else {
// Pass through original iterator.
solutions2 = solutions;
describedResources = null;
}
// Constructed Statements.
final CloseableIteration<BigdataStatement, QueryEvaluationException> src =
new ASTConstructIterator(context, store, //
optimizedQuery.getConstruct(), //
optimizedQuery.getWhereClause(),//
bnodes,//
solutions2//
);
final CloseableIteration<BigdataStatement, QueryEvaluationException> src2;
if (isDescribe) {
switch (describeMode) {
case SymmetricOneStep: // No expansion step.
case ForwardOneStep: // No expansion step.
src2 = src;
break;
case CBD:
case SCBD:
// case CBDNR:
// case SCBDNR:
{
/*
* Concise Bounded Description (of any flavor) requires a fixed
* point expansion.
*
* TODO CBD : The expansion should monitor a returned iterator so
* the query can be cancelled by the openrdf client. Right now the
* expansion is performed before the iteration is returned to the
* client, so there is no opportunity to cancel a running CBD
* DESCRIBE.
*/
src2 = new CBD(store, describeMode, describeIterationLimit,
describeStatementlimit, bnodes).computeClosure(src);
break;
}
default:
throw new UnsupportedOperationException("describeMode="
+ describeMode);
}
} else {
src2 = src;
}
final CloseableIteration<BigdataStatement, QueryEvaluationException> src3;
if (describeCache != null) {
/*
* Wrap the Statement iteration with logic that will update the
* DESCRIBE cache.
*
* Note: [describedResources] is the set of BigdataValues that were
* "described" by the query and will have an entry asserted in the
* cache.
*
* TODO We do not need to update cache entries unless they have been
* invalidated (or are based on the open web and have become stale
* or invalidated by finding new assertions relevant to those
* resources during an open web query).
*/
src3 = new DescribeCacheUpdater(describeCache, describedResources,
src2);
} else {
src3 = src2;
}
result = new GraphQueryResultImpl(//
optimizedQuery.getPrefixDecls(), //
src3);
} finally {
if (result == null) {
/**
* Cancel the query since we are not returning the
* GraphTupleQuery result object to the caller.
*
* Note: This provides only partial resolution of the following
* ticket. There are other operations than the underlying query
* that would need to be canceled. I have NOT verified that
* closing the underlying query is sufficient to unwind those
* operations. Also, the CBD support is not written to be
* interruptable at this time (see the TODO above).
*
* @see <a
* href="https://sourceforge.net/apps/trac/bigdata/ticket/715"
* > Interrupt of thread submitting a query for evaluation
* does not always terminate the AbstractRunningQuery </a>
*/
solutions.close();
}
}
return result;
}
/**
* Evaluate a query plan (core method).
* <p>
* As explained in some depth at <a
* href="https://sourceforge.net/apps/trac/bigdata/ticket/707">
* BlockingBuffer.close() does not unblock threads </a> and <a
* href="http://trac.blazegraph.com/ticket/864"> Semantics of interrupting a
* running query</a>, (a) you can not interrupted the thread that submits a
* query until the {@link CloseableIteration} has been returned to the
* caller submitting that query; (b)
* <p>
* (a) If you interrupt the thread submitting the query, the query may
* actually execute. This can occur because the interrupt can arise between
* the time at which the query begins to execute on the {@link QueryEngine}
* and the time at which the {@link IRunningQuery} object is bound up inside
* of the returned {@link CloseableIteration} and returned to the caller.
* Until the caller has possession of the {@link CloseableIteration}, an
* interrupt will not cause the associated {@link IRunningQuery} to be
* terminated. See <a
* href="https://sourceforge.net/apps/trac/bigdata/ticket/707">
* BlockingBuffer.close() does not unblock threads </a>
* <p>
* (b) If you interrupt the thread draining the solutions from the
* {@link CloseableIteration} or otherwise cause
* {@link CloseableIteration#close()} to become invoked, then the
* {@link IRunningQuery} will be interrupted. Per <a
* href="http://trac.blazegraph.com/ticket/864"> Semantics of interrupting a
* running query</a>, that interrupt is interpreted as <em>normal</em>
* termination (this supports the use case of LIMIT and is built deeply into
* the {@link QueryEngine} semantics). In order for the application to
* distinguish between a case where it has interrupted the query and a case
* where the query has been interrupted by a LIMIT, the application MUST
* notice when it decides to interrupt a given query and then discard the
* outcome of that query.
*
* @param astContainer
* The query model.
* @param ctx
* The evaluation context.
* @param bindingSets
* The source solution set(s).
* @param materializeProjectionInQuery
* When <code>true</code>, the projection was materialized within
* query plan. When <code>false</code>, this method will take
* responsibility for that materialization step.
* @param required
* The variables which must be materialized. Only materialized
* variables will be reported in the output solutions. This MAY
* be <code>null</code> to materialize all variables in the
* solutions. If MAY be empty to materialize NONE of the
* variables in the solutions (in which case all solutions will
* be empty).
*
* @return An iteration which may be used to read Sesame {@link BindingSet}s
* containing the solutions for the query.
*
* @throws QueryEvaluationException
*
* @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/707">
* BlockingBuffer.close() does not unblock threads </a>
* @see <a href="http://trac.blazegraph.com/ticket/864"> Semantics of
* interrupting a running query</a>
*/
static //private Note: Exposed to CBD class.
CloseableIteration<BindingSet, QueryEvaluationException> evaluateQuery(
final ASTContainer astContainer,
final AST2BOpContext ctx,
final boolean materializeProjectionInQuery,
final IVariable<?>[] required) throws QueryEvaluationException {
doSparqlLogging(ctx);
final PipelineOp queryPlan = astContainer.getQueryPlan();
IRunningQuery runningQuery = null;
try {
// Optional attributes to be attached to the query.
final Map<Object, Object> queryAttributes = ctx
.getQueryAttributes();
// Submit query for evaluation.
runningQuery = ctx.queryEngine.eval(queryPlan,
astContainer.getOptimizedASTBindingSets(), queryAttributes);
runningQuery.setStaticAnalysisStats(ctx.getStaticAnalysisStats());
/*
* Wrap up the native bigdata query solution iterator as Sesame
* compatible iteration with materialized RDF Values.
*/
return iterator(runningQuery, ctx.db,
materializeProjectionInQuery, required);
} catch (Throwable t) {
if (runningQuery != null) {
// ensure query is halted.
runningQuery.cancel(true/* mayInterruptIfRunning */);
}
throw new QueryEvaluationException(t);
}
}
/**
* Convert a Sesame {@link BindingSet} into a bigdata {@link IBindingSet}.
*
* @param src
* The {@link BindingSet} (optional).
*
* @return The {@link IBindingSet}. When the source is null or empty, an
* empty {@link ListBindingSet} is returned.
*/
@SuppressWarnings({ "unchecked", "rawtypes" })
private static IBindingSet[] toBindingSet(final BindingSet src) {
if (src == null || src.size() == 0) {
return new IBindingSet[] { new ListBindingSet() };
}
final ListBindingSet bindingSet = new ListBindingSet();
final Iterator<Binding> itr = src.iterator();
while (itr.hasNext()) {
final Binding binding = itr.next();
final IVariable<IV> var = com.bigdata.bop.Var.var(binding.getName());
final IV iv = ((BigdataValue) binding.getValue()).getIV();
final IConstant<IV> val = new Constant<IV>(iv);
bindingSet.set(var, val);
}
return new IBindingSet[]{ bindingSet };
}
/**
* Wrap {@link IRunningQuery} with the logic to materialize {@link IV}s as
* RDF {@link Value}s.
*
* @param runningQuery
* The {@link IRunningQuery}.
* @param db
* The view of the {@link AbstractTripleStore} against which the
* query is running.
* @param materializeProjectionInQuery
* When <code>true</code>, the projection was materialized within
* query plan. When <code>false</code>, this method will take
* responsibility for that materialization step.
* @param required
* The variables which must be materialized (optional).
*
* @return A Sesame {@link CloseableIteration} which will drain
* {@link BindingSet}s of materialized RDF {@link Value}s.
*/
private static CloseableIteration<BindingSet, QueryEvaluationException> iterator(
final IRunningQuery runningQuery, final AbstractTripleStore db,
final boolean materializeProjectionInQuery,
final IVariable<?>[] required) {
/*
* FIXME We should not dechunk just to rechunk here. This is not very
* efficient.
*
* The basic API alignment problem is that the IRunningQuery#iterator()
* visits IBindingSet[] chunks while the BigdataBindingSetResolverator
* and Bigdata2SesameBindingSetIterator are IChunked(Ordered)Iterators.
* That is, they implement #nextChunk(). A very simple class could be
* used to align an IBindingSet[] returned by next() with nextChunk(). I
* would be surprised if this class did not already exist (in fact, the
* class is ChunkedArraysIterator -or- ChunkConsumerIterator).
*
* The other issue is that RunningQueryCloseableIterator would APPEAR to
* be redundant with QueryResultIterator. However, correct termination
* is a tricky business and the current layering obviously works. The
* differences in those two classes appear to be (a) whether or not we
* invoke cancel() on the IRunningQuery when the iterator is closed and
* (b) whether or not we are buffering the last element visited. It is
* quite possible that RunningQueryCloseableIterator simply layers on
* one or two fixes which SHOULD be incorporated into the
* QueryResultIterator.
*
* @see https://sourceforge.net/apps/trac/bigdata/ticket/483 (Eliminate
* unnecessary chunking and dechunking)
*/
// Dechunkify the running query and monitor the Sesame iterator.
final ICloseableIterator<IBindingSet> it1 = iterator(runningQuery);
final BOp query = runningQuery.getQuery();
final int chunkCapacity = query.getProperty(
PipelineOp.Annotations.CHUNK_CAPACITY,
PipelineOp.Annotations.DEFAULT_CHUNK_CAPACITY);
// Wrap in an IChunkedOrderedIterator
final IChunkedOrderedIterator<IBindingSet> it2 = new ChunkedWrappedIterator<IBindingSet>(
it1, chunkCapacity, IBindingSet.class);
final CloseableIteration<BindingSet, QueryEvaluationException> it3;
if(materializeProjectionInQuery) {
/*
* The projection of the query is being materialized by the query
* plan. All we have to do here is convert bigdata IBindingSets
* consisting of IVs having cached BigdataValues to Sesame
* BindingSets.
*/
// Convert IVs in IBindingSets to Sesame BindingSets with Values.
it3 = new Bigdata2Sesame2BindingSetIterator(it2);
} else {
/*
* The projection of the query was not materialized by the query
* plan. We need to layer in a chunked iterator which handles that
* materialization step and then do the conversion into Sesame
* BindingSet objects.
*/
/*
* Note: This captures the historical behavior, which was based on
* the AbstractTripleStore's configuration properties for
* chunkCapacity, chunkOfChunksCapacity, and the chunkTimeout. Those
* properties still affect the rules engine but do not otherwise
* effect query performance. Most query operators use the PipelineOp
* annotations to control these properties. RDF Value
* materialization is the exception. To correct this, I have lifted
* out all these parameters here so we can override it based on
* query annotations.
*
* There are two basic code paths for RDF Value materialization: One
* is the ChunkedMateralizationOp (it handles the "chunk" you feed
* it as a "chunk" and is used for materialization for FILTERs). The
* other is the BigdataBindingSetResolverator. Both call through to
* LexiconRelation#getTerms().
*
* Regarding [termsChunkSize] and [blobsChunkSize], on a cluster,
* the operation is parallelized (by the ClientIndexView) on a
* cluster regardless so there is no reason to ever run materialized
* with more than one thread. Shard local resolution can be enabled
* by setting [materializeProjectionInQuery:=true], but at the cost
* of doing the materialization after a SLICE (if there is one in
* the query). However, when running through the
* BigdataBindingSetResolverator, there will be exactly one thread
* materializing RDF values (because the iterator pattern is single
* threaded) unless the chunkSize exceeds this threshold.
*/
// Historical values.
// final int chunkCapacity = db.getChunkCapacity();
// final int chunkOfChunksCapacity = db.getChunkOfChunksCapacity();
// final long chunkTimeout = db.getChunkTimeout();
// final int termsChunkSize = 4000;
// final int blobsChunkSize = 4000;
// Values set based on query hints.
// final BOp query = runningQuery.getQuery();
// final int chunkCapacity = query.getProperty(
// PipelineOp.Annotations.CHUNK_CAPACITY,
// PipelineOp.Annotations.DEFAULT_CHUNK_CAPACITY);
final int chunkOfChunksCapacity = query.getProperty(
PipelineOp.Annotations.CHUNK_OF_CHUNKS_CAPACITY,
PipelineOp.Annotations.DEFAULT_CHUNK_OF_CHUNKS_CAPACITY);
final long chunkTimeout = query.getProperty(
PipelineOp.Annotations.CHUNK_TIMEOUT,
(long)PipelineOp.Annotations.DEFAULT_CHUNK_TIMEOUT);
final int termsChunkSize = chunkCapacity;
final int blobsChunkSize = chunkCapacity;
// Convert bigdata binding sets to Sesame binding sets.
it3 = new Bigdata2Sesame2BindingSetIterator(
// Materialize IVs as RDF Values.
new BigdataBindingSetResolverator(db, it2,
runningQuery.getQueryId(), required, chunkCapacity,
chunkOfChunksCapacity, chunkTimeout,
termsChunkSize, blobsChunkSize).start(db
.getExecutorService()));
}
return it3;
}
/**
* Dechunkify the running query and monitor the Sesame iterator.
*
* @param runningQuery
* The {@link IRunningQuery}.
*
* @return An {@link ICloseableIterator} which has been dechunkified.
*/
private static ICloseableIterator<IBindingSet> iterator(
final IRunningQuery runningQuery) {
// The iterator draining the query solutions.
final ICloseableIterator<IBindingSet[]> it1 = runningQuery
.iterator();
// Dechunkify the original iterator
final ICloseableIterator<IBindingSet> it2 =
new Dechunkerator<IBindingSet>(it1);
// Monitor IRunningQuery and cancel if Sesame iterator is closed.
final ICloseableIterator<IBindingSet> it3 =
new RunningQueryCloseableIterator<IBindingSet>(runningQuery, it2);
return it3;
}
/**
* Evaluate a SPARQL UPDATE request (core method).
*
* @param astContainer
* The query model.
* @param ctx
* The evaluation context.
* @param dataset
* A dataset which will override the data set declaration for
* each {@link DeleteInsertGraph} operation in the update
* sequence (optional).
* @param includeInferred
* if inferences should be included in various operations.
*
* @return The timestamp of the commit point.
*
* @throws SailException
*
* TODO timeout for update?
*/
static public long executeUpdate(//
final BigdataSailRepositoryConnection conn,//
final ASTContainer astContainer,//
final Dataset dataset,
final boolean includeInferred,//
final QueryBindingSet bs
) throws UpdateExecutionException {
if(conn == null)
throw new IllegalArgumentException();
if(astContainer == null)
throw new IllegalArgumentException();
final DeferredResolutionResult resolved;
try {
// @see https://jira.blazegraph.com/browse/BLZG-1176
resolved = ASTDeferredIVResolution.resolveUpdate(conn.getTripleStore(), astContainer, bs, dataset);
} catch (MalformedQueryException e) {
throw new UpdateExecutionException(e.getMessage(), e);
}
try {
if (dataset != null) {
/*
* Apply the optional data set override.
*/
applyDataSet(conn.getTripleStore(), astContainer, resolved.dataset);
}
final AST2BOpUpdateContext ctx = new AST2BOpUpdateContext(
astContainer, conn);
doSparqlLogging(ctx);
// Propagate attribute.
ctx.setIncludeInferred(includeInferred);
// Batch resolve Values to IVs and convert to bigdata binding set.
final IBindingSet[] bindingSets = toBindingSet(resolved.bindingSet) ;
// Propagate bindings
ctx.setQueryBindingSet(bs);
ctx.setBindings(bindingSets);
ctx.setDataset(dataset);
/*
* Convert the query (generates an optimized AST as a side-effect).
*/
AST2BOpUpdate.optimizeUpdateRoot(ctx);
/*
* Generate and execute physical plans for the update operations.
*/
AST2BOpUpdate.convertUpdate(ctx);
return ctx.getCommitTime();
} catch (Exception ex) {
ex.printStackTrace();
throw new UpdateExecutionException(ex);
}
}
/**
* Apply the {@link Dataset} to each {@link DeleteInsertGraph} in the UPDATE
* request.
* <p>
* The openrdf API here is somewhat at odds with the current LCWD for SPARQL
* UPDATE. In order to align them, setting the {@link Dataset} here causes
* it to be applied to each {@link DeleteInsertGraph} operation in the
* {@link UpdateRoot}. Note that the {@link Dataset} has no effect exception
* for the {@link DeleteInsertGraph} operation in SPARQL 1.1 UPDATE (that is
* the only operation which has a WHERE clause and which implements the
* {@link IDataSetNode} interface).
*
* @param tripleStore
* @param astContainer
* @param dataset
*
* @see <a href="http://www.openrdf.org/issues/browse/SES-963"> Dataset
* assignment in update sequences not properly scoped </a>
*/
static private void applyDataSet(final AbstractTripleStore tripleStore,
final ASTContainer astContainer, final Dataset dataset) {
if (tripleStore == null)
throw new IllegalArgumentException();
if (astContainer == null)
throw new IllegalArgumentException();
if (dataset == null)
throw new IllegalArgumentException();
/*
* Batch resolve RDF Values to IVs and then set on the query model.
*/
// final Object[] tmp = new BigdataValueReplacer(tripleStore)
// .replaceValues(dataset, null/* bindings */);
/*
* Set the data set on the original AST.
*/
// final Dataset resolvedDataset = (Dataset) tmp[0];
final UpdateRoot updateRoot = astContainer.getOriginalUpdateAST();
for (Update op : updateRoot) {
if (op instanceof IDataSetNode) {
final IDataSetNode node = ((IDataSetNode) op);
node.setDataset(new DatasetNode(dataset, true/* update */));
}
}
}
/**
* Log SPARQL Query and SPARQL UPDATE requests.
* <p>
* Note: The SPARQL syntax is logged whenever possible. However, we
* sometimes generate the AST directly, in which case the SPARQL syntax is
* not available and the AST is logged instead.
*
* @param ctx
*/
private static void doSparqlLogging(final AST2BOpContext ctx) {
if (!log.isInfoEnabled())
return;
/*
* Log timestamp of the view and the SPARQL query string.
*/
setupLoggingContext(ctx);
final ASTContainer astContainer = ctx.astContainer;
final String queryString = astContainer.getQueryString();
if (queryString != null) {
/*
* Log the query string when it is available.
*
* Note: We sometimes generate the AST directly, in which case there
* is no query string.
*/
log.info(queryString);
} else {
/*
* If there is no query string, then log the AST instead.
*/
if (astContainer.isQuery()) {
log.info(astContainer.getOriginalAST());
} else {
log.info(astContainer.getOriginalUpdateAST());
}
}
clearLoggingContext();
}
private static void setupLoggingContext(final IEvaluationContext context) {
MDC.put("tx", TimestampUtility.toString(context.getTimestamp()));
}
private static void clearLoggingContext() {
MDC.remove("tx");
}
}