/**
Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved.
Contact:
SYSTAP, LLC DBA Blazegraph
2501 Calvert ST NW #106
Washington, DC 20008
licenses@blazegraph.com
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/*
* Created on Aug 28, 2012
*/
package com.bigdata.rdf.sparql.ast.eval;
import info.aduna.iteration.CloseableIteration;
import java.util.LinkedHashSet;
import java.util.Map;
import java.util.Set;
import org.apache.log4j.Logger;
import org.openrdf.query.BindingSet;
import org.openrdf.query.QueryEvaluationException;
import org.openrdf.query.algebra.evaluation.iterator.CollectionIteration;
import com.bigdata.bop.IBindingSet;
import com.bigdata.bop.bindingSet.ListBindingSet;
import com.bigdata.rdf.internal.IV;
import com.bigdata.rdf.model.BigdataBNode;
import com.bigdata.rdf.model.BigdataStatement;
import com.bigdata.rdf.model.BigdataValue;
import com.bigdata.rdf.sparql.ast.ASTContainer;
import com.bigdata.rdf.sparql.ast.AssignmentNode;
import com.bigdata.rdf.sparql.ast.ConstantNode;
import com.bigdata.rdf.sparql.ast.DescribeModeEnum;
import com.bigdata.rdf.sparql.ast.ProjectionNode;
import com.bigdata.rdf.sparql.ast.QueryRoot;
import com.bigdata.rdf.sparql.ast.QueryType;
import com.bigdata.rdf.sparql.ast.VarNode;
import com.bigdata.rdf.sparql.ast.optimizers.ASTDescribeOptimizer;
import com.bigdata.rdf.store.AbstractTripleStore;
/**
* Utility class for computing the Concise Bounded Description.
*
* @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/578"> Concise
* Bounded Description </a>
* @see <a href="http://www.w3.org/Submission/CBD/"> CBD - Concise Bounded
* Description </a>
* @see ASTDescribeOptimizer
* @see ASTConstructIterator
*
* @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a>
*
* FIXME Watch a timeout on the top-level query (if present)
*/
public class CBD {
private static final Logger log = Logger.getLogger(CBD.class);
/** The {@link AbstractTripleStore}. */
private final AbstractTripleStore store;
/**
* The {@link DescribeModeEnum} specifying how to evaluate the top-level
* DESCRIBE query.
*/
private final DescribeModeEnum describeMode;
/**
* The limit on the #of iterations (iff the statement limit is also
* reached) -or- ZERO (0) for no limit.
*/
private final int describeIterationLimit;
/**
* The limit on the #of statements (iff the iteration limit is also
* reached) -or- ZERO (0) for no limit.
*/
private final int describeStatementLimit;
/**
* The {@link DescribeModeEnum} specifying how to evaluate each expansion
* round of the DESCRIBE query.
*/
private final DescribeModeEnum describeExpansionMode;
/**
* A mapping that is used to preserve a consistent assignment from blank
* node IDs to {@link BigdataBNode}s scoped to the subgraph reported by the
* top-level DESCRIBE query.
*/
private final Map<String, BigdataBNode> bnodes;
/**
* @param store
* The {@link AbstractTripleStore}.
* @param describeMode
* The {@link DescribeModeEnum} specifying how to evaluate the
* DESCRIBE query.
* @param describeIterationLimit
* The limit on the #of iterations (iff the statement limit is
* also reached) -or- ZERO (0) for no limit.
* @param describeStatementLimit
* The limit on the #of statements (iff the iteration limit is
* also reached) -or- ZERO (0) for no limit..
* @param bnodes
* A mapping that is used to preserve a consistent assignment
* from blank node IDs to {@link BigdataBNode}s scoped to the
* subgraph reported by the top-level DESCRIBE query.
*/
public CBD(final AbstractTripleStore store,
final DescribeModeEnum describeMode,
final int describeIterationLimit,
final int describeStatementLimit,
final Map<String, BigdataBNode> bnodes) {
if (store == null)
throw new IllegalArgumentException();
if (describeMode == null)
throw new IllegalArgumentException();
if (describeIterationLimit < 0)
throw new IllegalArgumentException();
if (describeStatementLimit < 0)
throw new IllegalArgumentException();
if (bnodes == null)
throw new IllegalArgumentException();
this.store = store;
this.describeMode = describeMode;
this.describeIterationLimit = describeIterationLimit;
this.describeStatementLimit = describeStatementLimit;
this.bnodes = bnodes;
switch(describeMode) {
case CBD:
// case CBDNR:
// Expansion only explores the forward links.
describeExpansionMode = DescribeModeEnum.ForwardOneStep;
break;
case SCBD:
// case SCBDNR:
// Expansion explores both forward and reverse links.
describeExpansionMode = DescribeModeEnum.SymmetricOneStep;
break;
case ForwardOneStep:
case SymmetricOneStep:
// There are no expansion steps for these modes.
throw new UnsupportedOperationException();
default:
// Unknown describe mode.
throw new AssertionError();
}
}
/**
* The description of the original resource(s) is expanded for each blank
* node encountered in the constructed statements until no new blank nodes
* are encountered. The resulting set of statements is then reported as the
* description for the resources identified either as constants in the
* projection of the original query or as variables in the projection of the
* original query that became bound in the WHERE clause of that original
* query.
*
* @param src
* An iterator from which we can drain the solutions to the
* top-level DESCRIBE query. This is the input into the CBD
* expansion.
*
* @return An iterator from which the concise bounded description of the may
* be drained.
*
* @throws QueryEvaluationException
*/
CloseableIteration<BigdataStatement, QueryEvaluationException> computeClosure(
CloseableIteration<BigdataStatement, QueryEvaluationException> src)
throws QueryEvaluationException {
// Round ZERO (0) is the top-level describe.
int nrounds = 0;
// The blank nodes identified in the previous round.
final Set<IV<?,?>> bnodes_tm1 = new LinkedHashSet<IV<?,?>>();
// The statements identified so far.
final Set<BigdataStatement> stmts = new LinkedHashSet<BigdataStatement>();
while (true) {
// CBD expansion begins at round ONE (1).
nrounds++;
// #of statements on entry to this round.
final int nstmts = stmts.size();
if (cutoffQuery(nrounds - 1, nstmts)) {
src.close();
throw new QueryEvaluationException("CBD cutoff: nrounds="
+ nrounds + ", nstatements=" + nstmts + ".");
}
/*
* Build a collection of all distinct statements and all distinct
* blank node IVs encountered in the source statements. Any of the
* (s,o,c) positions can be blank nodes.
*/
// The blank nodes IVs identified in this round that were NOT known
// in the previous round(s).
final Set<IV<?, ?>> newBnodes = consumeStatements(src, stmts,
bnodes_tm1);
if (newBnodes.isEmpty()) {
// All done.
break;
}
/*
* We will have to do another CBD round since there is at least one
* new blank node IV that needs to be described.
*/
if (log.isInfoEnabled()) {
log.info("#rounds=" + nrounds + ", describeMode="
+ describeMode + ", #stmts(in)=" + stmts.size()
+ ", #bnodes(in)=" + bnodes_tm1.size()
+ ", #bnodes(new)=" + newBnodes.size() + " : "
+ newBnodes);
// Conditional logging.
logState(stmts, bnodes_tm1, newBnodes);
}
src = doRound(newBnodes);
// All of these blank nodes have been resolved.
bnodes_tm1.addAll(newBnodes);
}
// Done.
if (log.isInfoEnabled()) {
log.info("#rounds=" + nrounds + " (done), describeMode="
+ describeMode + ", #stmts(in)=" + stmts.size()
+ ", #bnodes(in)=" + bnodes_tm1.size());
// Conditional logging.
logState(stmts, bnodes_tm1, null/* newBNodes */);
}
/*
* Stream out the fixed point collection of statements that are the
* Concise Bounded Description of the resources identified in/by the
* top-level DESCRIBE query.
*/
return new CollectionIteration<BigdataStatement, QueryEvaluationException>(
stmts);
}
/**
* Return <code>true</code> iff the DESCRIBE query should be cutoff because
* the limits have been exceeded.
*
* @param nrounds
* The #of evaluation rounds that have already been computed and
* ZERO (0) if this is the ffirst round.
* @param nstmts
* The #of statements at the start of this round.
*
* @return <code>true</code> iff evaluation should be cutoff.
*/
private boolean cutoffQuery(int nrounds, int nstmts) {
// ZERO implies MAX_INT
final int describeIterationLimit = this.describeIterationLimit == 0 ? Integer.MAX_VALUE
: this.describeIterationLimit;
final int describeStatementLimit = this.describeStatementLimit == 0 ? Integer.MAX_VALUE
: this.describeStatementLimit;
final boolean cutoffRounds = nrounds >= describeIterationLimit;
final boolean cutoffStatements = nstmts >= describeStatementLimit;
return cutoffRounds && cutoffStatements;
}
/**
* Log the statements and bnode {@link IV}s @ DEBUG.
*
* @param stmts
* The statements.
* @param bnodes_tm1
* The bnode {@link IV}s from the last round (initially empty).
* @param newBnodes
* The bnode {@link IV}s (optional and <code>null</code> if we
* are done).
*/
private void logState(final Set<BigdataStatement> stmts,
final Set<IV<?, ?>> bnodes_tm1, final Set<IV<?, ?>> newBnodes) {
if (!log.isDebugEnabled())
return;
final StringBuilder sb = new StringBuilder(stmts.size() * 100);
{
sb.append("Statements: ("+stmts.size()+")\n");
for (BigdataStatement st : stmts) {
sb.append(st.toString());
sb.append("\n");
}
log.debug(sb.toString());
}
{
sb.setLength(0);// truncate.
sb.append("BNodes(t-1): ("+bnodes_tm1.size()+")\n");
for (IV<?, ?> iv : bnodes_tm1) {
sb.append(iv.toString());
sb.append("\n");
}
log.debug(sb.toString());
}
if (newBnodes != null) {
sb.setLength(0);// truncate.
sb.append("BNodes(new): ("+newBnodes.size()+")\n");
for (IV<?, ?> iv : newBnodes) {
sb.append(iv.toString());
sb.append("\n");
}
log.debug(sb.toString());
}
}
/**
* Consume statements from the source iterator, adding new statements into a
* collection and adding new blank node {@link IV}s into another collection.
*
* @param src
* The statements to be consumed.
* @param stmts
* The set of statements in the description of the resources from
* the previous round(s) (if any).
* @param bnodes_tm1
* The blank node {@link IV}s already known on entry to the
* current round. This is empty for on entry to the first
* expansion round.
*
* @return The set of blank node {@link IV}s not previous encountered in the
* CBD expansion.
*
* @throws QueryEvaluationException
*/
private static Set<IV<?, ?>> consumeStatements(
final CloseableIteration<BigdataStatement, QueryEvaluationException> src,
final Set<BigdataStatement> stmts, final Set<IV<?, ?>> bnodes_tm1)
throws QueryEvaluationException {
final Set<IV<?, ?>> newBnodes = new LinkedHashSet<IV<?, ?>>();
try {
while (src.hasNext()) {
final BigdataStatement stmt = src.next();
// /*
// * A statement of the form
// *
// * ?stmtN rdf:subject <term>
// *
// * where <term> is a blank node.
// */
// final boolean foo = stmt.getPredicate().equals(RDF.SUBJECT)
// && bnodes_tm1.contains(stmt.getObject());
if (stmts.add(stmt)) {
/*
* New blank node IVs can only be encountered for new
* statements.
*
* TODO Consider using an ISPO => BigdataStatement map for
* the statements so we can avoid duplicate entries for
* BigdataStatements having different blank nodes but the
* same IVs for those blank nodes.
*/
collectBNodeIVs(bnodes_tm1, newBnodes,
getBNodeIV(stmt.getSubject()));
collectBNodeIVs(bnodes_tm1, newBnodes,
getBNodeIV(stmt.getObject()));
collectBNodeIVs(bnodes_tm1, newBnodes,
getBNodeIV(stmt.getContext()));
}
}
return newBnodes;
} finally {
src.close();
}
}
/**
* Create a new DESCRIBE query to describe each new blank node identifier in
* the previous round. We need to tunnel the evaluation of the DESCRIBE
* query in order to: (a) ensure that the blank node {@link IV}s are
* attached to the blank nodes in the DESCRIBE clause; and (b) avoid the
* describe cache materialization logic since rounds GT ZERO (0) are not
* top-level DESCRIBE queries and do not describe top-level resources.
*
* @param bnodeIVs
* The blank nodes that need to be described.
* @return An iterator from which the description of those blank nodes may
* be read.
* @throws QueryEvaluationException
*/
private CloseableIteration<BigdataStatement, QueryEvaluationException> doRound(
final Set<IV<?, ?>> bnodeIVs) throws QueryEvaluationException {
/*
* Create the DESCRIBE query for the blank node IVs.
*/
final ASTContainer astContainer = getDescribeQuery(bnodeIVs);
final AST2BOpContext context = new AST2BOpContext(astContainer, store);
// Clear the optimized AST.
astContainer.clearOptimizedAST();
// Batch resolve Values to IVs and convert to bigdata binding set.
final IBindingSet[] bindingSets =
new IBindingSet[] { new ListBindingSet() };
// Convert the query (generates an optimized AST as a side-effect).
AST2BOpUtility.convert(context, bindingSets);
// The optimized AST.
final QueryRoot optimizedQuery = astContainer.getOptimizedAST();
if (log.isDebugEnabled()) {
log.debug("describeMode=" + describeMode + ", expansionMode="
+ describeExpansionMode);
log.debug("OriginalAST: " + astContainer.getOriginalAST());
log.debug("OptimizedAST: " + optimizedQuery);
}
final boolean materializeProjectionInQuery = context.materializeProjectionInQuery
&& !optimizedQuery.hasSlice();
// Solutions to the WHERE clause (as projected).
final CloseableIteration<BindingSet, QueryEvaluationException> solutions = ASTEvalHelper
.evaluateQuery(astContainer, context, materializeProjectionInQuery//
, optimizedQuery.getProjection().getProjectionVars()//
);
// Constructed Statements.
final CloseableIteration<BigdataStatement, QueryEvaluationException> src =
new ASTConstructIterator(context, store, //
optimizedQuery.getConstruct(), //
optimizedQuery.getWhereClause(),//
bnodes,//
solutions//
);
return src;
}
/**
* Generate a DESCRIBE query for one of the expansion rounds.
*
* @param bnodeIVs
* The blank nodes that need to be described.
* @return The {@link ASTContainer} wrapping that DESCRIBE query.
*/
@SuppressWarnings({ "unchecked", "rawtypes" })
private ASTContainer getDescribeQuery(final Set<IV<?, ?>> bnodeIVs) {
/*
* Ensure that the bnode IVs are resolved to the corresponding
* BigdataBlankNode objects and that the valueCache relation is set on
* those bnode IVs.
*/
final Map<IV<?, ?>, BigdataValue> terms = store.getLexiconRelation()
.getTerms(bnodeIVs);
for (Map.Entry<IV<?, ?>, BigdataValue> e : terms.entrySet()) {
((IV) e.getKey()).setValue(e.getValue());
}
final QueryRoot queryRoot = new QueryRoot(QueryType.DESCRIBE);
{
final ProjectionNode projection = new ProjectionNode();
queryRoot.setProjection(projection);
/*
* Specify the describe mode appropriate for the expansion given the
* top-level describe algorithm that we are evaluating.
*/
projection.setDescribeMode(describeExpansionMode);
int i=1;
for (IV<?, ?> iv : bnodeIVs) {
if (!iv.hasValue())
throw new AssertionError("valueCache not set : " + iv);
final VarNode anonvar = new VarNode("-cbd-bnode-"+i++);
anonvar.setAnonymous(true);
projection.addProjectionExpression(new AssignmentNode(anonvar,
new ConstantNode(iv)));
}
}
return new ASTContainer(queryRoot);
}
/**
* Collect blank nodes {@link IV}s not already declared in a previous round.
*
* @param bnodes_tm1
* The blank node {@link IV}s already declared in the previous
* round(s).
* @param newBnodes
* The set of blank node {@link IV}s that were discovered in this
* round.
* @param bNodeIV
* A blank node {@link IV} (may be <code>null</code>).
*/
private static void collectBNodeIVs(final Set<IV<?, ?>> bnodes_tm1,
final Set<IV<?, ?>> newBnodes, final IV<?, ?> bNodeIV) {
if (bNodeIV == null) {
/* The corresponding position in the statement was not a blank node. */
return;
}
if (bnodes_tm1.contains(bNodeIV)) {
/* This blank node was already declared in the previous round. */
return;
}
newBnodes.add(bNodeIV);
}
/**
* If the value is a blank node, then return the IV for that blank node and
* otherwise return <code>null</code>.
*
* @param v
* The value.
*
* @return The {@link IV} for that blank node and <code>null</code> iff the
* value is not a blank node.
*/
static private IV<?,?> getBNodeIV(final BigdataValue v) {
if (v == null) {
/*
* Note: This case is allowed for the context position of the
* statement.
*/
return null;
}
final BigdataBNode bnode = (BigdataBNode) ((v instanceof BigdataBNode) ? v
: null);
if (bnode == null) {
// Not a blank node.
return null;
}
final IV<?, ?> iv = bnode.getIV();
/*
* No. What we really need to do is ensure that the blank node variables
* (a) have their valueCache set when we create the DESCRIBE query for
* the expansion rounds; and (b) are projected out of the CONSTRUCT
* queries that are evaluated in the expansion rounds. That will ensure
* that the blank nodes are materialized.
*/
// /*
// * We need to set the valueCache relation on the IV. The
// * ASTConstructIterator depends on this in makeStatement().
// */
//
// ((IV) iv).setValue(bnode);
return iv;
}
}