/**
Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved.
Contact:
SYSTAP, LLC DBA Blazegraph
2501 Calvert ST NW #106
Washington, DC 20008
licenses@blazegraph.com
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/*
* Created on Mar 3, 2012
*/
package com.bigdata.rdf.sparql.ast.service;
import java.util.HashSet;
import java.util.Set;
import org.openrdf.model.BNode;
import org.openrdf.model.Value;
import org.openrdf.query.Binding;
import org.openrdf.query.BindingSet;
/**
* Factory encapsulates the logic required to decide on the manner in which
* solutions will be vectored into the remote service end point and in which the
* solutions flowing back from that service will be interpreted.
* <p>
* The matter of interpretation is staightforward when the BINDINGS clause is
* accepted by the remote end point and there are no variables which are
* correlated within a solution through shared blank nodes.
* <p>
* Query generation and solution interpretation is significantly more complex
* when any of those things is not true. There are several strategies which may
* be used in these cases, including:
* <ol>
* <li>Issue one remote query per solution, imposing a FILTER to enforce the
* variable corrlation since blank nodes can not be send via the BINDINGS
* clause. (CONS: Too many queries are issued.)</li>
* <li>Issue one remote query without specifying any BINDINGS. (CONS: The query
* could be very underconstrained, however we can not detect this case up front
* since (a) we do not know whether there will be correlated blank nodes until
* we are ready to generate the query; and (b) we can not know whether the end
* point supports BINDINGS until we try it at least once (but we could test for
* that capability and cache knowledge about whether the end point supports that
* or use the end points service description for this information.)</li>
* <li>Vector the remote query through a rewrite using a UNION with distinct
* variables for each presentation of the original graph pattern and values
* substituted for those variables via BIND() or the like. (CONS: You have to be
* careful to get the SPARQL correct when the original SERVICE graph pattern is
* not used exactly as given.)</li>
* </ol>
* In fact, we wind up using mixture of (1) and (3).
* <p>
* If there is only one source solution and it does not have any bindings, then
* we send the original SERVICE clause and DO NOT use the BINDINGS clause. This
* pattern works whether or not the the service end point supports bindings and
* covers the case where the service is run without vectoring in any solutions.
* <p>
* When there one or more non-empty solutions to be vectored and variable
* correlation is not present (or it is present but there is only one solution),
* and the service end point supports the BINDINGS clause, then we use the
* BINDINGS clause to vector the query against the remote SERVICE end point.
* <p>
* If there are multiple solutions and correlated variables -or- if the service
* does not support BINDINGS, then we use the UNION rewrite approach and BIND()
* the bindings within each alternative of the UNION (as a special case, no
* UNION is required if there is only one source solution (we can just BIND()
* the bindings) or if the source solution has no bound variables (we do not
* need to send any bindings).
* <p>
* Note: We do not need to use a rowId to correlate the source solutions and the
* service's solution. We will always do a hash join of the service solutions
* with the source solutions. The only time the hash join could be a problem is
* when there are blank nodes and multiple source solutions. However, we are
* already required to rewrite the SERVICE clause using a UNION pattern in this
* case so we can maintain the correlation of the blank nodes through the unique
* variable names in each alternative of the UNION.
*
* TODO ASK query optimization when there is a single triple pattern which is
* fully bound AND there is only one solution flowing into the service. (This is
* a pretty minor optimization and a very special case since we are more likely
* to have a vector of fully bound soltuions.)
*
* TODO If any of the source solutions is fully unbound, then the other source
* solutions could be eliminated since we will be running the service fully
* unbound anyway.
*
* @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a>
* @version $Id: RemoteSparqlBuilderFactory.java 6068 2012-03-03 21:34:31Z
* thompsonbry $
*/
public class RemoteSparqlBuilderFactory {
/**
* @param serviceOptions
* The configuration options for the service.
* @param serviceNode
* The SERVICE clause.
* @param bindingSets
* The source solutions. These will be used to create a BINDINGS
* clause for the query.
*/
static public IRemoteSparqlQueryBuilder get(
final IServiceOptions serviceOptions,
final ServiceNode serviceNode, final BindingSet[] bindingSets) {
if (serviceOptions == null)
throw new IllegalArgumentException();
if (serviceNode == null)
throw new IllegalArgumentException();
/*
* When 'SPARQLVersion.SPARQL_10', there is only one binding set to be vectored and it is
* empty. We DO NOT use the BINDINGS clause for this case in order to be
* compatible with services which do and do not support BINDINGS.
*/
if(serviceOptions.getSPARQLVersion().equals(SPARQLVersion.SPARQL_10)) {
return new RemoteSparql10QueryBuilder(serviceNode);
}
final boolean singleEmptyBindingSet = (bindingSets.length == 0)
|| (bindingSets.length == 1 && bindingSets[0].size() == 0);
if (!singleEmptyBindingSet && hasCorrelatedBlankNodeBindings(bindingSets)) {
return new RemoteSparql10QueryBuilder(serviceNode);
}
if(serviceOptions.getSPARQLVersion().equals(SPARQLVersion.SPARQL_11_DRAFT_BINDINGS)) {
return new RemoteSparql11DraftQueryBuilder(serviceNode);
}
return new RemoteSparql11QueryBuilder(serviceNode);
}
/**
* Return <code>true</code> iff (a) there is more than one solution; and (b)
* any of the solutions has a blank node which is bound for more than one
* variable. We need to use a different strategy for vectoring the solutions
* to the remote service when this is true.
*
* @param bindingSets
* The solutions.
*/
static private boolean hasCorrelatedBlankNodeBindings(
final BindingSet[] bindingSets) {
if (bindingSets.length <= 1) {
/*
* Correlation in the variables through shared blank nodes is Ok as
* long as there is only one solution flowing into the service end
* point.
*/
return false;
}
for (BindingSet bindingSet : bindingSets) {
Set<BNode> bnodes = null;
for (Binding b : bindingSet) {
final Value v = b.getValue();
if (!(v instanceof BNode))
continue;
if (bnodes == null)
bnodes = new HashSet<BNode>();
final BNode t = (BNode) v;
if (bnodes.add(t)) {
/*
* This solution has at least two variable bindings for the
* same blank node.
*/
return true;
}
}
}
/*
* No solution has two or more variables which are bound in that
* solution to the same blank node.
*/
return false;
}
}