RemoteSparqlBuilderFactory.java example

Explorer
blazegraph-master
- database-master
/**

Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016.  All rights reserved.

Contact:
     SYSTAP, LLC DBA Blazegraph
     2501 Calvert ST NW #106
     Washington, DC 20008
     licenses@blazegraph.com

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/
/*
 * Created on Mar 3, 2012
 */

package com.bigdata.rdf.sparql.ast.service;

import java.util.HashSet;
import java.util.Set;

import org.openrdf.model.BNode;
import org.openrdf.model.Value;
import org.openrdf.query.Binding;
import org.openrdf.query.BindingSet;



/**
 * Factory encapsulates the logic required to decide on the manner in which
 * solutions will be vectored into the remote service end point and in which the
 * solutions flowing back from that service will be interpreted.
 * <p>
 * The matter of interpretation is staightforward when the BINDINGS clause is
 * accepted by the remote end point and there are no variables which are
 * correlated within a solution through shared blank nodes.
 * <p>
 * Query generation and solution interpretation is significantly more complex
 * when any of those things is not true. There are several strategies which may
 * be used in these cases, including:
 * <ol>
 * <li>Issue one remote query per solution, imposing a FILTER to enforce the
 * variable corrlation since blank nodes can not be send via the BINDINGS
 * clause. (CONS: Too many queries are issued.)</li>
 * <li>Issue one remote query without specifying any BINDINGS. (CONS: The query
 * could be very underconstrained, however we can not detect this case up front
 * since (a) we do not know whether there will be correlated blank nodes until
 * we are ready to generate the query; and (b) we can not know whether the end
 * point supports BINDINGS until we try it at least once (but we could test for
 * that capability and cache knowledge about whether the end point supports that
 * or use the end points service description for this information.)</li>
 * <li>Vector the remote query through a rewrite using a UNION with distinct
 * variables for each presentation of the original graph pattern and values
 * substituted for those variables via BIND() or the like. (CONS: You have to be
 * careful to get the SPARQL correct when the original SERVICE graph pattern is
 * not used exactly as given.)</li>
 * </ol>
 * In fact, we wind up using mixture of (1) and (3).
 * <p>
 * If there is only one source solution and it does not have any bindings, then
 * we send the original SERVICE clause and DO NOT use the BINDINGS clause. This
 * pattern works whether or not the the service end point supports bindings and
 * covers the case where the service is run without vectoring in any solutions.
 * <p>
 * When there one or more non-empty solutions to be vectored and variable
 * correlation is not present (or it is present but there is only one solution),
 * and the service end point supports the BINDINGS clause, then we use the
 * BINDINGS clause to vector the query against the remote SERVICE end point.
 * <p>
 * If there are multiple solutions and correlated variables -or- if the service
 * does not support BINDINGS, then we use the UNION rewrite approach and BIND()
 * the bindings within each alternative of the UNION (as a special case, no
 * UNION is required if there is only one source solution (we can just BIND()
 * the bindings) or if the source solution has no bound variables (we do not
 * need to send any bindings).
 * <p>
 * Note: We do not need to use a rowId to correlate the source solutions and the
 * service's solution. We will always do a hash join of the service solutions
 * with the source solutions. The only time the hash join could be a problem is
 * when there are blank nodes and multiple source solutions. However, we are
 * already required to rewrite the SERVICE clause using a UNION pattern in this
 * case so we can maintain the correlation of the blank nodes through the unique
 * variable names in each alternative of the UNION.
 * 
 * TODO ASK query optimization when there is a single triple pattern which is
 * fully bound AND there is only one solution flowing into the service. (This is
 * a pretty minor optimization and a very special case since we are more likely
 * to have a vector of fully bound soltuions.)
 * 
 * TODO If any of the source solutions is fully unbound, then the other source
 * solutions could be eliminated since we will be running the service fully
 * unbound anyway.
 * 
 * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a>
 * @version $Id: RemoteSparqlBuilderFactory.java 6068 2012-03-03 21:34:31Z
 *          thompsonbry $
 */
public class RemoteSparqlBuilderFactory {

    /**
     * @param serviceOptions
     *            The configuration options for the service.
     * @param serviceNode
     *            The SERVICE clause.
     * @param bindingSets
     *            The source solutions. These will be used to create a BINDINGS
     *            clause for the query.
     */
    static public IRemoteSparqlQueryBuilder get(
            final IServiceOptions serviceOptions,
            final ServiceNode serviceNode, final BindingSet[] bindingSets) {

        if (serviceOptions == null)
            throw new IllegalArgumentException();

        if (serviceNode == null)
            throw new IllegalArgumentException();

        /*
         * When 'SPARQLVersion.SPARQL_10', there is only one binding set to be vectored and it is
         * empty. We DO NOT use the BINDINGS clause for this case in order to be
         * compatible with services which do and do not support BINDINGS.
         */

        if(serviceOptions.getSPARQLVersion().equals(SPARQLVersion.SPARQL_10)) {
            
            return new RemoteSparql10QueryBuilder(serviceNode);

        }
                
        final boolean singleEmptyBindingSet = (bindingSets.length == 0)
                || (bindingSets.length == 1 && bindingSets[0].size() == 0);
        
        if (!singleEmptyBindingSet && hasCorrelatedBlankNodeBindings(bindingSets)) {

            return new RemoteSparql10QueryBuilder(serviceNode);

        }
        
        if(serviceOptions.getSPARQLVersion().equals(SPARQLVersion.SPARQL_11_DRAFT_BINDINGS)) {
            
            return new RemoteSparql11DraftQueryBuilder(serviceNode);

        }

        return new RemoteSparql11QueryBuilder(serviceNode);

    }

    /**
     * Return <code>true</code> iff (a) there is more than one solution; and (b)
     * any of the solutions has a blank node which is bound for more than one
     * variable. We need to use a different strategy for vectoring the solutions
     * to the remote service when this is true.
     * 
     * @param bindingSets
     *            The solutions.
     */
    static private boolean hasCorrelatedBlankNodeBindings(
            final BindingSet[] bindingSets) {
        
        if (bindingSets.length <= 1) {
            /*
             * Correlation in the variables through shared blank nodes is Ok as
             * long as there is only one solution flowing into the service end
             * point.
             */
            return false;
        }
        
        for (BindingSet bindingSet : bindingSets) {
            Set<BNode> bnodes = null;
            for (Binding b : bindingSet) {
                final Value v = b.getValue();
                if (!(v instanceof BNode))
                    continue;
                if (bnodes == null)
                    bnodes = new HashSet<BNode>();
                final BNode t = (BNode) v;
                if (bnodes.add(t)) {
                    /*
                     * This solution has at least two variable bindings for the
                     * same blank node.
                     */
                    return true;
                }
            }
        }
     
        /*
         * No solution has two or more variables which are bound in that
         * solution to the same blank node.
         */
        return false;
        
    }

}