Bigdata2ASTSPARQLParser.java example

Explorer
blazegraph-master
- database-master
/**
Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016.  All rights reserved.

Contact:
     SYSTAP, LLC DBA Blazegraph
     2501 Calvert ST NW #106
     Washington, DC 20008
     licenses@blazegraph.com

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/
/*
 * Portions of this code are:
 *
 * Copyright Aduna (http://www.aduna-software.com/) (c) 1997-2007.
 *
 * Licensed under the Aduna BSD-style license.
 */
package com.bigdata.rdf.sail.sparql;

import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.UUID;

import org.apache.log4j.Logger;
import org.openrdf.model.URI;
import org.openrdf.model.Value;
import org.openrdf.model.impl.URIImpl;
import org.openrdf.query.MalformedQueryException;
import org.openrdf.query.parser.ParsedOperation;
import org.openrdf.query.parser.ParsedQuery;
import org.openrdf.query.parser.ParsedUpdate;
import org.openrdf.query.parser.QueryParser;
import org.openrdf.query.parser.QueryParserUtil;
import org.openrdf.query.parser.sparql.SPARQLParser;

import com.bigdata.bop.BOpUtility;
import com.bigdata.rdf.model.BigdataValue;
import com.bigdata.rdf.sail.sparql.ast.ASTPrefixDecl;
import com.bigdata.rdf.sail.sparql.ast.ASTQueryContainer;
import com.bigdata.rdf.sail.sparql.ast.ASTUpdate;
import com.bigdata.rdf.sail.sparql.ast.ASTUpdateContainer;
import com.bigdata.rdf.sail.sparql.ast.ASTUpdateSequence;
import com.bigdata.rdf.sail.sparql.ast.ParseException;
import com.bigdata.rdf.sail.sparql.ast.SyntaxTreeBuilder;
import com.bigdata.rdf.sail.sparql.ast.TokenMgrError;
import com.bigdata.rdf.sail.sparql.ast.VisitorException;
import com.bigdata.rdf.sparql.ast.ASTBase;
import com.bigdata.rdf.sparql.ast.ASTContainer;
import com.bigdata.rdf.sparql.ast.QueryHints;
import com.bigdata.rdf.sparql.ast.QueryRoot;
import com.bigdata.rdf.sparql.ast.StatementPatternNode;
import com.bigdata.rdf.sparql.ast.Update;
import com.bigdata.rdf.sparql.ast.UpdateRoot;
import com.bigdata.rdf.sparql.ast.eval.AST2BOpUtility;
import com.bigdata.rdf.sparql.ast.hints.QueryHintScope;
import com.bigdata.rdf.sparql.ast.optimizers.ASTQueryHintOptimizer;

/**
 * Overridden version of the openrdf {@link SPARQLParser} class which extracts
 * additional information required by bigdata and associates it with the
 * {@link ParsedQuery} or {@link ParsedUpdate}.
 * 
 * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a>
 * @openrdf
 */
public class Bigdata2ASTSPARQLParser implements QueryParser {

    private static final Logger log = Logger
            .getLogger(Bigdata2ASTSPARQLParser.class);

    static private final URI queryScope = new URIImpl(QueryHints.NAMESPACE
            + QueryHintScope.Query);

    static private final URI queryIdHint = new URIImpl(QueryHints.NAMESPACE
            + QueryHints.QUERYID);

    public Bigdata2ASTSPARQLParser() {
      
    }

    /**
     * Parse either a SPARQL QUERY or a SPARQL UPDATE request.
     * @param operation The request.
     * @param baseURI The base URI.
     * 
     * @return The {@link ParsedOperation}
     */
    public ParsedOperation parseOperation(final String operation,
            final String baseURI) throws MalformedQueryException {

        final String strippedOperation = QueryParserUtil
                .removeSPARQLQueryProlog(operation).toUpperCase();
        
        final ParsedOperation parsedOperation;
        
        if (strippedOperation.startsWith("SELECT")
                || strippedOperation.startsWith("CONSTRUCT")
                || strippedOperation.startsWith("DESCRIBE")
                || strippedOperation.startsWith("ASK")) {

            parsedOperation = parseQuery(operation, baseURI);
            
        } else {
            
            parsedOperation = parseUpdate(operation, baseURI);
            
        }

        return parsedOperation;
        
    }

    /**
     * {@inheritDoc}
     * <p>
     * The use of the alternative {@link #parseQuery2(String, String)} is
     * strongly encouraged.
     * 
     * @return An object which aligns the {@link ASTContainer} with the
     *         {@link ParsedQuery} interface.
     */
    @Override
    public BigdataParsedQuery parseQuery(final String queryStr,
            final String baseURI) throws MalformedQueryException {

        return new BigdataParsedQuery(parseQuery2(queryStr, baseURI));

    }

    /**
     * {@inheritDoc}
     * 
     * @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/448">
     *      SPARQL 1.1 Update </a>
     */
    @Override
    public ParsedUpdate parseUpdate(final String updateStr, final String baseURI)
            throws MalformedQueryException {

        return new BigdataParsedUpdate(parseUpdate2(updateStr, baseURI));

    }

    /**
     * Parse a SPARQL 1.1 UPDATE request.
     * 
     * @return The Bigdata AST model for that request.
     * 
     * @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/448">
     *      SPARQL 1.1 Update </a>
     */
    public ASTContainer parseUpdate2(final String updateStr,
            final String baseURI) throws MalformedQueryException {

    	final long startTime = System.nanoTime();

        if (log.isInfoEnabled())
            log.info(updateStr);

        try {

            /*
             * Note: The update sequence is *above* the update container. We
             * turn the ASTUpdateSequence into an UpdateRoot and each
             * ASTUpdateContainer into a bigdata Update which is a child of that
             * UpdateRoot. The bigdata Update is an abstract class. There is a
             * concrete implementation of Update for each of the SPARQL UPDATE
             * operations (ADD, DROP, CREATE, MOVE, COPY, INSERT DATA, REMOVE
             * DATA, DELETE/INSERT, etc).
             */
            final ASTUpdateSequence updateSequence = SyntaxTreeBuilder
                    .parseUpdateSequence(updateStr);

            final UpdateRoot updateRoot = new UpdateRoot();

            final ASTContainer astContainer = new ASTContainer(updateRoot);
            
            // Set the query string on the AST.
            astContainer.setQueryString(updateStr);

            // Set the parse tree on the AST.
            astContainer.setParseTree(updateSequence);

            // Class builds bigdata Update operators from SPARQL UPDATE ops.
            final UpdateExprBuilder updateExprBuilder = new UpdateExprBuilder(
                    new BigdataASTContext(new LinkedHashMap<Value, BigdataValue>()));

            // The sequence of UPDATE operations to be processed.
            final List<ASTUpdateContainer> updateOperations = updateSequence
                    .getUpdateContainers();

            List<ASTPrefixDecl> sharedPrefixDeclarations = null;

            // For each UPDATE operation in the sequence.
            for (int i = 0; i < updateOperations.size(); i++) {

                final ASTUpdateContainer uc = updateOperations.get(i);

                if (uc.jjtGetNumChildren() == 0 && i > 0 && i < updateOperations.size() - 1) {
                    // empty update in the middle of the sequence
                    throw new MalformedQueryException("empty update in sequence not allowed");
                }

                StringEscapesProcessor.process(uc);

                BaseDeclProcessor.process(uc, baseURI);

                /*
                 * Do a special dance to handle prefix declarations in
                 * sequences: if the current operation has its own prefix
                 * declarations, use those. Otherwise, try and use prefix
                 * declarations from a previous operation in this sequence.
                 */
                final List<ASTPrefixDecl> prefixDeclList = uc
                        .getPrefixDeclList();
                {

                    if (prefixDeclList == null || prefixDeclList.isEmpty()) {
                 
                        if (sharedPrefixDeclarations != null) {
                        
                            for (final ASTPrefixDecl prefixDecl : sharedPrefixDeclarations) {
                            
                                uc.jjtAppendChild(prefixDecl);
                                
                            }

                        }
                    
                    } else {
                        
                        sharedPrefixDeclarations = prefixDeclList;

                    }
                
                }

                PrefixDeclProcessor.process(uc);

                /*
                 * Note: In the query part of an update, blank nodes are treated
                 * as anonymous vars. In the data part of the update, like in a
                 * construct node, if a blank node is seen, for each binding set
                 * in the solution list, a new blank node is generated. If it is
                 * an update, that generated bnode is stored in the server, or
                 * if it's a constructnode, that new bnode is returned as the
                 * results.
                 */
                BlankNodeVarProcessor.process(uc);

                /*
                 * Prepare deferred batch IV resolution of ASTRDFValue to BigdataValues.
                 * @see https://jira.blazegraph.com/browse/BLZG-1176
                 * 
                 * Note: IV resolution must proceed separately (or be
                 * re-attempted) for each UPDATE operation in a sequence since
                 * some operations can cause new IVs to be declared in the
                 * lexicon. Resolution before those IVs have been declared would
                 * produce a different result than resolution afterward (it will
                 * be a null IV before the Value is added to the lexicon and a
                 * TermId or BlobIV afterward).
                 * 
                 * @see https://sourceforge.net/apps/trac/bigdata/ticket/558
                 */
                new ASTDeferredIVResolutionInitializer()
                        .process(uc);

                final ASTUpdate updateNode = uc.getUpdate();

                if (updateNode != null) {

                    /*
                     * Translate an UPDATE operation.
                     */
                    final Update updateOp = (Update) updateNode.jjtAccept(
                            updateExprBuilder, null/* data */);
                    
                    updateOp.setDatasetClauses(updateNode.getDatasetClauseList());

                    updateRoot.addChild(updateOp);

                }
                
            } // foreach

            astContainer.setQueryParseTime(System.nanoTime() - startTime);
            
            return astContainer;
            
        } catch (final ParseException e) {
            throw new MalformedQueryException(e.getMessage(), e);
        } catch (final TokenMgrError e) {
            throw new MalformedQueryException(e.getMessage(), e);
        } catch (final VisitorException e) {
            throw new MalformedQueryException(e.getMessage(), e);
        }

    }

    /**
     * Parse a SPARQL query.
     * 
     * @param queryStr
     *            The query.
     * @param baseURI
     *            The base URI.
     * 
     * @return The AST model for that query.
     * 
     * @throws MalformedQueryException
     */
    public ASTContainer parseQuery2(final String queryStr, final String baseURI)
            throws MalformedQueryException {

        final long startTime = System.nanoTime();
        
        if(log.isInfoEnabled())
            log.info(queryStr);

        try {
            
            final ASTQueryContainer qc = SyntaxTreeBuilder.parseQuery(queryStr);
            
            StringEscapesProcessor.process(qc);
            
            BaseDeclProcessor.process(qc, baseURI);
            
            final Map<String, String> prefixes = PrefixDeclProcessor.process(qc);
            
//            WildcardProjectionProcessor.process(qc);

            BlankNodeVarProcessor.process(qc);

            /*
             * Prepare deferred batch IV resolution of ASTRDFValue to BigdataValues.
             * @see https://jira.blazegraph.com/browse/BLZG-1176
             */
            final ASTDeferredIVResolutionInitializer resolver = new ASTDeferredIVResolutionInitializer();
            
            resolver.process(qc);

            /*
             * Build the bigdata AST from the parse tree.
             */
            final QueryRoot queryRoot = buildQueryModel(qc, resolver.getValues());

            final ASTContainer ast = new ASTContainer(queryRoot);
            
            // Set the query string on the AST.
            ast.setQueryString(queryStr);

            // Set the parse tree on the AST.
            ast.setParseTree(qc);

            doQueryIdHint(ast, queryRoot);
            
//            final Properties queryHints = getQueryHints(qc);
//
//            if (queryHints != null) {
//
//               queryRoot.setQueryHints(queryHints);
//
//            }

            /*
             * Attach namespace declarations.
             */
            queryRoot.setPrefixDecls(prefixes);
            
            VerifyAggregates.verifyAggregate(queryRoot);

            ast.setQueryParseTime(System.nanoTime() - startTime);
            
            return ast;

        } catch (final IllegalArgumentException e) {
        
            throw new MalformedQueryException(e.getMessage(), e);
            
        } catch (final VisitorException e) {
        
            throw new MalformedQueryException(e.getMessage(), e);
            
        } catch (final ParseException e) {
        
            throw new MalformedQueryException(e.getMessage(), e);
            
        } catch (final TokenMgrError e) {
            
            throw new MalformedQueryException(e.getMessage(), e);
            
        }

    }

    /**
     * IApplies the {@link BigdataExprBuilder} visitor to interpret the parse
     * tree, building up a bigdata {@link ASTBase AST}.
     * 
     * @param qc
     *            The root of the parse tree.
     * @param values
     *            Previously cached RDF values
     * @param context
     *            The context used to interpret that parse tree.
     * 
     * @return The root of the bigdata AST generated by interpreting the parse
     *         tree.
     * 
     * @throws MalformedQueryException
     */
    private QueryRoot buildQueryModel(final ASTQueryContainer qc,
            final Map<Value, BigdataValue> values) throws MalformedQueryException {

        final BigdataExprBuilder exprBuilder = new BigdataExprBuilder(new BigdataASTContext(values));

        try {

            return (QueryRoot) qc.jjtAccept(exprBuilder, null);

        } catch (final VisitorException e) {

            throw new MalformedQueryException(e.getMessage(), e);

        }

    }

    /**
     * Looks for the {@link QueryHints#QUERYID} and copies it to the
     * {@link ASTContainer}, which is where other code will look for a caller
     * given QueryID.
     * <p>
     * Note: This needs to be done up very early on in the processing of the
     * query since several things expect this information to already be known
     * before the query is handed over to the {@link AST2BOpUtility}.
     * 
     * @param ast
     *            The {@link ASTContainer}.
     * @param queryRoot
     *            The root of the query.
     * 
     * @throws MalformedQueryException
     * 
     *             TODO This does not actually modify the AST. It could be
     *             modified to do that, but the code would have to be robust to
     *             modification (of the AST children) during traversal. For the
     *             moment I am just leaving the query hint in place here. It
     *             will be stripped out when the {@link ASTQueryHintOptimizer}
     *             runs.
     */
    private void doQueryIdHint(final ASTContainer ast, final QueryRoot queryRoot)
            throws MalformedQueryException {

        final Iterator<StatementPatternNode> itr = BOpUtility.visitAll(
                queryRoot, StatementPatternNode.class);

        while (itr.hasNext()) {
        
            final StatementPatternNode sp = itr.next();
            
            if (queryIdHint.equals(sp.p().getValue())) {
            
                if (!queryScope.equals(sp.s().getValue())) {
                
                    throw new MalformedQueryException(QueryHints.QUERYID
                            + " must be in scope " + QueryHintScope.Query);
                
                }
                
                final String queryIdStr = sp.o().getValue().stringValue();
                
                try {
                    // Parse (validates that this is a UUID).
                    UUID.fromString(queryIdStr);
                } catch (final IllegalArgumentException ex) {
                    throw new MalformedQueryException("Not a valid UUID: "
                            + queryIdStr);
                }

                // Set the hint on the ASTContainer.
                ast.setQueryHint(QueryHints.QUERYID, queryIdStr);

                return;
            
            }
            
        }

    }
    
//    public static void main(String[] args)
//        throws java.io.IOException
//    {
//        System.out.println("Your SPARQL query:");
//
//        BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
//
//        StringBuilder buf = new StringBuilder();
//        String line = null;
//        while ((line = in.readLine()) != null) {
//            if (line.length() > 0) {
//                buf.append(' ').append(line).append('\n');
//            }
//            else {
//                String queryStr = buf.toString().trim();
//                if (queryStr.length() > 0) {
//                    try {
//                        SPARQLParser parser = new SPARQLParser();
//                        parser.parseQuery(queryStr, null);
//                    }
//                    catch (Exception e) {
//                        System.err.println(e.getMessage());
//                        e.printStackTrace();
//                    }
//                }
//                buf.setLength(0);
//            }
//        }
//    }
}