/** Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved. Contact: SYSTAP, LLC DBA Blazegraph 2501 Calvert ST NW #106 Washington, DC 20008 licenses@blazegraph.com This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2 of the License. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /* * Created on Sep 10, 2011 */ package com.bigdata.rdf.sparql.ast.optimizers; import java.util.ArrayList; import java.util.Iterator; import java.util.List; import java.util.Properties; import java.util.UUID; import org.openrdf.query.algebra.StatementPattern.Scope; import com.bigdata.bop.BOp; import com.bigdata.bop.IBindingSet; import com.bigdata.rdf.sparql.ast.ArbitraryLengthPathNode; import com.bigdata.rdf.sparql.ast.ConstantNode; import com.bigdata.rdf.sparql.ast.FilterNode; import com.bigdata.rdf.sparql.ast.FunctionNode; import com.bigdata.rdf.sparql.ast.FunctionRegistry; import com.bigdata.rdf.sparql.ast.GraphPatternGroup; import com.bigdata.rdf.sparql.ast.IGroupMemberNode; import com.bigdata.rdf.sparql.ast.JoinGroupNode; import com.bigdata.rdf.sparql.ast.PathNode.PathAlternative; import com.bigdata.rdf.sparql.ast.PathNode.PathElt; import com.bigdata.rdf.sparql.ast.PathNode.PathMod; import com.bigdata.rdf.sparql.ast.PathNode.PathNegatedPropertySet; import com.bigdata.rdf.sparql.ast.PathNode.PathOneInPropertySet; import com.bigdata.rdf.sparql.ast.PathNode.PathSequence; import com.bigdata.rdf.sparql.ast.PropertyPathNode; import com.bigdata.rdf.sparql.ast.PropertyPathUnionNode; import com.bigdata.rdf.sparql.ast.QueryHints; import com.bigdata.rdf.sparql.ast.StatementPatternNode; import com.bigdata.rdf.sparql.ast.StaticAnalysis; import com.bigdata.rdf.sparql.ast.TermNode; import com.bigdata.rdf.sparql.ast.UnionNode; import com.bigdata.rdf.sparql.ast.VarNode; import com.bigdata.rdf.sparql.ast.ZeroLengthPathNode; import com.bigdata.rdf.sparql.ast.eval.AST2BOpContext; public class ASTPropertyPathOptimizer extends AbstractJoinGroupOptimizer implements IASTOptimizer { // private static final transient Logger log = Logger.getLogger(ASTPropertyPathOptimizer.class); /** * Optimize the join group. */ protected void optimizeJoinGroup(final AST2BOpContext ctx, final StaticAnalysis sa, final IBindingSet[] bSets, final JoinGroupNode group) { for (PropertyPathNode node : group.getChildren(PropertyPathNode.class)) { optimize(ctx, sa, group, node, null); } } /** * Optimize a single PropertyPathNode. */ protected void optimize(final AST2BOpContext ctx, final StaticAnalysis sa, final JoinGroupNode group, final PropertyPathNode ppNode, final ArbitraryLengthPathNode alpNode) { final PathAlternative pathRoot = ppNode.p().getPathAlternative(); final PropertyPathInfo sp = new PropertyPathInfo(ppNode.s(), ppNode.o(), ppNode.c(), ppNode.getScope()); optimize(ctx, sa, group, sp, pathRoot, alpNode, ppNode.getQueryHints(), ppNode); /* * We always remove the PropertyPathNode. It has been replaced with * other executable nodes (joins, unions, paths, etc.) */ group.removeChild(ppNode); } /** * Optimize a PathAlternative using UNIONs. */ protected void optimize(final AST2BOpContext ctx, final StaticAnalysis sa, final GraphPatternGroup<? extends IGroupMemberNode> group, final PropertyPathInfo ppInfo, final PathAlternative pathAlt, final ArbitraryLengthPathNode alpNode, final Properties queryHints, final PropertyPathNode ppNode) { if (pathAlt.arity() == 1) { final PathSequence pathSeq = (PathSequence) pathAlt.get(0); optimize(ctx, sa, group, ppInfo, pathSeq, alpNode, queryHints, ppNode); } else { final UnionNode union = new PropertyPathUnionNode(); group.addArg(getPositionOfNodeInGroup(ppNode, group), union); final Iterator<BOp> it = pathAlt.argIterator(); while (it.hasNext()) { final JoinGroupNode subgroup = new JoinGroupNode(); union.addArg(subgroup); final PathSequence pathSeq = (PathSequence) it.next(); optimize(ctx, sa, subgroup, ppInfo, pathSeq, alpNode, queryHints, ppNode); } } } /** * Optimize a PathSequence. */ protected void optimize(final AST2BOpContext ctx, final StaticAnalysis sa, final GraphPatternGroup<? extends IGroupMemberNode> group, final PropertyPathInfo ppInfo, final PathSequence pathSeq, final ArbitraryLengthPathNode alpNode, final Properties queryHints, final PropertyPathNode ppNode) { if (pathSeq.arity() == 0) { return; } if (pathSeq.arity() == 1) { final PathElt pathElt = (PathElt) pathSeq.get(0); /* * Below is a failed attempt to solve the zero length path problem * with a separate kind of node and operator. This leads to * cardinality problems. */ // final PathMod mod = pathElt.getMod(); // // /* // * Pretty sure a singleton path sequence with a '?' or '*' modifier // * is the exact same as an optional in the special case where // * the sequence has only one element. // */ // if (mod == PathMod.ZERO_OR_ONE || mod == PathMod.ZERO_OR_MORE) { // // final PathElt _pathElt = new PathElt(pathElt); // // if (mod == PathMod.ZERO_OR_ONE) { // // _pathElt.setMod(null); // exactly one // // } else { // mod == PathMod.ZERO_OR_MORE // // _pathElt.setMod(PathMod.ONE_OR_MORE); // // } // // final PathAlternative pathAlt = new PathAlternative( // new PathSequence(new PathElt(new ZeroLengthPathNode())), // new PathSequence(_pathElt)); // // optimize(ctx, sa, group, ppInfo, pathAlt); // // } else { optimize(ctx, sa, group, ppInfo, pathElt, alpNode, queryHints, ppNode); // } } else { for (int i = 0; i < pathSeq.arity(); i++) { final PathElt pathElt = (PathElt) pathSeq.get(i); final PathMod mod = pathElt.getMod(); if (i < (pathSeq.arity()-1) && (mod == PathMod.ZERO_OR_ONE || mod == PathMod.ZERO_OR_MORE)) { /* * We need to create a new path sequence using an alt and * then run the optimizer on the new sequence instead of * this one. * * This element is an optional element. Create a new * sequence that starts with the elements of the old * sequence until we get to the optional element, then * add an alternative (split) with the subsequent elements, * one that includes the optional element and one that * doesn't. For example: * * a/b?/c -> a/((b/c)|c) * * This will work even if there are multiple optional * elements - we will just hit this recursively until * they are gone. * * We cannot solve this with optionals or by using the * arbitrary length path operator because of the ridiculous * semantics of zero length paths. */ final ArrayList<PathElt> newSeq = new ArrayList<PathElt>(i+1); final ArrayList<PathElt> with = new ArrayList<PathElt>(pathSeq.arity()-i); final ArrayList<PathElt> without = new ArrayList<PathElt>(pathSeq.arity()-i-1); for (int j = 0; j < pathSeq.arity(); j++) { final PathElt elt = (PathElt) pathSeq.get(j); if (j < i) { // add the original, no need to clone newSeq.add(elt); } else if (j == i) { final PathElt _pathElt = new PathElt(pathElt); if (mod == PathMod.ZERO_OR_ONE) { _pathElt.setMod(null); // exactly one } else { // mod == PathMod.ZERO_OR_MORE _pathElt.setMod(PathMod.ONE_OR_MORE); } with.add(_pathElt); } else { /* * After the splitting element we add a copy of * the subsequent elements to the two alternatives. */ with.add(new PathElt(elt)); without.add(new PathElt(elt)); } // if (i != j) { // // with.add(new PathElt(elt)); // // without.add(new PathElt(elt)); // // } else { // // final PathElt _pathElt = new PathElt(pathElt); // // if (mod == PathMod.ZERO_OR_ONE) { // // _pathElt.setMod(null); // exactly one // // } else { // mod == PathMod.ZERO_OR_MORE // // _pathElt.setMod(PathMod.ONE_OR_MORE); // // } // // with.add(_pathElt); // // } } newSeq.add(new PathElt(new PathAlternative( new PathSequence((PathElt[]) with.toArray(new PathElt[with.size()])), new PathSequence((PathElt[]) without.toArray(new PathElt[without.size()]))))); final PathSequence pathSeq2 = new PathSequence(newSeq.toArray(new PathElt[newSeq.size()])); optimize(ctx, sa, group, ppInfo, pathSeq2, alpNode, queryHints, ppNode); return; } } TermNode last = ppInfo.s; for (int i = 0; i < pathSeq.arity(); i++) { // TermNode next = (i == (pathSeq.arity()-1)) ? ppInfo.o : anonVar(); TermNode next; if (i == (pathSeq.arity()-1)) { next = ppInfo.o; } else { final VarNode anon = anonVar(); if (alpNode != null) { alpNode.addDropVar(anon); } next = anon; } final PropertyPathInfo _ppInfo = new PropertyPathInfo(last, next, ppInfo); final PathElt pathElt = (PathElt) pathSeq.get(i); optimize(ctx, sa, group, _ppInfo, pathElt, alpNode, queryHints, ppNode); last = next; } } } private final String anon = "--pp-anon-"; private VarNode anonVar() { VarNode v = new VarNode(anon+UUID.randomUUID().toString()); v.setAnonymous(true); return v; } /** * Override during testing to give predictable results * @param anon * @return */ protected VarNode anonVar(final String anon) { VarNode v = new VarNode(anon+UUID.randomUUID().toString()); v.setAnonymous(true); return v; } int i = 1; /** * Optimize a PathElt. */ protected void optimize(final AST2BOpContext ctx, final StaticAnalysis sa, GraphPatternGroup<? extends IGroupMemberNode> group, PropertyPathInfo ppInfo, final PathElt pathElt, ArbitraryLengthPathNode alpNode, final Properties queryHints, final PropertyPathNode ppNode) { ppInfo = pathElt.inverse() ? ppInfo.inverse() : ppInfo; final PathMod mod = pathElt.getMod(); /* * Push expressions with an path length modifier down into a * ArbitraryLengthPathNode group. */ if (mod != null) { // final VarNode tVarLeft = new VarNode(Var.var("tVarLeft" + i)); final VarNode tVarLeft = new VarNode(anonVar("-tVarLeft-")); // final VarNode tVarRight = new VarNode(Var.var("tVarRight" + i++)); final VarNode tVarRight = new VarNode(anonVar("-tVarRight-")); alpNode = new ArbitraryLengthPathNode(ppInfo.s, ppInfo.o, tVarLeft, tVarRight, mod); final String pipelinedHashJoinHint = queryHints==null ? null : queryHints.getProperty(QueryHints.PIPELINED_HASH_JOIN); if (pipelinedHashJoinHint!=null) { alpNode.setQueryHint( QueryHints.PIPELINED_HASH_JOIN, pipelinedHashJoinHint); } group.addArg(getPositionOfNodeInGroup(ppNode, group),alpNode); ppInfo = new PropertyPathInfo(tVarLeft, tVarRight, ppInfo); group = alpNode.subgroup(); } if (pathElt.isNestedPath()) { final PathAlternative pathAlt = (PathAlternative) pathElt.get(0); optimize(ctx, sa, group, ppInfo, pathAlt, alpNode, queryHints, ppNode); } else if (pathElt.isNegatedPropertySet()) { final PathNegatedPropertySet pathNPS = (PathNegatedPropertySet) pathElt.get(0); optimize(ctx, sa, group, ppInfo, pathNPS, alpNode, ppNode); } else if (pathElt.isZeroLengthPath()) { final ZeroLengthPathNode zlpNode = (ZeroLengthPathNode) pathElt.get(0); optimize(ctx, sa, group, ppInfo, zlpNode, ppNode); } else { final TermNode termNode = (ConstantNode) pathElt.get(0); optimize(ctx, sa, group, ppInfo, termNode, ppNode); } } /** * Optimize a TermNode (add a statement pattern to the group). */ protected void optimize(final AST2BOpContext ctx, final StaticAnalysis sa, final GraphPatternGroup<? extends IGroupMemberNode> group, final PropertyPathInfo ppInfo, final TermNode termNode, final PropertyPathNode ppNode) { final StatementPatternNode sp = ppInfo.toStatementPattern(termNode); group.addArg(getPositionOfNodeInGroup(ppNode, group), sp); } /** * Optimize a ZeroLengthPathNode (add it to the group with the left and * right properly set). */ protected void optimize(final AST2BOpContext ctx, final StaticAnalysis sa, final GraphPatternGroup<? extends IGroupMemberNode> group, final PropertyPathInfo ppInfo, final ZeroLengthPathNode zlpNode, final PropertyPathNode ppNode) { zlpNode.setLeft(ppInfo.s); zlpNode.setRight(ppInfo.o); group.addArg(getPositionOfNodeInGroup(ppNode, group), zlpNode); } /** * Optimize a PathNegatedPropertySet. This is done with a statement * pattern and filter. For example, the path: * * ?s !(x|y|z) ?o . * * Can be re-written into the simpler form: * * ?s ?p ?o . filter(?p not in (x, y, z)) . * * The more complicated case (where there are inverses involved) can be * run as a union of two of the above. */ protected void optimize(final AST2BOpContext ctx, final StaticAnalysis sa, final GraphPatternGroup<? extends IGroupMemberNode> group, final PropertyPathInfo ppInfo, final PathNegatedPropertySet pathNPS, final ArbitraryLengthPathNode alpNode, final PropertyPathNode ppNode) { ArrayList<ConstantNode> forward = null; ArrayList<ConstantNode> back = null; for (BOp child : pathNPS.args()) { final PathOneInPropertySet pathOIPS = (PathOneInPropertySet) child; final ConstantNode iri = (ConstantNode) pathOIPS.get(0); if (pathOIPS.inverse()) { if (back == null) back = new ArrayList<ConstantNode>(); back.add(iri); } else { if (forward == null) forward = new ArrayList<ConstantNode>(); forward.add(iri); } } if (forward != null && back != null) { final UnionNode union = new PropertyPathUnionNode(); final JoinGroupNode forwardGroup = new JoinGroupNode(); final JoinGroupNode backGroup = new JoinGroupNode(); union.addArg(forwardGroup); union.addArg(backGroup); group.addArg(getPositionOfNodeInGroup(ppNode, group), union); addNegateds(forwardGroup, forward, ppInfo, alpNode, ppNode); addNegateds(backGroup, back, ppInfo.inverse(), alpNode, ppNode); } else if (forward != null) { addNegateds(group, forward, ppInfo, alpNode, ppNode); } else { addNegateds(group, back, ppInfo.inverse(), alpNode, ppNode); } } protected void addNegateds( final GraphPatternGroup<? extends IGroupMemberNode> group, final ArrayList<ConstantNode> constants, final PropertyPathInfo ppInfo, final ArbitraryLengthPathNode alpNode, final PropertyPathNode ppNode) { final VarNode p = anonVar(); if (alpNode != null) { alpNode.addDropVar(p); } final StatementPatternNode sp = ppInfo.toStatementPattern(p); final TermNode[] args = new TermNode[constants.size()+1]; args[0] = p; System.arraycopy(constants.toArray(new ConstantNode[constants.size()]), 0, args, 1, constants.size()); final FunctionNode function = new FunctionNode( FunctionRegistry.NOT_IN, null, args ); final FilterNode filter = new FilterNode(function); group.addArg(getPositionOfNodeInGroup(ppNode, group),sp); group.addArg(getPositionOfNodeInGroup(ppNode, group),filter); } /** * Returns the position in the group. If the node is not present in * the group, the last position in the group is returned. * * We calculate the position in order to make sure that we append * new nodes at the same position of the property path node that * will be replaced later on. This is important in order to keep * semantics of the query, e.g. in the context of OPTIONAL or * MINUS queries we cannot blindly append at the end. See BLZG-1498 * and BLZG-1627 and the respective test cases in TestTickets for * examples that illustrate why this is indeed necessary. */ protected int getPositionOfNodeInGroup( final PropertyPathNode node, final GraphPatternGroup<? extends IGroupMemberNode> group) { final List<? extends IGroupMemberNode> children = group.getChildren(); for (int i=0; i<children.size(); i++) { if (children.get(i).equals(node)) return i; } // if we don't find the property path node, we're in a nested scope // (e.g. for union subgroups). In that case, we append at the end. return children.size(); } /** * Used during parsing to identify simple triple patterns. */ public static final boolean isSimpleIRI(final PathAlternative pathAlt) { if (pathAlt.arity() == 1) { final PathSequence pathSeq = (PathSequence) pathAlt.get(0); if (pathSeq.arity() == 1) { final PathElt pathElt = (PathElt) pathSeq.get(0); return !pathElt.inverse() && pathElt.getMod() == null && pathElt.isIRI(); } } return false; } /** * Used during parsing to identify simple triple patterns. */ public static final ConstantNode getSimpleIRI(final PathAlternative pathAlt) { if (pathAlt.arity() == 1) { final PathSequence pathSeq = (PathSequence) pathAlt.get(0); if (pathSeq.arity() == 1) { final PathElt pathElt = (PathElt) pathSeq.get(0); if (!pathElt.inverse() && pathElt.getMod() == null && pathElt.isIRI()) { return (ConstantNode) pathElt.get(0); } } } return null; } private static class PropertyPathInfo { public final TermNode s, o, c; public final Scope scope; public PropertyPathInfo(final TermNode s, final TermNode o, final TermNode c, final Scope scope) { this.s = s; this.o = o; this.c = c; this.scope = scope; } public PropertyPathInfo(final TermNode s, final TermNode o, final PropertyPathInfo base) { this(s, o, base.c, base.scope); } public PropertyPathInfo inverse() { return new PropertyPathInfo(o, s, c, scope); } public StatementPatternNode toStatementPattern(final TermNode p) { return new StatementPatternNode(s, p, o, c, scope); } } }