/**
Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved.
Contact:
SYSTAP, LLC DBA Blazegraph
2501 Calvert ST NW #106
Washington, DC 20008
licenses@blazegraph.com
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/*
* Created on June 10, 2015
*/
package com.bigdata.rdf.sparql.ast.optimizers;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.openrdf.model.URI;
import com.bigdata.bop.BOp;
import com.bigdata.bop.IBindingSet;
import com.bigdata.bop.IVariable;
import com.bigdata.rdf.sparql.ast.FilterNode;
import com.bigdata.rdf.sparql.ast.FunctionNode;
import com.bigdata.rdf.sparql.ast.FunctionRegistry;
import com.bigdata.rdf.sparql.ast.IGroupMemberNode;
import com.bigdata.rdf.sparql.ast.IValueExpressionNode;
import com.bigdata.rdf.sparql.ast.JoinGroupNode;
import com.bigdata.rdf.sparql.ast.QueryHints;
import com.bigdata.rdf.sparql.ast.StaticAnalysis;
import com.bigdata.rdf.sparql.ast.ValueExpressionNode;
import com.bigdata.rdf.sparql.ast.VarNode;
import com.bigdata.rdf.sparql.ast.eval.AST2BOpContext;
/**
* <p>
* The {@link ASTFilterNormalizationOptimizer} is responsible for the static decomposition
* and optimization of filter expressions. In it's first version, it decomposes
* conjunctive FILTER expressions, enabling (i) a more precise placement of
* filter expressions in join groups and (ii) clears the way for static
* binding analysis through the {@link ASTStaticBindingsOptimizer}.
* </p>
*
* <p>
* Further, it removes duplicate filters (inside the identical join group), also
* if such duplicates might pop up through conversion to CNF and eliminates
* certain redundant FILTER expressions based on heuristics.
* </p>
*
* <p>
* Note that this optimizer disregards filter placement issues; these should
* be tackled by optimizers running later in the pipeline.
* </p>
*
* <p>
* Possible extensions: (i) there are much more patterns for checking for
* trivially satisfied filters (however I'm not sure it's worth to put upfront
* effort in this here); (ii) we may want to check for filters that are
* trivially not satisfied and void the whole join group if one detected.
* </p>
*
* @author <a href="mailto:ms@metaphacts.com">Michael Schmidt</a>
*
* @version $Id$
*/
public class ASTFilterNormalizationOptimizer extends AbstractJoinGroupOptimizer {
@Override
protected void optimizeJoinGroup(
AST2BOpContext ctx, StaticAnalysis sa,
IBindingSet[] bSets, JoinGroupNode group) {
/**
* The FILTER decomposition optimizer can be disabled with a query hint.
*/
if (!group.getProperty(
QueryHints.NORMALIZE_FILTER_EXPRESSIONS,
QueryHints.DEFAULT_NORMALIZE_FILTER_EXPRESSIONS))
return;
normalizeAndDecomposeFilters(ctx, sa, bSets, group);
removeDuplicatesAndTautologies(ctx, sa, bSets, group);
}
/**
* Bring all FILTERs into CNF and split them at top-level to contain
* simpler FILTER expressions.
*/
protected void normalizeAndDecomposeFilters(
AST2BOpContext ctx, StaticAnalysis sa,
IBindingSet[] bSets, JoinGroupNode group) {
// substitution map (collected in upcoming for loop)
final Map<FilterNode, List<List<FilterNode>>> subst =
new HashMap<FilterNode, List<List<FilterNode>>>();
for (IGroupMemberNode child : group) {
if (child instanceof FilterNode) {
final FilterNode filterNode = (FilterNode)child;
final IValueExpressionNode vexprNode =
filterNode.getValueExpressionNode();
// don't even try if not optimizable
if (isDecomposable(vexprNode)) {
// otherwise, check if the node is already in CNF
final IValueExpressionNode filterAsCNF =
StaticAnalysis.isCNF(vexprNode) ?
vexprNode : StaticAnalysis.toCNF(vexprNode);
if (filterAsCNF!=null) {
final List<FilterNode> splittedFilterNodes =
constructFiltersForValueExpressionNode(
filterAsCNF, new ArrayList<FilterNode>());
if (splittedFilterNodes!=null) {
if (!subst.containsKey(filterNode)) {
subst.put(filterNode,new ArrayList<List<FilterNode>>());
}
final List<List<FilterNode>> values = subst.get(filterNode);
values.add(splittedFilterNodes);
} // else: something went wrong in conversion, be conservative
} // else: something wrong in conversion, be conservative
}
}
}
// remove the original filter nodes that were decomposed
for (final FilterNode filterNode : subst.keySet()) {
// remove child as often as we encountered the filter node
for (int i=0; i<subst.get(filterNode).size(); i++) {
group.removeChild(filterNode);
}
}
for (final List<List<FilterNode>> filterNodeListList : subst.values()) {
for (final List<FilterNode> filterNodeList : filterNodeListList) {
for (final FilterNode filterNode : filterNodeList) {
group.addChild(filterNode);
}
}
}
}
/**
* Remove duplicate FILTERs and tautologies
*/
protected void removeDuplicatesAndTautologies(
AST2BOpContext ctx, StaticAnalysis sa,
IBindingSet[] bSets, JoinGroupNode group) {
// variables that are definitely bound *after* executing the group
final Set<IVariable<?>> definitelyProd = new HashSet<IVariable<?>>();
sa.getDefinitelyIncomingBindings(group, definitelyProd);
sa.getDefinitelyProducedBindings(group, definitelyProd, true);
// variables that are maybe bound *after* executing the group
final Set<IVariable<?>> maybeProd = new HashSet<IVariable<?>>();
sa.getMaybeIncomingBindings(group, maybeProd);
sa.getMaybeProducedBindings(group, maybeProd, true);
// record the filters we've already seen, to remove duplicates
final Set<FilterNode> alreadySeen = new HashSet<FilterNode>();
final List<FilterNode> filtersToRemove = new ArrayList<FilterNode>();
for (int i=group.size()-1; i>=0; i--) {
final BOp child = group.get(i);
if (child instanceof FilterNode) {
final FilterNode filterNode = (FilterNode)child;
/**
* First check whether the FILTER is trivially satisfied or has
* bee encountered before.
*/
if (filterAlwaysSatisifed(filterNode, definitelyProd, maybeProd)
|| alreadySeen.contains(filterNode)) {
filtersToRemove.add(filterNode);
}
alreadySeen.add(filterNode); // mark as already seen
}
}
/*
* Remove the original filter nodes that were decomposed.
*
* The reason why we iterate in inverse order is a small problem (bug)
* in the removeChild method: the method always removes the first
* matching child, but resets the parent pointer of the passed argument.
* To keep this in synch, we need to make sure that we pass arguments in
* order; given that the arguments were extracten in inverse order, this
* can be reached by inverse iteration again.
*/
for (int i=filtersToRemove.size()-1; i>=0; i--) {
final FilterNode filterNode = filtersToRemove.get(i);
group.removeChild(filterNode);
}
}
/**
* Checks whether the given filter node is trivially satisfied
*
* @param filterNode
* @param definiteVars
* @param maybeVars
* @return
*/
boolean filterAlwaysSatisifed(FilterNode filterNode,
Set<IVariable<?>> definiteVars, Set<IVariable<?>> maybeVars) {
boolean alwaysSatisifed = false;
alwaysSatisifed |=
boundAlwaysSatisifed(filterNode.getValueExpressionNode(),definiteVars);
alwaysSatisifed |=
notBoundAlwaysSatisifed(filterNode.getValueExpressionNode(),maybeVars);
// note: you may add further special cases here in future ...
return alwaysSatisifed;
}
/**
* Checks whether the value expression node vexp is of the form
* <tt>bound(?var)</tt> where ?var is contained in the definite vars,
* meaning that we encountered a bound expression that is always true.
*
* @param vexp
* @param definiteVars
*/
private boolean boundAlwaysSatisifed(
final IValueExpressionNode vexp, final Set<IVariable<?>> definiteVars) {
if (!(vexp instanceof FunctionNode)) {
return false; //wrong pattern
}
final FunctionNode functionNode = (FunctionNode)vexp;
final URI functionURI = functionNode.getFunctionURI();
if (functionURI.equals(FunctionRegistry.BOUND)) {
if (functionNode.arity()==1) {
BOp varBop = functionNode.get(0);
if (varBop instanceof VarNode) {
VarNode varNode = (VarNode)varBop;
return definiteVars.contains(varNode.getValueExpression());
}
}
}
return false; // pattern not matched
}
/**
* Checks whether the value expression node vexp is of the form
* <tt>not(bound(?var))</tt> where ?var is not contained in the maybe vars,
* meaning that we encountered a not bound expression that is always true.
*
* @param vexp
* @param maybeVars
*/
private boolean notBoundAlwaysSatisifed(
final IValueExpressionNode vexp, final Set<IVariable<?>> maybeVars) {
if (!(vexp instanceof FunctionNode)) {
return false; //wrong pattern
}
final FunctionNode functionNode = (FunctionNode)vexp;
final URI functionURI = functionNode.getFunctionURI();
if (functionURI.equals(FunctionRegistry.NOT)) {
final ValueExpressionNode innerVexp =
(ValueExpressionNode)functionNode.get(0);
if (!(innerVexp instanceof FunctionNode)) {
return false; // wrong pattern
}
final FunctionNode innerFunctionNode = (FunctionNode)innerVexp;
final URI innerFunctionURI = innerFunctionNode.getFunctionURI();
if (innerFunctionURI.equals(FunctionRegistry.BOUND)) {
if (innerFunctionNode.arity()==1) {
BOp varBop = innerFunctionNode.get(0);
if (varBop instanceof VarNode) {
VarNode varNode = (VarNode)varBop;
return !maybeVars.contains(varNode.getValueExpression());
}
}
}
}
return false; // pattern not matched
}
/**
* Checks if there might be potential for decomposing the filter
* (overestimation).
*
* @param filterNode
* @return
*/
public boolean isDecomposable(final IValueExpressionNode vexpr) {
if(!(vexpr instanceof FunctionNode)) {
return false;
}
final FunctionNode functionNode = (FunctionNode)vexpr;
final URI functionURI = functionNode.getFunctionURI();
if (functionURI.equals(FunctionRegistry.AND) ||
functionURI.equals(FunctionRegistry.OR)) {
return true;
} else if (functionURI.equals(FunctionRegistry.NOT)) {
final BOp bop = functionNode.get(0);
if (bop instanceof FunctionNode) {
return isDecomposable((FunctionNode)bop);
}
}
// fallback: no decomposition opportunities identified
return false;
}
/**
* Construct FILTERs for the given value expression node, exploiting
* AND nodes at the top to split the node into multiple filters (if
* possible). Particularly useful to get filter expressions for all
* conjuncts when given a {@link ValueExpressionNode} in CNF as input.
*
* @param vexpNode the value expression node
* @param filters set where to collect filters in
*
* @return the array of filters
*/
public List<FilterNode> constructFiltersForValueExpressionNode(
final IValueExpressionNode vexp, final List<FilterNode> filters) {
final List<IValueExpressionNode> topLevelConjuncts =
StaticAnalysis.extractToplevelConjuncts(
vexp, new ArrayList<IValueExpressionNode>());
for (IValueExpressionNode toplevelConjunct : topLevelConjuncts) {
filters.add(new FilterNode(toplevelConjunct));
}
return filters;
}
}