/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.atlas.gremlin.optimizer; import java.util.ArrayList; import java.util.List; import org.apache.atlas.gremlin.GremlinExpressionFactory; import org.apache.atlas.groovy.AbstractFunctionExpression; import org.apache.atlas.groovy.GroovyExpression; import org.apache.atlas.groovy.StatementListExpression; import org.apache.atlas.groovy.TraversalStepType; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.google.common.annotations.VisibleForTesting; /** * Optimizer for gremlin queries. This class provides a framework for applying optimizations * to gremlin queries. Each optimization is implemented as a class that implements {@link GremlinOptimization}. * * The GremlinQueryOptimizer is the entry point for applying these optimizations. * * */ public final class GremlinQueryOptimizer { private static final Logger LOGGER = LoggerFactory.getLogger(GremlinQueryOptimizer.class); private final List<GremlinOptimization> optimizations = new ArrayList<>(); //Allows expression factory to be substituted in unit tests. private static volatile GremlinExpressionFactory FACTORY = GremlinExpressionFactory.INSTANCE; private static volatile GremlinQueryOptimizer INSTANCE = null; private GremlinQueryOptimizer() { } private void addOptimization(GremlinOptimization opt) { optimizations.add(opt); } public static GremlinQueryOptimizer getInstance() { if(INSTANCE == null) { synchronized(GremlinQueryOptimizer.class) { if(INSTANCE == null) { GremlinQueryOptimizer createdInstance = new GremlinQueryOptimizer(); //The order here is important. If there is an "or" nested within an "and", //that will not be found if ExpandOrsOptimization runs before ExpandAndsOptimization. createdInstance.addOptimization(new ExpandAndsOptimization(FACTORY)); createdInstance.addOptimization(new ExpandOrsOptimization(FACTORY)); INSTANCE = createdInstance; } } } return INSTANCE; } /** * For testing only */ @VisibleForTesting public static void setExpressionFactory(GremlinExpressionFactory factory) { GremlinQueryOptimizer.FACTORY = factory; } /** * For testing only */ @VisibleForTesting public static void reset() { INSTANCE = null; } /** * Optimizes the provided groovy expression. Note that the optimization * is a <i>destructive</i> process. The source GroovyExpression will be * modified as part of the optimization process. This is done to avoid * expensive copying operations where possible. * * @param source what to optimize * @return the optimized query */ public GroovyExpression optimize(GroovyExpression source) { LOGGER.debug("Optimizing gremlin query: " + source); OptimizationContext context = new OptimizationContext(); GroovyExpression updatedExpression = source; for (GremlinOptimization opt : optimizations) { updatedExpression = optimize(updatedExpression, opt, context); LOGGER.debug("After "+ opt.getClass().getSimpleName() + ", query = " + updatedExpression); } StatementListExpression result = new StatementListExpression(); result.addStatements(context.getInitialStatements()); result.addStatement(updatedExpression); LOGGER.debug("Final optimized query: " + result.toString()); return result; } /** * Optimizes the expression using the given optimization * @param source * @param optimization * @param context * @return */ private GroovyExpression optimize(GroovyExpression source, GremlinOptimization optimization, OptimizationContext context) { GroovyExpression result = source; if (optimization.appliesTo(source, context)) { //Apply the optimization to the expression. result = optimization.apply(source, context); } if (optimization.isApplyRecursively()) { //Visit the children, update result with the optimized //children. List<GroovyExpression> updatedChildren = new ArrayList<>(); boolean changed = false; for (GroovyExpression child : result.getChildren()) { //Recursively optimize this child. GroovyExpression updatedChild = optimize(child, optimization, context); changed |= updatedChild != child; updatedChildren.add(updatedChild); } if (changed) { //TBD - Can we update in place rather than making a copy? result = result.copy(updatedChildren); } } return result; } /** * Visits all expressions in the call hierarchy of an expression. For example, * in the expression g.V().has('x','y'), the order would be * <ol> * <li>pre-visit has('x','y')</li> * <li>pre-visit V()</li> * <li>visit g (non-function caller)</li> * <li>post-visit V()</li> * <li>post-visit has('x','y')</li> * </ol> * @param expr * @param visitor */ public static void visitCallHierarchy(GroovyExpression expr, CallHierarchyVisitor visitor) { if (expr == null) { visitor.visitNullCaller(); return; } if (expr instanceof AbstractFunctionExpression) { AbstractFunctionExpression functionCall = (AbstractFunctionExpression)expr; if (!visitor.preVisitFunctionCaller(functionCall)) { return; } GroovyExpression caller = functionCall.getCaller(); visitCallHierarchy(caller, visitor); if (!visitor.postVisitFunctionCaller(functionCall)) { return; } } else { visitor.visitNonFunctionCaller(expr); } } /** * Determines if the given expression is an "or" expression. * @param expr * @return */ public static boolean isOrExpression(GroovyExpression expr) { return IsOr.INSTANCE.apply(expr); } /** * Determines whether the given expression can safely * be pulled out of an and/or expression. * * @param expr an argument to an and or or function * @return */ public static boolean isExtractable(GroovyExpression expr) { HasForbiddenType hasForbiddenTypePredicate = new HasForbiddenType(FACTORY); //alias could conflict with alias in parent traversal hasForbiddenTypePredicate.addForbiddenType(TraversalStepType.SIDE_EFFECT); //inlining out(), in() steps will change the result of calls after the and/or() hasForbiddenTypePredicate.addForbiddenType(TraversalStepType.FLAT_MAP_TO_ELEMENTS); hasForbiddenTypePredicate.addForbiddenType(TraversalStepType.FLAT_MAP_TO_VALUES); hasForbiddenTypePredicate.addForbiddenType(TraversalStepType.BARRIER); hasForbiddenTypePredicate.addForbiddenType(TraversalStepType.MAP_TO_ELEMENT); hasForbiddenTypePredicate.addForbiddenType(TraversalStepType.MAP_TO_VALUE); //caller expects to be able to continue the traversal. We can't end it hasForbiddenTypePredicate.addForbiddenType(TraversalStepType.END); //we can't inline child traversals hasForbiddenTypePredicate.addForbiddenType(TraversalStepType.SOURCE); hasForbiddenTypePredicate.addForbiddenType(TraversalStepType.START); hasForbiddenTypePredicate.addForbiddenType(TraversalStepType.SIDE_EFFECT); hasForbiddenTypePredicate.addForbiddenType(TraversalStepType.NONE); hasForbiddenTypePredicate.addForbiddenType(TraversalStepType.BRANCH); ExpressionFinder forbiddenExpressionFinder = new ExpressionFinder(hasForbiddenTypePredicate); GremlinQueryOptimizer.visitCallHierarchy(expr, forbiddenExpressionFinder); return ! forbiddenExpressionFinder.isExpressionFound(); } /** * Recursively copies and follows the caller hierarchy of the expression until we come * to a function call with a null caller. The caller of that expression is set * to newLeaf. * * @param expr * @param newLeaf * @return the updated (/copied) expression */ public static GroovyExpression copyWithNewLeafNode(AbstractFunctionExpression expr, GroovyExpression newLeaf) { AbstractFunctionExpression result = (AbstractFunctionExpression)expr.copy(); //remove leading anonymous traversal expression, if there is one if(FACTORY.isLeafAnonymousTraversalExpression(expr)) { result = (AbstractFunctionExpression)newLeaf; } else { GroovyExpression newCaller = null; if (expr.getCaller() == null) { newCaller = newLeaf; } else { newCaller = copyWithNewLeafNode((AbstractFunctionExpression)result.getCaller(), newLeaf); } result.setCaller(newCaller); } return result; } }