/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.atlas.gremlin.optimizer;
import java.util.List;
import org.apache.atlas.gremlin.GremlinExpressionFactory;
import org.apache.atlas.groovy.AbstractFunctionExpression;
import org.apache.atlas.groovy.ClosureExpression;
import org.apache.atlas.groovy.ClosureExpression.VariableDeclaration;
import org.apache.atlas.groovy.FunctionCallExpression;
import org.apache.atlas.groovy.GroovyExpression;
import org.apache.atlas.groovy.IdentifierExpression;
/**
* Extracts common expressions from an or-containing expression
* into functions. These expressions would otherwise be duplicated
* as part of expanding the "or". Doing this shortens the overall length
* of the Gremlin script so we can maximize query performance.
*
*/
public class FunctionGenerator implements CallHierarchyVisitor {
//Function length constants.
//These assume we won't reach more than 9 function definition. Even if we do, this is still
//a reasonable approximation.
private static final int INITIAL_FUNCTION_DEF_LENGTH = "def f1={};".length();
private final int functionDefLength;
private static final int FUNCTION_CALL_OVERHEAD = "f1()".length();
/**
* The expression that should be the first (deepest) expression
* in the body of the next generated function. As we go up the
* expression tree in the post visit, this is updated based on the
* expressions we see. During the post visits, if it is null,
* the body expression is set to the expression we're visiting.
* As we go up the tree, it is nulled out if we create a function
* or encounter an or expression. This guarantees that the
* next function body will not contain any or expressions
* and that it will not have expressions that are already
* part of some other function.
*/
private GroovyExpression nextFunctionBodyStart;
/**
* The number of times expressions will be duplicated.
*/
private int scaleFactor = 1;
private final OptimizationContext context;
/**
* The current depth in the expression tree.
*/
private int depth = 0;
/**
* The name of the last function that was generated. If set,
* we can safely update this function instead of creating a new one.
*/
private String currentFunctionName;
/**
* The updated expression we will pass back to the caller.
*/
private GroovyExpression newRootExpression;
private final GremlinExpressionFactory factory;
public FunctionGenerator(GremlinExpressionFactory factory, OptimizationContext context) {
this.context = context;
this.factory = factory;
functionDefLength = ("def f1={" + factory.getTraversalExpressionClass() + " x->};").length();
}
@Override
public boolean preVisitFunctionCaller(AbstractFunctionExpression expr) {
depth++;
if (IsOr.INSTANCE.apply(expr)) {
FunctionCallExpression functionCall = (FunctionCallExpression) expr;
scaleFactor *= functionCall.getArguments().size();
}
if (newRootExpression == null) {
newRootExpression = expr;
}
return true;
}
@Override
public void visitNonFunctionCaller(GroovyExpression expr) {
if (nextFunctionBodyStart == null) {
nextFunctionBodyStart = expr;
}
}
@Override
public void visitNullCaller() {
//nothing to do
}
@Override
public boolean postVisitFunctionCaller(AbstractFunctionExpression expr) {
boolean isRootExpr = depth == 1;
visitParentExpression(expr);
//The root expression has no parent. To simplify the logic, we create
//a dummy expression so it does have a parent, then call visitParentExpression again
//to examine the root expression.
if (isRootExpr) {
FunctionCallExpression dummyParent = new FunctionCallExpression(expr, "dummy");
visitParentExpression(dummyParent);
newRootExpression = dummyParent.getCaller();
}
depth--;
return true;
}
/**
* Checks to see if the *caller* of this expression should become part
* of a function. If so, either a new function is created, or the
* expression becomes part of the last function we created.
*
* @param parentExpr
*/
private void visitParentExpression(AbstractFunctionExpression parentExpr) {
if (nextFunctionBodyStart == null) {
nextFunctionBodyStart = parentExpr;
}
if (currentFunctionName != null) {
updateCurrentFunction(parentExpr);
} else {
createFunctionIfNeeded(parentExpr);
}
if (GremlinQueryOptimizer.isOrExpression(parentExpr)) {
//reset
currentFunctionName = null;
//don't include 'or' in generated functions
nextFunctionBodyStart = null;
}
}
/**
* Creates a function whose body goes from the child of parentExpr
* up to (and including) the functionBodyEndExpr.
* @param parentExpr
*/
private void createFunctionIfNeeded(AbstractFunctionExpression parentExpr) {
GroovyExpression potentialFunctionBody = parentExpr.getCaller();
if (creatingFunctionShortensGremlin(potentialFunctionBody)) {
GroovyExpression functionCall = null;
if (nextFunctionBodyStart instanceof AbstractFunctionExpression) {
//The function body start is a a function call. In this
//case, we generate a function that takes one argument, which
//is a graph traversal. We have an expression tree that
//looks kind of like the following:
//
// parentExpr
// /
// / caller
// |/_
// potentialFunctionBody
// /
// / caller
// |/_
// ...
// /
// / caller
// |/_
// nextFunctionBodyStart
// /
// / caller
// |/_
// oldCaller
//
//
// Note that potentialFunctionBody and nextFunctionBodyStart
// could be the same expression. Let's say that the next
// function name is f1
//
// We reshuffle these expressions to the following:
//
// parentExpr
// /
// / caller
// |/_
// f1(oldCaller)
//
//
// potentialFunctionBody <- body of new function "f1(GraphTraversal x)"
// /
// / caller
// |/_
// ...
// /
// / caller
// |/_
// nextFunctionBodyStart
// /
// / caller
// |/_
// x
//
// As an example, suppose parentExpr is g.V().or(x,y).has(a).has(b).has(c)
// where has(a) is nextFunctionBodyStart.
//
// We generate a function f1 = { GraphTraversal x -> x.has(a).has(b) }
// parentExpr would become : f1(g.V().or(x,y)).has(c)
AbstractFunctionExpression nextFunctionBodyStartFunction=
(AbstractFunctionExpression) nextFunctionBodyStart;
String variableName = "x";
IdentifierExpression var = new IdentifierExpression(variableName);
GroovyExpression oldCaller = nextFunctionBodyStartFunction.getCaller();
nextFunctionBodyStartFunction.setCaller(var);
currentFunctionName = context.addFunctionDefinition(new VariableDeclaration(factory.getTraversalExpressionClass(), "x"),
potentialFunctionBody);
functionCall = new FunctionCallExpression(potentialFunctionBody.getType(),
currentFunctionName, oldCaller);
} else {
//The function body start is a not a function call. In this
//case, we generate a function that takes no arguments.
// As an example, suppose parentExpr is g.V().has(a).has(b).has(c)
// where g is nextFunctionBodyStart.
//
// We generate a function f1 = { g.V().has(a).has(b) }
// parentExpr would become : f1().has(c)
currentFunctionName = context.addFunctionDefinition(null, potentialFunctionBody);
functionCall = new FunctionCallExpression(potentialFunctionBody.getType(), currentFunctionName);
}
//functionBodyEnd is now part of a function definition, don't propagate it
nextFunctionBodyStart = null;
parentExpr.setCaller(functionCall);
}
}
/**
* Adds the caller of parentExpr to the current body of the last
* function that was created.
*
* @param parentExpr
*/
private void updateCurrentFunction(AbstractFunctionExpression parentExpr) {
GroovyExpression expr = parentExpr.getCaller();
if (expr instanceof AbstractFunctionExpression) {
AbstractFunctionExpression exprAsFunction = (AbstractFunctionExpression) expr;
GroovyExpression exprCaller = exprAsFunction.getCaller();
parentExpr.setCaller(exprCaller);
updateCurrentFunctionDefintion(exprAsFunction);
}
}
private void updateCurrentFunctionDefintion(AbstractFunctionExpression exprToAdd) {
ClosureExpression functionBodyClosure = context.getUserDefinedFunctionBody(currentFunctionName);
if (functionBodyClosure == null) {
throw new IllegalStateException("User-defined function " + currentFunctionName + " not found!");
}
List<GroovyExpression> exprs = functionBodyClosure.getStatements();
GroovyExpression currentFunctionBody = exprs.get(exprs.size() - 1);
//Update the expression so it is called by the current return
//value of the function.
exprToAdd.setCaller(currentFunctionBody);
functionBodyClosure.replaceStatement(exprs.size() - 1, exprToAdd);
}
//Determines if extracting this expression into a function will shorten
//the overall length of the Groovy script.
private boolean creatingFunctionShortensGremlin(GroovyExpression headExpr) {
int tailLength = getTailLength();
int length = headExpr.toString().length() - tailLength;
int overhead = 0;
if (nextFunctionBodyStart instanceof AbstractFunctionExpression) {
overhead = functionDefLength;
} else {
overhead = INITIAL_FUNCTION_DEF_LENGTH;
}
overhead += FUNCTION_CALL_OVERHEAD * scaleFactor;
//length * scaleFactor = space taken by having the expression be inlined [scaleFactor] times
//overhead + length = space taken by the function definition and its calls
return length * scaleFactor > overhead + length;
}
private int getTailLength() {
if (nextFunctionBodyStart == null) {
return 0;
}
if (!(nextFunctionBodyStart instanceof AbstractFunctionExpression)) {
return 0;
}
AbstractFunctionExpression bodyEndAsFunction = (AbstractFunctionExpression) nextFunctionBodyStart;
return bodyEndAsFunction.getCaller().toString().length();
}
public GroovyExpression getNewRootExpression() {
return newRootExpression;
}
}