/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.tajo.engine.planner; import org.apache.tajo.algebra.*; import java.util.ArrayList; import java.util.List; import java.util.Stack; /** * ExprNormalizer performs two kinds of works: * * <h3>1. Duplication Removal.</h3> * * For example, assume a simple query as follows: * <pre> * select price * rate as total_price, ..., order by price * rate * </pre> * * The expression <code>price * rate</code> is duplicated in both select list and order by clause. * Against those cases, ExprNormalizer removes duplicated expressions and replaces one with one reference. * In the case, ExprNormalizer replaces price * rate with total_price reference. * * <h3>2. Dissection of Expression</h3> * * A expression can be a complex expressions, including a mixed of scalar and aggregation expressions. * For example, assume an aggregation query as follows: * <pre> * select sum(price * rate) * (1 - avg(discount_rate))), ... * </pre> * * In this case, ExprNormalizer dissects the expression 'sum(price * rate) * (1 - avg(discount_rate)))' * into the following expressions: * <ul> * <li>$1 = price * rage</li> * <li>$2 = sum($1)</li> * <li>$3 = avg(discount_rate)</li> * <li>$4 = $2 * (1 - $3)</li> * </ul> * * It mainly two advantages. Firstly, it makes complex expression evaluations easier across multiple physical executors. * Second, it gives move opportunities to remove duplicated expressions. * * <h3>3. Name Normalization</h3> * * Users can use qualified column names, unqualified column names or aliased column references. * * Consider the following example: * * <pre> * select rate_a as total_rate, rate_a * 100, table1.rate_a, ... WHERE total_rate * 100 * </pre> * * <code>total_rate</code>, <code>rate_a</code>, and <code>table1.rate_a</code> are all the same references. But, * they have different forms. Due to their different forms, duplication removal can be hard. * * In order to solve this problem, ExprNormalizer normalizes all column references as qualified names while it keeps * its points.. */ class ExprNormalizer extends SimpleAlgebraVisitor<ExprNormalizer.ExprNormalizedResult, Object> { public static class ExprNormalizedResult { private final LogicalPlan plan; private final LogicalPlan.QueryBlock block; Expr baseExpr; // outmost expressions, which can includes one or more references of the results of aggregation // function. List<NamedExpr> aggExprs = new ArrayList<NamedExpr>(); // aggregation functions List<NamedExpr> scalarExprs = new ArrayList<NamedExpr>(); // scalar expressions which can be referred private ExprNormalizedResult(LogicalPlanner.PlanContext context) { this.plan = context.plan; this.block = context.queryBlock; } @Override public String toString() { return baseExpr.toString() + ", agg=" + aggExprs.size() + ", scalar=" + scalarExprs.size(); } } public ExprNormalizedResult normalize(LogicalPlanner.PlanContext context, Expr expr) throws PlanningException { ExprNormalizedResult exprNormalizedResult = new ExprNormalizedResult(context); Stack<Expr> stack = new Stack<Expr>(); stack.push(expr); visit(exprNormalizedResult, new Stack<Expr>(), expr); exprNormalizedResult.baseExpr = stack.pop(); return exprNormalizedResult; } @Override public Object visitCaseWhen(ExprNormalizedResult ctx, Stack<Expr> stack, CaseWhenPredicate expr) throws PlanningException { stack.push(expr); for (CaseWhenPredicate.WhenExpr when : expr.getWhens()) { visit(ctx, stack, when.getCondition()); visit(ctx, stack, when.getResult()); if (OpType.isAggregationFunction(when.getCondition().getType())) { String referenceName = ctx.block.namedExprsMgr.addExpr(when.getCondition()); ctx.aggExprs.add(new NamedExpr(when.getCondition(), referenceName)); when.setCondition(new ColumnReferenceExpr(referenceName)); } if (OpType.isAggregationFunction(when.getResult().getType())) { String referenceName = ctx.block.namedExprsMgr.addExpr(when.getResult()); ctx.aggExprs.add(new NamedExpr(when.getResult(), referenceName)); when.setResult(new ColumnReferenceExpr(referenceName)); } } if (expr.hasElseResult()) { visit(ctx, stack, expr.getElseResult()); if (OpType.isAggregationFunction(expr.getElseResult().getType())) { String referenceName = ctx.block.namedExprsMgr.addExpr(expr.getElseResult()); ctx.aggExprs.add(new NamedExpr(expr.getElseResult(), referenceName)); expr.setElseResult(new ColumnReferenceExpr(referenceName)); } } stack.pop(); return expr; } @Override public Expr visitUnaryOperator(ExprNormalizedResult ctx, Stack<Expr> stack, UnaryOperator expr) throws PlanningException { super.visitUnaryOperator(ctx, stack, expr); if (OpType.isAggregationFunction(expr.getChild().getType())) { // Get an anonymous column name and replace the aggregation function by the column name String refName = ctx.block.namedExprsMgr.addExpr(expr.getChild()); ctx.aggExprs.add(new NamedExpr(expr.getChild(), refName)); expr.setChild(new ColumnReferenceExpr(refName)); } return expr; } @Override public Expr visitBinaryOperator(ExprNormalizedResult ctx, Stack<Expr> stack, BinaryOperator expr) throws PlanningException { super.visitBinaryOperator(ctx, stack, expr); //////////////////////// // For Left Term //////////////////////// if (OpType.isAggregationFunction(expr.getLeft().getType())) { String leftRefName = ctx.block.namedExprsMgr.addExpr(expr.getLeft()); ctx.aggExprs.add(new NamedExpr(expr.getLeft(), leftRefName)); expr.setLeft(new ColumnReferenceExpr(leftRefName)); } //////////////////////// // For Right Term //////////////////////// if (OpType.isAggregationFunction(expr.getRight().getType())) { String rightRefName = ctx.block.namedExprsMgr.addExpr(expr.getRight()); ctx.aggExprs.add(new NamedExpr(expr.getRight(), rightRefName)); expr.setRight(new ColumnReferenceExpr(rightRefName)); } return expr; } /////////////////////////////////////////////////////////////////////////////////////////////////////////// // Function Section /////////////////////////////////////////////////////////////////////////////////////////////////////////// @Override public Expr visitFunction(ExprNormalizedResult ctx, Stack<Expr> stack, FunctionExpr expr) throws PlanningException { stack.push(expr); Expr param; for (int i = 0; i < expr.getParams().length; i++) { param = expr.getParams()[i]; visit(ctx, stack, param); if (OpType.isAggregationFunction(param.getType())) { String referenceName = ctx.plan.generateUniqueColumnName(param); ctx.aggExprs.add(new NamedExpr(param, referenceName)); expr.getParams()[i] = new ColumnReferenceExpr(referenceName); } } stack.pop(); return expr; } @Override public Expr visitGeneralSetFunction(ExprNormalizedResult ctx, Stack<Expr> stack, GeneralSetFunctionExpr expr) throws PlanningException { stack.push(expr); Expr param; for (int i = 0; i < expr.getParams().length; i++) { param = expr.getParams()[i]; visit(ctx, stack, param); // If parameters are all constants, we don't need to dissect an aggregation expression into two parts: // function and parameter parts. if (!OpType.isLiteral(param.getType())) { String referenceName = ctx.block.namedExprsMgr.addExpr(param); ctx.scalarExprs.add(new NamedExpr(param, referenceName)); expr.getParams()[i] = new ColumnReferenceExpr(referenceName); } } stack.pop(); return expr; } /////////////////////////////////////////////////////////////////////////////////////////////////////////// // Literal Section /////////////////////////////////////////////////////////////////////////////////////////////////////////// @Override public Expr visitCastExpr(ExprNormalizedResult ctx, Stack<Expr> stack, CastExpr expr) throws PlanningException { super.visitCastExpr(ctx, stack, expr); if (OpType.isAggregationFunction(expr.getType())) { String referenceName = ctx.block.namedExprsMgr.addExpr(expr.getChild()); ctx.aggExprs.add(new NamedExpr(expr.getChild(), referenceName)); expr.setChild(new ColumnReferenceExpr(referenceName)); } return expr; } @Override public Expr visitColumnReference(ExprNormalizedResult ctx, Stack<Expr> stack, ColumnReferenceExpr expr) throws PlanningException { // normalize column references. if (!expr.hasQualifier()) { if (ctx.block.namedExprsMgr.contains(expr.getCanonicalName())) { NamedExpr namedExpr = ctx.block.namedExprsMgr.getNamedExpr(expr.getCanonicalName()); return new ColumnReferenceExpr(namedExpr.getAlias()); } else { String normalized = ctx.plan.getNormalizedColumnName(ctx.block, expr); expr.setName(normalized); } } return expr; } }