/* This file is part of VoltDB. * Copyright (C) 2008-2017 VoltDB Inc. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as * published by the Free Software Foundation, either version 3 of the * License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with VoltDB. If not, see <http://www.gnu.org/licenses/>. */ package org.voltdb.planner; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import org.json_voltpatches.JSONException; import org.voltdb.catalog.Column; import org.voltdb.catalog.MaterializedViewHandlerInfo; import org.voltdb.catalog.MaterializedViewInfo; import org.voltdb.catalog.Table; import org.voltdb.expressions.AbstractExpression; import org.voltdb.expressions.AggregateExpression; import org.voltdb.expressions.ExpressionUtil; import org.voltdb.expressions.TupleValueExpression; import org.voltdb.planner.parseinfo.BranchNode; import org.voltdb.planner.parseinfo.JoinNode; import org.voltdb.planner.parseinfo.StmtTableScan; import org.voltdb.planner.parseinfo.StmtTargetTableScan; import org.voltdb.plannodes.AbstractPlanNode; import org.voltdb.plannodes.AbstractScanPlanNode; import org.voltdb.plannodes.HashAggregatePlanNode; import org.voltdb.plannodes.NodeSchema; import org.voltdb.plannodes.ProjectionPlanNode; import org.voltdb.plannodes.SchemaColumn; import org.voltdb.types.ExpressionType; import org.voltdb.utils.CatalogUtil; /** * When a materialized view has a source table that is partitioned, and the source table's * partition key is not a group by key for the materialized view, we need to "re-aggregate" * the contents of the view in the coordinator fragment, in order to account for possible * duplicate keys coming from different sites. This "re-aggregation" is done by injecting * an extra aggregation node above the receive node on the coordinator fragment. * * This class encapsulates the info required for adding in re-aggregation so that scans * of the materialized view get correct answers. */ public class MaterializedViewFixInfo { // New inlined projection node for the scan node, contain extra group by columns. private ProjectionPlanNode m_scanInlinedProjectionNode = null; // New re-Aggregation plan node on the coordinator to eliminate the duplicated rows. private HashAggregatePlanNode m_reAggNode = null; // Does this mv partitioned based query needs to be fixed. private boolean m_needed = false; // materialized view table private StmtTableScan m_mvTableScan = null; // Scan Node for join query. AbstractScanPlanNode m_scanNode = null; // ENG-5386: Edge case query. private boolean m_edgeCaseQueryNoFixNeeded = true; public boolean needed() { return m_needed; } public void setNeeded(boolean need) { m_needed = need; } public String getMVTableName () { assert(m_mvTableScan != null); return m_mvTableScan.getTableName(); } public String getMVTableAlias() { assert(m_mvTableScan != null); return m_mvTableScan.getTableAlias(); } public HashAggregatePlanNode getReAggregationPlanNode () { return m_reAggNode; } public void setEdgeCaseQueryNoFixNeeded (boolean edgeCase) { m_edgeCaseQueryNoFixNeeded = edgeCase; } /** * Check whether the results from a materialized view need to be * re-aggregated on the coordinator by the view's GROUP BY columns * prior to any of the processing specified by the query. * This is normally the case when a mat view's source table is partitioned * and the view's GROUP BY does not include the partition key. * There is a special edge case where the query already contains the exact * reaggregations that the added-cost fix would introduce, so the fix can * be skipped as an optimization. * Set the m_needed flag to true, only if the reaggregation fix is needed. * @return The value of m_needed */ public boolean processMVBasedQueryFix(StmtTableScan mvTableScan, Set<SchemaColumn> scanColumns, JoinNode joinTree, List<ParsedColInfo> displayColumns, List<ParsedColInfo> groupByColumns) { // Check valid cases first //@TODO if ( ! (mvTableScan instanceof StmtTargetTableScan)) { return false; } Table table = ((StmtTargetTableScan)mvTableScan).getTargetTable(); assert (table != null); String mvTableName = table.getTypeName(); Table srcTable = table.getMaterializer(); if (srcTable == null) { return false; } if (table.getIsreplicated()) { return false; } // Justify whether partition column is in group by column list or not if (table.getPartitioncolumn() != null) { return false; } m_mvTableScan = mvTableScan; Set<String> mvDDLGroupbyColumnNames = new HashSet<>(); List<Column> mvColumnArray = CatalogUtil.getSortedCatalogItems(table.getColumns(), "index"); String mvTableAlias = getMVTableAlias(); // Get the number of group-by columns. int numOfGroupByColumns; MaterializedViewInfo mvInfo = srcTable.getViews().get(mvTableName); if (mvInfo != null) { // single table view String complexGroupbyJson = mvInfo.getGroupbyexpressionsjson(); if (complexGroupbyJson.length() > 0) { List<AbstractExpression> mvComplexGroupbyCols = null; try { mvComplexGroupbyCols = AbstractExpression.fromJSONArrayString(complexGroupbyJson, null); } catch (JSONException e) { e.printStackTrace(); } numOfGroupByColumns = mvComplexGroupbyCols.size(); } else { numOfGroupByColumns = mvInfo.getGroupbycols().size(); } } else { // joined table view MaterializedViewHandlerInfo mvHandlerInfo = table.getMvhandlerinfo().get("mvHandlerInfo"); numOfGroupByColumns = mvHandlerInfo.getGroupbycolumncount(); } if (scanColumns.isEmpty() && numOfGroupByColumns == 0) { // This is an edge case that can happen if the view // has no group by keys, and we are just // doing a count(*) on the output of the view. // // Having no GB keys or scan columns would cause us to // produce plan nodes that have a 0-column output schema. // We can't handle this in several places, so add the // count(*) column from the view to the scan columns. Column mvCol = mvColumnArray.get(0); // this is the "count(*)" column. TupleValueExpression tve = new TupleValueExpression( mvTableName, mvTableAlias, mvCol, 0); tve.setOrigStmtId(mvTableScan.getStatementId()); String colName = mvCol.getName(); SchemaColumn scol = new SchemaColumn(mvTableName, mvTableAlias, colName, colName, tve); scanColumns.add(scol); } // Start to do real materialized view processing to fix the duplicates problem. // (1) construct new projection columns for scan plan node. Set<SchemaColumn> mvDDLGroupbyColumns = new HashSet<>(); NodeSchema inlineProjSchema = new NodeSchema(); for (SchemaColumn scol: scanColumns) { inlineProjSchema.addColumn(scol); } for (int i = 0; i < numOfGroupByColumns; i++) { Column mvCol = mvColumnArray.get(i); String colName = mvCol.getName(); TupleValueExpression tve = new TupleValueExpression( mvTableName, mvTableAlias, mvCol, i); tve.setOrigStmtId(mvTableScan.getStatementId()); mvDDLGroupbyColumnNames.add(colName); SchemaColumn scol = new SchemaColumn(mvTableName, mvTableAlias, colName, colName, tve); mvDDLGroupbyColumns.add(scol); if (!scanColumns.contains(scol)) { scanColumns.add(scol); // construct new projection columns for scan plan node. inlineProjSchema.addColumn(scol); } } // Record the re-aggregation type for each scan columns. Map<String, ExpressionType> mvColumnReAggType = new HashMap<>(); for (int i = numOfGroupByColumns; i < mvColumnArray.size(); i++) { Column mvCol = mvColumnArray.get(i); ExpressionType reAggType = ExpressionType.get(mvCol.getAggregatetype()); if (reAggType == ExpressionType.AGGREGATE_COUNT_STAR || reAggType == ExpressionType.AGGREGATE_COUNT) { reAggType = ExpressionType.AGGREGATE_SUM; } mvColumnReAggType.put(mvCol.getName(), reAggType); } assert (inlineProjSchema.size() > 0); m_scanInlinedProjectionNode = new ProjectionPlanNode(inlineProjSchema); // (2) Construct the reAggregation Node. // Construct the reAggregation plan node's aggSchema m_reAggNode = new HashAggregatePlanNode(); int outputColumnIndex = 0; // inlineProjSchema contains the group by columns, while aggSchema may do not. NodeSchema aggSchema = new NodeSchema(); // Construct reAggregation node's aggregation and group by list. for (SchemaColumn scol: inlineProjSchema.getColumns()) { if (mvDDLGroupbyColumns.contains(scol)) { // Add group by expression. m_reAggNode.addGroupByExpression(scol.getExpression()); } else { ExpressionType reAggType = mvColumnReAggType.get(scol.getColumnName()); assert(reAggType != null); AbstractExpression agg_input_expr = scol.getExpression(); assert(agg_input_expr instanceof TupleValueExpression); // Add aggregation information. m_reAggNode.addAggregate(reAggType, false, outputColumnIndex, agg_input_expr); } aggSchema.addColumn(scol); outputColumnIndex++; } assert (aggSchema.size() > 0); m_reAggNode.setOutputSchema(aggSchema); // Collect all TVEs that need to be do re-aggregation in coordinator. List<TupleValueExpression> needReAggTVEs = new ArrayList<>(); List<AbstractExpression> aggPostExprs = new ArrayList<>(); for (int i = numOfGroupByColumns; i < mvColumnArray.size(); i++) { Column mvCol = mvColumnArray.get(i); TupleValueExpression tve = new TupleValueExpression( mvTableName, mvTableAlias, mvCol, -1); tve.setOrigStmtId(mvTableScan.getStatementId()); needReAggTVEs.add(tve); } collectReAggNodePostExpressions(joinTree, needReAggTVEs, aggPostExprs); AbstractExpression aggPostExpr = ExpressionUtil.combinePredicates(aggPostExprs); // Add post filters for the reAggregation node. m_reAggNode.setPostPredicate(aggPostExpr); // ENG-5386 if (m_edgeCaseQueryNoFixNeeded && edgeCaseQueryNoFixNeeded(mvDDLGroupbyColumnNames, mvColumnReAggType, displayColumns, groupByColumns)) { return false; } m_needed = true; return true; } /** ENG-5386: do not fix some cases in order to get better performance. * There is a special edge case when certain queries are applied to * partitioned materialized views that do not contain the partition key in * their GROUP BY columns. In this special case, where the query duplicates * the reaggregation behavior of the fix -- which must consist of MIN, MAX * and/or non-distinct SUM reaggregations -- the added-cost fix code can be * skipped as an optimization. */ private boolean edgeCaseQueryNoFixNeeded(Set<String> mvDDLGroupbyColumnNames, Map<String, ExpressionType> mvColumnAggType, List<ParsedColInfo> displayColumns, List<ParsedColInfo> groupByColumns) { // Condition (1): Group by columns must be part of or all from MV DDL group by TVEs. for (ParsedColInfo gcol: groupByColumns) { assert(gcol.expression instanceof TupleValueExpression); TupleValueExpression tve = (TupleValueExpression) gcol.expression; if (tve.getTableName().equals(getMVTableName()) && ! mvDDLGroupbyColumnNames.contains(tve.getColumnName())) { return false; } } // Condition (2): All the aggregations must qualify. for (ParsedColInfo dcol: displayColumns) { if (groupByColumns.contains(dcol)) { // Skip a group-by column pass-through. continue; } if (dcol.expression instanceof AggregateExpression == false) { return false; } AggregateExpression aggExpr = (AggregateExpression) dcol.expression; if (aggExpr.getLeft() instanceof TupleValueExpression == false) { return false; } ExpressionType type = aggExpr.getExpressionType(); // Only MIN, MAX, and non-DISTINCT SUM // can tolerate a skipped reaggregation. if ((type != ExpressionType.AGGREGATE_SUM || aggExpr.isDistinct()) && type != ExpressionType.AGGREGATE_MIN && type != ExpressionType.AGGREGATE_MAX) { return false; } TupleValueExpression tve = (TupleValueExpression) aggExpr.getLeft(); if (tve.getTableName().equals(getMVTableName())) { String columnName = tve.getColumnName(); // The type of the aggregation in the query must match the // type of aggregation defined for the view column -- // SUMming a SUM, MINning a MIN, or MAXxing a MAX. if (mvColumnAggType.get(columnName) != type ) { return false; } } else { // The aggregate argument is a column from the // other (non-view) side of the join. // It's OK for its rows to get duplicated by joining // with multiple "partial group" rows ONLY if it is // feeding a MIN or MAX. // The duplication would corrupt a SUM. if (type == ExpressionType.AGGREGATE_SUM) { return false; } } } // Edge case query can be optimized with correct answer without MV reAggregation fix. return true; } /** * Find the scan node on MV table, replace it with reAggNode for join query. * This scan node can not be in-lined, so it should be as a child of a join node. * @param node */ public boolean processScanNodeWithReAggNode(AbstractPlanNode node, AbstractPlanNode reAggNode) { // MV table scan node can not be in in-lined nodes. for (int i = 0; i < node.getChildCount(); i++) { AbstractPlanNode child = node.getChild(i); if (child instanceof AbstractScanPlanNode) { AbstractScanPlanNode scanNode = (AbstractScanPlanNode) child; if (!scanNode.getTargetTableName().equals(getMVTableName())) { continue; } if (reAggNode != null) { // Join query case. node.setAndLinkChild(i, reAggNode); } // Process scan node. // Set up the scan plan node's scan columns. Add in-line projection node for scan node. scanNode.addInlinePlanNode(m_scanInlinedProjectionNode); m_scanNode = scanNode; return true; } else { boolean replaced = processScanNodeWithReAggNode(child, reAggNode); if (replaced) { return true; } } } return false; } private void collectReAggNodePostExpressions(JoinNode joinTree, List<TupleValueExpression> needReAggTVEs, List<AbstractExpression> aggPostExprs) { if (joinTree instanceof BranchNode) { collectReAggNodePostExpressions(((BranchNode)joinTree).getLeftNode(), needReAggTVEs, aggPostExprs); collectReAggNodePostExpressions(((BranchNode)joinTree).getRightNode(), needReAggTVEs, aggPostExprs); return; } joinTree.setJoinExpression(processFilters(joinTree.getJoinExpression(), needReAggTVEs, aggPostExprs)); // For outer join filters. Inner join or single table query will have whereExpr be null. joinTree.setWhereExpression(processFilters(joinTree.getWhereExpression(), needReAggTVEs, aggPostExprs)); } private boolean fromMVTableOnly(List<TupleValueExpression> tves) { String mvTableName = getMVTableName(); for (TupleValueExpression tve: tves) { String tveTableName = tve.getTableName(); if (!mvTableName.equals(tveTableName)) { return false; } } return true; } private AbstractExpression processFilters (AbstractExpression filters, List<TupleValueExpression> needReAggTVEs, List<AbstractExpression> aggPostExprs) { if (filters == null) { return null; } // Collect all TVEs that need re-aggregation in the coordinator. List<AbstractExpression> remaningExprs = new ArrayList<>(); // Check where clause. List<AbstractExpression> exprs = ExpressionUtil.uncombinePredicate(filters); for (AbstractExpression expr: exprs) { List<TupleValueExpression> tves = expr.findAllTupleValueSubexpressions(); boolean canPushdown = true; for (TupleValueExpression needReAggTVE: needReAggTVEs) { if (tves.contains(needReAggTVE)) { m_edgeCaseQueryNoFixNeeded = false; if (fromMVTableOnly(tves)) { canPushdown = false; } break; } } if (canPushdown) { remaningExprs.add(expr); } else { aggPostExprs.add(expr); } } AbstractExpression remaningFilters = ExpressionUtil.combinePredicates(remaningExprs); // Update new filters for the scanNode. return remaningFilters; } }