MaterializedViewFixInfo.java example

Explorer
voltdb-master
/* This file is part of VoltDB.
 * Copyright (C) 2008-2017 VoltDB Inc.
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as
 * published by the Free Software Foundation, either version 3 of the
 * License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with VoltDB.  If not, see <http://www.gnu.org/licenses/>.
 */

package org.voltdb.planner;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.json_voltpatches.JSONException;
import org.voltdb.catalog.Column;
import org.voltdb.catalog.MaterializedViewHandlerInfo;
import org.voltdb.catalog.MaterializedViewInfo;
import org.voltdb.catalog.Table;
import org.voltdb.expressions.AbstractExpression;
import org.voltdb.expressions.AggregateExpression;
import org.voltdb.expressions.ExpressionUtil;
import org.voltdb.expressions.TupleValueExpression;
import org.voltdb.planner.parseinfo.BranchNode;
import org.voltdb.planner.parseinfo.JoinNode;
import org.voltdb.planner.parseinfo.StmtTableScan;
import org.voltdb.planner.parseinfo.StmtTargetTableScan;
import org.voltdb.plannodes.AbstractPlanNode;
import org.voltdb.plannodes.AbstractScanPlanNode;
import org.voltdb.plannodes.HashAggregatePlanNode;
import org.voltdb.plannodes.NodeSchema;
import org.voltdb.plannodes.ProjectionPlanNode;
import org.voltdb.plannodes.SchemaColumn;
import org.voltdb.types.ExpressionType;
import org.voltdb.utils.CatalogUtil;

/**
 * When a materialized view has a source table that is partitioned, and the source table's
 * partition key is not a group by key for the materialized view, we need to "re-aggregate"
 * the contents of the view in the coordinator fragment, in order to account for possible
 * duplicate keys coming from different sites.  This "re-aggregation" is done by injecting
 * an extra aggregation node above the receive node on the coordinator fragment.
 *
 * This class encapsulates the info required for adding in re-aggregation so that scans
 * of the materialized view get correct answers.
 */
public class MaterializedViewFixInfo {
    // New inlined projection node for the scan node, contain extra group by columns.
    private ProjectionPlanNode m_scanInlinedProjectionNode = null;
    // New re-Aggregation plan node on the coordinator to eliminate the duplicated rows.
    private HashAggregatePlanNode m_reAggNode = null;

    // Does this mv partitioned based query needs to be fixed.
    private boolean m_needed = false;
    // materialized view table
    private StmtTableScan m_mvTableScan = null;

    // Scan Node for join query.
    AbstractScanPlanNode m_scanNode = null;

    // ENG-5386: Edge case query.
    private boolean m_edgeCaseQueryNoFixNeeded = true;

    public boolean needed() {
        return m_needed;
    }

    public void setNeeded(boolean need) {
        m_needed = need;
    }

    public String getMVTableName () {
        assert(m_mvTableScan != null);
        return m_mvTableScan.getTableName();
    }

    public String getMVTableAlias() {
        assert(m_mvTableScan != null);
        return m_mvTableScan.getTableAlias();
    }

    public HashAggregatePlanNode getReAggregationPlanNode () {
        return m_reAggNode;
    }

    public void setEdgeCaseQueryNoFixNeeded (boolean edgeCase) {
        m_edgeCaseQueryNoFixNeeded = edgeCase;
    }

    /**
     * Check whether the results from a materialized view need to be
     * re-aggregated on the coordinator by the view's GROUP BY columns
     * prior to any of the processing specified by the query.
     * This is normally the case when a mat view's source table is partitioned
     * and the view's GROUP BY does not include the partition key.
     * There is a special edge case where the query already contains the exact
     * reaggregations that the added-cost fix would introduce, so the fix can
     * be skipped as an optimization.
     * Set the m_needed flag to true, only if the reaggregation fix is needed.
     * @return The value of m_needed
     */
    public boolean processMVBasedQueryFix(StmtTableScan mvTableScan, Set<SchemaColumn> scanColumns, JoinNode joinTree,
            List<ParsedColInfo> displayColumns, List<ParsedColInfo> groupByColumns) {

        // Check valid cases first
        //@TODO
        if  ( ! (mvTableScan instanceof StmtTargetTableScan)) {
            return false;
        }
        Table table = ((StmtTargetTableScan)mvTableScan).getTargetTable();
        assert (table != null);
        String mvTableName = table.getTypeName();
        Table srcTable = table.getMaterializer();
        if (srcTable == null) {
            return false;
        }
        if (table.getIsreplicated()) {
            return false;
        }

        // Justify whether partition column is in group by column list or not
        if (table.getPartitioncolumn() != null) {
            return false;
        }

        m_mvTableScan = mvTableScan;

        Set<String> mvDDLGroupbyColumnNames = new HashSet<>();
        List<Column> mvColumnArray =
                CatalogUtil.getSortedCatalogItems(table.getColumns(), "index");

        String mvTableAlias = getMVTableAlias();

        // Get the number of group-by columns.
        int numOfGroupByColumns;
        MaterializedViewInfo mvInfo = srcTable.getViews().get(mvTableName);
        if (mvInfo != null) {
            // single table view
            String complexGroupbyJson = mvInfo.getGroupbyexpressionsjson();
            if (complexGroupbyJson.length() > 0) {
                List<AbstractExpression> mvComplexGroupbyCols = null;
                try {
                    mvComplexGroupbyCols = AbstractExpression.fromJSONArrayString(complexGroupbyJson, null);
                }
                catch (JSONException e) {
                    e.printStackTrace();
                }
                numOfGroupByColumns = mvComplexGroupbyCols.size();
            }
            else {
                numOfGroupByColumns = mvInfo.getGroupbycols().size();
            }
        }
        else {
            // joined table view
            MaterializedViewHandlerInfo mvHandlerInfo = table.getMvhandlerinfo().get("mvHandlerInfo");
            numOfGroupByColumns = mvHandlerInfo.getGroupbycolumncount();
        }

        if (scanColumns.isEmpty() && numOfGroupByColumns == 0) {
            // This is an edge case that can happen if the view
            // has no group by keys, and we are just
            // doing a count(*) on the output of the view.
            //
            // Having no GB keys or scan columns would cause us to
            // produce plan nodes that have a 0-column output schema.
            // We can't handle this in several places, so add the
            // count(*) column from the view to the scan columns.
            Column mvCol = mvColumnArray.get(0); // this is the "count(*)" column.
            TupleValueExpression tve = new TupleValueExpression(
                    mvTableName, mvTableAlias, mvCol, 0);
            tve.setOrigStmtId(mvTableScan.getStatementId());

            String colName = mvCol.getName();
            SchemaColumn scol = new SchemaColumn(mvTableName, mvTableAlias,
                    colName, colName, tve);
            scanColumns.add(scol);
        }

        // Start to do real materialized view processing to fix the duplicates problem.
        // (1) construct new projection columns for scan plan node.
        Set<SchemaColumn> mvDDLGroupbyColumns = new HashSet<>();
        NodeSchema inlineProjSchema = new NodeSchema();
        for (SchemaColumn scol: scanColumns) {
            inlineProjSchema.addColumn(scol);
        }


        for (int i = 0; i < numOfGroupByColumns; i++) {
            Column mvCol = mvColumnArray.get(i);
            String colName = mvCol.getName();

            TupleValueExpression tve = new TupleValueExpression(
                    mvTableName, mvTableAlias, mvCol, i);
            tve.setOrigStmtId(mvTableScan.getStatementId());

            mvDDLGroupbyColumnNames.add(colName);

            SchemaColumn scol = new SchemaColumn(mvTableName, mvTableAlias,
                    colName, colName, tve);
            mvDDLGroupbyColumns.add(scol);
            if (!scanColumns.contains(scol)) {
                scanColumns.add(scol);
                // construct new projection columns for scan plan node.
                inlineProjSchema.addColumn(scol);
            }
        }

        // Record the re-aggregation type for each scan columns.
        Map<String, ExpressionType> mvColumnReAggType = new HashMap<>();
        for (int i = numOfGroupByColumns; i < mvColumnArray.size(); i++) {
            Column mvCol = mvColumnArray.get(i);
            ExpressionType reAggType = ExpressionType.get(mvCol.getAggregatetype());

            if (reAggType == ExpressionType.AGGREGATE_COUNT_STAR ||
                    reAggType == ExpressionType.AGGREGATE_COUNT) {
                reAggType = ExpressionType.AGGREGATE_SUM;
            }
            mvColumnReAggType.put(mvCol.getName(), reAggType);
        }

        assert (inlineProjSchema.size() > 0);
        m_scanInlinedProjectionNode =
                new ProjectionPlanNode(inlineProjSchema);

        // (2) Construct the reAggregation Node.

        // Construct the reAggregation plan node's aggSchema
        m_reAggNode = new HashAggregatePlanNode();
        int outputColumnIndex = 0;
        // inlineProjSchema contains the group by columns, while aggSchema may do not.
        NodeSchema aggSchema = new NodeSchema();

        // Construct reAggregation node's aggregation and group by list.
        for (SchemaColumn scol: inlineProjSchema.getColumns()) {
            if (mvDDLGroupbyColumns.contains(scol)) {
                // Add group by expression.
                m_reAggNode.addGroupByExpression(scol.getExpression());
            }
            else {
                ExpressionType reAggType = mvColumnReAggType.get(scol.getColumnName());
                assert(reAggType != null);
                AbstractExpression agg_input_expr = scol.getExpression();
                assert(agg_input_expr instanceof TupleValueExpression);
                // Add aggregation information.
                m_reAggNode.addAggregate(reAggType, false, outputColumnIndex, agg_input_expr);
            }
            aggSchema.addColumn(scol);
            outputColumnIndex++;
        }

        assert (aggSchema.size() > 0);
        m_reAggNode.setOutputSchema(aggSchema);


        // Collect all TVEs that need to be do re-aggregation in coordinator.
        List<TupleValueExpression> needReAggTVEs = new ArrayList<>();
        List<AbstractExpression> aggPostExprs = new ArrayList<>();

        for (int i = numOfGroupByColumns; i < mvColumnArray.size(); i++) {
            Column mvCol = mvColumnArray.get(i);

            TupleValueExpression tve = new TupleValueExpression(
                    mvTableName, mvTableAlias, mvCol, -1);
            tve.setOrigStmtId(mvTableScan.getStatementId());

            needReAggTVEs.add(tve);
        }

        collectReAggNodePostExpressions(joinTree, needReAggTVEs, aggPostExprs);

        AbstractExpression aggPostExpr = ExpressionUtil.combinePredicates(aggPostExprs);
        // Add post filters for the reAggregation node.
        m_reAggNode.setPostPredicate(aggPostExpr);


        // ENG-5386
        if (m_edgeCaseQueryNoFixNeeded &&
                edgeCaseQueryNoFixNeeded(mvDDLGroupbyColumnNames, mvColumnReAggType, displayColumns, groupByColumns)) {
            return false;
        }

        m_needed = true;
        return true;
    }

    /** ENG-5386: do not fix some cases in order to get better performance.
     * There is a special edge case when certain queries are applied to
     * partitioned materialized views that do not contain the partition key in
     * their GROUP BY columns. In this special case, where the query duplicates
     * the reaggregation behavior of the fix -- which must consist of MIN, MAX
     * and/or non-distinct SUM reaggregations -- the added-cost fix code can be
     * skipped as an optimization.
     */
    private boolean edgeCaseQueryNoFixNeeded(Set<String> mvDDLGroupbyColumnNames,
            Map<String, ExpressionType> mvColumnAggType,
            List<ParsedColInfo> displayColumns,
            List<ParsedColInfo> groupByColumns) {

        // Condition (1): Group by columns must be part of or all from MV DDL group by TVEs.
        for (ParsedColInfo gcol: groupByColumns) {
            assert(gcol.expression instanceof TupleValueExpression);
            TupleValueExpression tve = (TupleValueExpression) gcol.expression;
            if (tve.getTableName().equals(getMVTableName()) &&
                    ! mvDDLGroupbyColumnNames.contains(tve.getColumnName())) {
                return false;
            }
        }

        // Condition (2): All the aggregations must qualify.
        for (ParsedColInfo dcol: displayColumns) {
            if (groupByColumns.contains(dcol)) {
                // Skip a group-by column pass-through.
                continue;
            }
            if (dcol.expression instanceof AggregateExpression == false) {
                return false;
            }
            AggregateExpression aggExpr = (AggregateExpression) dcol.expression;
            if (aggExpr.getLeft() instanceof TupleValueExpression == false) {
                return false;
            }
            ExpressionType type = aggExpr.getExpressionType();

            // Only MIN, MAX, and non-DISTINCT SUM
            // can tolerate a skipped reaggregation.
            if ((type != ExpressionType.AGGREGATE_SUM || aggExpr.isDistinct())
                    && type != ExpressionType.AGGREGATE_MIN
                    && type != ExpressionType.AGGREGATE_MAX) {
                return false;
            }

            TupleValueExpression tve = (TupleValueExpression) aggExpr.getLeft();
            if (tve.getTableName().equals(getMVTableName())) {
                String columnName = tve.getColumnName();
                // The type of the aggregation in the query must match the
                // type of aggregation defined for the view column --
                // SUMming a SUM, MINning a MIN, or MAXxing a MAX.
                if (mvColumnAggType.get(columnName) != type ) {
                    return false;
                }
            }
            else {
                // The aggregate argument is a column from the
                // other (non-view) side of the join.
                // It's OK for its rows to get duplicated by joining
                // with multiple "partial group" rows ONLY if it is
                // feeding a MIN or MAX.
                // The duplication would corrupt a SUM.
                if (type == ExpressionType.AGGREGATE_SUM) {
                    return false;
                }
            }
        }

        // Edge case query can be optimized with correct answer without MV reAggregation fix.
        return true;
    }


    /**
     * Find the scan node on MV table, replace it with reAggNode for join query.
     * This scan node can not be in-lined, so it should be as a child of a join node.
     * @param node
     */
    public boolean processScanNodeWithReAggNode(AbstractPlanNode node, AbstractPlanNode reAggNode) {
        // MV table scan node can not be in in-lined nodes.
        for (int i = 0; i < node.getChildCount(); i++) {
            AbstractPlanNode child = node.getChild(i);

            if (child instanceof AbstractScanPlanNode) {
                AbstractScanPlanNode scanNode = (AbstractScanPlanNode) child;
                if (!scanNode.getTargetTableName().equals(getMVTableName())) {
                    continue;
                }
                if (reAggNode != null) {
                    // Join query case.
                    node.setAndLinkChild(i, reAggNode);
                }
                // Process scan node.
                // Set up the scan plan node's scan columns. Add in-line projection node for scan node.
                scanNode.addInlinePlanNode(m_scanInlinedProjectionNode);
                m_scanNode = scanNode;
                return true;
            }
            else {
                boolean replaced = processScanNodeWithReAggNode(child, reAggNode);
                if (replaced) {
                    return true;
                }
            }
        }
        return false;
    }

    private void collectReAggNodePostExpressions(JoinNode joinTree,
            List<TupleValueExpression> needReAggTVEs, List<AbstractExpression> aggPostExprs) {
        if (joinTree instanceof BranchNode) {
            collectReAggNodePostExpressions(((BranchNode)joinTree).getLeftNode(), needReAggTVEs, aggPostExprs);
            collectReAggNodePostExpressions(((BranchNode)joinTree).getRightNode(), needReAggTVEs, aggPostExprs);
            return;
        }
        joinTree.setJoinExpression(processFilters(joinTree.getJoinExpression(),
                                                  needReAggTVEs, aggPostExprs));
        // For outer join filters. Inner join or single table query will have whereExpr be null.
        joinTree.setWhereExpression(processFilters(joinTree.getWhereExpression(),
                                    needReAggTVEs, aggPostExprs));
    }


    private boolean fromMVTableOnly(List<TupleValueExpression> tves) {
        String mvTableName = getMVTableName();
        for (TupleValueExpression tve: tves) {
            String tveTableName = tve.getTableName();
            if (!mvTableName.equals(tveTableName)) {
                return false;
            }
        }
        return true;
    }

    private AbstractExpression processFilters (AbstractExpression filters,
            List<TupleValueExpression> needReAggTVEs, List<AbstractExpression> aggPostExprs) {
        if (filters == null) {
            return null;
        }

        // Collect all TVEs that need re-aggregation in the coordinator.
        List<AbstractExpression> remaningExprs = new ArrayList<>();
        // Check where clause.
        List<AbstractExpression> exprs = ExpressionUtil.uncombinePredicate(filters);

        for (AbstractExpression expr: exprs) {
            List<TupleValueExpression> tves =
                    expr.findAllTupleValueSubexpressions();

            boolean canPushdown = true;

            for (TupleValueExpression needReAggTVE: needReAggTVEs) {
                if (tves.contains(needReAggTVE)) {
                    m_edgeCaseQueryNoFixNeeded = false;

                    if (fromMVTableOnly(tves)) {
                        canPushdown = false;
                    }

                    break;
                }
            }
            if (canPushdown) {
                remaningExprs.add(expr);
            }
            else {
                aggPostExprs.add(expr);
            }
        }
        AbstractExpression remaningFilters = ExpressionUtil.combinePredicates(remaningExprs);
        // Update new filters for the scanNode.
        return remaningFilters;
    }
}