WindowFunctionPlanNode.java example

Explorer
voltdb-master
/* This file is part of VoltDB.
 * Copyright (C) 2008-2017 VoltDB Inc.
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as
 * published by the Free Software Foundation, either version 3 of the
 * License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with VoltDB.  If not, see <http://www.gnu.org/licenses/>.
 */
package org.voltdb.plannodes;

import java.util.ArrayList;
import java.util.Collection;
import java.util.List;

import org.json_voltpatches.JSONArray;
import org.json_voltpatches.JSONException;
import org.json_voltpatches.JSONObject;
import org.json_voltpatches.JSONStringer;
import org.voltdb.catalog.Database;
import org.voltdb.expressions.AbstractExpression;
import org.voltdb.expressions.ExpressionUtil;
import org.voltdb.expressions.TupleValueExpression;
import org.voltdb.expressions.WindowFunctionExpression;
import org.voltdb.planner.AbstractParsedStmt;
import org.voltdb.types.ExpressionType;
import org.voltdb.types.PlanNodeType;

/**
 * This plan node represents windowed aggregate computations.
 */
public class WindowFunctionPlanNode extends AbstractPlanNode {
    public enum Members {
        AGGREGATE_COLUMNS,
        AGGREGATE_TYPE,
        AGGREGATE_OUTPUT_COLUMN,
        AGGREGATE_EXPRESSIONS,
        PARTITIONBY_EXPRESSIONS

    };
    // A list of aggregate types.  These are not programming language
    // types.  They are more like expression operator types, like
    // MIN, MAX etc.
    protected List<ExpressionType> m_aggregateTypes = new ArrayList<>();
    // a list of column offsets/indexes not plan column guids.
    protected List<Integer> m_aggregateOutputColumns = new ArrayList<>();
    // List of the input TVEs into the aggregates.  Maybe should become
    // a list of SchemaColumns someday
    protected List<List<AbstractExpression>> m_aggregateExpressions = new ArrayList<>();
    // There is one list of partition by expressions for this WindowFunctionPlanNode.
    // If there is more than one window we will need more than one WindowFunctionPlanNode.
    // At the moment these are guaranteed to be TVES.  This might always be true
    protected List<AbstractExpression> m_partitionByExpressions = new ArrayList<>();
    // This is the list of TVEs for the window functions.  There
    // will only be one of them for now.
    private List<TupleValueExpression> m_outputTVEs = new ArrayList<>();

    // List of the order by expressions.  If there
    // are no order by expressions in one aggregate the list
    // is empty, not null.
    protected List<AbstractExpression> m_orderByExpressions = new ArrayList<>();

    private int getAggregateFunctionCount() {
        return m_aggregateTypes.size();
    }

    @Override
    public void generateOutputSchema(Database db) {
        // We only expect one window function here.
        assert(getAggregateFunctionCount() == 1);
        if (m_outputSchema == null) {
            m_outputSchema = new NodeSchema();
        }
        else {
            assert(getOutputSchema().size() == 0);
        }
        assert(m_children.size() == 1);
        m_children.get(0).generateOutputSchema(db);
        NodeSchema inputSchema = m_children.get(0).getOutputSchema();
        // We already created the TVE for this expression.
        for (int ii = 0; ii < getAggregateFunctionCount(); ii += 1) {
            TupleValueExpression tve = m_outputTVEs.get(ii);
            getOutputSchema().addColumn(
                    tve.getTableName(), tve.getTableAlias(),
                    tve.getColumnName(), tve.getColumnAlias(),
                    tve);
        }
        // Just copy the input columns to the output schema.
        for (SchemaColumn col : inputSchema.getColumns()) {
            getOutputSchema().addColumn(col.clone());
        }
        m_hasSignificantOutputSchema = true;
    }

    public final void setWindowFunctionExpression(WindowFunctionExpression winExpr) {
        // Currently, we can have only one window function in this kind of a
        // plan node.  So, if we are adding one here, we can't have
        // any already.
        assert(getAggregateFunctionCount() == 0);
        m_aggregateOutputColumns.add(getAggregateFunctionCount());
        m_aggregateTypes.add(winExpr.getExpressionType());
        if (winExpr.getAggregateArguments().size() > 0) {
            m_aggregateExpressions.add(winExpr.getAggregateArguments());
        }
        else {
            m_aggregateExpressions.add(null);
        }
        m_partitionByExpressions = winExpr.getPartitionByExpressions();
        m_orderByExpressions = winExpr.getOrderByExpressions();
        m_outputTVEs.add(winExpr.getDisplayListExpression());
    }

    /**
     * Serialize to JSON.  We only serialize the expressions, and not the
     * directions.  We won't need them in the executor.  The directions will
     * be in the order by plan node in any case.
     */
    @Override
    public void toJSONString(JSONStringer stringer) throws JSONException {
        super.toJSONString(stringer);
        stringer.key("AGGREGATE_COLUMNS")
                .array();
        for (int ii = 0; ii < m_aggregateTypes.size(); ii++) {
            stringer.object();
            stringer.keySymbolValuePair(Members.AGGREGATE_TYPE.name(), m_aggregateTypes.get(ii).name());
            stringer.keySymbolValuePair(Members.AGGREGATE_OUTPUT_COLUMN.name(), m_aggregateOutputColumns.get(ii));
            AbstractExpression.toJSONArray(stringer,
                                           Members.AGGREGATE_EXPRESSIONS.name(),
                                           m_aggregateExpressions.get(ii));
            stringer.endObject();
        }
        stringer.endArray();
        AbstractExpression.toJSONArray(stringer,
                                       Members.PARTITIONBY_EXPRESSIONS.name(),
                                       m_partitionByExpressions);
        AbstractExpression.toJSONArrayFromSortList(stringer,
                                                   m_orderByExpressions,
                                                       null);
    }

    /**
     * Deserialize a PartitionByPlanNode from JSON.  Since we don't need the
     * sort directions, and we don't serialize them in toJSONString, then we
     * can't in general get them here.
     */
    @Override
    public void loadFromJSONObject(JSONObject jobj, Database db)
            throws JSONException {
        helpLoadFromJSONObject(jobj, db);
        JSONArray jarray = jobj.getJSONArray( Members.AGGREGATE_COLUMNS.name() );
        int size = jarray.length();
        for (int i = 0; i < size; i++) {
            // We only expect one of these for now.
            assert(i == 0);
            JSONObject tempObj = jarray.getJSONObject( i );
            m_aggregateTypes.add( ExpressionType.get( tempObj.getString( Members.AGGREGATE_TYPE.name() )));
            m_aggregateOutputColumns.add( tempObj.getInt( Members.AGGREGATE_OUTPUT_COLUMN.name() ));
            m_aggregateExpressions.add(
                    AbstractExpression.loadFromJSONArrayChild(null,
                                                              tempObj,
                                                              Members.AGGREGATE_EXPRESSIONS.name(),
                                                              null));
        }
        m_partitionByExpressions
                = AbstractExpression.loadFromJSONArrayChild(null,
                                                            jobj,
                                                            Members.PARTITIONBY_EXPRESSIONS.name(),
                                                            null);
        m_orderByExpressions = new ArrayList<>();
        AbstractExpression.loadSortListFromJSONArray(m_orderByExpressions,
                                                     null,
                                                     jobj);
    }

    @Override
    public boolean planNodeClassNeedsProjectionNode() {
        return true;
    }

    @Override
    public PlanNodeType getPlanNodeType() {
        return PlanNodeType.WINDOWFUNCTION;
    }

    public void resolveColumnIndexesUsingSchema(NodeSchema inputSchema) {
        Collection<TupleValueExpression> allTves;

        // get all the TVEs in the output columns
        for (SchemaColumn col : m_outputSchema.getColumns()) {
            AbstractExpression colExpr = col.getExpression();
            allTves = ExpressionUtil.getTupleValueExpressions(colExpr);
            for (TupleValueExpression tve : allTves) {
                int index = tve.setColumnIndexUsingSchema(inputSchema);
                if (index == -1) {
                    // check to see if this TVE is the aggregate output
                    if ( ! tve.getTableName().equals(AbstractParsedStmt.TEMP_TABLE_NAME)) {
                        throw new RuntimeException("Unable to find index for column: " +
                                tve.getColumnName());
                    }
                }
            }
        }

        // Aggregates also need to resolve indexes for aggregate inputs
        // Find the proper index for the sort columns.  Not quite
        // sure these should be TVEs in the long term.
        for (List<AbstractExpression> agg_exps : m_aggregateExpressions) {
            if (agg_exps != null) {
                for (AbstractExpression agg_exp : agg_exps) {
                    allTves = ExpressionUtil.getTupleValueExpressions(agg_exp);
                    for (TupleValueExpression tve : allTves) {
                        tve.setColumnIndexUsingSchema(inputSchema);
                    }
                }
            }
        }

        // Aggregates also need to resolve indexes for partition by inputs
        for (AbstractExpression group_exp : m_partitionByExpressions) {
            allTves = ExpressionUtil.getTupleValueExpressions(group_exp);
            for (TupleValueExpression tve : allTves) {
                tve.setColumnIndexUsingSchema(inputSchema);
            }
        }

        // Resolve column indices for the order by expressions.  We will
        // only have one of these, but we will act as if we have more than
        // one, which is the general case.
        for (AbstractExpression obExpr : m_orderByExpressions) {
            allTves = ExpressionUtil.getTupleValueExpressions(obExpr);
            for (TupleValueExpression tve : allTves) {
                tve.setColumnIndexUsingSchema(inputSchema);
            }
        }
        /*
         * Is this needed?
         */
        resolveSubqueryColumnIndexes();
    }

    @Override
    public void resolveColumnIndexes() {
        // Aggregates need to resolve indexes for the output schema but don't need
        // to reorder it.  Some of the outputs may be local aggregate columns and
        // won't have a TVE to resolve.
        assert (m_children.size() == 1);
        m_children.get(0).resolveColumnIndexes();
        NodeSchema inputSchema = m_children.get(0).getOutputSchema();

        resolveColumnIndexesUsingSchema(inputSchema);
    }

    @Override
    protected String explainPlanForNode(String indent) {
        return("WINDOW FUNCTION AGGREGATION: ops: " + m_aggregateTypes.get(0).name() + "()");
    }

    public List<AbstractExpression> getPartitionByExpressions() {
        return m_partitionByExpressions;
    }
    public final List<ExpressionType> getAggregateTypes() {
        return m_aggregateTypes;
    }

    public final void setAggregateTypes(List<ExpressionType> aggregateTypes) {
        m_aggregateTypes = aggregateTypes;
    }

    public final List<List<AbstractExpression>> getAggregateExpressions() {
        return m_aggregateExpressions;
    }
}