/* This file is part of VoltDB. * Copyright (C) 2008-2017 VoltDB Inc. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as * published by the Free Software Foundation, either version 3 of the * License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with VoltDB. If not, see <http://www.gnu.org/licenses/>. */ package org.voltdb.plannodes; import java.util.ArrayList; import java.util.Collection; import java.util.List; import java.util.Set; import org.json_voltpatches.JSONArray; import org.json_voltpatches.JSONException; import org.json_voltpatches.JSONObject; import org.json_voltpatches.JSONStringer; import org.voltdb.catalog.Column; import org.voltdb.catalog.Database; import org.voltdb.catalog.Table; import org.voltdb.expressions.AbstractExpression; import org.voltdb.expressions.AbstractSubqueryExpression; import org.voltdb.expressions.AggregateExpression; import org.voltdb.expressions.ExpressionUtil; import org.voltdb.expressions.TupleValueExpression; import org.voltdb.planner.AbstractParsedStmt; import org.voltdb.planner.parseinfo.StmtTargetTableScan; import org.voltdb.types.ExpressionType; import org.voltdb.types.PlanNodeType; import org.voltdb.types.SortDirectionType; public class AggregatePlanNode extends AbstractPlanNode { public enum Members { PRE_PREDICATE, // ENG-1565: to accelerate min() / max() using index purpose only POST_PREDICATE, AGGREGATE_COLUMNS, AGGREGATE_TYPE, AGGREGATE_DISTINCT, AGGREGATE_OUTPUT_COLUMN, AGGREGATE_EXPRESSION, GROUPBY_EXPRESSIONS, PARTIAL_GROUPBY_COLUMNS ; } protected List<ExpressionType> m_aggregateTypes = new ArrayList<>(); // a list of whether the aggregate is over distinct elements // 0 is not distinct, 1 is distinct protected List<Integer> m_aggregateDistinct = new ArrayList<>(); // a list of column offsets/indexes not plan column guids. protected List<Integer> m_aggregateOutputColumns = new ArrayList<>(); // List of the input TVEs into the aggregates. Maybe should become // a list of SchemaColumns someday protected List<AbstractExpression> m_aggregateExpressions = new ArrayList<>(); // At the moment these are guaranteed to be TVES. This might always be true protected List<AbstractExpression> m_groupByExpressions = new ArrayList<>(); // This list is only used for the special case of instances of PartialAggregatePlanNode. protected List<Integer> m_partialGroupByColumns = null; // True if this aggregate node is the coordinator summary aggregator // for an aggregator that was pushed down. Must know to correctly // decide if other nodes can be pushed down / past this node. public boolean m_isCoordinatingAggregator = false; protected AbstractExpression m_prePredicate; protected AbstractExpression m_postPredicate; public AggregatePlanNode() { super(); } @Override public PlanNodeType getPlanNodeType() { return PlanNodeType.AGGREGATE; } public List<ExpressionType> getAggregateTypes() { return m_aggregateTypes; } @Override public void validate() throws Exception { super.validate(); // // We need to have an aggregate type and column // We're not checking that it's a valid ExpressionType because this plannode is a temporary hack // if (m_aggregateTypes.size() != m_aggregateDistinct.size() || m_aggregateDistinct.size() != m_aggregateExpressions.size() || m_aggregateExpressions.size() != m_aggregateOutputColumns.size()) { throw new Exception("ERROR: Mismatched number of aggregate expression column attributes for PlanNode '" + this + "'"); } if (m_aggregateTypes.isEmpty()|| m_aggregateTypes.contains(ExpressionType.INVALID)) { throw new Exception("ERROR: Invalid Aggregate ExpressionType or No Aggregate Expression types for PlanNode '" + this + "'"); } if (m_aggregateExpressions.isEmpty()) { throw new Exception("ERROR: No Aggregate Expressions for PlanNode '" + this + "'"); } } public boolean isTableCountStar() { if (m_groupByExpressions.isEmpty() == false) { return false; } if (m_aggregateTypes.size() != 1) { return false; } if (m_aggregateTypes.get(0).equals(ExpressionType.AGGREGATE_COUNT_STAR) == false) { return false; } return true; } public boolean isTableNonDistinctCount() { if (m_groupByExpressions.isEmpty() == false) { return false; } if (m_aggregateTypes.size() != 1) { return false; } if (m_aggregateTypes.get(0).equals(ExpressionType.AGGREGATE_COUNT) == false) { return false; } // Does it have a distinct keyword? if (m_aggregateDistinct.get(0) == 1) { return false; } return true; } public boolean isTableNonDistinctCountConstant() { if (!isTableNonDistinctCount()) { return false; } AbstractExpression aggArgument = m_aggregateExpressions.get(0); ExpressionType argumentType = aggArgument.getExpressionType(); // Is the expression a constant? return argumentType.equals(ExpressionType.VALUE_PARAMETER) || argumentType.equals(ExpressionType.VALUE_CONSTANT); } public boolean isTableCountNonDistinctNullableColumn() { if (!isTableNonDistinctCount()) { return false; } // Is the expression a column? AbstractExpression aggArgument = m_aggregateExpressions.get(0); if (! aggArgument.getExpressionType().equals(ExpressionType.VALUE_TUPLE)) { return false; } // Need to go to its child node to see the table schema. // Normally it has to be a ScanPlanNode. // If the query is a join query then the child will be something like nested loop. assert (m_children.size() == 1); if (! (m_children.get(0) instanceof AbstractScanPlanNode) ) { return false; } AbstractScanPlanNode asp = (AbstractScanPlanNode)m_children.get(0); if ( ! (asp.getTableScan() instanceof StmtTargetTableScan)) { return false; } StmtTargetTableScan sttscan = (StmtTargetTableScan)asp.getTableScan(); Table tbl = sttscan.getTargetTable(); TupleValueExpression tve = (TupleValueExpression)aggArgument; String columnName = tve.getColumnName(); Column col = tbl.getColumns().get(columnName); // Is the column nullable? if (col.getNullable()) { return false; } return true; } // single min() without GROUP BY? public boolean isTableMin() { // do not support GROUP BY for now if (m_groupByExpressions.isEmpty() == false) { return false; } if (m_aggregateTypes.size() != 1) { return false; } if (m_aggregateTypes.get(0).equals(ExpressionType.AGGREGATE_MIN) == false) { return false; } return true; } // single max() without GROUP BY? public boolean isTableMax() { // do not support GROUP BY for now if (m_groupByExpressions.isEmpty() == false) { return false; } if (m_aggregateTypes.size() != 1) { return false; } if (m_aggregateTypes.get(0).equals(ExpressionType.AGGREGATE_MAX) == false) { return false; } return true; } // set predicate for SELECT MAX(X) FROM T WHERE X > / >= ? case public void setPrePredicate(AbstractExpression predicate) { m_prePredicate = predicate; } public void setPostPredicate(AbstractExpression predicate) { m_postPredicate = predicate; } public AbstractExpression getPostPredicate() { return m_postPredicate; } // for single min() / max(), return the single aggregate expression public AbstractExpression getFirstAggregateExpression() { return m_aggregateExpressions.get(0); } public int getAggregateTypesSize () { return m_aggregateTypes.size(); } public List<AbstractExpression> getGroupByExpressions() { return m_groupByExpressions; } public int getGroupByExpressionsSize () { return m_groupByExpressions.size(); } public void setOutputSchema(NodeSchema schema) { // aggregates currently have their output schema specified m_outputSchema = schema.clone(); m_hasSignificantOutputSchema = true; } @Override public void generateOutputSchema(Database db) { // aggregate's output schema is pre-determined if (m_children.size() == 1) { m_children.get(0).generateOutputSchema(db); assert(m_hasSignificantOutputSchema); } // Generate the output schema for subqueries Collection<AbstractExpression> subqueryExpressions = findAllSubquerySubexpressions(); for (AbstractExpression subqueryExpression : subqueryExpressions) { assert(subqueryExpression instanceof AbstractSubqueryExpression); ((AbstractSubqueryExpression) subqueryExpression).generateOutputSchema(db); } } @Override public void resolveColumnIndexes() { // Aggregates need to resolve indexes for the output schema but don't need // to reorder it. Some of the outputs may be local aggregate columns and // won't have a TVE to resolve. assert (m_children.size() == 1); m_children.get(0).resolveColumnIndexes(); NodeSchema inputSchema = m_children.get(0).getOutputSchema(); resolveColumnIndexesUsingSchema(inputSchema); } void resolveColumnIndexesUsingSchema(NodeSchema inputSchema) { Collection<TupleValueExpression> allTves; // get all the TVEs in the output columns for (SchemaColumn col : m_outputSchema.getColumns()) { AbstractExpression colExpr = col.getExpression(); allTves = ExpressionUtil.getTupleValueExpressions(colExpr); for (TupleValueExpression tve : allTves) { int index = tve.setColumnIndexUsingSchema(inputSchema); if (index == -1) { // check to see if this TVE is the aggregate output if ( ! tve.getTableName().equals(AbstractParsedStmt.TEMP_TABLE_NAME)) { throw new RuntimeException("Unable to find index for column: " + tve.getColumnName()); } } } } // Aggregates also need to resolve indexes for aggregate inputs // Find the proper index for the sort columns. Not quite // sure these should be TVEs in the long term. for (AbstractExpression agg_exp : m_aggregateExpressions) { allTves = ExpressionUtil.getTupleValueExpressions(agg_exp); for (TupleValueExpression tve : allTves) { tve.setColumnIndexUsingSchema(inputSchema); } } // Aggregates also need to resolve indexes for group_by inputs for (AbstractExpression group_exp : m_groupByExpressions) { allTves = ExpressionUtil.getTupleValueExpressions(group_exp); for (TupleValueExpression tve : allTves) { tve.setColumnIndexUsingSchema(inputSchema); } } // Post filter also needs to resolve indexes, but a little // differently since it applies to the OUTPUT tuple. allTves = ExpressionUtil.getTupleValueExpressions(m_postPredicate); for (TupleValueExpression tve : allTves) { int index = m_outputSchema.getIndexOfTve(tve); tve.setColumnIndex(index); } resolveSubqueryColumnIndexes(); } @Override protected void resolveSubqueryColumnIndexes() { // Possible subquery expressions Collection<AbstractExpression> exprs = findAllSubquerySubexpressions(); for (AbstractExpression expr: exprs) { ((AbstractSubqueryExpression) expr).resolveColumnIndexes(); } } /** * Add an aggregate to this plan node. * @param aggType * @param isDistinct Is distinct being applied to the argument of this aggregate? * @param aggOutputColumn Which output column in the output schema this * aggregate should occupy * @param aggInputExpr The input expression which should get aggregated */ public void addAggregate(ExpressionType aggType, boolean isDistinct, Integer aggOutputColumn, AbstractExpression aggInputExpr) { m_aggregateTypes.add(aggType); if (isDistinct) { m_aggregateDistinct.add(1); } else { m_aggregateDistinct.add(0); } m_aggregateOutputColumns.add(aggOutputColumn); if (aggType.isNullary()) { assert(aggInputExpr == null); m_aggregateExpressions.add(null); } else { assert(aggInputExpr != null); m_aggregateExpressions.add(aggInputExpr.clone()); } } public void updateAggregate( int index, ExpressionType aggType) { // Create a new aggregate expression which we'll use to update the // output schema (whose exprs are TVEs). AggregateExpression aggExpr = new AggregateExpression(aggType); aggExpr.finalizeValueTypes(); int outputSchemaIndex = m_aggregateOutputColumns.get(index); SchemaColumn schemaCol = m_outputSchema.getColumns().get(outputSchemaIndex); AbstractExpression schemaExpr = schemaCol.getExpression(); schemaExpr.setValueType(aggExpr.getValueType()); schemaExpr.setValueSize(aggExpr.getValueSize()); m_aggregateTypes.set(index, aggType); } public void addGroupByExpression(AbstractExpression expr) { if (expr == null) { return; } m_groupByExpressions.add(expr.clone()); } @Override public void toJSONString(JSONStringer stringer) throws JSONException { super.toJSONString(stringer); stringer.key("AGGREGATE_COLUMNS"); stringer.array(); for (int ii = 0; ii < m_aggregateTypes.size(); ii++) { stringer.object(); stringer.keySymbolValuePair(Members.AGGREGATE_TYPE.name(), m_aggregateTypes.get(ii).name()); stringer.keySymbolValuePair(Members.AGGREGATE_DISTINCT.name(), m_aggregateDistinct.get(ii)); stringer.keySymbolValuePair(Members.AGGREGATE_OUTPUT_COLUMN.name(), m_aggregateOutputColumns.get(ii)); AbstractExpression ae = m_aggregateExpressions.get(ii); if (ae != null) { stringer.key(Members.AGGREGATE_EXPRESSION.name()); stringer.object(); ae.toJSONString(stringer); stringer.endObject(); } stringer.endObject(); } stringer.endArray(); if (! m_groupByExpressions.isEmpty()) { stringer.key(Members.GROUPBY_EXPRESSIONS.name()).array(); for (int i = 0; i < m_groupByExpressions.size(); i++) { stringer.object(); m_groupByExpressions.get(i).toJSONString(stringer); stringer.endObject(); } stringer.endArray(); if (m_partialGroupByColumns != null) { assert(! m_partialGroupByColumns.isEmpty()); stringer.key(Members.PARTIAL_GROUPBY_COLUMNS.name()).array(); for (Integer ith: m_partialGroupByColumns) { stringer.value(ith.longValue()); } stringer.endArray(); } } if (m_prePredicate != null) { stringer.key(Members.PRE_PREDICATE.name()).value(m_prePredicate); } if (m_postPredicate != null) { stringer.key(Members.POST_PREDICATE.name()).value(m_postPredicate); } } private static String planNodeTypeToAggDescString(PlanNodeType nodeType) { switch (nodeType) { case AGGREGATE: return "Serial"; case PARTIALAGGREGATE: return "Partial"; default: assert(nodeType == PlanNodeType.HASHAGGREGATE); return "Hash"; } } @Override protected String explainPlanForNode(String indent) { StringBuilder sb = new StringBuilder(); String optionalTableName = "*NO MATCH -- USE ALL TABLE NAMES*"; String aggType = planNodeTypeToAggDescString(getPlanNodeType()); sb.append(aggType + " AGGREGATION ops: "); String sep = ""; int ii = 0; for (ExpressionType e : m_aggregateTypes) { sb.append(sep).append(e.symbol()); sep = ", "; if (e != ExpressionType.AGGREGATE_COUNT_STAR) { if (m_aggregateDistinct.get(ii) == 1) { sb.append(" DISTINCT"); } AbstractExpression ae = m_aggregateExpressions.get(ii); assert(ae != null); sb.append("("); sb.append(ae.explain(optionalTableName)); sb.append(")"); } ++ii; } if (m_prePredicate != null) { sb.append(" ONLY IF " + m_prePredicate.explain(optionalTableName)); } if (m_postPredicate != null) { // HAVING is always defined WRT to the current outputSchema (NOT inputschema). // This might be a little surprising to the user // -- maybe we can find some better way to describe the TVEs, here. sb.append(" HAVING " + m_postPredicate.explain(AbstractParsedStmt.TEMP_TABLE_NAME)); } return sb.toString(); } @Override public void loadFromJSONObject( JSONObject jobj, Database db ) throws JSONException { helpLoadFromJSONObject(jobj, db); JSONArray jarray = jobj.getJSONArray( Members.AGGREGATE_COLUMNS.name() ); int size = jarray.length(); for (int i = 0; i < size; i++) { JSONObject tempObj = jarray.getJSONObject( i ); m_aggregateTypes.add( ExpressionType.get( tempObj.getString( Members.AGGREGATE_TYPE.name() ))); m_aggregateDistinct.add( tempObj.getInt( Members.AGGREGATE_DISTINCT.name() ) ); m_aggregateOutputColumns.add( tempObj.getInt( Members.AGGREGATE_OUTPUT_COLUMN.name() )); if (tempObj.isNull(Members.AGGREGATE_EXPRESSION.name())) { m_aggregateExpressions.add(null); } else { m_aggregateExpressions.add( AbstractExpression.fromJSONChild(tempObj, Members.AGGREGATE_EXPRESSION.name())); } } AbstractExpression.loadFromJSONArrayChild(m_groupByExpressions, jobj, Members.GROUPBY_EXPRESSIONS.name(), null); if ( ! jobj.isNull(Members.PARTIAL_GROUPBY_COLUMNS.name())) { JSONArray jarray2 = jobj.getJSONArray(Members.PARTIAL_GROUPBY_COLUMNS.name()); int numCols = jarray2.length(); m_partialGroupByColumns = new ArrayList<>(numCols); for (int ii = 0; ii < numCols; ++ii) { m_partialGroupByColumns.add(jarray2.getInt(ii)); } } m_prePredicate = AbstractExpression.fromJSONChild(jobj, Members.PRE_PREDICATE.name()); m_postPredicate = AbstractExpression.fromJSONChild(jobj, Members.POST_PREDICATE.name()); } public static AggregatePlanNode getInlineAggregationNode(AbstractPlanNode node) { AggregatePlanNode aggNode = (AggregatePlanNode) (node.getInlinePlanNode(PlanNodeType.AGGREGATE)); if (aggNode == null) { aggNode = (HashAggregatePlanNode) (node.getInlinePlanNode(PlanNodeType.HASHAGGREGATE)); } if (aggNode == null) { aggNode = (PartialAggregatePlanNode) (node.getInlinePlanNode(PlanNodeType.PARTIALAGGREGATE)); } return aggNode; } @Override public void findAllExpressionsOfClass(Class< ? extends AbstractExpression> aeClass, Set<AbstractExpression> collected) { super.findAllExpressionsOfClass(aeClass, collected); if (m_prePredicate != null) { collected.addAll(m_prePredicate.findAllSubexpressionsOfClass(aeClass)); } if (m_postPredicate != null) { collected.addAll(m_postPredicate.findAllSubexpressionsOfClass(aeClass)); } for (AbstractExpression ae : m_aggregateExpressions) { if (ae == null) { // This is a place-holder for the "*" in "COUNT(*)". // There are no subexpressions to find here. continue; } collected.addAll(ae.findAllSubexpressionsOfClass(aeClass)); } for (AbstractExpression ae : m_groupByExpressions) { collected.addAll(ae.findAllSubexpressionsOfClass(aeClass)); } } @Override public boolean isOutputOrdered (List<AbstractExpression> sortExpressions, List<SortDirectionType> sortDirections) { if (getPlanNodeType() == PlanNodeType.HASHAGGREGATE) { return false; } else { // the order for Serial and Partial aggregates is determined by the order // of the keys from the child node assert(getChildCount() == 1); AbstractPlanNode child = getChild(0); return child.isOutputOrdered(sortExpressions, sortDirections); } } /** * Convert HashAggregate into a Serialized Aggregate * * @param hashAggregateNode HashAggregatePlanNode * @return AggregatePlanNode */ public static AggregatePlanNode convertToSerialAggregatePlanNode(HashAggregatePlanNode hashAggregateNode) { AggregatePlanNode serialAggr = new AggregatePlanNode(); return setAggregatePlanNode(hashAggregateNode, serialAggr); } /** * Convert HashAggregate into a Partial Aggregate * * @param hashAggregateNode HashAggregatePlanNode * @param aggrColumnIdxs partial aggregate column indexes * @return AggregatePlanNode */ public static AggregatePlanNode convertToPartialAggregatePlanNode(HashAggregatePlanNode hashAggregateNode, List<Integer> aggrColumnIdxs) { AggregatePlanNode partialAggr = new PartialAggregatePlanNode(); partialAggr = setAggregatePlanNode(hashAggregateNode, partialAggr); partialAggr.m_partialGroupByColumns = aggrColumnIdxs; return partialAggr; } private static AggregatePlanNode setAggregatePlanNode(AggregatePlanNode origin, AggregatePlanNode destination) { destination.m_isCoordinatingAggregator = origin.m_isCoordinatingAggregator; destination.m_prePredicate = origin.m_prePredicate; destination.m_postPredicate = origin.m_postPredicate; for (AbstractExpression expr : origin.m_groupByExpressions) { destination.addGroupByExpression(expr); } List<ExpressionType> aggregateTypes = origin.m_aggregateTypes; List<Integer> aggregateDistinct = origin.m_aggregateDistinct; List<Integer> aggregateOutputColumns = origin.m_aggregateOutputColumns; List<AbstractExpression> aggregateExpressions = origin.m_aggregateExpressions; for (int i = 0; i < origin.getAggregateTypesSize(); i++) { destination.addAggregate(aggregateTypes.get(i), aggregateDistinct.get(i) == 1 ? true : false, aggregateOutputColumns.get(i), aggregateExpressions.get(i)); } destination.setOutputSchema(origin.getOutputSchema()); return destination; } @Override /** * AggregatePlanNodes don't need projection nodes. */ public boolean planNodeClassNeedsProjectionNode() { return false; } }