/* This file is part of VoltDB.
* Copyright (C) 2008-2017 VoltDB Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with VoltDB. If not, see <http://www.gnu.org/licenses/>.
*/
package org.voltdb.plannodes;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Set;
import org.json_voltpatches.JSONArray;
import org.json_voltpatches.JSONException;
import org.json_voltpatches.JSONObject;
import org.json_voltpatches.JSONStringer;
import org.voltdb.catalog.Column;
import org.voltdb.catalog.Database;
import org.voltdb.catalog.Table;
import org.voltdb.expressions.AbstractExpression;
import org.voltdb.expressions.AbstractSubqueryExpression;
import org.voltdb.expressions.AggregateExpression;
import org.voltdb.expressions.ExpressionUtil;
import org.voltdb.expressions.TupleValueExpression;
import org.voltdb.planner.AbstractParsedStmt;
import org.voltdb.planner.parseinfo.StmtTargetTableScan;
import org.voltdb.types.ExpressionType;
import org.voltdb.types.PlanNodeType;
import org.voltdb.types.SortDirectionType;
public class AggregatePlanNode extends AbstractPlanNode {
public enum Members {
PRE_PREDICATE, // ENG-1565: to accelerate min() / max() using index purpose only
POST_PREDICATE,
AGGREGATE_COLUMNS,
AGGREGATE_TYPE,
AGGREGATE_DISTINCT,
AGGREGATE_OUTPUT_COLUMN,
AGGREGATE_EXPRESSION,
GROUPBY_EXPRESSIONS,
PARTIAL_GROUPBY_COLUMNS
;
}
protected List<ExpressionType> m_aggregateTypes = new ArrayList<>();
// a list of whether the aggregate is over distinct elements
// 0 is not distinct, 1 is distinct
protected List<Integer> m_aggregateDistinct = new ArrayList<>();
// a list of column offsets/indexes not plan column guids.
protected List<Integer> m_aggregateOutputColumns = new ArrayList<>();
// List of the input TVEs into the aggregates. Maybe should become
// a list of SchemaColumns someday
protected List<AbstractExpression> m_aggregateExpressions =
new ArrayList<>();
// At the moment these are guaranteed to be TVES. This might always be true
protected List<AbstractExpression> m_groupByExpressions
= new ArrayList<>();
// This list is only used for the special case of instances of PartialAggregatePlanNode.
protected List<Integer> m_partialGroupByColumns = null;
// True if this aggregate node is the coordinator summary aggregator
// for an aggregator that was pushed down. Must know to correctly
// decide if other nodes can be pushed down / past this node.
public boolean m_isCoordinatingAggregator = false;
protected AbstractExpression m_prePredicate;
protected AbstractExpression m_postPredicate;
public AggregatePlanNode() {
super();
}
@Override
public PlanNodeType getPlanNodeType() {
return PlanNodeType.AGGREGATE;
}
public List<ExpressionType> getAggregateTypes() {
return m_aggregateTypes;
}
@Override
public void validate() throws Exception {
super.validate();
//
// We need to have an aggregate type and column
// We're not checking that it's a valid ExpressionType because this plannode is a temporary hack
//
if (m_aggregateTypes.size() != m_aggregateDistinct.size() ||
m_aggregateDistinct.size() != m_aggregateExpressions.size() ||
m_aggregateExpressions.size() != m_aggregateOutputColumns.size()) {
throw new Exception("ERROR: Mismatched number of aggregate expression column attributes for PlanNode '" + this + "'");
}
if (m_aggregateTypes.isEmpty()|| m_aggregateTypes.contains(ExpressionType.INVALID)) {
throw new Exception("ERROR: Invalid Aggregate ExpressionType or No Aggregate Expression types for PlanNode '" + this + "'");
}
if (m_aggregateExpressions.isEmpty()) {
throw new Exception("ERROR: No Aggregate Expressions for PlanNode '" + this + "'");
}
}
public boolean isTableCountStar() {
if (m_groupByExpressions.isEmpty() == false) {
return false;
}
if (m_aggregateTypes.size() != 1) {
return false;
}
if (m_aggregateTypes.get(0).equals(ExpressionType.AGGREGATE_COUNT_STAR) == false) {
return false;
}
return true;
}
public boolean isTableNonDistinctCount() {
if (m_groupByExpressions.isEmpty() == false) {
return false;
}
if (m_aggregateTypes.size() != 1) {
return false;
}
if (m_aggregateTypes.get(0).equals(ExpressionType.AGGREGATE_COUNT) == false) {
return false;
}
// Does it have a distinct keyword?
if (m_aggregateDistinct.get(0) == 1) {
return false;
}
return true;
}
public boolean isTableNonDistinctCountConstant() {
if (!isTableNonDistinctCount()) {
return false;
}
AbstractExpression aggArgument = m_aggregateExpressions.get(0);
ExpressionType argumentType = aggArgument.getExpressionType();
// Is the expression a constant?
return argumentType.equals(ExpressionType.VALUE_PARAMETER) ||
argumentType.equals(ExpressionType.VALUE_CONSTANT);
}
public boolean isTableCountNonDistinctNullableColumn() {
if (!isTableNonDistinctCount()) {
return false;
}
// Is the expression a column?
AbstractExpression aggArgument = m_aggregateExpressions.get(0);
if (! aggArgument.getExpressionType().equals(ExpressionType.VALUE_TUPLE)) {
return false;
}
// Need to go to its child node to see the table schema.
// Normally it has to be a ScanPlanNode.
// If the query is a join query then the child will be something like nested loop.
assert (m_children.size() == 1);
if (! (m_children.get(0) instanceof AbstractScanPlanNode) ) {
return false;
}
AbstractScanPlanNode asp = (AbstractScanPlanNode)m_children.get(0);
if ( ! (asp.getTableScan() instanceof StmtTargetTableScan)) {
return false;
}
StmtTargetTableScan sttscan = (StmtTargetTableScan)asp.getTableScan();
Table tbl = sttscan.getTargetTable();
TupleValueExpression tve = (TupleValueExpression)aggArgument;
String columnName = tve.getColumnName();
Column col = tbl.getColumns().get(columnName);
// Is the column nullable?
if (col.getNullable()) {
return false;
}
return true;
}
// single min() without GROUP BY?
public boolean isTableMin() {
// do not support GROUP BY for now
if (m_groupByExpressions.isEmpty() == false) {
return false;
}
if (m_aggregateTypes.size() != 1) {
return false;
}
if (m_aggregateTypes.get(0).equals(ExpressionType.AGGREGATE_MIN) == false) {
return false;
}
return true;
}
// single max() without GROUP BY?
public boolean isTableMax() {
// do not support GROUP BY for now
if (m_groupByExpressions.isEmpty() == false) {
return false;
}
if (m_aggregateTypes.size() != 1) {
return false;
}
if (m_aggregateTypes.get(0).equals(ExpressionType.AGGREGATE_MAX) == false) {
return false;
}
return true;
}
// set predicate for SELECT MAX(X) FROM T WHERE X > / >= ? case
public void setPrePredicate(AbstractExpression predicate) {
m_prePredicate = predicate;
}
public void setPostPredicate(AbstractExpression predicate) {
m_postPredicate = predicate;
}
public AbstractExpression getPostPredicate() {
return m_postPredicate;
}
// for single min() / max(), return the single aggregate expression
public AbstractExpression getFirstAggregateExpression() {
return m_aggregateExpressions.get(0);
}
public int getAggregateTypesSize () {
return m_aggregateTypes.size();
}
public List<AbstractExpression> getGroupByExpressions() {
return m_groupByExpressions;
}
public int getGroupByExpressionsSize () {
return m_groupByExpressions.size();
}
public void setOutputSchema(NodeSchema schema) {
// aggregates currently have their output schema specified
m_outputSchema = schema.clone();
m_hasSignificantOutputSchema = true;
}
@Override
public void generateOutputSchema(Database db) {
// aggregate's output schema is pre-determined
if (m_children.size() == 1) {
m_children.get(0).generateOutputSchema(db);
assert(m_hasSignificantOutputSchema);
}
// Generate the output schema for subqueries
Collection<AbstractExpression> subqueryExpressions = findAllSubquerySubexpressions();
for (AbstractExpression subqueryExpression : subqueryExpressions) {
assert(subqueryExpression instanceof AbstractSubqueryExpression);
((AbstractSubqueryExpression) subqueryExpression).generateOutputSchema(db);
}
}
@Override
public void resolveColumnIndexes() {
// Aggregates need to resolve indexes for the output schema but don't need
// to reorder it. Some of the outputs may be local aggregate columns and
// won't have a TVE to resolve.
assert (m_children.size() == 1);
m_children.get(0).resolveColumnIndexes();
NodeSchema inputSchema = m_children.get(0).getOutputSchema();
resolveColumnIndexesUsingSchema(inputSchema);
}
void resolveColumnIndexesUsingSchema(NodeSchema inputSchema) {
Collection<TupleValueExpression> allTves;
// get all the TVEs in the output columns
for (SchemaColumn col : m_outputSchema.getColumns()) {
AbstractExpression colExpr = col.getExpression();
allTves = ExpressionUtil.getTupleValueExpressions(colExpr);
for (TupleValueExpression tve : allTves) {
int index = tve.setColumnIndexUsingSchema(inputSchema);
if (index == -1) {
// check to see if this TVE is the aggregate output
if ( ! tve.getTableName().equals(AbstractParsedStmt.TEMP_TABLE_NAME)) {
throw new RuntimeException("Unable to find index for column: " +
tve.getColumnName());
}
}
}
}
// Aggregates also need to resolve indexes for aggregate inputs
// Find the proper index for the sort columns. Not quite
// sure these should be TVEs in the long term.
for (AbstractExpression agg_exp : m_aggregateExpressions) {
allTves = ExpressionUtil.getTupleValueExpressions(agg_exp);
for (TupleValueExpression tve : allTves) {
tve.setColumnIndexUsingSchema(inputSchema);
}
}
// Aggregates also need to resolve indexes for group_by inputs
for (AbstractExpression group_exp : m_groupByExpressions) {
allTves = ExpressionUtil.getTupleValueExpressions(group_exp);
for (TupleValueExpression tve : allTves) {
tve.setColumnIndexUsingSchema(inputSchema);
}
}
// Post filter also needs to resolve indexes, but a little
// differently since it applies to the OUTPUT tuple.
allTves = ExpressionUtil.getTupleValueExpressions(m_postPredicate);
for (TupleValueExpression tve : allTves) {
int index = m_outputSchema.getIndexOfTve(tve);
tve.setColumnIndex(index);
}
resolveSubqueryColumnIndexes();
}
@Override
protected void resolveSubqueryColumnIndexes() {
// Possible subquery expressions
Collection<AbstractExpression> exprs = findAllSubquerySubexpressions();
for (AbstractExpression expr: exprs) {
((AbstractSubqueryExpression) expr).resolveColumnIndexes();
}
}
/**
* Add an aggregate to this plan node.
* @param aggType
* @param isDistinct Is distinct being applied to the argument of this aggregate?
* @param aggOutputColumn Which output column in the output schema this
* aggregate should occupy
* @param aggInputExpr The input expression which should get aggregated
*/
public void addAggregate(ExpressionType aggType,
boolean isDistinct,
Integer aggOutputColumn,
AbstractExpression aggInputExpr)
{
m_aggregateTypes.add(aggType);
if (isDistinct)
{
m_aggregateDistinct.add(1);
}
else
{
m_aggregateDistinct.add(0);
}
m_aggregateOutputColumns.add(aggOutputColumn);
if (aggType.isNullary()) {
assert(aggInputExpr == null);
m_aggregateExpressions.add(null);
} else {
assert(aggInputExpr != null);
m_aggregateExpressions.add(aggInputExpr.clone());
}
}
public void updateAggregate(
int index,
ExpressionType aggType) {
// Create a new aggregate expression which we'll use to update the
// output schema (whose exprs are TVEs).
AggregateExpression aggExpr = new AggregateExpression(aggType);
aggExpr.finalizeValueTypes();
int outputSchemaIndex = m_aggregateOutputColumns.get(index);
SchemaColumn schemaCol = m_outputSchema.getColumns().get(outputSchemaIndex);
AbstractExpression schemaExpr = schemaCol.getExpression();
schemaExpr.setValueType(aggExpr.getValueType());
schemaExpr.setValueSize(aggExpr.getValueSize());
m_aggregateTypes.set(index, aggType);
}
public void addGroupByExpression(AbstractExpression expr)
{
if (expr == null) {
return;
}
m_groupByExpressions.add(expr.clone());
}
@Override
public void toJSONString(JSONStringer stringer) throws JSONException {
super.toJSONString(stringer);
stringer.key("AGGREGATE_COLUMNS");
stringer.array();
for (int ii = 0; ii < m_aggregateTypes.size(); ii++) {
stringer.object();
stringer.keySymbolValuePair(Members.AGGREGATE_TYPE.name(), m_aggregateTypes.get(ii).name());
stringer.keySymbolValuePair(Members.AGGREGATE_DISTINCT.name(), m_aggregateDistinct.get(ii));
stringer.keySymbolValuePair(Members.AGGREGATE_OUTPUT_COLUMN.name(), m_aggregateOutputColumns.get(ii));
AbstractExpression ae = m_aggregateExpressions.get(ii);
if (ae != null) {
stringer.key(Members.AGGREGATE_EXPRESSION.name());
stringer.object();
ae.toJSONString(stringer);
stringer.endObject();
}
stringer.endObject();
}
stringer.endArray();
if (! m_groupByExpressions.isEmpty()) {
stringer.key(Members.GROUPBY_EXPRESSIONS.name()).array();
for (int i = 0; i < m_groupByExpressions.size(); i++) {
stringer.object();
m_groupByExpressions.get(i).toJSONString(stringer);
stringer.endObject();
}
stringer.endArray();
if (m_partialGroupByColumns != null) {
assert(! m_partialGroupByColumns.isEmpty());
stringer.key(Members.PARTIAL_GROUPBY_COLUMNS.name()).array();
for (Integer ith: m_partialGroupByColumns) {
stringer.value(ith.longValue());
}
stringer.endArray();
}
}
if (m_prePredicate != null) {
stringer.key(Members.PRE_PREDICATE.name()).value(m_prePredicate);
}
if (m_postPredicate != null) {
stringer.key(Members.POST_PREDICATE.name()).value(m_postPredicate);
}
}
private static String planNodeTypeToAggDescString(PlanNodeType nodeType) {
switch (nodeType) {
case AGGREGATE:
return "Serial";
case PARTIALAGGREGATE:
return "Partial";
default:
assert(nodeType == PlanNodeType.HASHAGGREGATE);
return "Hash";
}
}
@Override
protected String explainPlanForNode(String indent) {
StringBuilder sb = new StringBuilder();
String optionalTableName = "*NO MATCH -- USE ALL TABLE NAMES*";
String aggType = planNodeTypeToAggDescString(getPlanNodeType());
sb.append(aggType + " AGGREGATION ops: ");
String sep = "";
int ii = 0;
for (ExpressionType e : m_aggregateTypes) {
sb.append(sep).append(e.symbol());
sep = ", ";
if (e != ExpressionType.AGGREGATE_COUNT_STAR) {
if (m_aggregateDistinct.get(ii) == 1) {
sb.append(" DISTINCT");
}
AbstractExpression ae = m_aggregateExpressions.get(ii);
assert(ae != null);
sb.append("(");
sb.append(ae.explain(optionalTableName));
sb.append(")");
}
++ii;
}
if (m_prePredicate != null) {
sb.append(" ONLY IF " + m_prePredicate.explain(optionalTableName));
}
if (m_postPredicate != null) {
// HAVING is always defined WRT to the current outputSchema (NOT inputschema).
// This might be a little surprising to the user
// -- maybe we can find some better way to describe the TVEs, here.
sb.append(" HAVING " + m_postPredicate.explain(AbstractParsedStmt.TEMP_TABLE_NAME));
}
return sb.toString();
}
@Override
public void loadFromJSONObject( JSONObject jobj, Database db ) throws JSONException {
helpLoadFromJSONObject(jobj, db);
JSONArray jarray = jobj.getJSONArray( Members.AGGREGATE_COLUMNS.name() );
int size = jarray.length();
for (int i = 0; i < size; i++) {
JSONObject tempObj = jarray.getJSONObject( i );
m_aggregateTypes.add( ExpressionType.get( tempObj.getString( Members.AGGREGATE_TYPE.name() )));
m_aggregateDistinct.add( tempObj.getInt( Members.AGGREGATE_DISTINCT.name() ) );
m_aggregateOutputColumns.add( tempObj.getInt( Members.AGGREGATE_OUTPUT_COLUMN.name() ));
if (tempObj.isNull(Members.AGGREGATE_EXPRESSION.name())) {
m_aggregateExpressions.add(null);
}
else {
m_aggregateExpressions.add(
AbstractExpression.fromJSONChild(tempObj, Members.AGGREGATE_EXPRESSION.name()));
}
}
AbstractExpression.loadFromJSONArrayChild(m_groupByExpressions, jobj,
Members.GROUPBY_EXPRESSIONS.name(), null);
if ( ! jobj.isNull(Members.PARTIAL_GROUPBY_COLUMNS.name())) {
JSONArray jarray2 = jobj.getJSONArray(Members.PARTIAL_GROUPBY_COLUMNS.name());
int numCols = jarray2.length();
m_partialGroupByColumns = new ArrayList<>(numCols);
for (int ii = 0; ii < numCols; ++ii) {
m_partialGroupByColumns.add(jarray2.getInt(ii));
}
}
m_prePredicate = AbstractExpression.fromJSONChild(jobj, Members.PRE_PREDICATE.name());
m_postPredicate = AbstractExpression.fromJSONChild(jobj, Members.POST_PREDICATE.name());
}
public static AggregatePlanNode getInlineAggregationNode(AbstractPlanNode node) {
AggregatePlanNode aggNode =
(AggregatePlanNode) (node.getInlinePlanNode(PlanNodeType.AGGREGATE));
if (aggNode == null) {
aggNode = (HashAggregatePlanNode) (node.getInlinePlanNode(PlanNodeType.HASHAGGREGATE));
}
if (aggNode == null) {
aggNode = (PartialAggregatePlanNode) (node.getInlinePlanNode(PlanNodeType.PARTIALAGGREGATE));
}
return aggNode;
}
@Override
public void findAllExpressionsOfClass(Class< ? extends AbstractExpression> aeClass, Set<AbstractExpression> collected) {
super.findAllExpressionsOfClass(aeClass, collected);
if (m_prePredicate != null) {
collected.addAll(m_prePredicate.findAllSubexpressionsOfClass(aeClass));
}
if (m_postPredicate != null) {
collected.addAll(m_postPredicate.findAllSubexpressionsOfClass(aeClass));
}
for (AbstractExpression ae : m_aggregateExpressions) {
if (ae == null) {
// This is a place-holder for the "*" in "COUNT(*)".
// There are no subexpressions to find here.
continue;
}
collected.addAll(ae.findAllSubexpressionsOfClass(aeClass));
}
for (AbstractExpression ae : m_groupByExpressions) {
collected.addAll(ae.findAllSubexpressionsOfClass(aeClass));
}
}
@Override
public boolean isOutputOrdered (List<AbstractExpression> sortExpressions, List<SortDirectionType> sortDirections) {
if (getPlanNodeType() == PlanNodeType.HASHAGGREGATE) {
return false;
} else {
// the order for Serial and Partial aggregates is determined by the order
// of the keys from the child node
assert(getChildCount() == 1);
AbstractPlanNode child = getChild(0);
return child.isOutputOrdered(sortExpressions, sortDirections);
}
}
/**
* Convert HashAggregate into a Serialized Aggregate
*
* @param hashAggregateNode HashAggregatePlanNode
* @return AggregatePlanNode
*/
public static AggregatePlanNode convertToSerialAggregatePlanNode(HashAggregatePlanNode hashAggregateNode) {
AggregatePlanNode serialAggr = new AggregatePlanNode();
return setAggregatePlanNode(hashAggregateNode, serialAggr);
}
/**
* Convert HashAggregate into a Partial Aggregate
*
* @param hashAggregateNode HashAggregatePlanNode
* @param aggrColumnIdxs partial aggregate column indexes
* @return AggregatePlanNode
*/
public static AggregatePlanNode convertToPartialAggregatePlanNode(HashAggregatePlanNode hashAggregateNode,
List<Integer> aggrColumnIdxs) {
AggregatePlanNode partialAggr = new PartialAggregatePlanNode();
partialAggr = setAggregatePlanNode(hashAggregateNode, partialAggr);
partialAggr.m_partialGroupByColumns = aggrColumnIdxs;
return partialAggr;
}
private static AggregatePlanNode setAggregatePlanNode(AggregatePlanNode origin, AggregatePlanNode destination) {
destination.m_isCoordinatingAggregator = origin.m_isCoordinatingAggregator;
destination.m_prePredicate = origin.m_prePredicate;
destination.m_postPredicate = origin.m_postPredicate;
for (AbstractExpression expr : origin.m_groupByExpressions) {
destination.addGroupByExpression(expr);
}
List<ExpressionType> aggregateTypes = origin.m_aggregateTypes;
List<Integer> aggregateDistinct = origin.m_aggregateDistinct;
List<Integer> aggregateOutputColumns = origin.m_aggregateOutputColumns;
List<AbstractExpression> aggregateExpressions = origin.m_aggregateExpressions;
for (int i = 0; i < origin.getAggregateTypesSize(); i++) {
destination.addAggregate(aggregateTypes.get(i),
aggregateDistinct.get(i) == 1 ? true : false,
aggregateOutputColumns.get(i),
aggregateExpressions.get(i));
}
destination.setOutputSchema(origin.getOutputSchema());
return destination;
}
@Override
/**
* AggregatePlanNodes don't need projection nodes.
*/
public boolean planNodeClassNeedsProjectionNode() {
return false;
}
}