/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hive.ql.optimizer.stats.annotation; import java.lang.reflect.Field; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Set; import java.util.Stack; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator; import org.apache.hadoop.hive.ql.exec.ColumnInfo; import org.apache.hadoop.hive.ql.exec.CommonJoinOperator; import org.apache.hadoop.hive.ql.exec.FilterOperator; import org.apache.hadoop.hive.ql.exec.GroupByOperator; import org.apache.hadoop.hive.ql.exec.LimitOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.OperatorUtils; import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; import org.apache.hadoop.hive.ql.exec.RowSchema; import org.apache.hadoop.hive.ql.exec.SelectOperator; import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.lib.Node; import org.apache.hadoop.hive.ql.lib.NodeProcessor; import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.parse.PrunedPartitionList; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.AggregationDesc; import org.apache.hadoop.hive.ql.plan.ColStatistics; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnListDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc.ExprNodeDescEqualityWrapper; import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils; import org.apache.hadoop.hive.ql.plan.ExprNodeDynamicListDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDynamicValueDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; import org.apache.hadoop.hive.ql.plan.GroupByDesc; import org.apache.hadoop.hive.ql.plan.JoinCondDesc; import org.apache.hadoop.hive.ql.plan.JoinDesc; import org.apache.hadoop.hive.ql.plan.MapJoinDesc; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.Statistics; import org.apache.hadoop.hive.ql.stats.StatsUtils; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBetween; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFInBloomFilter; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualNS; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNot; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotEqual; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotNull; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNull; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFStruct; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.collect.Sets; public class StatsRulesProcFactory { private static final Logger LOG = LoggerFactory.getLogger(StatsRulesProcFactory.class.getName()); private static final boolean isDebugEnabled = LOG.isDebugEnabled(); /** * Collect basic statistics like number of rows, data size and column level statistics from the * table. Also sets the state of the available statistics. Basic and column statistics can have * one of the following states COMPLETE, PARTIAL, NONE. In case of partitioned table, the basic * and column stats are aggregated together to table level statistics. Column statistics will not * be collected if hive.stats.fetch.column.stats is set to false. If basic statistics is not * available then number of rows will be estimated from file size and average row size (computed * from schema). */ public static class TableScanStatsRule extends DefaultStatsRule implements NodeProcessor { @Override public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { TableScanOperator tsop = (TableScanOperator) nd; AnnotateStatsProcCtx aspCtx = (AnnotateStatsProcCtx) procCtx; PrunedPartitionList partList = aspCtx.getParseContext().getPrunedPartitions(tsop); Table table = tsop.getConf().getTableMetadata(); try { // gather statistics for the first time and the attach it to table scan operator Statistics stats = StatsUtils.collectStatistics(aspCtx.getConf(), partList, table, tsop); tsop.setStatistics(stats.clone()); if (isDebugEnabled) { LOG.debug("[0] STATS-" + tsop.toString() + " (" + table.getTableName() + "): " + stats.extendedToString()); } } catch (CloneNotSupportedException e) { throw new SemanticException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg()); } catch (HiveException e) { LOG.debug("Failed to retrieve stats ",e); throw new SemanticException(e); } return null; } } /** * SELECT operator doesn't change the number of rows emitted from the parent operator. It changes * the size of each tuple emitted. In a typical case, where only subset of columns are selected * the average row size will reduce as some of the columns will be pruned. In order to accurately * compute the average row size, column level statistics is required. Column level statistics * stores average size of values in column which can be used to more reliably estimate the * reduction in size of each tuple. In the absence of column level statistics, size of columns * will be based on data type. For primitive data types size from * {@link org.apache.hadoop.hive.ql.util.JavaDataModel} will be used and for variable length data * types worst case will be assumed. * <p> * <i>For more information, refer 'Estimating The Cost Of Operations' chapter in * "Database Systems: The Complete Book" by Garcia-Molina et. al.</i> * </p> */ public static class SelectStatsRule extends DefaultStatsRule implements NodeProcessor { @Override public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { SelectOperator sop = (SelectOperator) nd; Operator<? extends OperatorDesc> parent = sop.getParentOperators().get(0); Statistics parentStats = parent.getStatistics(); AnnotateStatsProcCtx aspCtx = (AnnotateStatsProcCtx) procCtx; HiveConf conf = aspCtx.getConf(); Statistics stats = null; if (parentStats != null) { try { stats = parentStats.clone(); } catch (CloneNotSupportedException e) { throw new SemanticException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg()); } } try { if (satisfyPrecondition(parentStats)) { // this will take care of mapping between input column names and output column names. The // returned column stats will have the output column names. List<ColStatistics> colStats = StatsUtils.getColStatisticsFromExprMap(conf, parentStats, sop.getColumnExprMap(), sop.getSchema()); stats.setColumnStats(colStats); // in case of select(*) the data size does not change if (!sop.getConf().isSelectStar() && !sop.getConf().isSelStarNoCompute()) { long dataSize = StatsUtils.getDataSizeFromColumnStats(stats.getNumRows(), colStats); stats.setDataSize(dataSize); } sop.setStatistics(stats); if (isDebugEnabled) { LOG.debug("[0] STATS-" + sop.toString() + ": " + stats.extendedToString()); } } else { if (parentStats != null) { sop.setStatistics(parentStats.clone()); if (isDebugEnabled) { LOG.debug("[1] STATS-" + sop.toString() + ": " + parentStats.extendedToString()); } } } } catch (CloneNotSupportedException e) { throw new SemanticException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg()); } return null; } } /** * FILTER operator does not change the average row size but it does change the number of rows * emitted. The reduction in the number of rows emitted is dependent on the filter expression. * <ul> * <i>Notations:</i> * <li>T(S) - Number of tuples in relations S</li> * <li>V(S,A) - Number of distinct values of attribute A in relation S</li> * </ul> * <ul> * <i>Rules:</i> <b> * <li>Column equals a constant</li></b> T(S) = T(R) / V(R,A) * <p> * <b> * <li>Inequality conditions</li></b> T(S) = T(R) / 3 * <p> * <b> * <li>Not equals comparison</li></b> - Simple formula T(S) = T(R) * <p> * - Alternate formula T(S) = T(R) (V(R,A) - 1) / V(R,A) * <p> * <b> * <li>NOT condition</li></b> T(S) = 1 - T(S'), where T(S') is the satisfying condition * <p> * <b> * <li>Multiple AND conditions</li></b> Cascadingly apply the rules 1 to 3 (order doesn't matter) * <p> * <b> * <li>Multiple OR conditions</li></b> - Simple formula is to evaluate conditions independently * and sum the results T(S) = m1 + m2 * <p> * - Alternate formula T(S) = T(R) * ( 1 - ( 1 - m1/T(R) ) * ( 1 - m2/T(R) )) * <p> * where, m1 is the number of tuples that satisfy condition1 and m2 is the number of tuples that * satisfy condition2 * </ul> * <p> * <i>Worst case:</i> If no column statistics are available, then evaluation of predicate * expression will assume worst case (i.e; half the input rows) for each of predicate expression. * <p> * <i>For more information, refer 'Estimating The Cost Of Operations' chapter in * "Database Systems: The Complete Book" by Garcia-Molina et. al.</i> * </p> */ public static class FilterStatsRule extends DefaultStatsRule implements NodeProcessor { @Override public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { AnnotateStatsProcCtx aspCtx = (AnnotateStatsProcCtx) procCtx; FilterOperator fop = (FilterOperator) nd; Operator<? extends OperatorDesc> parent = fop.getParentOperators().get(0); Statistics parentStats = parent.getStatistics(); List<String> neededCols = null; if (parent instanceof TableScanOperator) { TableScanOperator tsop = (TableScanOperator) parent; neededCols = tsop.getNeededColumns(); } try { if (parentStats != null) { ExprNodeDesc pred = fop.getConf().getPredicate(); // evaluate filter expression and update statistics long newNumRows = evaluateExpression(parentStats, pred, aspCtx, neededCols, fop, 0); Statistics st = parentStats.clone(); if (satisfyPrecondition(parentStats)) { // update statistics based on column statistics. // OR conditions keeps adding the stats independently, this may // result in number of rows getting more than the input rows in // which case stats need not be updated if (newNumRows <= parentStats.getNumRows()) { updateStats(st, newNumRows, true, fop); } if (isDebugEnabled) { LOG.debug("[0] STATS-" + fop.toString() + ": " + st.extendedToString()); } } else { // update only the basic statistics in the absence of column statistics if (newNumRows <= parentStats.getNumRows()) { updateStats(st, newNumRows, false, fop); } if (isDebugEnabled) { LOG.debug("[1] STATS-" + fop.toString() + ": " + st.extendedToString()); } } fop.setStatistics(st); aspCtx.setAndExprStats(null); } } catch (CloneNotSupportedException e) { throw new SemanticException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg()); } return null; } private long evaluateExpression(Statistics stats, ExprNodeDesc pred, AnnotateStatsProcCtx aspCtx, List<String> neededCols, FilterOperator fop, long evaluatedRowCount) throws CloneNotSupportedException, SemanticException { long newNumRows = 0; Statistics andStats = null; if (stats.getNumRows() <= 1 || stats.getDataSize() <= 0) { if (isDebugEnabled) { LOG.debug("Estimating row count for " + pred + " Original num rows: " + stats.getNumRows() + " Original data size: " + stats.getDataSize() + " New num rows: 1"); } return 1; } if (pred instanceof ExprNodeGenericFuncDesc) { ExprNodeGenericFuncDesc genFunc = (ExprNodeGenericFuncDesc) pred; GenericUDF udf = genFunc.getGenericUDF(); // for AND condition cascadingly update stats if (udf instanceof GenericUDFOPAnd) { andStats = stats.clone(); aspCtx.setAndExprStats(andStats); // evaluate children for (ExprNodeDesc child : genFunc.getChildren()) { newNumRows = evaluateChildExpr(aspCtx.getAndExprStats(), child, aspCtx, neededCols, fop, evaluatedRowCount); if (satisfyPrecondition(aspCtx.getAndExprStats())) { updateStats(aspCtx.getAndExprStats(), newNumRows, true, fop); } else { updateStats(aspCtx.getAndExprStats(), newNumRows, false, fop); } } } else if (udf instanceof GenericUDFOPOr) { // for OR condition independently compute and update stats. for (ExprNodeDesc child : genFunc.getChildren()) { // early exit if OR evaluation yields more rows than input rows if (evaluatedRowCount >= stats.getNumRows()) { evaluatedRowCount = stats.getNumRows(); } else { newNumRows = StatsUtils.safeAdd( evaluateChildExpr(stats, child, aspCtx, neededCols, fop, evaluatedRowCount), newNumRows); evaluatedRowCount = newNumRows; } } } else if (udf instanceof GenericUDFIn) { // for IN clause newNumRows = evaluateInExpr(stats, pred, aspCtx, neededCols, fop); } else if (udf instanceof GenericUDFBetween) { // for BETWEEN clause newNumRows = evaluateBetweenExpr(stats, pred, aspCtx, neededCols, fop); } else if (udf instanceof GenericUDFOPNot) { newNumRows = evaluateNotExpr(stats, pred, aspCtx, neededCols, fop); } else if (udf instanceof GenericUDFOPNotNull) { return evaluateNotNullExpr(stats, genFunc); } else { // single predicate condition newNumRows = evaluateChildExpr(stats, pred, aspCtx, neededCols, fop, evaluatedRowCount); } } else if (pred instanceof ExprNodeColumnDesc) { // can be boolean column in which case return true count ExprNodeColumnDesc encd = (ExprNodeColumnDesc) pred; String colName = encd.getColumn(); String colType = encd.getTypeString(); if (colType.equalsIgnoreCase(serdeConstants.BOOLEAN_TYPE_NAME)) { ColStatistics cs = stats.getColumnStatisticsFromColName(colName); if (cs != null) { newNumRows = cs.getNumTrues(); } else { // default newNumRows = stats.getNumRows() / 2; } } else { // if not boolean column return half the number of rows newNumRows = stats.getNumRows() / 2; } } else if (pred instanceof ExprNodeConstantDesc) { // special case for handling false constants ExprNodeConstantDesc encd = (ExprNodeConstantDesc) pred; if (Boolean.FALSE.equals(encd.getValue())) { newNumRows = 0; } else { newNumRows = stats.getNumRows(); } } if (isDebugEnabled) { LOG.debug("Estimating row count for " + pred + " Original num rows: " + stats.getNumRows() + " New num rows: " + newNumRows); } return newNumRows; } private long evaluateInExpr(Statistics stats, ExprNodeDesc pred, AnnotateStatsProcCtx aspCtx, List<String> neededCols, FilterOperator fop) throws SemanticException { long numRows = stats.getNumRows(); ExprNodeGenericFuncDesc fd = (ExprNodeGenericFuncDesc) pred; // 1. It is an IN operator, check if it uses STRUCT List<ExprNodeDesc> children = fd.getChildren(); List<ExprNodeDesc> columns = Lists.newArrayList(); List<ColStatistics> columnStats = Lists.newArrayList(); List<Set<ExprNodeDescEqualityWrapper>> values = Lists.newArrayList(); ExprNodeDesc columnsChild = children.get(0); boolean multiColumn; if (columnsChild instanceof ExprNodeGenericFuncDesc && ((ExprNodeGenericFuncDesc) columnsChild).getGenericUDF() instanceof GenericUDFStruct) { for (int j = 0; j < columnsChild.getChildren().size(); j++) { ExprNodeDesc columnChild = columnsChild.getChildren().get(j); // If column is not column reference , we bail out if (!(columnChild instanceof ExprNodeColumnDesc)) { // Default return numRows / 2; } columns.add(columnChild); final String columnName = ((ExprNodeColumnDesc)columnChild).getColumn(); // if column name is not contained in needed column list then it // is a partition column. We do not need to evaluate partition columns // in filter expression since it will be taken care by partition pruner if (neededCols != null && !neededCols.contains(columnName)) { // Default return numRows / 2; } columnStats.add(stats.getColumnStatisticsFromColName(columnName)); values.add(Sets.<ExprNodeDescEqualityWrapper>newHashSet()); } multiColumn = true; } else { // If column is not column reference , we bail out if (!(columnsChild instanceof ExprNodeColumnDesc)) { // Default return numRows / 2; } columns.add(columnsChild); final String columnName = ((ExprNodeColumnDesc)columnsChild).getColumn(); // if column name is not contained in needed column list then it // is a partition column. We do not need to evaluate partition columns // in filter expression since it will be taken care by partition pruner if (neededCols != null && !neededCols.contains(columnName)) { // Default return numRows / 2; } columnStats.add(stats.getColumnStatisticsFromColName(columnName)); values.add(Sets.<ExprNodeDescEqualityWrapper>newHashSet()); multiColumn = false; } // 2. Extract columns and values for (int i = 1; i < children.size(); i++) { ExprNodeDesc child = children.get(i); // If value is not a constant, we bail out if (!(child instanceof ExprNodeConstantDesc)) { // Default return numRows / 2; } if (multiColumn) { ExprNodeConstantDesc constantChild = (ExprNodeConstantDesc) child; List<?> items = (List<?>) constantChild.getWritableObjectInspector().getWritableConstantValue(); List<TypeInfo> structTypes = ((StructTypeInfo) constantChild.getTypeInfo()).getAllStructFieldTypeInfos(); for (int j = 0; j < structTypes.size(); j++) { ExprNodeConstantDesc constant = new ExprNodeConstantDesc(structTypes.get(j), items.get(j)); values.get(j).add(new ExprNodeDescEqualityWrapper(constant)); } } else { values.get(0).add(new ExprNodeDescEqualityWrapper(child)); } } // 3. Calculate IN selectivity double factor = 1d; for (int i = 0; i < columnStats.size(); i++) { long dvs = columnStats.get(i) == null ? 0 : columnStats.get(i).getCountDistint(); // (num of distinct vals for col in IN clause / num of distinct vals for col ) double columnFactor = dvs == 0 ? 0.5d : ((double) values.get(i).size() / dvs); // max can be 1, even when ndv is larger in IN clause than in column stats factor *= columnFactor > 1d ? 1d : columnFactor; } float inFactor = HiveConf.getFloatVar(aspCtx.getConf(), HiveConf.ConfVars.HIVE_STATS_IN_CLAUSE_FACTOR); return Math.round( (double) numRows * factor * inFactor); } private long evaluateBetweenExpr(Statistics stats, ExprNodeDesc pred, AnnotateStatsProcCtx aspCtx, List<String> neededCols, FilterOperator fop) throws SemanticException, CloneNotSupportedException { final ExprNodeGenericFuncDesc fd = (ExprNodeGenericFuncDesc) pred; final boolean invert = Boolean.TRUE.equals( ((ExprNodeConstantDesc) fd.getChildren().get(0)).getValue()); // boolean invert (not) final ExprNodeDesc comparisonExpression = fd.getChildren().get(1); // expression final ExprNodeDesc leftExpression = fd.getChildren().get(2); // left expression final ExprNodeDesc rightExpression = fd.getChildren().get(3); // right expression // Short circuit and return the current number of rows if this is a // synthetic predicate with dynamic values if (leftExpression instanceof ExprNodeDynamicValueDesc) { return stats.getNumRows(); } // We transform the BETWEEN clause to AND clause (with NOT on top in invert is true). // This is more straightforward, as the evaluateExpression method will deal with // generating the final row count relying on the basic comparator evaluation methods final ExprNodeDesc leftComparator = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, new GenericUDFOPEqualOrGreaterThan(), Lists.newArrayList(comparisonExpression, leftExpression)); final ExprNodeDesc rightComparator = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, new GenericUDFOPEqualOrLessThan(), Lists.newArrayList(comparisonExpression, rightExpression)); ExprNodeDesc newExpression = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, new GenericUDFOPAnd(), Lists.newArrayList(leftComparator, rightComparator)); if (invert) { newExpression = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, new GenericUDFOPNot(), Lists.newArrayList(newExpression)); } return evaluateExpression(stats, newExpression, aspCtx, neededCols, fop, 0); } private long evaluateNotExpr(Statistics stats, ExprNodeDesc pred, AnnotateStatsProcCtx aspCtx, List<String> neededCols, FilterOperator fop) throws CloneNotSupportedException, SemanticException { long numRows = stats.getNumRows(); // if the evaluate yields true then pass all rows else pass 0 rows if (pred instanceof ExprNodeGenericFuncDesc) { ExprNodeGenericFuncDesc genFunc = (ExprNodeGenericFuncDesc) pred; for (ExprNodeDesc leaf : genFunc.getChildren()) { if (leaf instanceof ExprNodeGenericFuncDesc) { // GenericUDF long newNumRows = 0; for (ExprNodeDesc child : genFunc.getChildren()) { newNumRows = evaluateChildExpr(stats, child, aspCtx, neededCols, fop, 0); } return numRows - newNumRows; } else if (leaf instanceof ExprNodeConstantDesc) { ExprNodeConstantDesc encd = (ExprNodeConstantDesc) leaf; if (Boolean.TRUE.equals(encd.getValue())) { return 0; } else { return numRows; } } else if (leaf instanceof ExprNodeColumnDesc) { // NOT on boolean columns is possible. in which case return false count. ExprNodeColumnDesc encd = (ExprNodeColumnDesc) leaf; String colName = encd.getColumn(); String colType = encd.getTypeString(); if (colType.equalsIgnoreCase(serdeConstants.BOOLEAN_TYPE_NAME)) { ColStatistics cs = stats.getColumnStatisticsFromColName(colName); if (cs != null) { return cs.getNumFalses(); } } // if not boolean column return half the number of rows return numRows / 2; } } } // worst case return numRows / 2; } private long evaluateColEqualsNullExpr(Statistics stats, ExprNodeDesc pred) { long numRows = stats.getNumRows(); if (pred instanceof ExprNodeGenericFuncDesc) { ExprNodeGenericFuncDesc genFunc = (ExprNodeGenericFuncDesc) pred; for (ExprNodeDesc leaf : genFunc.getChildren()) { if (leaf instanceof ExprNodeColumnDesc) { ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) leaf; String colName = colDesc.getColumn(); ColStatistics cs = stats.getColumnStatisticsFromColName(colName); if (cs != null) { return cs.getNumNulls(); } } } } // worst case return numRows / 2; } private long evaluateNotNullExpr(Statistics parentStats, ExprNodeGenericFuncDesc pred) { long noOfNulls = getMaxNulls(parentStats, pred); long parentCardinality = parentStats.getNumRows(); long newPredCardinality = parentCardinality; if (parentCardinality > noOfNulls) { newPredCardinality = parentCardinality - noOfNulls; } else { LOG.error("Invalid column stats: No of nulls > cardinality"); } return newPredCardinality; } private long getMaxNulls(Statistics stats, ExprNodeDesc pred) { long tmpNoNulls = 0; long maxNoNulls = 0; if (pred instanceof ExprNodeColumnDesc) { ColStatistics cs = stats.getColumnStatisticsFromColName(((ExprNodeColumnDesc) pred) .getColumn()); if (cs != null) { tmpNoNulls = cs.getNumNulls(); } } else if (pred instanceof ExprNodeGenericFuncDesc || pred instanceof ExprNodeColumnListDesc) { long noNullsOfChild = 0; for (ExprNodeDesc childExpr : pred.getChildren()) { noNullsOfChild = getMaxNulls(stats, childExpr); if (noNullsOfChild > tmpNoNulls) { tmpNoNulls = noNullsOfChild; } } } else if (pred instanceof ExprNodeConstantDesc) { if (ExprNodeDescUtils.isNullConstant(pred)) { tmpNoNulls = stats.getNumRows(); } else { tmpNoNulls = 0; } } else if (pred instanceof ExprNodeDynamicListDesc) { tmpNoNulls = 0; } else if (pred instanceof ExprNodeFieldDesc) { // TODO Confirm this is safe tmpNoNulls = getMaxNulls(stats, ((ExprNodeFieldDesc) pred).getDesc()); } if (tmpNoNulls > maxNoNulls) { maxNoNulls = tmpNoNulls; } return maxNoNulls; } private long evaluateComparator(Statistics stats, ExprNodeGenericFuncDesc genFunc) { long numRows = stats.getNumRows(); GenericUDF udf = genFunc.getGenericUDF(); ExprNodeColumnDesc columnDesc; ExprNodeConstantDesc constantDesc; boolean upperBound; String boundValue = null; if (genFunc.getChildren().get(0) instanceof ExprNodeColumnDesc && genFunc.getChildren().get(1) instanceof ExprNodeConstantDesc) { columnDesc = (ExprNodeColumnDesc) genFunc.getChildren().get(0); constantDesc = (ExprNodeConstantDesc) genFunc.getChildren().get(1); // Comparison to null will always return false if (constantDesc.getValue() == null) { return 0; } if (udf instanceof GenericUDFOPEqualOrGreaterThan || udf instanceof GenericUDFOPGreaterThan) { boundValue = constantDesc.getValue().toString(); upperBound = false; } else { boundValue = constantDesc.getValue().toString(); upperBound = true; } } else if (genFunc.getChildren().get(1) instanceof ExprNodeColumnDesc && genFunc.getChildren().get(0) instanceof ExprNodeConstantDesc) { columnDesc = (ExprNodeColumnDesc) genFunc.getChildren().get(1); constantDesc = (ExprNodeConstantDesc) genFunc.getChildren().get(0); // Comparison to null will always return false if (constantDesc.getValue() == null) { return 0; } if (udf instanceof GenericUDFOPEqualOrGreaterThan || udf instanceof GenericUDFOPGreaterThan) { boundValue = constantDesc.getValue().toString(); upperBound = true; } else { boundValue = constantDesc.getValue().toString(); upperBound = false; } } else { // default return numRows / 3; } ColStatistics cs = stats.getColumnStatisticsFromColName(columnDesc.getColumn()); if (cs != null && cs.getRange() != null && cs.getRange().maxValue != null && cs.getRange().minValue != null) { String colTypeLowerCase = columnDesc.getTypeString().toLowerCase(); try { if (colTypeLowerCase.equals(serdeConstants.TINYINT_TYPE_NAME)) { byte value = new Byte(boundValue); byte maxValue = cs.getRange().maxValue.byteValue(); byte minValue = cs.getRange().minValue.byteValue(); if (upperBound) { if (maxValue < value) { return numRows; } if (minValue > value) { return 0; } } else { if (minValue >= value) { return numRows; } if (maxValue < value) { return 0; } } } else if (colTypeLowerCase.equals(serdeConstants.SMALLINT_TYPE_NAME)) { short value = new Short(boundValue); short maxValue = cs.getRange().maxValue.shortValue(); short minValue = cs.getRange().minValue.shortValue(); if (upperBound) { if (maxValue < value) { return numRows; } if (minValue > value) { return 0; } } else { if (minValue >= value) { return numRows; } if (maxValue < value) { return 0; } } } else if (colTypeLowerCase.equals(serdeConstants.INT_TYPE_NAME) || colTypeLowerCase.equals(serdeConstants.DATE_TYPE_NAME)) { // Date is an integer internally int value = new Integer(boundValue); int maxValue = cs.getRange().maxValue.intValue(); int minValue = cs.getRange().minValue.intValue(); if (upperBound) { if (maxValue < value) { return numRows; } if (minValue > value) { return 0; } } else { if (minValue >= value) { return numRows; } if (maxValue < value) { return 0; } } } else if (colTypeLowerCase.equals(serdeConstants.BIGINT_TYPE_NAME)) { long value = new Long(boundValue); long maxValue = cs.getRange().maxValue.longValue(); long minValue = cs.getRange().minValue.longValue(); if (upperBound) { if (maxValue < value) { return numRows; } if (minValue > value) { return 0; } } else { if (minValue >= value) { return numRows; } if (maxValue < value) { return 0; } } } else if (colTypeLowerCase.equals(serdeConstants.FLOAT_TYPE_NAME)) { float value = new Float(boundValue); float maxValue = cs.getRange().maxValue.floatValue(); float minValue = cs.getRange().minValue.floatValue(); if (upperBound) { if (maxValue < value) { return numRows; } if (minValue > value) { return 0; } } else { if (minValue >= value) { return numRows; } if (maxValue < value) { return 0; } } } else if (colTypeLowerCase.equals(serdeConstants.DOUBLE_TYPE_NAME)) { double value = new Double(boundValue); double maxValue = cs.getRange().maxValue.doubleValue(); double minValue = cs.getRange().minValue.doubleValue(); if (upperBound) { if (maxValue < value) { return numRows; } if (minValue > value) { return 0; } } else { if (minValue >= value) { return numRows; } if (maxValue < value) { return 0; } } } } catch (NumberFormatException nfe) { return numRows / 3; } } // default return numRows / 3; } private long evaluateChildExpr(Statistics stats, ExprNodeDesc child, AnnotateStatsProcCtx aspCtx, List<String> neededCols, FilterOperator fop, long evaluatedRowCount) throws CloneNotSupportedException, SemanticException { long numRows = stats.getNumRows(); if (child instanceof ExprNodeGenericFuncDesc) { ExprNodeGenericFuncDesc genFunc = (ExprNodeGenericFuncDesc) child; GenericUDF udf = genFunc.getGenericUDF(); if (udf instanceof GenericUDFOPEqual || udf instanceof GenericUDFOPEqualNS) { String colName = null; boolean isConst = false; Object prevConst = null; for (ExprNodeDesc leaf : genFunc.getChildren()) { if (leaf instanceof ExprNodeConstantDesc) { // constant = constant expressions. We shouldn't be getting this // after constant folding if (isConst) { // special case: if both constants are not equal then return 0 if (prevConst != null && !prevConst.equals(((ExprNodeConstantDesc)leaf).getValue())) { return 0; } return numRows; } // if the first argument is const then just set the flag and continue if (colName == null) { isConst = true; prevConst = ((ExprNodeConstantDesc) leaf).getValue(); continue; } // if column name is not contained in needed column list then it // is a partition column. We do not need to evaluate partition columns // in filter expression since it will be taken care by partitio pruner if (neededCols != null && !neededCols.contains(colName)) { return numRows; } ColStatistics cs = stats.getColumnStatisticsFromColName(colName); if (cs != null) { long dvs = cs.getCountDistint(); numRows = dvs == 0 ? numRows / 2 : Math.round( (double)numRows / dvs); return numRows; } } else if (leaf instanceof ExprNodeColumnDesc) { ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) leaf; colName = colDesc.getColumn(); // if const is first argument then evaluate the result if (isConst) { // if column name is not contained in needed column list then it // is a partition column. We do not need to evaluate partition columns // in filter expression since it will be taken care by partitio pruner if (neededCols != null && neededCols.indexOf(colName) == -1) { return numRows; } ColStatistics cs = stats.getColumnStatisticsFromColName(colName); if (cs != null) { long dvs = cs.getCountDistint(); numRows = dvs == 0 ? numRows / 2 : Math.round( (double)numRows / dvs); return numRows; } } } } } else if (udf instanceof GenericUDFOPNotEqual) { return numRows; } else if (udf instanceof GenericUDFOPEqualOrGreaterThan || udf instanceof GenericUDFOPEqualOrLessThan || udf instanceof GenericUDFOPGreaterThan || udf instanceof GenericUDFOPLessThan) { return evaluateComparator(stats, genFunc); } else if (udf instanceof GenericUDFOPNotNull) { return evaluateNotNullExpr(stats, genFunc); } else if (udf instanceof GenericUDFOPNull) { return evaluateColEqualsNullExpr(stats, genFunc); } else if (udf instanceof GenericUDFOPAnd || udf instanceof GenericUDFOPOr || udf instanceof GenericUDFIn || udf instanceof GenericUDFBetween || udf instanceof GenericUDFOPNot) { return evaluateExpression(stats, genFunc, aspCtx, neededCols, fop, evaluatedRowCount); } else if (udf instanceof GenericUDFInBloomFilter) { if (genFunc.getChildren().get(1) instanceof ExprNodeDynamicValueDesc) { // Synthetic predicates from semijoin opt should not affect stats. return numRows; } } } else if (child instanceof ExprNodeConstantDesc) { if (Boolean.FALSE.equals(((ExprNodeConstantDesc) child).getValue())) { return 0; } else { return stats.getNumRows(); } } // worst case return numRows / 2; } } /** * GROUPBY operator changes the number of rows. The number of rows emitted by GBY operator will be * atleast 1 or utmost T(R) (number of rows in relation T) based on the aggregation. A better * estimate can be found if we have column statistics on the columns that we are grouping on. * <p> * Suppose if we are grouping by attributes A,B,C and if statistics for columns A,B,C are * available then a better estimate can be found by taking the smaller of product of V(R,[A,B,C]) * (product of distinct cardinalities of A,B,C) and T(R)/2. * <p> * T(R) = min (T(R)/2 , V(R,[A,B,C]) ---> [1] * <p> * In the presence of grouping sets, map-side GBY will emit more rows depending on the size of * grouping set (input rows * size of grouping set). These rows will get reduced because of * map-side hash aggregation. Hash aggregation is an optimization in hive to reduce the number of * rows shuffled between map and reduce stage. This optimization will be disabled if the memory * used for hash aggregation exceeds 90% of max available memory for hash aggregation. The number * of rows emitted from map-side will vary if hash aggregation is enabled throughout execution or * disabled. In the presence of grouping sets, following rules will be applied * <p> * If <b>hash-aggregation is enabled</b>, for query SELECT * FROM table GROUP BY (A,B) WITH CUBE * <p> * T(R) = min(T(R)/2, T(R, GBY(A,B)) + T(R, GBY(A)) + T(R, GBY(B)) + 1)) * <p> * where, GBY(A,B), GBY(B), GBY(B) are the GBY rules mentioned above [1] * <p> * If <b>hash-aggregation is disabled</b>, apply the GBY rule [1] and then multiply the result by * number of elements in grouping set T(R) = T(R) * length_of_grouping_set. Since we do not know * if hash-aggregation is enabled or disabled during compile time, we will assume worst-case i.e, * hash-aggregation is disabled * <p> * NOTE: The number of rows from map-side GBY operator is dependent on map-side parallelism i.e, * number of mappers. The map-side parallelism is expected from hive config * "hive.stats.map.parallelism". If the config is not set then default parallelism of 1 will be * assumed. * <p> * <i>Worst case:</i> If no column statistics are available, then T(R) = T(R)/2 will be used as * heuristics. * <p> * <i>For more information, refer 'Estimating The Cost Of Operations' chapter in * "Database Systems: The Complete Book" by Garcia-Molina et. al.</i> * </p> */ public static class GroupByStatsRule extends DefaultStatsRule implements NodeProcessor { @Override public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { GroupByOperator gop = (GroupByOperator) nd; Operator<? extends OperatorDesc> parent = gop.getParentOperators().get(0); Statistics parentStats = parent.getStatistics(); // parent stats are not populated yet if (parentStats == null) { return null; } AnnotateStatsProcCtx aspCtx = (AnnotateStatsProcCtx) procCtx; HiveConf conf = aspCtx.getConf(); long maxSplitSize = HiveConf.getLongVar(conf, HiveConf.ConfVars.MAPREDMAXSPLITSIZE); List<AggregationDesc> aggDesc = gop.getConf().getAggregators(); Map<String, ExprNodeDesc> colExprMap = gop.getColumnExprMap(); RowSchema rs = gop.getSchema(); Statistics stats = null; List<ColStatistics> colStats = StatsUtils.getColStatisticsFromExprMap(conf, parentStats, colExprMap, rs); long cardinality; long parallelism = 1L; boolean interReduction = false; boolean hashAgg = false; long inputSize = 1L; boolean containsGroupingSet = gop.getConf().isGroupingSetsPresent(); long sizeOfGroupingSet = containsGroupingSet ? gop.getConf().getListGroupingSets().size() : 1L; // There are different cases for Group By depending on map/reduce side, hash aggregation, // grouping sets and column stats. If we don't have column stats, we just assume hash // aggregation is disabled. Following are the possible cases and rule for cardinality // estimation // INTERMEDIATE REDUCTION: // Case 1: NO column stats, NO hash aggregation, NO grouping sets — numRows // Case 2: NO column stats, NO hash aggregation, grouping sets — numRows * sizeOfGroupingSet // Case 3: column stats, hash aggregation, NO grouping sets — Min(numRows / 2, ndvProduct * parallelism) // Case 4: column stats, hash aggregation, grouping sets — Min((numRows * sizeOfGroupingSet) / 2, ndvProduct * parallelism * sizeOfGroupingSet) // Case 5: column stats, NO hash aggregation, NO grouping sets — numRows // Case 6: column stats, NO hash aggregation, grouping sets — numRows * sizeOfGroupingSet // FINAL REDUCTION: // Case 7: NO column stats — numRows / 2 // Case 8: column stats, grouping sets — Min(numRows, ndvProduct * sizeOfGroupingSet) // Case 9: column stats, NO grouping sets - Min(numRows, ndvProduct) if (!gop.getConf().getMode().equals(GroupByDesc.Mode.MERGEPARTIAL) && !gop.getConf().getMode().equals(GroupByDesc.Mode.COMPLETE) && !gop.getConf().getMode().equals(GroupByDesc.Mode.FINAL)) { interReduction = true; // consider approximate map side parallelism to be table data size // divided by max split size TableScanOperator top = OperatorUtils.findSingleOperatorUpstream(gop, TableScanOperator.class); // if top is null then there are multiple parents (RS as well), hence // lets use parent statistics to get data size. Also maxSplitSize should // be updated to bytes per reducer (1GB default) if (top == null) { inputSize = parentStats.getDataSize(); maxSplitSize = HiveConf.getLongVar(conf, HiveConf.ConfVars.BYTESPERREDUCER); } else { inputSize = top.getConf().getStatistics().getDataSize(); } parallelism = (int) Math.ceil((double) inputSize / maxSplitSize); } if (isDebugEnabled) { LOG.debug("STATS-" + gop.toString() + ": inputSize: " + inputSize + " maxSplitSize: " + maxSplitSize + " parallelism: " + parallelism + " containsGroupingSet: " + containsGroupingSet + " sizeOfGroupingSet: " + sizeOfGroupingSet); } try { // satisfying precondition means column statistics is available if (satisfyPrecondition(parentStats)) { // check if map side aggregation is possible or not based on column stats hashAgg = checkMapSideAggregation(gop, colStats, conf); if (isDebugEnabled) { LOG.debug("STATS-" + gop.toString() + " hashAgg: " + hashAgg); } stats = parentStats.clone(); stats.setColumnStats(colStats); long ndvProduct = 1; final long parentNumRows = stats.getNumRows(); // compute product of distinct values of grouping columns for (ColStatistics cs : colStats) { if (cs != null) { long ndv = cs.getCountDistint(); if (cs.getNumNulls() > 0) { ndv = StatsUtils.safeAdd(ndv, 1); } ndvProduct = StatsUtils.safeMult(ndvProduct, ndv); } else { if (parentStats.getColumnStatsState().equals(Statistics.State.COMPLETE)) { // the column must be an aggregate column inserted by GBY. We // don't have to account for this column when computing product // of NDVs continue; } else { // partial column statistics on grouping attributes case. // if column statistics on grouping attribute is missing, then // assume worst case. // GBY rule will emit half the number of rows if ndvProduct is 0 ndvProduct = 0; } break; } } // if ndvProduct is 0 then column stats state must be partial and we are missing // column stats for a group by column if (ndvProduct == 0) { ndvProduct = parentNumRows / 2; if (isDebugEnabled) { LOG.debug("STATS-" + gop.toString() + ": ndvProduct became 0 as some column does not" + " have stats. ndvProduct changed to: " + ndvProduct); } } if (interReduction) { if (hashAgg) { if (containsGroupingSet) { // Case 4: column stats, hash aggregation, grouping sets cardinality = Math.min( (StatsUtils.safeMult(parentNumRows, sizeOfGroupingSet)) / 2, StatsUtils.safeMult(StatsUtils.safeMult(ndvProduct, parallelism), sizeOfGroupingSet)); if (isDebugEnabled) { LOG.debug("[Case 4] STATS-" + gop.toString() + ": cardinality: " + cardinality); } } else { // Case 3: column stats, hash aggregation, NO grouping sets cardinality = Math.min(parentNumRows / 2, StatsUtils.safeMult(ndvProduct, parallelism)); if (isDebugEnabled) { LOG.debug("[Case 3] STATS-" + gop.toString() + ": cardinality: " + cardinality); } } } else { if (containsGroupingSet) { // Case 6: column stats, NO hash aggregation, grouping sets cardinality = StatsUtils.safeMult(parentNumRows, sizeOfGroupingSet); if (isDebugEnabled) { LOG.debug("[Case 6] STATS-" + gop.toString() + ": cardinality: " + cardinality); } } else { // Case 5: column stats, NO hash aggregation, NO grouping sets cardinality = parentNumRows; if (isDebugEnabled) { LOG.debug("[Case 5] STATS-" + gop.toString() + ": cardinality: " + cardinality); } } } } else { // in reduce side GBY, we don't know if the grouping set was present or not. so get it // from map side GBY GroupByOperator mGop = OperatorUtils.findSingleOperatorUpstream(parent, GroupByOperator.class); if (mGop != null) { containsGroupingSet = mGop.getConf().isGroupingSetsPresent(); } if (containsGroupingSet) { // Case 8: column stats, grouping sets sizeOfGroupingSet = mGop.getConf().getListGroupingSets().size(); cardinality = Math.min(parentNumRows, StatsUtils.safeMult(ndvProduct, sizeOfGroupingSet)); if (isDebugEnabled) { LOG.debug("[Case 8] STATS-" + gop.toString() + ": cardinality: " + cardinality); } } else { // Case 9: column stats, NO grouping sets cardinality = Math.min(parentNumRows, ndvProduct); if (isDebugEnabled) { LOG.debug("[Case 9] STATS-" + gop.toString() + ": cardinality: " + cardinality); } } } // update stats, but don't update NDV as it will not change updateStats(stats, cardinality, true, gop, false); } else { // NO COLUMN STATS if (parentStats != null) { stats = parentStats.clone(); final long parentNumRows = stats.getNumRows(); // if we don't have column stats, we just assume hash aggregation is disabled if (interReduction) { if (containsGroupingSet) { // Case 2: NO column stats, NO hash aggregation, grouping sets cardinality = StatsUtils.safeMult(parentNumRows, sizeOfGroupingSet); if (isDebugEnabled) { LOG.debug("[Case 2] STATS-" + gop.toString() + ": cardinality: " + cardinality); } } else { // Case 1: NO column stats, NO hash aggregation, NO grouping sets cardinality = parentNumRows; if (isDebugEnabled) { LOG.debug("[Case 1] STATS-" + gop.toString() + ": cardinality: " + cardinality); } } } else { // Case 7: NO column stats cardinality = parentNumRows / 2; if (isDebugEnabled) { LOG.debug("[Case 7] STATS-" + gop.toString() + ": cardinality: " + cardinality); } } updateStats(stats, cardinality, false, gop); } } // if UDAFs are present, new columns needs to be added if (!aggDesc.isEmpty() && stats != null) { List<ColStatistics> aggColStats = Lists.newArrayList(); for (ColumnInfo ci : rs.getSignature()) { // if the columns in row schema is not contained in column // expression map, then those are the aggregate columns that // are added GBY operator. we will estimate the column statistics // for those newly added columns if (!colExprMap.containsKey(ci.getInternalName())) { String colName = ci.getInternalName(); String colType = ci.getTypeName(); ColStatistics cs = new ColStatistics(colName, colType); cs.setCountDistint(stats.getNumRows()); cs.setNumNulls(0); cs.setAvgColLen(StatsUtils.getAvgColLenOf(conf, ci.getObjectInspector(), colType)); aggColStats.add(cs); } } // add the new aggregate column and recompute data size if (aggColStats.size() > 0) { stats.addToColumnStats(aggColStats); // only if the column stats is available, update the data size from // the column stats if (!stats.getColumnStatsState().equals(Statistics.State.NONE)) { updateStats(stats, stats.getNumRows(), true, gop); } } // if UDAF present and if column expression map is empty then it must // be full aggregation query like count(*) in which case number of // rows will be 1 if (colExprMap.isEmpty()) { stats.setNumRows(1); updateStats(stats, 1, true, gop); } } gop.setStatistics(stats); if (isDebugEnabled && stats != null) { LOG.debug("[0] STATS-" + gop.toString() + ": " + stats.extendedToString()); } } catch (CloneNotSupportedException e) { throw new SemanticException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg()); } return null; } /** * This method does not take into account many configs used at runtime to * disable hash aggregation like HIVEMAPAGGRHASHMINREDUCTION. This method * roughly estimates the number of rows and size of each row to see if it * can fit in hashtable for aggregation. * @param gop - group by operator * @param colStats - column stats for key columns * @param conf - hive conf * @return */ private boolean checkMapSideAggregation(GroupByOperator gop, List<ColStatistics> colStats, HiveConf conf) { List<AggregationDesc> aggDesc = gop.getConf().getAggregators(); GroupByDesc desc = gop.getConf(); GroupByDesc.Mode mode = desc.getMode(); if (mode.equals(GroupByDesc.Mode.HASH)) { float hashAggMem = conf.getFloatVar(HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY); float hashAggMaxThreshold = conf.getFloatVar(HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD); // get available map memory long totalMemory = StatsUtils.getAvailableMemory(conf) * 1000L * 1000L; long maxMemHashAgg = Math.round(totalMemory * hashAggMem * hashAggMaxThreshold); // estimated number of rows will be product of NDVs long numEstimatedRows = 1; // estimate size of key from column statistics long avgKeySize = 0; for (ColStatistics cs : colStats) { if (cs != null) { numEstimatedRows = StatsUtils.safeMult(numEstimatedRows, cs.getCountDistint()); avgKeySize += Math.ceil(cs.getAvgColLen()); } } // average value size will be sum of all sizes of aggregation buffers long avgValSize = 0; // go over all aggregation buffers and see they implement estimable // interface if so they aggregate the size of the aggregation buffer GenericUDAFEvaluator[] aggregationEvaluators; aggregationEvaluators = new GenericUDAFEvaluator[aggDesc.size()]; // get aggregation evaluators for (int i = 0; i < aggregationEvaluators.length; i++) { AggregationDesc agg = aggDesc.get(i); aggregationEvaluators[i] = agg.getGenericUDAFEvaluator(); } // estimate size of aggregation buffer for (int i = 0; i < aggregationEvaluators.length; i++) { // each evaluator has constant java object overhead avgValSize += gop.javaObjectOverHead; GenericUDAFEvaluator.AggregationBuffer agg = null; try { agg = aggregationEvaluators[i].getNewAggregationBuffer(); } catch (HiveException e) { // in case of exception assume unknown type (256 bytes) avgValSize += gop.javaSizeUnknownType; } // aggregate size from aggregation buffers if (agg != null) { if (GenericUDAFEvaluator.isEstimable(agg)) { avgValSize += ((GenericUDAFEvaluator.AbstractAggregationBuffer) agg) .estimate(); } else { // if the aggregation buffer is not estimable then get all the // declared fields and compute the sizes from field types Field[] fArr = ObjectInspectorUtils .getDeclaredNonStaticFields(agg.getClass()); for (Field f : fArr) { long avgSize = StatsUtils .getAvgColLenOfFixedLengthTypes(f.getType().getName()); avgValSize += avgSize == 0 ? gop.javaSizeUnknownType : avgSize; } } } } // total size of each hash entry long hashEntrySize = gop.javaHashEntryOverHead + avgKeySize + avgValSize; // estimated hash table size long estHashTableSize = StatsUtils.safeMult(numEstimatedRows, hashEntrySize); if (estHashTableSize < maxMemHashAgg) { return true; } } // worst-case, hash aggregation disabled return false; } } /** * JOIN operator can yield any of the following three cases <li>The values of join keys are * disjoint in both relations in which case T(RXS) = 0 (we need histograms for this)</li> <li>Join * key is primary key on relation R and foreign key on relation S in which case every tuple in S * will have a tuple in R T(RXS) = T(S) (we need histograms for this)</li> <li>Both R & S relation * have same value for join-key. Ex: bool column with all true values T(RXS) = T(R) * T(S) (we * need histograms for this. counDistinct = 1 and same value)</li> * <p> * In the absence of histograms, we can use the following general case * <p> * <b>2 Relations, 1 attribute</b> * <p> * T(RXS) = (T(R)*T(S))/max(V(R,Y), V(S,Y)) where Y is the join attribute * <p> * <b>2 Relations, 2 attributes</b> * <p> * T(RXS) = T(R)*T(S)/max(V(R,y1), V(S,y1)) * max(V(R,y2), V(S,y2)), where y1 and y2 are the join * attributes * <p> * <b>3 Relations, 1 attributes</b> * <p> * T(RXSXQ) = T(R)*T(S)*T(Q)/top2largest(V(R,y), V(S,y), V(Q,y)), where y is the join attribute * <p> * <b>3 Relations, 2 attributes</b> * <p> * T(RXSXQ) = T(R)*T(S)*T(Q)/top2largest(V(R,y1), V(S,y1), V(Q,y1)) * top2largest(V(R,y2), V(S,y2), V(Q,y2)), * where y1 and y2 are the join attributes * <p> * <i>Worst case:</i> If no column statistics are available, then T(RXS) = joinFactor * max(T(R), * T(S)) * (numParents - 1) will be used as heuristics. joinFactor is from hive.stats.join.factor * hive config. In the worst case, since we do not know any information about join keys (and hence * which of the 3 cases to use), we let it to the user to provide the join factor. * <p> * <i>For more information, refer 'Estimating The Cost Of Operations' chapter in * "Database Systems: The Complete Book" by Garcia-Molina et. al.</i> * </p> */ public static class JoinStatsRule extends DefaultStatsRule implements NodeProcessor { @Override public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { long newNumRows = 0; CommonJoinOperator<? extends JoinDesc> jop = (CommonJoinOperator<? extends JoinDesc>) nd; List<Operator<? extends OperatorDesc>> parents = jop.getParentOperators(); int numAttr = 1; AnnotateStatsProcCtx aspCtx = (AnnotateStatsProcCtx) procCtx; HiveConf conf = aspCtx.getConf(); boolean allSatisfyPreCondition = true; for (Operator<? extends OperatorDesc> op : parents) { if (op.getStatistics() == null) { return null; } } for (Operator<? extends OperatorDesc> op : parents) { if (!satisfyPrecondition(op.getStatistics())) { allSatisfyPreCondition = false; break; } } if (allSatisfyPreCondition) { // statistics object that is combination of statistics from all // relations involved in JOIN Statistics stats = new Statistics(); int numParent = parents.size(); Map<Integer, Long> rowCountParents = Maps.newHashMap(); Map<Integer, Statistics> joinStats = Maps.newHashMap(); Map<Integer, List<String>> joinKeys = Maps.newHashMap(); List<Long> rowCounts = Lists.newArrayList(); // detect if there are multiple attributes in join key ReduceSinkOperator rsOp = (ReduceSinkOperator) jop.getParentOperators().get(0); List<String> keyExprs = StatsUtils.getQualifedReducerKeyNames(rsOp.getConf() .getOutputKeyColumnNames()); numAttr = keyExprs.size(); // infer PK-FK relationship in single attribute join case long inferredRowCount = inferPKFKRelationship(numAttr, parents, jop); // get the join keys from parent ReduceSink operators for (int pos = 0; pos < parents.size(); pos++) { ReduceSinkOperator parent = (ReduceSinkOperator) jop.getParentOperators().get(pos); Statistics parentStats; try { parentStats = parent.getStatistics().clone(); } catch (CloneNotSupportedException e) { throw new SemanticException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg()); } keyExprs = StatsUtils.getQualifedReducerKeyNames(parent.getConf() .getOutputKeyColumnNames()); rowCountParents.put(pos, parentStats.getNumRows()); rowCounts.add(parentStats.getNumRows()); // internal name for expressions and estimate column statistics for expression. joinKeys.put(pos, keyExprs); // get column statistics for all output columns joinStats.put(pos, parentStats); // since new statistics is derived from all relations involved in // JOIN, we need to update the state information accordingly stats.updateColumnStatsState(parentStats.getColumnStatsState()); } if (numAttr == 0) { // It is a cartesian product, row count is easy to infer inferredRowCount = 1; for (int pos = 0; pos < parents.size(); pos++) { inferredRowCount = StatsUtils.safeMult(joinStats.get(pos).getNumRows(), inferredRowCount); } } List<Long> distinctVals = Lists.newArrayList(); long denom = 1; if (inferredRowCount == -1) { // failed to infer PK-FK relationship for row count estimation fall-back on default logic // compute denominator max(V(R,y1), V(S,y1)) * max(V(R,y2), V(S,y2)) // in case of multi-attribute join List<Long> perAttrDVs = Lists.newArrayList(); for (int idx = 0; idx < numAttr; idx++) { for (Integer i : joinKeys.keySet()) { String col = joinKeys.get(i).get(idx); ColStatistics cs = joinStats.get(i).getColumnStatisticsFromColName(col); if (cs != null) { perAttrDVs.add(cs.getCountDistint()); } } distinctVals.add(getDenominator(perAttrDVs)); perAttrDVs.clear(); } if (numAttr > 1 && conf.getBoolVar(HiveConf.ConfVars.HIVE_STATS_CORRELATED_MULTI_KEY_JOINS)) { denom = Collections.max(distinctVals); } else if (numAttr > numParent) { // To avoid denominator getting larger and aggressively reducing // number of rows, we will ease out denominator. denom = StatsUtils.addWithExpDecay(distinctVals); } else { for (Long l : distinctVals) { denom = StatsUtils.safeMult(denom, l); } } } // Update NDV of joined columns to be min(V(R,y), V(S,y)) updateJoinColumnsNDV(joinKeys, joinStats, numAttr); // column statistics from different sources are put together and // rename based on output schema of join operator Map<String, ExprNodeDesc> colExprMap = jop.getColumnExprMap(); RowSchema rs = jop.getSchema(); List<ColStatistics> outColStats = Lists.newArrayList(); for (ColumnInfo ci : rs.getSignature()) { String key = ci.getInternalName(); ExprNodeDesc end = colExprMap.get(key); if (end instanceof ExprNodeColumnDesc) { String colName = ((ExprNodeColumnDesc) end).getColumn(); int pos = jop.getConf().getReversedExprs().get(key); ColStatistics cs = joinStats.get(pos).getColumnStatisticsFromColName(colName); String outColName = key; if (cs != null) { cs.setColumnName(outColName); } outColStats.add(cs); } } // update join statistics stats.setColumnStats(outColStats); long newRowCount = inferredRowCount !=-1 ? inferredRowCount : computeNewRowCount(rowCounts, denom, jop); updateColStats(conf, stats, newRowCount, jop, rowCountParents); jop.setStatistics(stats); if (isDebugEnabled) { LOG.debug("[0] STATS-" + jop.toString() + ": " + stats.extendedToString()); } } else { // worst case when there are no column statistics float joinFactor = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVE_STATS_JOIN_FACTOR); int numParents = parents.size(); long crossRowCount = 1; long crossDataSize = 1; long maxRowCount = 0; long maxDataSize = 0; for (Operator<? extends OperatorDesc> op : parents) { Statistics ps = op.getStatistics(); long rowCount = ps.getNumRows(); long dataSize = ps.getDataSize(); // Update cross size long newCrossRowCount = StatsUtils.safeMult(crossRowCount, rowCount); long newCrossDataSize = StatsUtils.safeAdd( StatsUtils.safeMult(crossDataSize, rowCount), StatsUtils.safeMult(dataSize, crossRowCount)); crossRowCount = newCrossRowCount; crossDataSize = newCrossDataSize; // Update largest relation if (rowCount > maxRowCount) { maxRowCount = rowCount; maxDataSize = dataSize; } } long newDataSize; // detect if there are attributes in join key boolean cartesianProduct = false; if (jop.getParentOperators().get(0) instanceof ReduceSinkOperator) { ReduceSinkOperator rsOp = (ReduceSinkOperator) jop.getParentOperators().get(0); List<String> keyExprs = StatsUtils.getQualifedReducerKeyNames(rsOp.getConf() .getOutputKeyColumnNames()); cartesianProduct = keyExprs.size() == 0; } else if (jop instanceof AbstractMapJoinOperator) { AbstractMapJoinOperator<? extends MapJoinDesc> mjop = (AbstractMapJoinOperator<? extends MapJoinDesc>) jop; List<ExprNodeDesc> keyExprs = mjop.getConf().getKeys().values().iterator().next(); cartesianProduct = keyExprs.size() == 0; } if (cartesianProduct) { // Cartesian product newNumRows = crossRowCount; newDataSize = crossDataSize; } else { newNumRows = StatsUtils.safeMult(StatsUtils.safeMult(maxRowCount, (numParents - 1)), joinFactor); newDataSize = StatsUtils.safeMult(StatsUtils.safeMult(maxDataSize, (numParents - 1)), joinFactor); } Statistics wcStats = new Statistics(); wcStats.setNumRows(newNumRows); wcStats.setDataSize(newDataSize); jop.setStatistics(wcStats); if (isDebugEnabled) { LOG.debug("[1] STATS-" + jop.toString() + ": " + wcStats.extendedToString()); } } return null; } private long inferPKFKRelationship(int numAttr, List<Operator<? extends OperatorDesc>> parents, CommonJoinOperator<? extends JoinDesc> jop) { long newNumRows = -1; if (numAttr != 1) { return newNumRows; } // If numAttr is 1, this means we join on one single key column. Map<Integer, ColStatistics> parentsWithPK = getPrimaryKeyCandidates(parents); // We only allow one single PK. if (parentsWithPK.size() != 1) { LOG.debug("STATS-" + jop.toString() + ": detects none/multiple PK parents."); return newNumRows; } Integer pkPos = parentsWithPK.keySet().iterator().next(); ColStatistics csPK = parentsWithPK.values().iterator().next(); // infer foreign key candidates positions Map<Integer, ColStatistics> csFKs = getForeignKeyCandidates(parents, csPK); // we allow multiple foreign keys (snowflake schema) // csfKs.size() + 1 == parents.size() means we have a single PK and all // the rest ops are FKs. if (csFKs.size() + 1 == parents.size()) { newNumRows = getCardinality(parents, pkPos, csPK, csFKs, jop); // some debug information if (isDebugEnabled) { List<String> parentIds = Lists.newArrayList(); // print primary key containing parents for (Integer i : parentsWithPK.keySet()) { parentIds.add(parents.get(i).toString()); } LOG.debug("STATS-" + jop.toString() + ": PK parent id(s) - " + parentIds); parentIds.clear(); // print foreign key containing parents for (Integer i : csFKs.keySet()) { parentIds.add(parents.get(i).toString()); } LOG.debug("STATS-" + jop.toString() + ": FK parent id(s) - " + parentIds); } } return newNumRows; } /** * Get cardinality of reduce sink operators. * @param csPK - ColStatistics for a single primary key * @param csFKs - ColStatistics for multiple foreign keys */ private long getCardinality(List<Operator<? extends OperatorDesc>> ops, Integer pkPos, ColStatistics csPK, Map<Integer, ColStatistics> csFKs, CommonJoinOperator<? extends JoinDesc> jop) { double pkfkSelectivity = Double.MAX_VALUE; int fkInd = -1; // 1. We iterate through all the operators that have candidate FKs and // choose the FK that has the minimum selectivity. We assume that PK and this FK // have the PK-FK relationship. This is heuristic and can be // improved later. for (Entry<Integer, ColStatistics> entry : csFKs.entrySet()) { int pos = entry.getKey(); Operator<? extends OperatorDesc> opWithPK = ops.get(pkPos); double selectivity = getSelectivitySimpleTree(opWithPK); double selectivityAdjustment = StatsUtils.getScaledSelectivity(csPK, entry.getValue()); selectivity = selectivityAdjustment * selectivity > 1 ? selectivity : selectivityAdjustment * selectivity; if (selectivity < pkfkSelectivity) { pkfkSelectivity = selectivity; fkInd = pos; } } long newrows = 1; List<Long> rowCounts = Lists.newArrayList(); List<Long> distinctVals = Lists.newArrayList(); // 2. We then iterate through all the operators that have candidate FKs again. // We assume the PK is first joining with the FK that we just selected. // And we apply the PK-FK relationship when we compute the newrows and ndv. // After that, we join the result with all the other FKs. // We do not assume the PK-FK relationship anymore and just compute the // row count using the classic formula. for (Entry<Integer, ColStatistics> entry : csFKs.entrySet()) { int pos = entry.getKey(); ColStatistics csFK = entry.getValue(); ReduceSinkOperator parent = (ReduceSinkOperator) jop.getParentOperators().get(pos); Statistics parentStats = parent.getStatistics(); if (fkInd == pos) { // 2.1 This is the new number of rows after PK is joining with FK newrows = (long) Math.ceil(parentStats.getNumRows() * pkfkSelectivity); rowCounts.add(newrows); // 2.1 The ndv is the minimum of the PK and the FK. distinctVals.add(Math.min(csFK.getCountDistint(), csPK.getCountDistint())); } else { // 2.2 All the other FKs. rowCounts.add(parentStats.getNumRows()); distinctVals.add(csFK.getCountDistint()); } } long newNumRows; if (csFKs.size() == 1) { // there is only one FK newNumRows = newrows; } else { // there is more than one FK newNumRows = this.computeNewRowCount(rowCounts, getDenominator(distinctVals), jop); } return newNumRows; } private float getSelectivitySimpleTree(Operator<? extends OperatorDesc> op) { TableScanOperator tsOp = OperatorUtils .findSingleOperatorUpstream(op, TableScanOperator.class); if (tsOp == null) { // complex tree with multiple parents return getSelectivityComplexTree(op); } else { // simple tree with single parent long inputRow = tsOp.getStatistics().getNumRows(); long outputRow = op.getStatistics().getNumRows(); return (float) outputRow / (float) inputRow; } } private float getSelectivityComplexTree(Operator<? extends OperatorDesc> op) { Operator<? extends OperatorDesc> multiParentOp = null; Operator<? extends OperatorDesc> currentOp = op; // TS-1 TS-2 // | | // RS-1 RS-2 // \ / // JOIN // | // FIL // | // RS-3 // // For the above complex operator tree, // selectivity(JOIN) = selectivity(RS-1) * selectivity(RS-2) and // selectivity(RS-3) = numRows(RS-3)/numRows(JOIN) * selectivity(JOIN) while(multiParentOp == null) { if (op.getParentOperators().size() > 1) { multiParentOp = op; } else { op = op.getParentOperators().get(0); } } // No need for overflow checks, assume selectivity is always <= 1.0 float selMultiParent = 1.0f; for(Operator<? extends OperatorDesc> parent : multiParentOp.getParentOperators()) { // In the above example, TS-1 -> RS-1 and TS-2 -> RS-2 are simple trees selMultiParent *= getSelectivitySimpleTree(parent); } float selCurrOp = ((float) currentOp.getStatistics().getNumRows() / (float) multiParentOp.getStatistics().getNumRows()) * selMultiParent; return selCurrOp; } /** * Returns the index of parents whose join key column statistics ranges are within the specified * primary key range (inferred as foreign keys). * @param ops - operators * @param csPK - column statistics of primary key * @return - a map which contains position ids and the corresponding column statistics */ private Map<Integer, ColStatistics> getForeignKeyCandidates(List<Operator<? extends OperatorDesc>> ops, ColStatistics csPK) { Map<Integer, ColStatistics> result = new HashMap<Integer, ColStatistics>(); if (csPK == null || ops == null) { return result; } for (int i = 0; i < ops.size(); i++) { Operator<? extends OperatorDesc> op = ops.get(i); if (op != null && op instanceof ReduceSinkOperator) { ReduceSinkOperator rsOp = (ReduceSinkOperator) op; List<String> keys = StatsUtils.getQualifedReducerKeyNames(rsOp.getConf().getOutputKeyColumnNames()); if (keys.size() == 1) { String joinCol = keys.get(0); if (rsOp.getStatistics() != null) { ColStatistics cs = rsOp.getStatistics().getColumnStatisticsFromColName(joinCol); if (cs != null && !cs.isPrimaryKey()) { if (StatsUtils.inferForeignKey(csPK, cs)) { result.put(i,cs); } } } } } } return result; } /** * Returns the index of parents whose join key columns are infer as primary keys * @param ops - operators * @return - list of primary key containing parent ids */ private Map<Integer, ColStatistics> getPrimaryKeyCandidates(List<Operator<? extends OperatorDesc>> ops) { Map<Integer, ColStatistics> result = new HashMap<Integer, ColStatistics>(); if (ops != null && !ops.isEmpty()) { for (int i = 0; i < ops.size(); i++) { Operator<? extends OperatorDesc> op = ops.get(i); if (op instanceof ReduceSinkOperator) { ReduceSinkOperator rsOp = (ReduceSinkOperator) op; List<String> keys = StatsUtils.getQualifedReducerKeyNames(rsOp.getConf().getOutputKeyColumnNames()); if (keys.size() == 1) { String joinCol = keys.get(0); if (rsOp.getStatistics() != null) { ColStatistics cs = rsOp.getStatistics().getColumnStatisticsFromColName(joinCol); if (cs != null && cs.isPrimaryKey()) { result.put(i, cs); } } } } } } return result; } private void updateColStats(HiveConf conf, Statistics stats, long newNumRows, CommonJoinOperator<? extends JoinDesc> jop, Map<Integer, Long> rowCountParents) { if (newNumRows < 0) { LOG.debug("STATS-" + jop.toString() + ": Overflow in number of rows. " + newNumRows + " rows will be set to Long.MAX_VALUE"); } if (newNumRows == 0) { LOG.debug("STATS-" + jop.toString() + ": Equals 0 in number of rows. " + newNumRows + " rows will be set to 1"); newNumRows = 1; } newNumRows = StatsUtils.getMaxIfOverflow(newNumRows); stats.setNumRows(newNumRows); // scale down/up the column statistics based on the changes in number of // rows from each parent. For ex: If there are 2 parents for JOIN operator // with 1st parent having 200 rows and 2nd parent having 2000 rows. Now if // the new number of rows after applying join rule is 10, then the column // stats for columns from 1st parent should be scaled down by 200/10 = 20x // and stats for columns from 2nd parent should be scaled down by 200x List<ColStatistics> colStats = stats.getColumnStats(); Set<String> colNameStatsAvailable = new HashSet<>(); for (ColStatistics cs : colStats) { colNameStatsAvailable.add(cs.getColumnName()); int pos = jop.getConf().getReversedExprs().get(cs.getColumnName()); long oldRowCount = rowCountParents.get(pos); double ratio = (double) newNumRows / (double) oldRowCount; long oldDV = cs.getCountDistint(); long newDV = oldDV; // if ratio is greater than 1, then number of rows increases. This can happen // when some operators like GROUPBY duplicates the input rows in which case // number of distincts should not change. Update the distinct count only when // the output number of rows is less than input number of rows. if (ratio <= 1.0) { newDV = (long) Math.ceil(ratio * oldDV); } // Assumes inner join // TODO: HIVE-5579 will handle different join types cs.setNumNulls(0); cs.setCountDistint(newDV); } stats.setColumnStats(colStats); long newDataSize = StatsUtils .getDataSizeFromColumnStats(newNumRows, colStats); // Add default size for columns for which stats were not available List<String> neededColumns = new ArrayList<>(); for (String colName : jop.getSchema().getColumnNames()) { if (!colNameStatsAvailable.contains(colName)) { neededColumns.add(colName); } } if (neededColumns.size() != 0) { int restColumnsDefaultSize = StatsUtils.estimateRowSizeFromSchema(conf, jop.getSchema().getSignature(), neededColumns); newDataSize = StatsUtils.safeAdd(newDataSize, StatsUtils.safeMult(restColumnsDefaultSize, newNumRows)); } stats.setDataSize(StatsUtils.getMaxIfOverflow(newDataSize)); } private long computeNewRowCount(List<Long> rowCountParents, long denom, CommonJoinOperator<? extends JoinDesc> join) { double factor = 0.0d; long result = 1; long max = rowCountParents.get(0); long maxIdx = 0; // To avoid long overflow, we will divide the max row count by denominator // and use that factor to multiply with other row counts for (int i = 1; i < rowCountParents.size(); i++) { if (rowCountParents.get(i) > max) { max = rowCountParents.get(i); maxIdx = i; } } denom = denom == 0 ? 1 : denom; factor = (double) max / (double) denom; for (int i = 0; i < rowCountParents.size(); i++) { if (i != maxIdx) { result = StatsUtils.safeMult(result, rowCountParents.get(i)); } } result = (long) (result * factor); if (join.getConf().getConds().length == 1) { JoinCondDesc joinCond = join.getConf().getConds()[0]; switch (joinCond.getType()) { case JoinDesc.INNER_JOIN: // only dealing with special join types here. break; case JoinDesc.LEFT_OUTER_JOIN : // all rows from left side will be present in resultset result = Math.max(rowCountParents.get(joinCond.getLeft()),result); break; case JoinDesc.RIGHT_OUTER_JOIN : // all rows from right side will be present in resultset result = Math.max(rowCountParents.get(joinCond.getRight()),result); break; case JoinDesc.FULL_OUTER_JOIN : // all rows from both side will be present in resultset result = Math.max(StatsUtils.safeAdd(rowCountParents.get(joinCond.getRight()), rowCountParents.get(joinCond.getLeft())),result); break; case JoinDesc.LEFT_SEMI_JOIN : // max # of rows = rows from left side result = Math.min(rowCountParents.get(joinCond.getLeft()),result); break; default: LOG.debug("Unhandled join type in stats estimation: " + joinCond.getType()); break; } } return result; } private void updateJoinColumnsNDV(Map<Integer, List<String>> joinKeys, Map<Integer, Statistics> joinStats, int numAttr) { int joinColIdx = 0; while (numAttr > 0) { long minNDV = Long.MAX_VALUE; // find min NDV for joining columns for (Map.Entry<Integer, List<String>> entry : joinKeys.entrySet()) { int pos = entry.getKey(); String key = entry.getValue().get(joinColIdx); ColStatistics cs = joinStats.get(pos).getColumnStatisticsFromColName(key); if (cs != null && cs.getCountDistint() < minNDV) { minNDV = cs.getCountDistint(); } } // set min NDV value to both columns involved in join if (minNDV != Long.MAX_VALUE) { for (Map.Entry<Integer, List<String>> entry : joinKeys.entrySet()) { int pos = entry.getKey(); String key = entry.getValue().get(joinColIdx); ColStatistics cs = joinStats.get(pos).getColumnStatisticsFromColName(key); if (cs != null) { cs.setCountDistint(minNDV); } } } joinColIdx++; numAttr--; } } private long getDenominator(List<Long> distinctVals) { if (distinctVals.isEmpty()) { // TODO: in union20.q the tab alias is not properly propagated down the // operator tree. This happens when UNION ALL is used as sub query. Hence, even // if column statistics are available, the tab alias will be null which will fail // to get proper column statistics. For now assume, worst case in which // denominator is 2. return 2; } // simple join from 2 relations: denom = max(v1, v2) if (distinctVals.size() <= 2) { return Collections.max(distinctVals); } else { // remember min value and ignore it from the denominator long minNDV = distinctVals.get(0); int minIdx = 0; for (int i = 1; i < distinctVals.size(); i++) { if (distinctVals.get(i) < minNDV) { minNDV = distinctVals.get(i); minIdx = i; } } // join from multiple relations: // denom = Product of all NDVs except the least of all long denom = 1; for (int i = 0; i < distinctVals.size(); i++) { if (i != minIdx) { denom = StatsUtils.safeMult(denom, distinctVals.get(i)); } } return denom; } } } /** * LIMIT operator changes the number of rows and thereby the data size. */ public static class LimitStatsRule extends DefaultStatsRule implements NodeProcessor { @Override public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { LimitOperator lop = (LimitOperator) nd; Operator<? extends OperatorDesc> parent = lop.getParentOperators().get(0); Statistics parentStats = parent.getStatistics(); try { long limit = -1; limit = lop.getConf().getLimit(); if (satisfyPrecondition(parentStats)) { Statistics stats = parentStats.clone(); List<ColStatistics> colStats = StatsUtils.getColStatisticsUpdatingTableAlias( parentStats, lop.getSchema()); stats.setColumnStats(colStats); // if limit is greater than available rows then do not update // statistics if (limit <= parentStats.getNumRows()) { updateStats(stats, limit, true, lop); } lop.setStatistics(stats); if (isDebugEnabled) { LOG.debug("[0] STATS-" + lop.toString() + ": " + stats.extendedToString()); } } else { if (parentStats != null) { // in the absence of column statistics, compute data size based on // based on average row size Statistics wcStats = parentStats.clone(); limit = StatsUtils.getMaxIfOverflow(limit); if (limit <= parentStats.getNumRows()) { long numRows = limit; long avgRowSize = parentStats.getAvgRowSize(); long dataSize = StatsUtils.safeMult(avgRowSize, limit); wcStats.setNumRows(numRows); wcStats.setDataSize(dataSize); } lop.setStatistics(wcStats); if (isDebugEnabled) { LOG.debug("[1] STATS-" + lop.toString() + ": " + wcStats.extendedToString()); } } } } catch (CloneNotSupportedException e) { throw new SemanticException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg()); } return null; } } /** * ReduceSink operator does not change any of the statistics. But it renames * the column statistics from its parent based on the output key and value * column names to make it easy for the downstream operators. This is different * from the default stats which just aggregates and passes along the statistics * without actually renaming based on output schema of the operator. */ public static class ReduceSinkStatsRule extends DefaultStatsRule implements NodeProcessor { @Override public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { ReduceSinkOperator rop = (ReduceSinkOperator) nd; Operator<? extends OperatorDesc> parent = rop.getParentOperators().get(0); Statistics parentStats = parent.getStatistics(); if (parentStats != null) { AnnotateStatsProcCtx aspCtx = (AnnotateStatsProcCtx) procCtx; HiveConf conf = aspCtx.getConf(); List<String> outKeyColNames = rop.getConf().getOutputKeyColumnNames(); List<String> outValueColNames = rop.getConf().getOutputValueColumnNames(); Map<String, ExprNodeDesc> colExprMap = rop.getColumnExprMap(); try { Statistics outStats = parentStats.clone(); if (satisfyPrecondition(parentStats)) { List<ColStatistics> colStats = Lists.newArrayList(); for (String key : outKeyColNames) { String prefixedKey = Utilities.ReduceField.KEY.toString() + "." + key; ExprNodeDesc end = colExprMap.get(prefixedKey); if (end != null) { ColStatistics cs = StatsUtils .getColStatisticsFromExpression(conf, parentStats, end); if (cs != null) { cs.setColumnName(prefixedKey); colStats.add(cs); } } } for (String val : outValueColNames) { String prefixedVal = Utilities.ReduceField.VALUE.toString() + "." + val; ExprNodeDesc end = colExprMap.get(prefixedVal); if (end != null) { ColStatistics cs = StatsUtils .getColStatisticsFromExpression(conf, parentStats, end); if (cs != null) { cs.setColumnName(prefixedVal); colStats.add(cs); } } } outStats.setColumnStats(colStats); } rop.setStatistics(outStats); if (isDebugEnabled) { LOG.debug("[0] STATS-" + rop.toString() + ": " + outStats.extendedToString()); } } catch (CloneNotSupportedException e) { throw new SemanticException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg()); } } return null; } } /** * Default rule is to aggregate the statistics from all its parent operators. */ public static class DefaultStatsRule implements NodeProcessor { @Override public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { Operator<? extends OperatorDesc> op = (Operator<? extends OperatorDesc>) nd; OperatorDesc conf = op.getConf(); AnnotateStatsProcCtx aspCtx = (AnnotateStatsProcCtx) procCtx; HiveConf hconf = aspCtx.getConf(); if (conf != null) { Statistics stats = conf.getStatistics(); if (stats == null) { if (op.getParentOperators() != null) { // if parent statistics is null then that branch of the tree is not // walked yet. don't update the stats until all branches are walked if (isAllParentsContainStatistics(op)) { stats = new Statistics(); for (Operator<? extends OperatorDesc> parent : op.getParentOperators()) { if (parent.getStatistics() != null) { Statistics parentStats = parent.getStatistics(); stats.addToNumRows(parentStats.getNumRows()); stats.addToDataSize(parentStats.getDataSize()); stats.updateColumnStatsState(parentStats.getColumnStatsState()); List<ColStatistics> colStats = StatsUtils.getColStatisticsFromExprMap(hconf, parentStats, op.getColumnExprMap(), op.getSchema()); stats.addToColumnStats(colStats); op.getConf().setStatistics(stats); if (isDebugEnabled) { LOG.debug("[0] STATS-" + op.toString() + ": " + stats.extendedToString()); } } } } } } } return null; } // check if all parent statistics are available private boolean isAllParentsContainStatistics(Operator<? extends OperatorDesc> op) { for (Operator<? extends OperatorDesc> parent : op.getParentOperators()) { if (parent.getStatistics() == null) { return false; } } return true; } } public static NodeProcessor getTableScanRule() { return new TableScanStatsRule(); } public static NodeProcessor getSelectRule() { return new SelectStatsRule(); } public static NodeProcessor getFilterRule() { return new FilterStatsRule(); } public static NodeProcessor getGroupByRule() { return new GroupByStatsRule(); } public static NodeProcessor getJoinRule() { return new JoinStatsRule(); } public static NodeProcessor getLimitRule() { return new LimitStatsRule(); } public static NodeProcessor getReduceSinkRule() { return new ReduceSinkStatsRule(); } public static NodeProcessor getDefaultRule() { return new DefaultStatsRule(); } /** * Update the basic statistics of the statistics object based on the row number * @param stats * - statistics to be updated * @param newNumRows * - new number of rows * @param useColStats * - use column statistics to compute data size */ static void updateStats(Statistics stats, long newNumRows, boolean useColStats, Operator<? extends OperatorDesc> op) { updateStats(stats, newNumRows, useColStats, op, true); } static void updateStats(Statistics stats, long newNumRows, boolean useColStats, Operator<? extends OperatorDesc> op, boolean updateNDV) { if (newNumRows < 0) { LOG.debug("STATS-" + op.toString() + ": Overflow in number of rows. " + newNumRows + " rows will be set to Long.MAX_VALUE"); newNumRows = StatsUtils.getMaxIfOverflow(newNumRows); } if (newNumRows == 0) { LOG.debug("STATS-" + op.toString() + ": Equals 0 in number of rows. " + newNumRows + " rows will be set to 1"); newNumRows = 1; } long oldRowCount = stats.getNumRows(); double ratio = (double) newNumRows / (double) oldRowCount; stats.setNumRows(newNumRows); if (useColStats) { List<ColStatistics> colStats = stats.getColumnStats(); for (ColStatistics cs : colStats) { long oldNumNulls = cs.getNumNulls(); long oldDV = cs.getCountDistint(); long newNumNulls = Math.round(ratio * oldNumNulls); cs.setNumNulls(newNumNulls); if (updateNDV) { long newDV = oldDV; // if ratio is greater than 1, then number of rows increases. This can happen // when some operators like GROUPBY duplicates the input rows in which case // number of distincts should not change. Update the distinct count only when // the output number of rows is less than input number of rows. if (ratio <= 1.0) { newDV = (long) Math.ceil(ratio * oldDV); } cs.setCountDistint(newDV); } } stats.setColumnStats(colStats); long newDataSize = StatsUtils.getDataSizeFromColumnStats(newNumRows, colStats); stats.setDataSize(StatsUtils.getMaxIfOverflow(newDataSize)); } else { long newDataSize = (long) (ratio * stats.getDataSize()); stats.setDataSize(StatsUtils.getMaxIfOverflow(newDataSize)); } } static boolean satisfyPrecondition(Statistics stats) { return stats != null && stats.getBasicStatsState().equals(Statistics.State.COMPLETE) && !stats.getColumnStatsState().equals(Statistics.State.NONE); } }