/**
* Copyright (C) 2009-2013 FoundationDB, LLC
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package com.foundationdb.sql.optimizer.rule.cost;
import com.foundationdb.server.PersistitKeyValueTarget;
import com.foundationdb.server.store.statistics.Histogram;
import com.foundationdb.server.store.statistics.HistogramEntry;
import com.foundationdb.server.types.value.ValueSource;
import com.foundationdb.server.types.value.ValueSources;
import com.foundationdb.sql.optimizer.rule.SchemaRulesContext;
import com.foundationdb.sql.optimizer.plan.*;
import com.foundationdb.sql.optimizer.plan.TableGroupJoinTree.TableGroupJoinNode;
import com.foundationdb.ais.model.*;
import com.foundationdb.qp.rowtype.InternalIndexTypes;
import com.foundationdb.qp.rowtype.Schema;
import com.foundationdb.qp.rowtype.TableRowType;
import com.foundationdb.server.service.tree.KeyCreator;
import com.foundationdb.server.store.statistics.IndexStatistics;
import com.foundationdb.server.types.TInstance;
import com.persistit.Key;
import com.google.common.primitives.UnsignedBytes;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.*;
import static java.lang.Math.round;
public abstract class CostEstimator implements TableRowCounts
{
private static final Logger logger = LoggerFactory.getLogger(CostEstimator.class);
private final Schema schema;
private final Properties properties;
private final CostModel model;
private final Key key;
private final PersistitKeyValueTarget keyPTarget;
private final Comparator<byte[]> bytesComparator;
protected CostEstimator(Schema schema, Properties properties, KeyCreator keyCreator,
CostModelFactory modelFactory) {
this.schema = schema;
this.properties = properties;
model = modelFactory.newCostModel(schema, this);
key = keyCreator.createKey();
keyPTarget = new PersistitKeyValueTarget(getClass().getSimpleName());
bytesComparator = UnsignedBytes.lexicographicalComparator();
}
protected CostEstimator(SchemaRulesContext rulesContext, KeyCreator keyCreator,
CostModelFactory modelFactory) {
this(rulesContext.getSchema(), rulesContext.getProperties(),
keyCreator, modelFactory);
}
public CostModel getCostModel() {
return model;
}
protected CostEstimate adjustCostEstimate(CostEstimate costEstimate) {
return model.adjustCostEstimate(costEstimate);
}
@Override
public long getTableRowCount(Table table) {
long count = getTableRowCountFromStatistics(table);
if (count >= 0)
return count;
return 1;
}
protected long getTableRowCountFromStatistics(Table table) {
// This implementation is only for testing; normally overridden by real server.
// Return row count (not sample count) from analysis time.
for (Index index : table.getIndexes()) {
IndexStatistics istats = getIndexStatistics(index);
if (istats != null)
return istats.getRowCount();
}
return -1; // Not analyzed.
}
public abstract IndexStatistics getIndexStatistics(Index index);
public void getIndexColumnStatistics(Index index, Index[] indexColumnsIndexes, Histogram[] histograms) {
List<IndexColumn> allIndexColumns = index.getAllColumns();
IndexStatistics statsForRequestedIndex = getIndexStatistics(index);
int nIndexColumns = allIndexColumns.size();
int nKeyColumns = index.getKeyColumns().size();
for (int i = 0; i < nIndexColumns; i++) {
Histogram histogram = null;
Index indexColumnsIndex = null;
// Use a histogram of the index itself, if possible.
if (i < nKeyColumns && statsForRequestedIndex != null) {
indexColumnsIndex = index;
histogram = statsForRequestedIndex.getHistogram(i, 1);
}
if (histogram == null) {
indexColumnsIndex = (i == 0) ? index : null;
// If none, find a TableIndex whose first column is leadingColumn
IndexStatistics indexStatistics = null;
Column leadingColumn = allIndexColumns.get(i).getColumn();
for (TableIndex tableIndex : leadingColumn.getTable().getIndexes()) {
if (tableIndex.getKeyColumns().get(0).getColumn() == leadingColumn) {
indexStatistics = getIndexStatistics(tableIndex);
if (indexStatistics != null) {
indexColumnsIndex = tableIndex;
histogram = indexStatistics.getHistogram(0, 1);
break;
}
else if (indexColumnsIndex == null) {
indexColumnsIndex = tableIndex;
}
}
}
// If none, find a GroupIndex whose first column is leadingColumn
if (indexStatistics == null) {
groupLoop: for (Group group : schema.ais().getGroups().values()) {
for (GroupIndex groupIndex : group.getIndexes()) {
if (groupIndex.getKeyColumns().get(0).getColumn() == leadingColumn) {
indexStatistics = getIndexStatistics(groupIndex);
if (indexStatistics != null) {
indexColumnsIndex = groupIndex;
histogram = indexStatistics.getHistogram(0, 1);
break groupLoop;
}
else if (indexColumnsIndex == null) {
indexColumnsIndex = groupIndex;
}
}
}
}
}
}
indexColumnsIndexes[i] = indexColumnsIndex;
histograms[i] = histogram;
}
}
/* Settings.
* Note: these are compiler properties, so they start with
* optimizer.cost. in the server.properties file.
*/
protected final double DEFAULT_MISSING_STATS_SELECTIVITY = 0.85;
protected double missingStatsSelectivity() {
String str = getProperty("cost.missingStatsSelectivity");
if (str != null)
return Double.valueOf(str);
else
return DEFAULT_MISSING_STATS_SELECTIVITY;
}
protected String getProperty(String key) {
return properties.getProperty(key);
}
protected String getProperty(String key, String defval) {
return properties.getProperty(key, defval);
}
/** Estimate cost of scanning from this index. */
public CostEstimate costIndexScan(Index index,
List<ExpressionNode> equalityComparands,
ExpressionNode lowComparand, boolean lowInclusive,
ExpressionNode highComparand, boolean highInclusive) {
return costIndexScan(index, sizeIndexScan(index, equalityComparands,
lowComparand, lowInclusive,
highComparand, highInclusive));
}
/** Estimate number of rows returned from this index. */
public long sizeIndexScan(Index index,
List<ExpressionNode> equalityComparands,
ExpressionNode lowComparand, boolean lowInclusive,
ExpressionNode highComparand, boolean highInclusive) {
if (index.isUnique()) {
if ((equalityComparands != null) &&
(equalityComparands.size() >= index.getKeyColumns().size())) {
// Exact match from unique index; probably one row.
return 1;
}
}
Table indexedTable = index.leafMostTable();
long rowCount = getTableRowCount(indexedTable);
// TODO: FIX THIS COMMENT. Should it refer to getSingleColumnHistogram?
// Get IndexStatistics for each column. If the ith element is non-null, then it definitely has
// a leading-column histogram (obtained by IndexStats.getHistogram(1)).
// else: There are no index stats for the first column of the index. Either there is no such index,
// or there is, but it doesn't have stats.
int nidxcols = index.getAllColumns().size();
Index[] indexColumnsIndexes = new Index[nidxcols];
Histogram[] histograms = new Histogram[nidxcols];
getIndexColumnStatistics(index, indexColumnsIndexes, histograms);
int columnCount = 0;
if (equalityComparands != null)
columnCount = equalityComparands.size();
if ((lowComparand != null) || (highComparand != null))
columnCount++;
if (columnCount == 0) {
// Index just used for ordering.
return rowCount;
}
boolean scaleCount = true;
double selectivity = 1.0;
if (equalityComparands != null && !equalityComparands.isEmpty()) {
selectivity = fractionEqual(index,
indexColumnsIndexes,
histograms,
equalityComparands);
}
if (lowComparand != null || highComparand != null) {
selectivity *= fractionBetween(index.getAllColumns().get(columnCount - 1).getColumn(),
indexColumnsIndexes[columnCount - 1],
histograms[columnCount - 1],
lowComparand, lowInclusive,
highComparand, highInclusive);
}
if (mostlyDistinct(histograms)) scaleCount = false;
// statsCount: Number of rows in the table based on an index of the table, according to index
// statistics, which may be stale.
// rowCount: Approximate number of rows in the table, reasonably up to date.
long statsCount;
IndexStatistics stats = tableIndexStatistics(indexedTable, indexColumnsIndexes, histograms);
if (stats != null) {
statsCount = stats.getSampledCount();
}
else {
statsCount = rowCount;
scaleCount = false;
}
long nrows = Math.max(1, round(selectivity * statsCount));
if (scaleCount) {
checkRowCountChanged(indexedTable, stats, rowCount);
if ((rowCount > 0) && (statsCount > 0))
nrows = simpleRound((nrows * rowCount), statsCount);
}
return nrows;
}
private IndexStatistics tableIndexStatistics(Table indexedTable,
Index[] indexColumnsIndexes,
Histogram[] histograms) {
// At least one of the index columns must be from the indexed table
for (int i = 0; i < indexColumnsIndexes.length; i++) {
Index index = indexColumnsIndexes[i];
if (index != null) {
Column leadingColumn = index.getKeyColumns().get(0).getColumn();
if (leadingColumn.getTable() == indexedTable && histograms[i] != null) {
return histograms[i].getIndexStatistics();
}
}
}
// No index stats available.
return null;
}
/** Estimate cost of scanning given number of rows from this index.
* One random access to get there, then nrows-1 sequential accesses following,
* Plus a surcharge for copying something as wide as the index.
*/
public CostEstimate costIndexScan(Index index, long nrows) {
return new CostEstimate(nrows,
model.indexScan(schema.indexRowType(index), (int)nrows));
}
protected double fractionEqual(Index index,
Index[] indexColumnsIndexes,
Histogram[] histograms,
List<ExpressionNode> eqExpressions) {
double selectivity = 1.0;
keyPTarget.attach(key);
for (int column = 0; column < eqExpressions.size(); column++) {
ExpressionNode node = eqExpressions.get(column);
Histogram histogram = histograms[column];
selectivity *= fractionEqual(index.getAllColumns().get(column).getColumn(),
indexColumnsIndexes[column],
histogram,
node);
}
return selectivity;
}
protected double fractionEqual(Column column,
Index index,
Histogram histogram,
ExpressionNode expr) {
if (histogram == null) {
missingStats(index, column);
return missingStatsSelectivity();
} else {
long indexStatsSampledCount = histogram.getIndexStatistics().getSampledCount();
if (histogram.getEntries().isEmpty()) {
missingStats(index, column);
return missingStatsSelectivity();
} else if ((expr instanceof ColumnExpression) &&
(((ColumnExpression)expr).getTable() instanceof ExpressionsSource)) {
// Can do better than unknown if we know some actual values.
// Compute the average selectivity among them.
ColumnExpression toColumn = (ColumnExpression)expr;
ExpressionsSource values = (ExpressionsSource)toColumn.getTable();
int position = toColumn.getPosition();
double sum = 0.0;
int count = 0;
for (List<ExpressionNode> row : values.getExpressions()) {
sum += fractionEqual(column, index, histogram, row.get(position));
count++;
}
if (count > 0) sum /= count;
return sum;
} else {
key.clear();
keyPTarget.attach(key);
// encodeKeyValue evaluates non-null iff node is a constant expression. key is initialized as a side-effect.
byte[] columnValue = encodeKeyValue(expr, index, histogram.getFirstColumn()) ? keyCopy() : null;
if (columnValue == null) {
// Variable expression. Use average selectivity for histogram.
return
mostlyDistinct(histogram)
? 1.0 / indexStatsSampledCount
: 1.0 / histogram.totalDistinctCount();
} else {
// TODO: Could use Collections.binarySearch if we had something that looked like a HistogramEntry.
List<HistogramEntry> entries = histogram.getEntries();
for (HistogramEntry entry : entries) {
// Constant expression
int compare = bytesComparator.compare(columnValue, entry.getKeyBytes());
if (compare == 0) {
return ((double) entry.getEqualCount()) / indexStatsSampledCount;
} else if (compare < 0) {
long d = entry.getDistinctCount();
return d == 0 ? 0.0 : ((double) entry.getLessCount()) / (d * indexStatsSampledCount);
}
}
HistogramEntry lastEntry = entries.get(entries.size() - 1);
long d = lastEntry.getDistinctCount();
if (d == 0) {
return 1;
}
return 0.00483;
}
}
}
}
protected double fractionBetween(Column column,
Index index,
Histogram histogram,
ExpressionNode lo, boolean lowInclusive,
ExpressionNode hi, boolean highInclusive)
{
if (histogram == null || histogram.getEntries().isEmpty()) {
missingStats(index, column);
return missingStatsSelectivity();
}
keyPTarget.attach(key);
key.clear();
byte[] loBytes = encodeKeyValue(lo, index, histogram.getFirstColumn()) ? keyCopy() : null;
key.clear();
byte[] hiBytes = encodeKeyValue(hi, index, histogram.getFirstColumn()) ? keyCopy() : null;
if (loBytes == null && hiBytes == null) {
return missingStatsSelectivity();
}
boolean before = (loBytes != null);
long rowCount = 0;
byte[] entryStartBytes, entryEndBytes = null;
for (HistogramEntry entry : histogram.getEntries()) {
entryStartBytes = entryEndBytes;
entryEndBytes = entry.getKeyBytes();
long portionStart = 0;
if (before) {
int compare = bytesComparator.compare(loBytes, entryEndBytes);
if (compare > 0) {
continue;
}
if (compare == 0) {
if (lowInclusive) {
rowCount += entry.getEqualCount();
}
continue;
}
portionStart = uniformPortion(entryStartBytes,
entryEndBytes,
loBytes,
entry.getLessCount());
before = false; // Don't include uniformPortion for subsequent buckets.
// Fall through to check high in same entry.
}
if (hiBytes != null) {
int compare = bytesComparator.compare(hiBytes, entryEndBytes);
if (compare == 0) {
rowCount += entry.getLessCount() - portionStart;
if (highInclusive) {
rowCount += entry.getEqualCount();
}
break;
}
if (compare < 0) {
rowCount += uniformPortion(entryStartBytes,
entryEndBytes,
hiBytes,
entry.getLessCount()) - portionStart;
break;
}
}
rowCount += entry.getLessCount() + entry.getEqualCount() - portionStart;
}
return ((double) Math.max(rowCount, 1)) / histogram.getIndexStatistics().getSampledCount();
}
// Must be provably mostly distinct: Every histogram is available and mostly distinct.
private boolean mostlyDistinct(Histogram[] histograms)
{
for (Histogram histogram : histograms) {
if (histogram == null) {
return false;
} else {
if (!mostlyDistinct(histogram)) {
// < 90% distinct
return false;
}
}
}
return true;
}
private boolean mostlyDistinct(Histogram histogram)
{
return
histogram != null &&
histogram.totalDistinctCount() * 10 > histogram.getIndexStatistics().getSampledCount() * 9;
}
/** Assuming that byte strings are uniformly distributed, what
* would be given position correspond to?
*/
protected static long uniformPortion(byte[] start, byte[] end, byte[] middle,
long total) {
int idx = 0;
while (safeByte(start, idx) == safeByte(end, idx)) idx++; // First mismatch.
long lstart = 0, lend = 0, lmiddle = 0;
for (int i = 0; i < 4; i++) {
lstart = (lstart << 8) + safeByte(start, idx+i);
lend = (lend << 8) + safeByte(end, idx+i);
lmiddle = (lmiddle << 8) + safeByte(middle, idx+i);
}
return simpleRound((lmiddle - lstart) * total, lend - lstart);
}
private static int safeByte(byte[] ba, int idx) {
if ((ba != null) && (idx < ba.length))
return ba[idx] & 0xFF;
else
return 0;
}
protected static long simpleRound(long n, long d) {
return (n + d / 2) / d;
}
/** Encode the given field expressions a comparable key byte array.
* Or return <code>null</code> if some field is not a constant.
*/
protected byte[] encodeKeyBytes(Index index,
List<ExpressionNode> fields,
ExpressionNode anotherField,
boolean upper) {
key.clear();
keyPTarget.attach(key);
int i = 0;
if (fields != null) {
for (ExpressionNode field : fields) {
if (!encodeKeyValue(field, index, i++)) {
return null;
}
}
}
if (anotherField != null) {
if (!encodeKeyValue(anotherField, index, i++)) {
return null;
}
}
else if (upper) {
key.append(Key.AFTER);
}
return keyCopy();
}
protected boolean isConstant(ExpressionNode node)
{
return node.isConstant();
}
protected boolean encodeKeyValue(ExpressionNode node, Index index, int column) {
ValueSource value = null;
if (node instanceof ConstantExpression) {
if (node.getPreptimeValue() != null) {
if (node.getType() == null) { // Literal null
keyPTarget.putNull();
return true;
}
value = node.getPreptimeValue().value();
}
}
else if (node instanceof ParameterExpression && ((ParameterExpression)node).isSet()) {
if (((ParameterExpression)node).getValue() == null) {
keyPTarget.putNull();
return true;
}
value = ValueSources.fromObject(((ParameterExpression)node).getValue(),
node.getPreptimeValue().type()).value();
}
else if (node instanceof IsNullIndexKey) {
keyPTarget.putNull();
return true;
}
if (value == null)
return false;
TInstance type;
determine_type:
{
if (index.isSpatial()) {
int firstSpatialColumn = index.firstSpatialArgument();
if (column == firstSpatialColumn) {
type = InternalIndexTypes.LONG.instance(node.getPreptimeValue().isNullable());
break determine_type;
}
else if (column > firstSpatialColumn) {
column += index.spatialColumns() - 1;
}
}
type = index.getAllColumns().get(column).getColumn().getType();
}
type.writeCollating(value, keyPTarget);
return true;
}
private byte[] keyCopy()
{
byte[] keyBytes = new byte[key.getEncodedSize()];
System.arraycopy(key.getEncodedBytes(), 0, keyBytes, 0, keyBytes.length);
return keyBytes;
}
/** Estimate the cost of intersecting a left-deep multi-index intersection. */
public CostEstimate costIndexIntersection(MultiIndexIntersectScan intersection, IndexIntersectionCoster coster)
{
IntersectionCostRunner runner = new IntersectionCostRunner(coster);
runner.buildIndexScanCost(intersection);
CostEstimate estimate = new CostEstimate(runner.rowCount, runner.cost);
return estimate;
}
/** Estimate the cost of starting at the given table's index and
* fetching the given tables, then joining them with Flatten and
* Product. */
public CostEstimate costFlatten(TableGroupJoinTree tableGroup,
TableSource indexTable,
Set<TableSource> requiredTables) {
TableGroupJoinNode startNode = tableGroup.getRoot().findTable(indexTable);
coverBranches(tableGroup, startNode, requiredTables);
long rowCount = 1;
double cost = 0.0;
List<TableRowType> ancestorTypes = new ArrayList<>();
for (TableGroupJoinNode ancestorNode = startNode;
ancestorNode != null;
ancestorNode = ancestorNode.getParent()) {
if (isRequired(ancestorNode)) {
if ((ancestorNode == startNode) &&
(getSideBranches(ancestorNode) != 0)) {
continue; // Branch, not ancestor.
}
ancestorTypes.add(schema.tableRowType(ancestorNode.getTable().getTable().getTable()));
}
}
// Cost to get main branch.
cost += model.ancestorLookup(ancestorTypes);
for (TableGroupJoinNode branchNode : tableGroup) {
if (isSideBranchLeaf(branchNode)) {
int branch = Long.numberOfTrailingZeros(getBranches(branchNode));
TableGroupJoinNode branchRoot = branchNode, nextToRoot = null;
while (true) {
TableGroupJoinNode parent = branchRoot.getParent();
if (parent == startNode) {
// Different kind of BranchLookup.
nextToRoot = branchRoot = parent;
break;
}
if ((parent == null) || !onBranch(parent, branch))
break;
nextToRoot = branchRoot;
branchRoot = parent;
}
assert (nextToRoot != null);
// Multiplier from this branch.
rowCount *= descendantCardinality(branchNode, branchRoot);
// Cost to get side branch.
cost += model.branchLookup(schema.tableRowType(nextToRoot.getTable().getTable().getTable()));
}
}
for (TableGroupJoinNode node : tableGroup) {
if (isFlattenable(node)) {
long nrows = tableCardinality(node);
// Cost of flattening these children with their ancestor.
cost += model.flatten((int)nrows);
}
}
if (rowCount > 1)
cost += model.product((int)rowCount);
return new CostEstimate(rowCount, cost);
}
/** Estimate the cost of starting from a group scan and joining
* with Flatten and Product. */
public CostEstimate costFlattenGroup(TableGroupJoinTree tableGroup,
Set<TableSource> requiredTables) {
TableGroupJoinNode rootNode = tableGroup.getRoot();
coverBranches(tableGroup, rootNode, requiredTables);
int branchCount = 0;
long rowCount = 1;
double cost = 0.0;
for (TableGroupJoinNode node : tableGroup) {
if (isFlattenable(node)) {
long nrows = getTableRowCount(node.getTable().getTable().getTable());
// Cost of flattening these children with their ancestor.
cost += model.flatten((int)nrows);
if (isSideBranchLeaf(node)) {
// Leaf of a new branch.
branchCount++;
rowCount *= nrows;
}
}
}
if (branchCount > 1)
cost += model.product((int)rowCount);
return new CostEstimate(rowCount, cost);
}
/** Estimate the cost of getting the desired number of flattened
* rows from a group scan. This combined costing of the partial
* scan itself and the flatten, since they are tied together. */
public CostEstimate costPartialGroupScanAndFlatten(TableGroupJoinTree tableGroup,
Set<TableSource> requiredTables,
Map<Table,Long> tableCounts) {
TableGroupJoinNode rootNode = tableGroup.getRoot();
coverBranches(tableGroup, rootNode, requiredTables);
int branchCount = 0;
long rowCount = 1;
double cost = 0.0;
for (Map.Entry<Table,Long> entry : tableCounts.entrySet()) {
cost += model.partialGroupScan(schema.tableRowType(entry.getKey()),
entry.getValue());
}
for (TableGroupJoinNode node : tableGroup) {
if (isFlattenable(node)) {
long nrows = tableCounts.get(node.getTable().getTable().getTable());
// Cost of flattening these children with their ancestor.
cost += model.flatten((int)nrows);
if (isSideBranchLeaf(node)) {
// Leaf of a new branch.
branchCount++;
rowCount *= nrows;
}
}
}
if (branchCount > 1)
cost += model.product((int)rowCount);
return new CostEstimate(rowCount, cost);
}
/** Estimate the cost of starting from outside the loop in the same group. */
public CostEstimate costFlattenNested(TableGroupJoinTree tableGroup,
TableSource outsideTable,
TableSource insideTable,
boolean insideIsParent,
Set<TableSource> requiredTables) {
TableGroupJoinNode startNode = tableGroup.getRoot().findTable(insideTable);
coverBranches(tableGroup, startNode, requiredTables);
int branchCount = 0;
long rowCount = 1;
double cost = 0.0;
if (insideIsParent) {
cost += model.ancestorLookup(Collections.singletonList(schema.tableRowType(insideTable.getTable().getTable())));
}
else {
rowCount *= descendantCardinality(insideTable, outsideTable);
cost += model.branchLookup(schema.tableRowType(insideTable.getTable().getTable()));
}
for (TableGroupJoinNode node : tableGroup) {
if (isFlattenable(node)) {
long nrows = tableCardinality(node);
// Cost of flattening these children with their ancestor.
cost += model.flatten((int)nrows);
if (isSideBranchLeaf(node)) {
// Leaf of a new branch.
branchCount++;
rowCount *= nrows;
}
}
}
if (branchCount > 1)
cost += model.product((int)rowCount);
return new CostEstimate(rowCount, cost);
}
/** This table needs to be included in flattens. */
protected static final long REQUIRED = 1;
/** This table is on the main branch. */
protected static final long ANCESTOR = 2;
protected static final int ANCESTOR_BRANCH = 1;
/** Mask for main or side branch. */
protected static final long BRANCH_MASK = ~1;
/** Mask for side branch. */
protected static final long SIDE_BRANCH_MASK = ~3;
protected static boolean isRequired(TableGroupJoinNode table) {
return ((table.getState() & REQUIRED) != 0);
}
protected static void setRequired(TableGroupJoinNode table) {
table.setState(table.getState() | REQUIRED);
}
protected static boolean isAncestor(TableGroupJoinNode table) {
return ((table.getState() & ANCESTOR) != 0);
}
protected static long getBranches(TableGroupJoinNode table) {
return (table.getState() & BRANCH_MASK);
}
protected static long getSideBranches(TableGroupJoinNode table) {
return (table.getState() & SIDE_BRANCH_MASK);
}
protected static boolean onBranch(TableGroupJoinNode table, int b) {
return ((table.getState() & (1 << b)) != 0);
}
protected void setBranch(TableGroupJoinNode table, int b) {
table.setState(table.getState() | (1 << b));
}
/** Like {@link BranchJoiner#markBranches} but simpler without
* having to worry about the exact <em>order</em> in which
* operations are performed.
*/
protected void coverBranches(TableGroupJoinTree tableGroup,
TableGroupJoinNode startNode,
Set<TableSource> requiredTables) {
for (TableGroupJoinNode table : tableGroup) {
table.setState(requiredTables.contains(table.getTable()) ? REQUIRED : 0);
}
int nbranches = ANCESTOR_BRANCH;
boolean anyAncestorRequired = false;
for (TableGroupJoinNode table = startNode; table != null; table = table.getParent()) {
setBranch(table, nbranches);
if (isRequired(table))
anyAncestorRequired = true;
}
nbranches++;
for (TableGroupJoinNode table : tableGroup) {
if (isSideBranchLeaf(table)) {
// This is the leaf of a new side branch.
while (true) {
boolean onBranchAlready = (getBranches(table) != 0);
setBranch(table, nbranches);
if (onBranchAlready) {
if (!isRequired(table)) {
// Might become required for joining of branches.
if (Long.bitCount(anyAncestorRequired ?
getBranches(table) :
getSideBranches(table)) > 1)
setRequired(table);
}
break;
}
table = table.getParent();
}
nbranches++;
}
}
}
/** A table is the leaf of some side branch if it's required but
* none of its descendants are. */
protected boolean isSideBranchLeaf(TableGroupJoinNode table) {
if (!isRequired(table) || isAncestor(table))
return false;
for (TableGroupJoinNode descendant : table) {
if ((descendant != table) && isRequired(descendant))
return false;
}
return true;
}
/** A table is flattened in if it's required and one of its
* ancestors is as well. */
protected boolean isFlattenable(TableGroupJoinNode table) {
if (!isRequired(table))
return false;
while (true) {
table = table.getParent();
if (table == null) break;
if (isRequired(table))
return true;
}
return false;
}
/** Number of rows of given table, total per index row. */
protected long tableCardinality(TableGroupJoinNode table) {
if (isAncestor(table))
return 1;
TableGroupJoinNode parent = table;
while (true) {
parent = parent.getParent();
if (isAncestor(parent))
return descendantCardinality(table, parent);
}
}
/** Number of child rows per ancestor.
* Never returns zero to avoid contaminating product estimate.
*/
protected long descendantCardinality(TableGroupJoinNode childNode,
TableGroupJoinNode ancestorNode) {
return descendantCardinality(childNode.getTable(), ancestorNode.getTable());
}
protected long descendantCardinality(TableSource childTable,
TableSource ancestorTable) {
long childCount = getTableRowCount(childTable.getTable().getTable());
long ancestorCount = getTableRowCount(ancestorTable.getTable().getTable());
if (ancestorCount == 0) return 1;
return Math.max(simpleRound(childCount, ancestorCount), 1);
}
/** Estimate the cost of testing some conditions. */
// TODO: Assumes that each condition turns into a separate select.
public CostEstimate costSelect(Collection<ConditionExpression> conditions,
double selectivity,
long size) {
int nconds = 0; // Approximate number of predicate tests.
for (ConditionExpression cond : conditions) {
if (cond instanceof InListCondition)
nconds += ((InListCondition)cond).getExpressions().size();
// TODO: Maybe various kinds of subquery predicate get high count?
else
nconds++;
}
return new CostEstimate(Math.max(1, round(size * selectivity)),
model.select((int)size) * nconds);
}
public CostEstimate costSelect(Collection<ConditionExpression> conditions,
SelectivityConditions selectivityConditions,
long size) {
return costSelect(conditions, conditionsSelectivity(selectivityConditions), size);
}
public static class SelectivityConditions {
private Map<ColumnExpression,Collection<ConditionExpression>> map =
new HashMap<>();
public void addCondition(ColumnExpression column, ConditionExpression condition) {
Collection<ConditionExpression> entry = map.get(column);
if (entry == null) {
entry = new ArrayList<>();
map.put(column, entry);
}
entry.add(condition);
}
public Iterable<ColumnExpression> getColumns() {
return map.keySet();
}
public Collection<ConditionExpression> getConditions(ColumnExpression column) {
return map.get(column);
}
}
public double conditionsSelectivity(SelectivityConditions conditions) {
double selectivity = 1.0;
for (ColumnExpression entry : conditions.getColumns()) {
Index index = null;
IndexStatistics indexStatistics = null;
Column column = entry.getColumn();
// Find a TableIndex whose first column is leadingColumn
for (TableIndex tableIndex : column.getTable().getIndexes()) {
if (!tableIndex.isSpatial() && tableIndex.getKeyColumns().get(0).getColumn() == column) {
indexStatistics = getIndexStatistics(tableIndex);
if (indexStatistics != null) {
index = tableIndex;
break;
}
}
}
// If none, find a GroupIndex whose first column is leadingColumn
if (indexStatistics == null) {
groupLoop: for (Group group : schema.ais().getGroups().values()) {
for (GroupIndex groupIndex : group.getIndexes()) {
if (!groupIndex.isSpatial() && groupIndex.getKeyColumns().get(0).getColumn() == column) {
indexStatistics = getIndexStatistics(groupIndex);
if (indexStatistics != null) {
index = groupIndex;
break groupLoop;
}
}
}
}
}
if (indexStatistics == null) continue;
ExpressionNode eq = null, ne = null, lo = null, hi = null;
boolean loInc = false, hiInc = false;
List<ExpressionNode> in = null;
for (ConditionExpression cond : conditions.getConditions(entry)) {
if (cond instanceof ComparisonCondition) {
ComparisonCondition ccond = (ComparisonCondition)cond;
switch (ccond.getOperation()) {
case EQ:
eq = ccond.getRight();
break;
case NE:
ne = ccond.getRight();
break;
case LT:
hi = ccond.getRight();
hiInc = false;
break;
case LE:
hi = ccond.getRight();
hiInc = true;
break;
case GT:
lo = ccond.getRight();
loInc = false;
break;
case GE:
lo = ccond.getRight();
loInc = true;
break;
}
}
else if (cond instanceof InListCondition) {
in = ((InListCondition)cond).getExpressions();
}
}
Histogram histogram = indexStatistics.getHistogram(0, 1);
if (eq != null) {
selectivity *= fractionEqual(column, index, histogram, eq);
}
else if (ne != null)
selectivity *= (1.0 - fractionEqual(column, index, histogram, eq));
else if ((lo != null) || (hi != null))
selectivity *= fractionBetween(column, index, histogram, lo, loInc, hi, hiInc);
else if (in != null) {
double fraction = 0.0;
for (ExpressionNode expr : in) {
fraction += fractionEqual(column, index, histogram, expr);
}
if (fraction > 1.0) fraction = 1.0;
selectivity *= fraction;
}
}
return selectivity;
}
/** Estimate the cost of a sort of the given size. */
public CostEstimate costSort(long size) {
return new CostEstimate(size, model.sort((int)size, false));
}
/** Estimate the cost of a sort of the given size and limit. */
public CostEstimate costSortWithLimit(long size, long limit, int nfields) {
return new CostEstimate(Math.min(size, limit),
model.sortWithLimit((int)size, nfields));
}
/** Estimate cost of scanning the whole group. */
// TODO: Need to account for tables actually wanted?
public CostEstimate costGroupScan(Group group) {
long nrows = 0;
Table root = null;
for (Table table : group.getRoot().getAIS().getTables().values()) {
if (table.getGroup() == group) {
if (table.getParentJoin() == null)
root = table;
nrows += getTableRowCount(table);
}
}
return new CostEstimate(nrows, model.fullGroupScan(schema.tableRowType(root)));
}
public CostEstimate costHKeyRow(List<ExpressionNode> keys) {
double cost = model.project(keys.size(), 1);
return new CostEstimate(1, cost);
}
public interface IndexIntersectionCoster {
public CostEstimate singleIndexScanCost(SingleIndexScan scan, CostEstimator costEstimator);
}
private class IntersectionCostRunner {
private IndexIntersectionCoster coster;
private double cost = 0;
private long rowCount = 0;
private IntersectionCostRunner(IndexIntersectionCoster coster) {
this.coster = coster;
}
public void buildIndexScanCost(IndexScan scan) {
if (scan instanceof SingleIndexScan) {
SingleIndexScan singleScan = (SingleIndexScan) scan;
CostEstimate estimate = coster.singleIndexScanCost(singleScan, CostEstimator.this);
long singleCount = estimate.getRowCount();
double singleCost = estimate.getCost();
if (rowCount == 0) {
// First index: start with its cost. This should
// be the output side, since nested intersections
// are left-deep. Its selectivity does not matter;
// subsequent ones filter it.
rowCount = singleCount;
cost = singleCost;
}
else {
// Add cost of this index and of intersecting its rows with rows so far.
cost += singleCost + model.intersect((int)rowCount, (int)singleCount);
long totalRowCount = getTableRowCount(singleScan.getIndex().leafMostTable());
if (totalRowCount > singleCount)
// Apply this index's selectivity to cumulative row count.
rowCount = simpleRound(rowCount * singleCount, totalRowCount);
}
rowCount = Math.max(rowCount, 1);
}
else if (scan instanceof MultiIndexIntersectScan) {
MultiIndexIntersectScan multiScan = (MultiIndexIntersectScan) scan;
buildIndexScanCost(multiScan.getOutputIndexScan());
buildIndexScanCost(multiScan.getSelectorIndexScan());
}
else {
throw new AssertionError("can't build scan of: " + scan);
}
}
}
public CostEstimate costValues(ExpressionsSource values, boolean selectToo) {
int nfields = values.nFields();
int nrows = values.getExpressions().size();
double cost = model.project(nfields, nrows);
if (selectToo)
cost += model.select(nrows);
CostEstimate estimate = new CostEstimate(nrows, cost);
return adjustCostEstimate(estimate);
}
public CostEstimate costBoundRow(){
return new CostEstimate(1,0);
}
public CostEstimate costBloomFilter(CostEstimate loaderCost,
CostEstimate inputCost,
CostEstimate checkCost,
double checkSelectivity) {
long checkCount = Math.max(Math.round(inputCost.getRowCount() * checkSelectivity),1);
// Scan to load plus scan input plus check matching fraction
// plus filter setup and use.
CostEstimate estimate =
new CostEstimate(checkCount,
loaderCost.getCost() +
inputCost.getCost() +
// Model includes cost of one random access for check.
/* checkCost.getCost() * checkCount + */
model.selectWithFilter((int)inputCost.getRowCount(),
(int)loaderCost.getRowCount(),
checkSelectivity));
return adjustCostEstimate(estimate);
}
public CostEstimate costHashLookup(CostEstimate equivalentCost,
int joinColumns,
int columnCount) {
long nrows = equivalentCost.getRowCount();
CostEstimate estimate = new CostEstimate(nrows,
model.unloadHashTable((int)nrows,
joinColumns,
columnCount));
return adjustCostEstimate(estimate);
}
public CostEstimate costHashJoin(CostEstimate loaderCost,
CostEstimate outerCost,
CostEstimate lookupCost,
int joinColumns,
int outerColumnCount,
int innerColumnCount) {
CostEstimate estimate = outerCost.nest(lookupCost);
estimate = new CostEstimate(estimate.getRowCount(),
loaderCost.getCost() +
model.loadHashTable((int)loaderCost.getRowCount(),
joinColumns,
outerColumnCount) +
estimate.getCost());
return adjustCostEstimate(estimate);
}
protected void missingStats(Index index, Column column) {
}
protected void checkRowCountChanged(Table table, IndexStatistics stats,
long rowCount) {
}
}