package com.tesora.dve.sql.transform.strategy; /* * #%L * Tesora Inc. * Database Virtualization Engine * %% * Copyright (C) 2011 - 2014 Tesora Inc. * %% * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License, version 3, * as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. * #L% */ import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; import com.tesora.dve.common.MultiMap; import com.tesora.dve.sql.expression.TableKey; import com.tesora.dve.sql.jg.CollapsedJoinGraph; import com.tesora.dve.sql.jg.DGJoin; import com.tesora.dve.sql.jg.JoinEdge; import com.tesora.dve.sql.node.LanguageNode; import com.tesora.dve.sql.node.expression.ColumnInstance; import com.tesora.dve.sql.node.test.EdgeTest; import com.tesora.dve.sql.node.test.EngineConstant; import com.tesora.dve.sql.schema.PEColumn; import com.tesora.dve.sql.schema.PEKey; import com.tesora.dve.sql.schema.SchemaContext; import com.tesora.dve.sql.schema.TempTable; import com.tesora.dve.sql.statement.dml.SelectStatement; import com.tesora.dve.sql.transform.ConstraintCollector; import com.tesora.dve.sql.transform.constraints.PlanningConstraint; import com.tesora.dve.sql.util.ListSet; import com.tesora.dve.sql.util.Pair; import com.tesora.dve.variables.KnownVariables; // we define a bunch of basic terms here: // the cardinality (card) of an index is the number of unique values in it. // the cardinality (card) ratio of an index is how many rows each unique value contributes. // we seek to find the maximal number of rows, which is the happiest case - i.e. all rows have match values. // // costing rules are based on the case. // no filters: // smaller cardinality * lhs card ratio * rhs card ratio // both sides with filters, no unique indexes // lhs row count * rhs row count // both sides with filters, unique indexes // smaller row count // when only one side is filtered: // unfiltered side rowcount is card ratio * filtered side rowcount // // filters are used to build row count estimates. index cards and card ratios are used to // propagate estimates when the corresponding side's row count is unknown. // // for a given set of joins A join B join C join D join E join F .... // we first bind all tables with filters, then we start considering joins // foreach join: if both sides have row counts, build the combined and register the resulting rowcount // with each table. if one side has a row count, propagate and register the rowcount with both tables. // keep going until all that's left are either joins with two unbound tables, or the last join. // for the unbound table - use the the no filters case. public final class Costing { public static ExecutionCost buildCombinedCost(SchemaContext sc, DGJoin join, ExecutionCost...costs) { return getDefaultStrategy(sc,null).buildCombinedCost(sc, join, costs); } public static ExecutionCost buildBasicCost(SchemaContext sc, SelectStatement ss, boolean singleSite) { return getDefaultStrategy(sc,ss).buildBasicCost(sc, ss, singleSite); } // we have two strategies - the full costing strategy when costing is turned on and cardinalities are known // and a structural costing strategy for all other cases private static abstract class CostingStrategy { abstract ExecutionCost buildCombinedCost(SchemaContext sc, DGJoin join, ExecutionCost...costs); abstract ExecutionCost buildBasicCost(SchemaContext sc, SelectStatement ss, boolean singleSite); ListSet<PlanningConstraint> getConstraints(SchemaContext sc, LanguageNode node, boolean partialOK) { ConstraintCollector collector = new ConstraintCollector(sc, node, false, true, partialOK); return collector.getConstraints(); } } private static final CostingStrategy dataStrategy = new CardinalityCosting(); private static final CostingStrategy structureStrategy = new StructureCosting(); private static CostingStrategy getDefaultStrategy(SchemaContext sc, SelectStatement ss) { boolean hasCardInfo = true; if (ss != null) { ListSet<TableKey> allTabs = ss.getAllTableKeys(); for(TableKey tk : allTabs) { if (!tk.getAbstractTable().hasCardinalityInfo(sc)) { hasCardInfo = false; break; } } } // use the server setting. note that if the server setting is for costing, but no cards are present // we'll fall back to structure if (hasCardInfo && KnownVariables.COST_BASED_PLANNING.getValue(sc.getConnection().getVariableSource()).booleanValue()) return dataStrategy; return structureStrategy; } private static class CardinalityCosting extends CostingStrategy { @Override public ExecutionCost buildCombinedCost(SchemaContext sc, DGJoin join, ExecutionCost...costs) { if (costs == null) return null; if (costs.length < 2) return null; if (costs.length > 2) { // multijoin - do this with structure only return structureStrategy.buildCombinedCost(sc, join, costs); } ExecutionCost leftCost = costs[0]; ExecutionCost rightCost = costs[1]; Long result = compute(sc, join, (leftCost.getRowCount() == -1 ? null : leftCost.getRowCount()), (rightCost.getRowCount() == -1 ? null : rightCost.getRowCount()), new Counters(), 10); if (result == null) return structureStrategy.buildCombinedCost(sc, join, costs); List<PlanningConstraint> all = new ArrayList<PlanningConstraint>(); if (leftCost.getConstraint() != null) all.add(leftCost.getConstraint()); if (rightCost.getConstraint() != null) all.add(rightCost.getConstraint()); PlanningConstraint best = ConstraintCollector.chooseBest(sc, all); return new ExecutionCost(best, (result == null ? -1 : result.longValue()),leftCost, rightCost); } @Override public ExecutionCost buildBasicCost(SchemaContext sc, SelectStatement ss, boolean singleSite) { ListSet<PlanningConstraint> constraints = getConstraints(sc,ss); CollapsedJoinGraph cjg = EngineConstant.PARTITIONS.getValue(ss, sc); if (cjg.getAllJoins().isEmpty()) { PlanningConstraint best = ConstraintCollector.chooseBest(sc,constraints); return new ExecutionCost(EngineConstant.WHERECLAUSE.has(ss), singleSite, best, (best == null ? -1 : best.getRowCount())); } return buildCost(sc,ss,constraints,cjg.getAllJoins(), singleSite); } private static final EdgeTest[] edges = new EdgeTest[] { EngineConstant.WHERECLAUSE, EngineConstant.FROMCLAUSE }; private ListSet<PlanningConstraint> getConstraints(SchemaContext sc, SelectStatement ss) { ListSet<PlanningConstraint> all = new ListSet<PlanningConstraint>(); for(EdgeTest et : edges) { if (et.has(ss)) all.addAll(getConstraints(sc,et.get(ss),true)); } return all; } private ExecutionCost buildCost(SchemaContext sc, SelectStatement ss, ListSet<PlanningConstraint> constraints, ListSet<DGJoin> joins, boolean singleSite) { MultiMap<TableKey,PlanningConstraint> constraintsByTable = new MultiMap<TableKey,PlanningConstraint>(); PlanningConstraint best = null; for(PlanningConstraint pc : constraints) { constraintsByTable.put(pc.getTableKey(),pc); if (best == null) best = pc; else if (best.compareTo(pc) >= 1) best = pc; } ListSet<TableKey> unbound = new ListSet<TableKey>(); for(DGJoin dgj : joins) { unbound.add(dgj.getRightTable()); unbound.addAll(dgj.getLeftTables()); } ListSet<TableKey> bound = new ListSet<TableKey>(); HashMap<TableKey,Long> estimates = new HashMap<TableKey,Long>(); // so on estimates, we first traverse the constraints by table key, and find a 'best' constraint for each // for temp tables we use the stored row count for(Iterator<TableKey> iter = unbound.iterator(); iter.hasNext();) { TableKey tk = iter.next(); Collection<PlanningConstraint> any = constraintsByTable.get(tk); if (any == null || any.isEmpty()) continue; // choose a best constraint PlanningConstraint tb = null; for(PlanningConstraint pc : any) { if (pc.getRowCount() == -1) continue; if (tb == null) tb = pc; else if (tb.compareTo(pc) >= 1) tb = pc; } if (tb == null) continue; estimates.put(tk,best.getRowCount()); iter.remove(); bound.add(tk); } int effort = 0; DGJoin lastJoin = null; while(joins.size() > 0) { int beforeSize = joins.size(); Counters counters = new Counters(); for(Iterator<DGJoin> iter = joins.iterator(); iter.hasNext();) { DGJoin dgj = iter.next(); TableKey ltk = dgj.getLeftTable(); TableKey rtk = dgj.getRightTable(); // if the estimates are missing but the tables are temp tables, use the row counts from the temp tables Long result = compute(sc, dgj, getRowCountEstimate(sc,ltk,estimates), getRowCountEstimate(sc,rtk,estimates), counters, effort); if (result != null) { estimates.put(ltk, result); estimates.put(rtk, result); iter.remove(); lastJoin = dgj; } } int afterSize = joins.size(); int effortWas = effort; if (afterSize != beforeSize) { effort = 0; continue; } effort = counters.computeEffort(); if (effortWas == effort) { lastJoin = null; break; } } long result = -1; if (lastJoin != null) { Long rc = estimates.get(lastJoin.getRightTable()); if (rc != null) result = rc.longValue(); } else if (best != null) { result = best.getRowCount(); } return new ExecutionCost(EngineConstant.WHERECLAUSE.has(ss), singleSite, best, result); } private Long compute(SchemaContext sc, DGJoin join, Long lrc, Long rrc, Counters counters, int effort) { if (lrc != null && rrc != null) { return computeBothFiltered(join.getLeftIndex(sc),lrc.longValue(),join.getRightIndex(sc),rrc); } else if (lrc == null && rrc == null) { counters.bumpTwoUnRes(); if (effort < 2) return null; return computeTwoUnfiltered(sc,join.getLeftIndex(sc),join.getRightIndex(sc)); } else { counters.bumpOneUnRes(); if (effort < 1) return null; return computeOneUnfiltered(sc,join.getLeftIndex(sc),lrc,join.getRightIndex(sc),rrc); } } private long computeBothFiltered(PEKey leftIndex, long lrc, PEKey rightIndex, long rrc) { long result = -1; if (leftIndex == null || rightIndex == null) result = lrc * rrc; else { if (leftIndex.isUnique() && rightIndex.isUnique()) result = (lrc < rrc ? lrc : rrc); else if (leftIndex.isUnique()) result = lrc; else if (rightIndex.isUnique()) result = rrc; else result = lrc * rrc; } return result; } private Long computeOneUnfiltered(SchemaContext sc, PEKey leftIndex, Long lrc, PEKey rightIndex, Long rrc) { if (lrc == null) { if (leftIndex == null) return null; long ratio = leftIndex.getCardRatio(sc); if (ratio == -1) return null; return rrc.longValue() * ratio; } else { if (rightIndex == null) return null; long ratio = rightIndex.getCardRatio(sc); if (ratio == -1) return null; return lrc.longValue() * ratio; } } private Long computeTwoUnfiltered(SchemaContext sc, PEKey leftIndex, PEKey rightIndex) { if (leftIndex == null || rightIndex == null) return null; long leftCard = leftIndex.getCardinality(); long rightCard = rightIndex.getCardinality(); long leftRatio = leftIndex.getCardRatio(sc); long rightRatio = rightIndex.getCardRatio(sc); if (leftCard == -1 || rightCard == -1) return null; if (leftRatio == -1 || rightRatio == -1) return null; return leftRatio * rightRatio * (leftCard < rightCard ? leftCard : rightCard); } private Long getRowCountEstimate(SchemaContext sc, TableKey tk, Map<TableKey,Long> estimates) { Long out = estimates.get(tk); if (out == null && tk.getAbstractTable().isTempTable()) { TempTable tt = (TempTable) tk.getAbstractTable().asTable(); long result = tt.getTableSizeEstimate(sc); if (result > -1) out = result; } return out; } private static class Counters { private int oneUnRes; private int twoUnRes; public Counters() { oneUnRes = 0; twoUnRes = 0; } public void bumpOneUnRes() { oneUnRes++; } public void bumpTwoUnRes() { twoUnRes++; } public int computeEffort() { if (oneUnRes > 0) return 1; if (twoUnRes > 0) return 2; return 0; } } } private static class StructureCosting extends CostingStrategy { @Override public ExecutionCost buildBasicCost(SchemaContext sc, SelectStatement ss, boolean singleSite) { ListSet<PlanningConstraint> constraints = getConstraints(sc,ss,false); PlanningConstraint best = ConstraintCollector.chooseBest(sc,constraints); return new ExecutionCost(EngineConstant.WHERECLAUSE.has(ss),singleSite,best,(best == null ? -1 : best.getRowCount())); } @Override public ExecutionCost buildCombinedCost(SchemaContext sc, DGJoin join, ExecutionCost...costs) { if (costs.length < 2) return null; int leftLimit = costs.length - 1; ExecutionCost rhc = costs[leftLimit]; if (rhc == null) return null; HashMap<TableKey,ExecutionCost> byKey = new HashMap<TableKey,ExecutionCost>(); for(int i = 0; i < leftLimit; i++) { if (costs[i] != null && costs[i].getConstraint() != null) byKey.put(costs[i].getConstraint().getTableKey(),costs[i]); } List<ExecutionCost> promoted = new ArrayList<ExecutionCost>(); boolean multiSite = false; boolean wc = false; for(JoinEdge je : join.getEdges()) { ExecutionCost lhc = byKey.get(je.getLHSTab()); if (lhc == null) continue; ExecutionCost c = promoteConstraint(sc, je, lhc, rhc); if (c != null) { if (!lhc.isSingleSite() || !rhc.isSingleSite()) multiSite = true; if (lhc.hasWhereClause() || rhc.hasWhereClause()) wc = true; promoted.add(c); } } if (promoted.isEmpty()) { List<PlanningConstraint> all = new ArrayList<PlanningConstraint>(); for(ExecutionCost ec : costs) if (ec.getConstraint() != null) all.add(ec.getConstraint()); PlanningConstraint best = ConstraintCollector.chooseBest(sc, all); return new ExecutionCost(best, -1, costs); } Collections.sort(promoted); if (join.getJoinType().isOuterJoin()) { return new ExecutionCost(wc,!multiSite,promoted.get(promoted.size() - 1)); } return new ExecutionCost(wc,!multiSite,promoted.get(0)); } private ListSet<PEKey> buildMatching(SchemaContext sc, ListSet<PEColumn> cols) { ListSet<PEKey> candidates = new ListSet<PEKey>(); for(PEColumn pec : cols) { List<PEKey> refs = pec.getReferencedBy(sc); for(PEKey pek : refs) { HashSet<PEColumn> uses = new HashSet<PEColumn>(pek.getColumns(sc)); uses.removeAll(cols); if (uses.isEmpty()) candidates.add(pek); } } return candidates; } private ExecutionCost promoteConstraint(SchemaContext sc, JoinEdge je, ExecutionCost lhc, ExecutionCost rhc) { ListSet<PEColumn> leftCols = new ListSet<PEColumn>(); ListSet<PEColumn> rightCols = new ListSet<PEColumn>(); for(Pair<ColumnInstance,ColumnInstance> p : je.getSimpleColumns()) { leftCols.add(p.getFirst().getPEColumn()); rightCols.add(p.getSecond().getPEColumn()); } ListSet<PEKey> lmatch = buildMatching(sc, leftCols); ListSet<PEKey> rmatch = buildMatching(sc, rightCols); PEKey luniq = null; PEKey runiq = null; PEKey lindex = null; PEKey rindex = null; for(PEKey p : lmatch) { if (luniq == null && (p.isUnique())) luniq = p; else if (!p.isUnique()) { if (lindex == null) lindex = p; else if (p.getCardRatio(sc) < lindex.getCardRatio(sc)) lindex = p; } } for(PEKey p : rmatch) { if (runiq == null && (p.isUnique())) runiq = p; else if (!p.isUnique()) { if (rindex == null) rindex = p; else if (p.getCardRatio(sc) < rindex.getCardRatio(sc)) rindex = p; } } long lrc = -1; long rrc = -1; if (lhc.getRowCount() > -1) { if (luniq != null) lrc = lhc.getRowCount(); else if (lindex != null) lrc = lhc.getRowCount() * lindex.getCardRatio(sc); } if (rhc.getRowCount() > -1) { if (runiq != null) rrc = rhc.getRowCount(); else if (rindex != null) rrc = rhc.getRowCount() * rindex.getCardRatio(sc); } if (luniq != null && runiq != null) { return (lhc.compareTo(rhc) < 0 ? lhc : rhc); } else if (luniq != null && rindex != null) { return new ExecutionCost(rhc,rrc); } else if (lindex != null && runiq != null) { return new ExecutionCost(lhc,lrc); } else if (lindex != null && rindex != null) { long nrc = -1; if (lrc > -1 && rrc > -1) nrc = lrc * rrc; return new ExecutionCost((lhc.compareTo(rhc) > 0 ? rhc : lhc),nrc); } else if (lindex == null) { return lhc; } else if (rindex == null) { return rhc; } else { return (lhc.compareTo(rhc) > 0 ? rhc : lhc); } } } }