package com.tesora.dve.sql.transform.strategy.correlated;
/*
* #%L
* Tesora Inc.
* Database Virtualization Engine
* %%
* Copyright (C) 2011 - 2014 Tesora Inc.
* %%
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License, version 3,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import com.tesora.dve.exceptions.PEException;
import com.tesora.dve.sql.SchemaException;
import com.tesora.dve.sql.ParserException.Pass;
import com.tesora.dve.sql.expression.ColumnKey;
import com.tesora.dve.sql.expression.ExpressionUtils;
import com.tesora.dve.sql.expression.SetQuantifier;
import com.tesora.dve.sql.expression.TableKey;
import com.tesora.dve.sql.node.expression.ColumnInstance;
import com.tesora.dve.sql.node.expression.ExpressionNode;
import com.tesora.dve.sql.node.expression.FunctionCall;
import com.tesora.dve.sql.node.expression.Subquery;
import com.tesora.dve.sql.node.expression.Wildcard;
import com.tesora.dve.sql.node.structural.FromTableReference;
import com.tesora.dve.sql.node.structural.JoinSpecification;
import com.tesora.dve.sql.node.structural.JoinedTable;
import com.tesora.dve.sql.node.structural.SortingSpecification;
import com.tesora.dve.sql.node.test.EdgeTest;
import com.tesora.dve.sql.node.test.EngineConstant;
import com.tesora.dve.sql.schema.FunctionName;
import com.tesora.dve.sql.schema.PEAbstractTable;
import com.tesora.dve.sql.schema.PEKey;
import com.tesora.dve.sql.schema.PEStorageGroup;
import com.tesora.dve.sql.schema.SchemaContext;
import com.tesora.dve.sql.schema.TempTable;
import com.tesora.dve.sql.schema.DistributionVector.Model;
import com.tesora.dve.sql.schema.TempTableCreateOptions;
import com.tesora.dve.sql.statement.dml.DMLStatementUtils;
import com.tesora.dve.sql.statement.dml.ProjectingStatement;
import com.tesora.dve.sql.statement.dml.SelectStatement;
import com.tesora.dve.sql.transform.AggFunCollector;
import com.tesora.dve.sql.transform.ColumnInstanceCollector;
import com.tesora.dve.sql.transform.CopyVisitor;
import com.tesora.dve.sql.transform.behaviors.ComplexFeaturePlannerFilter;
import com.tesora.dve.sql.transform.behaviors.defaults.DefaultFeaturePlannerFilter;
import com.tesora.dve.sql.transform.strategy.ApplyOption;
import com.tesora.dve.sql.transform.strategy.CollapsingMutator;
import com.tesora.dve.sql.transform.strategy.ColumnMutator;
import com.tesora.dve.sql.transform.strategy.CompoundExpressionColumnMutator;
import com.tesora.dve.sql.transform.strategy.MutatorState;
import com.tesora.dve.sql.transform.strategy.PassThroughMutator;
import com.tesora.dve.sql.transform.strategy.PlannerContext;
import com.tesora.dve.sql.transform.strategy.ProjectionMutator;
import com.tesora.dve.sql.transform.strategy.RecallingMutator;
import com.tesora.dve.sql.transform.strategy.TransformFactory;
import com.tesora.dve.sql.transform.strategy.FeaturePlannerIdentifier;
import com.tesora.dve.sql.transform.strategy.featureplan.FeatureStep;
import com.tesora.dve.sql.transform.strategy.featureplan.ProjectingFeatureStep;
import com.tesora.dve.sql.transform.strategy.featureplan.RedistFeatureStep;
import com.tesora.dve.sql.transform.strategy.IndexCollector;
import com.tesora.dve.sql.util.ListSet;
import com.tesora.dve.sql.util.Pair;
/*
* Applies for correlated subqueries in the where clause. The strategy is:
* [1] Decompose the where clause, excise the filter enclosing the correlated subquery
* [2] Simplify the outer projection. Strip off order by, group by, limit.
* [3] Add the outer correlated columns to the projection if need be.
* [4] Plan the outer query, redist to temp table OT
* [5] Build lookup table LT from OT.
* [6] Plan the inner query, redist to temp table ST.
* [7] Join OT & ST on the correlated colummns, apply the original filter,
* undo changes to the projection, group by, order by, limit.
*/
public class WhereClauseCorrelatedSubqueryTransformFactory extends
CorrelatedSubqueryTransformFactory {
@Override
protected EdgeTest getMatchLocation() {
return EngineConstant.WHERECLAUSE;
}
@Override
protected CorrelatedSubquery buildSubquery(SchemaContext sc, ProjectingStatement subq,
SelectStatement copy, ListSet<ColumnKey> outerColumns) throws PEException {
List<ExpressionNode> clauses = ExpressionUtils.decomposeAndClause(copy.getWhereClause());
ExpressionNode container = subq.ifAncestor(clauses);
if (container == null)
throw new PEException("Cannot find subquery within where clause");
ListSet<ColumnKey> containerColumns = new ListSet<ColumnKey>();
ListSet<ColumnInstance> concols = ColumnInstanceCollector.getColumnInstances(container);
for(ColumnInstance ci : concols) {
if (ci.getEnclosing(SelectStatement.class, null) == copy)
containerColumns.add(ci.getColumnKey());
}
int offset = -1;
for(int i = 0; i < clauses.size(); i++) {
if (clauses.get(i) == container) {
offset = i;
break;
}
}
return new WhereClauseCorrelatedSubquery(sc,(Subquery)subq.getParent(),outerColumns,offset,container,containerColumns);
}
@Override
public FeaturePlannerIdentifier getFeaturePlannerID() {
return FeaturePlannerIdentifier.WC_CORSUB;
}
private static class WhereClauseCorrelatedSubquery extends CorrelatedSubquery {
private int origOffset;
// the original container for the subq.
// so if the subq is in the expression foo in ( subq )
// then "foo in ( subq )" is the original container
private ExpressionNode originalContainer;
// and then foo is a container column
private ListSet<ColumnKey> containerColumns;
public WhereClauseCorrelatedSubquery(SchemaContext sc, Subquery sq,
ListSet<ColumnKey> outerColumns, int origOffset, ExpressionNode origContainer,
ListSet<ColumnKey> containerColumns) {
super(sc, sq, EngineConstant.WHERECLAUSE, outerColumns);
this.origOffset = origOffset;
this.originalContainer = origContainer;
this.containerColumns = containerColumns;
}
@Override
public SelectStatement pasteTempTable(PlannerContext pc,
SelectStatement into, RedistFeatureStep tempTableStep) throws PEException {
TempTable tt = tempTableStep.getTargetTempTable();
SelectStatement tq = tempTableStep.buildNewSelect(pc);
SelectStatement fsq = CopyVisitor.copy(into);
SelectStatement combined = DMLStatementUtils.compose(pc.getContext(),tq,fsq);
List<ExpressionNode> mappedJoinCondition = new ArrayList<ExpressionNode>();
IndexCollector ic = new IndexCollector();
for(Pair<ColumnKey,ColumnKey> p : joinConditions) {
ColumnInstance lhk = combined.getMapper().copyColumnKeyForward(p.getFirst()).toInstance();
ColumnInstance rhk = combined.getMapper().copyColumnKeyForward(p.getSecond()).toInstance();
FunctionCall fc = new FunctionCall(FunctionName.makeEquals(),lhk,rhk);
fc.setGrouped();
mappedJoinCondition.add(fc);
ic.addColumnInstance(lhk);
ic.addColumnInstance(rhk);
}
ic.setIndexes(pc.getContext());
ExpressionNode jc = ExpressionUtils.safeBuildAnd(mappedJoinCondition);
FromTableReference lt = combined.getTablesEdge().get(0);
FromTableReference rt = combined.getTablesEdge().getLast();
combined.getTablesEdge().removeLast();
JoinedTable jt = new JoinedTable(rt.getBaseTable(),jc,JoinSpecification.INNER_JOIN);
lt.addJoinedTable(jt);
// we also have to apply the original container again on the where clause
List<ExpressionNode> decompAnd = ExpressionUtils.decomposeAndClause(combined.getWhereClause());
ColumnInstance replcol = new ColumnInstance(tt.getColumns(pc.getContext()).get(0),lt.getBaseTable());
sq.getParentEdge().set(replcol);
ExpressionNode container = originalContainer;
// container is an EXISTS function then it must contain a subquery so it is already handled so don't add to the WHERE clause
if (originalContainer instanceof FunctionCall) {
FunctionCall fc = (FunctionCall) originalContainer;
if (fc.getFunctionName().isExists())
container = null;
}
ExpressionNode mapped = combined.getMapper().copyForward(container);
decompAnd.add(mapped);
combined.setWhereClause(ExpressionUtils.safeBuildAnd(decompAnd));
for(int i = 0; i < tq.getProjectionEdge().size(); i++)
combined.getProjectionEdge().remove(0);
combined.normalize(pc.getContext());
return combined;
}
@Override
public void removeFromParent(SelectStatement parent) throws PEException {
List<ExpressionNode> decomp = ExpressionUtils.decomposeAndClause(parent.getWhereClause());
decomp.remove(origOffset);
if (decomp.isEmpty())
parent.setWhereClause(null);
else
parent.setWhereClause(ExpressionUtils.safeBuildAnd(decomp));
}
@Override
public int getOffset() {
return origOffset;
}
public ListSet<ColumnKey> getContainerColumns() {
return containerColumns;
}
private static final SimplifyPass[] simplifications = new SimplifyPass[] {
stripRedundantLimit,
// forwardDeeplyCorrelated,
yankDownRestrictions,
propagateFixedCorrelation
};
@Override
protected SimplifyPass[] getSimplifications() {
return simplifications;
}
}
private static class DelayAggFunProjectionMutator extends ProjectionMutator {
public DelayAggFunProjectionMutator(SchemaContext sc) {
super(sc);
}
@Override
public List<ExpressionNode> adapt(MutatorState ms,
List<ExpressionNode> proj) {
// if the entry is an agg fun, do the delayed agg fun column mutator, otherwise passthrough
for(int i = 0; i < proj.size(); i++) {
ExpressionNode targ = ExpressionUtils.getTarget(proj.get(i));
ColumnMutator cm = null;
if (EngineConstant.AGGFUN.has(targ)) {
FunctionCall fc = (FunctionCall) targ;
if (fc.getFunctionName().isCount() && fc.getParametersEdge().get(0) instanceof Wildcard) {
// we're going to say the standin is any unique column that is a base table
ColumnKey whatever = null;
for(FromTableReference ftr : ms.getStatement().getTablesEdge()) {
if (ftr.getBaseTable() != null) {
PEAbstractTable<?> pet = ftr.getBaseTable().getAbstractTable();
if (pet.isView()) continue;
List<PEKey> yuks = pet.asTable().getUniqueKeys(context);
if (yuks.isEmpty())
continue;
whatever = new ColumnKey(ftr.getBaseTable().getTableKey(),yuks.get(0).getColumns(context).get(0));
break;
}
}
if (whatever == null)
throw new SchemaException(Pass.PLANNER,"Unable to find good standin for count(*) for where clause correlated subquery planner");
cm = new CountStarMutator(whatever);
} else {
cm = new RecallingMutator();
}
} else {
cm = new PassThroughMutator();
}
cm.setBeforeOffset(i);
columns.add(cm);
}
return applyAdapted(proj,ms);
}
}
private static class CountStarMutator extends ColumnMutator {
private SetQuantifier quantifier;
private ColumnKey standin;
private Wildcard original;
public CountStarMutator(ColumnKey best) {
super();
standin = best;
}
@Override
public List<ExpressionNode> adapt(SchemaContext sc, List<ExpressionNode> proj, MutatorState ms) {
ExpressionNode targ = getProjectionEntry(proj,getBeforeOffset());
FunctionCall fc = (FunctionCall) targ;
original = (Wildcard) fc.getParametersEdge().get(0);
quantifier = fc.getSetQuantifier();
// we're going to replace the wildcard with the standin
ArrayList<ExpressionNode> out = new ArrayList<ExpressionNode>();
out.add(standin.toInstance());
return out;
}
@Override
public List<ExpressionNode> apply(List<ExpressionNode> proj,
ApplyOption ignored) {
FunctionCall fc = new FunctionCall(FunctionName.makeCount(),original);
fc.setSetQuantifier(quantifier);
return Collections.singletonList((ExpressionNode)fc);
}
}
private static class ExplodeAggCompoundExpressionsProjectionMutator extends ProjectionMutator {
public ExplodeAggCompoundExpressionsProjectionMutator(SchemaContext sc) {
super(sc);
}
@Override
public List<ExpressionNode> adapt(MutatorState ms,
List<ExpressionNode> proj) {
for(int i = 0; i < proj.size(); i++) {
ExpressionNode targ = ExpressionUtils.getTarget(proj.get(i));
ListSet<FunctionCall> aggFuns = AggFunCollector.collectAggFuns(targ);
ColumnMutator cm = null;
if (!aggFuns.isEmpty()) {
cm = new CompoundExpressionColumnMutator();
} else {
cm = new PassThroughMutator();
}
cm.setBeforeOffset(i);
columns.add(cm);
}
return applyAdapted(proj,ms);
}
}
@Override
protected FeatureStep buildSteps(PlannerContext pc,
List<CorrelatedSubquery> subs, SelectStatement origQuery,
SelectStatement currentCopy) throws PEException {
ListSet<WhereClauseCorrelatedSubquery> befores = new ListSet<WhereClauseCorrelatedSubquery>();
ListSet<WhereClauseCorrelatedSubquery> afters = new ListSet<WhereClauseCorrelatedSubquery>();
for(CorrelatedSubquery cs : subs) {
WhereClauseCorrelatedSubquery wc = (WhereClauseCorrelatedSubquery) cs;
boolean anyNonTemp = false;
for(ColumnKey ck : wc.getOuterColumns()) {
if (!ck.getTableKey().getAbstractTable().isTempTable())
anyNonTemp = true;
}
if (anyNonTemp) {
afters.add(wc);
} else {
befores.add(wc);
}
}
ListSet<FeatureStep> deps = new ListSet<FeatureStep>();
SelectStatement workingCopy = currentCopy;
if (!befores.isEmpty()) {
// for befores we just need to break them out from the the parent, yank the temp table
// onto the from clause, slap on the group by on the outer join columns, and plan the queries
// then, we take the result and redist bcast back onto the pg, and rewrite the working copy
// to reference the temp tables
HashMap<WhereClauseCorrelatedSubquery,ProjectingFeatureStep> planned =
new HashMap<WhereClauseCorrelatedSubquery,ProjectingFeatureStep>();
for(int i = befores.size() - 1; i > -1; i--) {
WhereClauseCorrelatedSubquery subq = befores.get(i);
subq.removeFromParent(workingCopy);
subq.getSubquery().getStatement().getMapper().getOriginals().add(workingCopy);
workingCopy.getDerivedInfo().getLocalNestedQueries().remove(subq.getSubquery().getStatement());
// we need to group the subq on the join columns
SelectStatement ss = (SelectStatement) subq.getSubquery().getStatement();
ListSet<TableKey> tempTables = new ListSet<TableKey>();
for(Pair<ColumnKey,ColumnKey> p : subq.getJoinColumns()) {
ss.getGroupBysEdge().add(new SortingSpecification(p.getFirst().toInstance(),true));
ss.getProjectionEdge().add(p.getFirst().toInstance());
tempTables.add(p.getSecond().getTableKey());
}
for(TableKey tk : tempTables) {
ss.getTablesEdge().add(new FromTableReference(tk.toInstance()));
ss.getDerivedInfo().addLocalTable(tk);
}
ProjectingFeatureStep innerStep =
(ProjectingFeatureStep) buildPlan(ss,pc,DefaultFeaturePlannerFilter.INSTANCE);
planned.put(subq,innerStep);
}
// we need to choose a group to put the temp tables on - it should be whatever group the lhs is on
for(int i = befores.size() - 1; i > -1; i--) {
WhereClauseCorrelatedSubquery subq = befores.get(i);
ListSet<PEStorageGroup> groups = new ListSet<PEStorageGroup>();
for(ColumnKey ck : subq.getOuterColumns()) {
groups.add(ck.getTableKey().getAbstractTable().getStorageGroup(pc.getContext()));
}
PEStorageGroup aTempGroup = null;
PEStorageGroup persGroup = null;
for(PEStorageGroup pesg : groups) {
if (pesg.isTempGroup())
aTempGroup = pesg;
else
persGroup = pesg;
}
PEStorageGroup tempTableGroup = (persGroup != null ? persGroup : aTempGroup);
ProjectingFeatureStep innerStep = planned.get(subq);
RedistFeatureStep asTempTable =
innerStep.redist(pc, this,
new TempTableCreateOptions(Model.BROADCAST,tempTableGroup)
.withRowCount(innerStep.getCost().getRowCount()),
null,
null);
workingCopy = subq.pasteTempTable(pc, workingCopy, asTempTable);
deps.add(asTempTable);
}
}
int originalProjectionSize = -1;
ListSet<ColumnKey> allOuterColumns = null;
List<ProjectionMutator> mutators = null;
MutatorState ms = null;
ProjectingFeatureStep outerStep = null;
if (!afters.isEmpty()) {
// first setup
ListSet<ColumnKey> neededOuterColumns = new ListSet<ColumnKey>();
ListSet<ColumnKey> neededContainerColumns = new ListSet<ColumnKey>();
for(int i = afters.size() - 1; i > -1; i--) {
WhereClauseCorrelatedSubquery subq = afters.get(i);
subq.removeFromParent(workingCopy);
subq.getSubquery().getStatement().getMapper().getOriginals().add(workingCopy);
workingCopy.getDerivedInfo().getLocalNestedQueries().remove(subq.getSubquery().getStatement());
// we need the columns from the outer query to be present in it's planned version so we can build the join
neededOuterColumns.addAll(subq.getOuterColumns());
// but we also need the columns from the container of the inner query to be present as well
// so we can rebuild the where clause
neededContainerColumns.addAll(subq.getContainerColumns());
}
// we need to add outer columns and any container columns
originalProjectionSize = workingCopy.getProjectionEdge().size();
// figure out current columns
ListSet<ColumnKey> existing = new ListSet<ColumnKey>();
for(ExpressionNode en : workingCopy.getProjectionEdge()) {
ExpressionNode t = ExpressionUtils.getTarget(en);
if (t instanceof ColumnInstance) {
ColumnInstance ci = (ColumnInstance) t;
existing.add(ci.getColumnKey());
}
}
ListSet<ColumnKey> added = new ListSet<ColumnKey>();
for(ColumnKey ck : neededOuterColumns) {
if (existing.add(ck)) {
added.add(ck);
}
}
for(ColumnKey ck : neededContainerColumns) {
if (existing.add(ck))
added.add(ck);
}
for(ColumnKey ck : added) {
workingCopy.getProjectionEdge().add(ck.toInstance());
}
allOuterColumns = neededOuterColumns;
// now we can run the mutators on the projection
mutators = new ArrayList<ProjectionMutator>();
// first pass is exploding compound expressions involving agg funs
mutators.add(new ExplodeAggCompoundExpressionsProjectionMutator(pc.getContext()));
// second pass is stripping off all agg funs and replacing them with their params
mutators.add(new DelayAggFunProjectionMutator(pc.getContext()));
// third pass is removing duplicates
mutators.add(new CollapsingMutator(pc.getContext()));
ms = new MutatorState(workingCopy);
List<ExpressionNode> copyProj = workingCopy.getProjection();
for(ProjectionMutator pm : mutators) {
copyProj = pm.adapt(ms, copyProj);
}
ms.combine(pc.getContext(),copyProj,false);
if (emitting()) {
emit("preplan rewrite: " + workingCopy.getSQL(pc.getContext(), " "));
}
outerStep = (ProjectingFeatureStep) buildPlan(workingCopy,pc,DefaultFeaturePlannerFilter.INSTANCE);
outerStep.prefixChildren(deps);
deps.clear();
RedistFeatureStep tempTableStep =
redistToAggSite(pc, outerStep);
SelectStatement fs = tempTableStep.buildNewSelect(pc);
RedistFeatureStep lookupTableStep =
buildLookupTable(pc,tempTableStep,allOuterColumns,origQuery.getSingleGroup(pc.getContext()));
TempTable lt = lookupTableStep.getTargetTempTable();
for(CorrelatedSubquery csq : afters) {
if (emitting())
emit("wc subq: " + csq.getSubquery().getStatement().getSQL(pc.getContext()," "));
SelectStatement rewritten = csq.rewriteSubqueryUsingLookupTable(pc.getContext(),lt);
ProjectingFeatureStep childStep =
(ProjectingFeatureStep) buildPlan(rewritten,pc,
new ComplexFeaturePlannerFilter(Collections.<FeaturePlannerIdentifier> emptySet(),
TransformFactory.allTransforms));
childStep.getPreChildren().add(lookupTableStep);
RedistFeatureStep tt = redistToAggSite(pc,childStep);
fs = csq.pasteTempTable(pc, fs, tt);
deps.add(tt);
}
// now we have joined in the correlated subquery - we need to undo what we did in the setup
// so apply the mutators in reverse order, then remove the trailing proj entries down to the original
// proj size
List<ExpressionNode> intermediate = fs.getProjection();
for(int i = mutators.size() - 1; i > -1; i--) {
intermediate = mutators.get(i).apply(this,ms, intermediate, new ApplyOption(i,mutators.size() - 1));
}
fs.setProjection(intermediate);
while(fs.getProjectionEdge().size() > originalProjectionSize) {
fs.getProjectionEdge().remove(fs.getProjectionEdge().size() - 1);
}
fs.normalize(pc.getContext());
workingCopy = fs;
}
FeatureStep out =
buildPlan(workingCopy,pc,
new ComplexFeaturePlannerFilter(Collections.<FeaturePlannerIdentifier> emptySet(), TransformFactory.allTransforms));
out.prefixChildren(deps);
return out;
}
}