package com.tesora.dve.sql.transform.strategy.correlated;
/*
* #%L
* Tesora Inc.
* Database Virtualization Engine
* %%
* Copyright (C) 2011 - 2014 Tesora Inc.
* %%
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License, version 3,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
* #L%
*/
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import com.tesora.dve.common.MultiMap;
import com.tesora.dve.exceptions.PEException;
import com.tesora.dve.sql.expression.ColumnKey;
import com.tesora.dve.sql.expression.ExpressionPath;
import com.tesora.dve.sql.expression.ExpressionUtils;
import com.tesora.dve.sql.expression.TableKey;
import com.tesora.dve.sql.node.expression.CaseExpression;
import com.tesora.dve.sql.node.expression.ColumnInstance;
import com.tesora.dve.sql.node.expression.ExpressionNode;
import com.tesora.dve.sql.node.expression.FunctionCall;
import com.tesora.dve.sql.node.expression.LiteralExpression;
import com.tesora.dve.sql.node.expression.Subquery;
import com.tesora.dve.sql.node.expression.TableInstance;
import com.tesora.dve.sql.node.expression.WhenClause;
import com.tesora.dve.sql.node.structural.FromTableReference;
import com.tesora.dve.sql.node.structural.JoinSpecification;
import com.tesora.dve.sql.node.structural.JoinedTable;
import com.tesora.dve.sql.node.test.EdgeTest;
import com.tesora.dve.sql.node.test.EngineConstant;
import com.tesora.dve.sql.schema.FunctionName;
import com.tesora.dve.sql.schema.PEColumn;
import com.tesora.dve.sql.schema.SchemaContext;
import com.tesora.dve.sql.schema.TempTable;
import com.tesora.dve.sql.statement.dml.DMLStatementUtils;
import com.tesora.dve.sql.statement.dml.ProjectingStatement;
import com.tesora.dve.sql.statement.dml.SelectStatement;
import com.tesora.dve.sql.transform.ColumnInstanceCollector;
import com.tesora.dve.sql.transform.CopyContext;
import com.tesora.dve.sql.transform.CopyVisitor;
import com.tesora.dve.sql.transform.behaviors.ComplexFeaturePlannerFilter;
import com.tesora.dve.sql.transform.behaviors.defaults.DefaultFeaturePlannerFilter;
import com.tesora.dve.sql.transform.behaviors.defaults.DefaultFeatureStepBuilder;
import com.tesora.dve.sql.transform.execution.DMLExplainReason;
import com.tesora.dve.sql.transform.strategy.ApplyOption;
import com.tesora.dve.sql.transform.strategy.ColumnMutator;
import com.tesora.dve.sql.transform.strategy.ExecutionCost;
import com.tesora.dve.sql.transform.strategy.FeaturePlannerIdentifier;
import com.tesora.dve.sql.transform.strategy.MutatorState;
import com.tesora.dve.sql.transform.strategy.OrderByLimitRewriteTransformFactory;
import com.tesora.dve.sql.transform.strategy.PassThroughMutator;
import com.tesora.dve.sql.transform.strategy.PlannerContext;
import com.tesora.dve.sql.transform.strategy.ProjectionMutator;
import com.tesora.dve.sql.transform.strategy.TransformFactory;
import com.tesora.dve.sql.transform.strategy.featureplan.FeatureStep;
import com.tesora.dve.sql.transform.strategy.featureplan.ProjectingFeatureStep;
import com.tesora.dve.sql.transform.strategy.featureplan.RedistFeatureStep;
import com.tesora.dve.sql.util.Functional;
import com.tesora.dve.sql.util.ListSet;
import com.tesora.dve.sql.util.Pair;
/*
* Handles correlated subqueries on the projection. The strategy is:
* [1] Execute the parent query without the correlated subquery, adding the outer correlated columns to the projection if need be.
* [2] Build a lookup table on the outer correlated columns
* [3] Execute the inner query as a join against the lookup table, grouped on the inner join columns
* [4] Redistribute the inner query next to the outer query
* [5] Execute a left join between the outer query (results from step 1) and the inner query (results from step 4)
*/
public class ProjectionCorrelatedSubqueryTransformFactory extends
CorrelatedSubqueryTransformFactory {
@Override
protected EdgeTest getMatchLocation() {
return EngineConstant.PROJECTION;
}
@Override
public FeaturePlannerIdentifier getFeaturePlannerID() {
return FeaturePlannerIdentifier.PROJ_CORSUB;
}
private static Pair<ExpressionNode, Integer> findOffset(SelectStatement copy, ProjectingStatement subq) throws PEException {
int offset = -1;
ExpressionNode expr = null;
int counter = -1;
for(Iterator<ExpressionNode> iter = copy.getProjectionEdge().iterator(); iter.hasNext();) {
counter++;
ExpressionNode en = iter.next();
if (subq.ifAncestor(Collections.singleton(en)) != null) {
offset = counter;
expr = en;
break;
}
}
if (offset == -1)
throw new PEException("Cannot find subquery within projection");
return new Pair<ExpressionNode,Integer>(expr,offset);
}
@Override
protected CorrelatedSubquery buildSubquery(SchemaContext sc, ProjectingStatement subq,
SelectStatement copy, ListSet<ColumnKey> outerColumns) throws PEException {
Pair<ExpressionNode,Integer> found = findOffset(copy, subq);
if (subq.getProjections().get(0).size() > 1)
throw new PEException("Too many columns in correlated subquery");
return new ProjectionCorrelatedSubquery(sc,(Subquery)subq.getParent(),found.getFirst(),found.getSecond(),outerColumns);
}
private static class ProjectionCorrelatedSubquery extends CorrelatedSubquery {
private ExpressionNode origProjEntry;
private int origProjOffset;
public ProjectionCorrelatedSubquery(SchemaContext sc, Subquery sq, ExpressionNode projEntry, int projOffset, ListSet<ColumnKey> outerColumns) {
super(sc, sq,EngineConstant.PROJECTION,outerColumns);
origProjEntry = projEntry;
origProjOffset = projOffset;
}
// we need to be able to reset these
void setProjEntry(ExpressionNode en) {
origProjEntry = en;
}
void setOrigProjOffset(int i) {
origProjOffset = i;
}
// ss is the query against the original parent result; we have to fold in
// a query against our temp table (left outer join) and fix up the projection appropriately
@Override
public SelectStatement pasteTempTable(PlannerContext sc,SelectStatement ss, RedistFeatureStep tempTableStep) throws PEException {
SelectStatement tq = tempTableStep.buildNewSelect(sc);
SelectStatement fsq = CopyVisitor.copy(ss);
SelectStatement combined = DMLStatementUtils.compose(sc.getContext(),fsq,tq);
for(int i = 0; i < tq.getProjectionEdge().size(); i++)
combined.getProjectionEdge().remove(combined.getProjectionEdge().size() - 1);
List<ExpressionNode> mappedJoinCondition = new ArrayList<ExpressionNode>();
Map<TableKey, Set<PEColumn>> keyMap = new HashMap<TableKey, Set<PEColumn>>();
// build up the join condition; we will use an loj for this
for(Pair<ColumnKey,ColumnKey> p : joinConditions) {
ColumnKey lhk = combined.getMapper().copyColumnKeyForward(p.getFirst());
ColumnKey rhk = combined.getMapper().copyColumnKeyForward(p.getSecond());
ColumnInstance lci = lhk.toInstance();
ColumnInstance rci = rhk.toInstance();
FunctionCall fc = new FunctionCall(FunctionName.makeEquals(),lci,rci);
fc.setGrouped();
mappedJoinCondition.add(fc);
noteJoinedColumn(keyMap, lci, lhk.getPEColumn());
noteJoinedColumn(keyMap, rci, rhk.getPEColumn());
}
for(TableKey tempTableKey : keyMap.keySet()) {
((TempTable)(tempTableKey.getAbstractTable())).noteJoinedColumns(sc.getContext(), new ArrayList<PEColumn>(keyMap.get(tempTableKey)));
}
ExpressionNode jc = ExpressionUtils.safeBuildAnd(mappedJoinCondition);
FromTableReference lt = combined.getTablesEdge().get(0);
FromTableReference rt = combined.getTablesEdge().get(1);
combined.getTablesEdge().remove(1);
JoinedTable jt = new JoinedTable(rt.getBaseTable(),jc,JoinSpecification.LEFT_OUTER_JOIN);
lt.addJoinedTable(jt);
// so the first column in the temp table select is the actual result column of the subquery -
// we're going to replace that in the orig proj offset, then map the rest forward
ExpressionNode rewritten = rewriteSubquerySpot(sc.getContext(),ExpressionUtils.getTarget(tq.getProjectionEdge().get(0)));
ExpressionNode mappedProj = combined.getMapper().copyForward(rewritten);
List<ExpressionNode> proj = new ArrayList<ExpressionNode>(combined.getProjection());
proj.add(origProjOffset, mappedProj);
combined.setProjection(proj);
return combined;
}
private void noteJoinedColumn(Map<TableKey, Set<PEColumn>> keyMap, ColumnInstance ci, PEColumn peCol) {
TableInstance ti = ci.getTableInstance();
if (ti.getAbstractTable().isTempTable()) {
if (!keyMap.containsKey(ti.getTableKey())) {
keyMap.put(ti.getTableKey(), new LinkedHashSet<PEColumn>());
}
keyMap.get(ti.getTableKey()).add(peCol);
}
}
private ExpressionNode rewriteSubquerySpot(SchemaContext sc, ExpressionNode subqRepl) throws PEException {
ExpressionPath ep = null;
if (exists)
ep = ExpressionPath.build(sq.getParent(), origProjEntry);
else
ep = ExpressionPath.build(sq, origProjEntry);
ExpressionNode targ = ExpressionUtils.getTarget(sq.getStatement().getProjections().get(0).get(0));
ExpressionNode replacement = null;
if (sq.getStatement().getLimit() != null && sq.getStatement().getLimit().hasLimitOne(sc)) {
replacement = subqRepl;
} else if (targ instanceof ColumnInstance) {
if (!exists)
throw new PEException("Likely illegal rewrite - correlated subquery not in exists function but not a function");
FunctionCall isnot = new FunctionCall(FunctionName.makeIsNot(), subqRepl, LiteralExpression.makeNullLiteral());
replacement =
new CaseExpression(null,
LiteralExpression.makeLongLiteral(0),
Collections.singletonList(new WhenClause(isnot,LiteralExpression.makeLongLiteral(1),null)),
null);
replacement.setGrouped();
} else {
FunctionCall fc = (FunctionCall)targ;
FunctionName fn = fc.getFunctionName();
if (ep.size() == 1) {
if (fn.isCount()) {
LiteralExpression otherValue = LiteralExpression.makeLongLiteral(0);
replacement = new FunctionCall(FunctionName.makeIfNull(),subqRepl,otherValue);
} else {
replacement = subqRepl;
}
} else {
replacement = subqRepl;
}
}
ep.update(origProjEntry, replacement);
return origProjEntry;
}
@Override
public void removeFromParent(SelectStatement parent) throws PEException {
parent.getProjectionEdge().remove(getOffset());
}
@Override
public int getOffset() {
return origProjOffset;
}
private static final SimplifyPass[] simplifications =
new SimplifyPass[] {
stripRedundantLimit,
// forwardDeeplyCorrelated
yankDownRestrictions,
removeCorrelation
};
@Override
protected SimplifyPass[] getSimplifications() {
return simplifications;
}
}
private static class ExplodeComplexCorrelatedSubqueriesProjectionMutator extends ProjectionMutator {
// we only care about particular elements in the projection -
MultiMap<Integer,Subquery> byOffset;
CopyContext cc;
public ExplodeComplexCorrelatedSubqueriesProjectionMutator(
SchemaContext sc,
CopyContext cc,
MultiMap<Integer,Subquery> offsets) {
super(sc);
this.byOffset = offsets;
this.cc = cc;
}
@Override
public List<ExpressionNode> adapt(MutatorState ms,
List<ExpressionNode> proj) {
for(int i = 0; i < proj.size(); i++) {
ColumnMutator cm = null;
if (byOffset.containsKey(i)) {
cm = new ExplodeComplexCorrelatedSubqueriesColumnMutator(byOffset.get(i),cc);
} else {
cm = new PassThroughMutator();
}
cm.setBeforeOffset(i);
columns.add(cm);
}
return applyAdapted(proj,ms);
}
}
// basically what we need to remember here is a path for each subq
// and we need to put each column found in the expression that's not part of the subq into the projection
private static class ExplodeComplexCorrelatedSubqueriesColumnMutator extends ColumnMutator {
List<Subquery> theQueries;
ExpressionNode original;
protected List<ExpressionPath> paths;
protected CopyContext cc;
public ExplodeComplexCorrelatedSubqueriesColumnMutator(Collection<Subquery> allSubqs, CopyContext cc) {
super();
theQueries = Functional.toList(allSubqs);
paths = new ArrayList<ExpressionPath>();
this.cc = cc;
}
@Override
public List<ExpressionNode> adapt(SchemaContext sc,
List<ExpressionNode> proj, MutatorState ms) {
original = getProjectionEntry(proj, getBeforeOffset());
ArrayList<ExpressionNode> out = new ArrayList<ExpressionNode>();
ListSet<ColumnInstance> columns = ColumnInstanceCollector.getColumnInstances(original);
for(ColumnInstance ci : columns) {
if (ci.ifAncestor(theQueries) != null) {
// handled by the nested query
continue;
}
paths.add(ExpressionPath.build(ci,original));
out.add(ci);
}
// now add entries for the subqueries
for(Subquery sq : theQueries) {
paths.add(ExpressionPath.build(sq,original));
out.add(sq);
}
return out;
}
@Override
public List<ExpressionNode> apply(List<ExpressionNode> proj,
ApplyOption opts) {
ExpressionNode repl = (ExpressionNode) original.copy(cc);
for(int i = 0; i < paths.size(); i++) {
ExpressionNode updated = getProjectionEntry(proj, getAfterOffsetBegin() + i);
ExpressionPath path = paths.get(i);
path.update(repl, updated);
}
return Collections.singletonList(repl);
}
}
@Override
protected FeatureStep buildSteps(PlannerContext pc,
List<CorrelatedSubquery> subs, SelectStatement origQuery,
SelectStatement copy) throws PEException {
ListSet<ProjectionCorrelatedSubquery> queries = new ListSet<ProjectionCorrelatedSubquery>();
for(CorrelatedSubquery cs : subs)
queries.add((ProjectionCorrelatedSubquery) cs);
List<ProjectionMutator> mutators = new ArrayList<ProjectionMutator>();
MutatorState ms = null;
MultiMap<Integer,Subquery> complex = new MultiMap<Integer,Subquery>();
boolean anyComplex = false;
for(ProjectionCorrelatedSubquery psc : queries) {
Collection<Subquery> any = complex.get(psc.getOffset());
if (any != null && any.size() > 0) anyComplex = true;
complex.put(psc.getOffset(), psc.getSubquery());
}
if (anyComplex) {
if (emitting()) {
emit("Before proj explode: " + copy.getSQL(pc.getContext()));
}
// we need to build the mutator and blow out the complex expression
// this also means we will need to rebuild paths on all of the subqs
mutators.add(new ExplodeComplexCorrelatedSubqueriesProjectionMutator(pc.getContext(),copy.getMapper().getCopyContext(),complex));
ms = new MutatorState(copy);
List<ExpressionNode> copyProj = copy.getProjection();
for(ProjectionMutator pm : mutators) {
copyProj = pm.adapt(ms, copyProj);
}
ms.combine(pc.getContext(),copyProj,false);
if (emitting()) {
emit("After proj explode: " + copy.getSQL(pc.getContext()));
}
// ok, copy has been modified. update the offsets.
TreeMap<Integer,ProjectionCorrelatedSubquery> byOffset = new TreeMap<Integer,ProjectionCorrelatedSubquery>();
for(ProjectionCorrelatedSubquery subq : queries) {
Pair<ExpressionNode, Integer> found = findOffset(copy,subq.getSubquery().getStatement());
subq.setOrigProjOffset(found.getSecond());
subq.setProjEntry(found.getFirst());
byOffset.put(found.getSecond(), subq);
}
// finally, we need to reorder them in the list to make the rest of this work correctly
queries.clear();
queries.addAll(byOffset.values());
}
// now traverse the queries in reverse order to remove the subqueries; we're also going
// to build the dependency info - we only care about the outer columns that are needed
// note that corsub impls modify as needed - assume that copy is ready for planning at the end of this.
List<ExpressionNode> removed = new ArrayList<ExpressionNode>();
ListSet<ColumnKey> neededOuterColumns = new ListSet<ColumnKey>();
for(int i = queries.size() - 1; i > -1; i--) {
CorrelatedSubquery sq = queries.get(i);
// we need to fix the mapper for the nested query for what we need to do later
sq.getSubquery().getStatement().getMapper().getOriginals().add(copy);
removed.add(copy.getProjectionEdge().get(sq.getOffset()));
sq.removeFromParent(copy);
copy.getDerivedInfo().getLocalNestedQueries().remove(sq.getSubquery().getStatement());
neededOuterColumns.addAll(sq.getOuterColumns());
}
OrderByLimitRewriteTransformFactory.maybeCleanupGroupBys(removed, copy);
// now figure out which of the outer join needed columns are already on the projection
ListSet<ColumnKey> allOuterColumns = new ListSet<ColumnKey>();
allOuterColumns.addAll(neededOuterColumns);
for(ExpressionNode en : copy.getProjection()) {
ExpressionNode targ = ExpressionUtils.getTarget(en);
if (targ instanceof ColumnInstance) {
ColumnInstance ci = (ColumnInstance) targ;
neededOuterColumns.remove(ci.getColumnKey());
}
}
for(ColumnKey ck : neededOuterColumns) {
copy.getProjectionEdge().add(ck.toInstance());
}
if (copy.getProjectionEdge().size() != origQuery.getProjectionEdge().size())
copy.normalize(pc.getContext());
if (emitting())
emit("out " + copy.getSQL(pc.getContext(), " "));
ProjectingFeatureStep childStep =
(ProjectingFeatureStep) buildPlan(copy,pc,DefaultFeaturePlannerFilter.INSTANCE);
RedistFeatureStep onAggSiteStep = redistToAggSite(pc,childStep);
TempTable tt = onAggSiteStep.getTargetTempTable();
SelectStatement fs = tt.buildSelect(pc.getContext());
RedistFeatureStep lookupTableStep = buildLookupTable(pc,onAggSiteStep,allOuterColumns,origQuery.getSingleGroup(pc.getContext()));
TempTable lt = lookupTableStep.getTargetTempTable();
ListSet<FeatureStep> deps = new ListSet<FeatureStep>();
for(CorrelatedSubquery csq : queries) {
if (emitting())
emit("proj subq: " + csq.getSubquery().getStatement().getSQL(pc.getContext(), " "));
SelectStatement rewritten = csq.rewriteSubqueryUsingLookupTable(pc.getContext(),lt);
ProjectingFeatureStep subqStep = (ProjectingFeatureStep) buildPlan(rewritten,pc,
new ComplexFeaturePlannerFilter(Collections.<FeaturePlannerIdentifier> emptySet(),
TransformFactory.allTransforms));
subqStep.getPreChildren().add(lookupTableStep);
RedistFeatureStep redistToAgg =
redistToAggSite(pc,subqStep);
fs = csq.pasteTempTable(pc, fs, redistToAgg);
deps.add(redistToAgg);
}
for(int i = 0; i < neededOuterColumns.size(); i++) {
fs.getProjectionEdge().remove(fs.getProjectionEdge().size() - 1);
}
// if I have mutators - then I need to unexplode the projection.
if (ms != null) {
List<ExpressionNode> intermediate = fs.getProjection();
for(int i = mutators.size() - 1; i > -1; i--) {
intermediate = mutators.get(i).apply(this,ms, intermediate, new ApplyOption(i,mutators.size() - 1));
}
fs.setProjection(intermediate);
}
fs.normalize(pc.getContext());
ProjectingFeatureStep returnStep =
DefaultFeatureStepBuilder.INSTANCE.buildProjectingStep(
pc,
this,
fs,
new ExecutionCost(true,true,null,-1),
pc.getTempGroupManager().getGroup(true),
origQuery.getDatabase(pc.getContext()),
EngineConstant.BROADEST_DISTRIBUTION_VECTOR.getValue(fs,pc.getContext()),
null,
DMLExplainReason.PROJECTION_CORRELATED_SUBQUERY.makeRecord());
returnStep.prefixChildren(deps);
return returnStep;
}
}