/*
* Licensed to CRATE Technology GmbH ("Crate") under one or more contributor
* license agreements. See the NOTICE file distributed with this work for
* additional information regarding copyright ownership. Crate licenses
* this file to you under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. You may
* obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*
* However, if you have executed another commercial license agreement
* with Crate these terms will supersede the license and you may use the
* software solely pursuant to the terms of the relevant commercial agreement.
*/
package io.crate.planner.consumer;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Lists;
import io.crate.analyze.*;
import io.crate.analyze.relations.AnalyzedRelation;
import io.crate.analyze.relations.JoinPair;
import io.crate.analyze.relations.QueriedDocTable;
import io.crate.analyze.relations.QueriedRelation;
import io.crate.analyze.symbol.*;
import io.crate.collections.Lists2;
import io.crate.metadata.Functions;
import io.crate.metadata.TableIdent;
import io.crate.operation.projectors.TopN;
import io.crate.planner.*;
import io.crate.planner.distribution.DistributionInfo;
import io.crate.planner.node.dql.MergePhase;
import io.crate.planner.node.dql.join.JoinType;
import io.crate.planner.node.dql.join.NestedLoop;
import io.crate.planner.node.dql.join.NestedLoopPhase;
import io.crate.planner.projection.Projection;
import io.crate.planner.projection.builder.InputColumns;
import io.crate.planner.projection.builder.ProjectionBuilder;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.cluster.service.ClusterService;
import org.elasticsearch.common.logging.Loggers;
import java.util.*;
class NestedLoopConsumer implements Consumer {
private final static Logger LOGGER = Loggers.getLogger(NestedLoopConsumer.class);
private final Visitor visitor;
NestedLoopConsumer(ClusterService clusterService, Functions functions, TableStats tableStats) {
visitor = new Visitor(clusterService, functions, tableStats);
}
@Override
public Plan consume(AnalyzedRelation rootRelation, ConsumerContext context) {
return visitor.process(rootRelation, context);
}
private static class Visitor extends RelationPlanningVisitor {
private final ClusterService clusterService;
private final Functions functions;
private final TableStats tableStats;
public Visitor(ClusterService clusterService, Functions functions, TableStats tableStats) {
this.clusterService = clusterService;
this.functions = functions;
this.tableStats = tableStats;
}
@Override
public Plan visitTwoTableJoin(TwoTableJoin statement, ConsumerContext context) {
QuerySpec querySpec = statement.querySpec();
QueriedRelation left = statement.left();
QueriedRelation right = statement.right();
if (left instanceof QueriedSelectRelation || right instanceof QueriedSelectRelation) {
throw new UnsupportedOperationException("JOIN with sub queries is not supported");
}
List<Symbol> nlOutputs = Lists2.concat(left.fields(), right.fields());
// for nested loops we are fine to remove pushed down orders
OrderBy orderByBeforeSplit = querySpec.orderBy().orElse(null);
if (statement.remainingOrderBy().isPresent()) {
querySpec.orderBy(statement.remainingOrderBy().get());
}
JoinPair joinPair = statement.joinPair();
JoinType joinType = joinPair.joinType();
Symbol joinCondition = joinPair.condition();
WhereClause where = querySpec.where();
/*
* isDistributed/filterNeeded doesn't consider the joinCondition.
* This means joins with implicit syntax result in a different plan than joins using explicit syntax.
* This was unintentional but we'll keep it that way (for now) as a distributed plan can be significantly slower
* (depending on the number of rows that are filtered)
* and we don't want to introduce a performance regression.
*
* We may at some point add some kind of session-settings to override this behaviour or otherwise
* come up with a better heuristic.
*/
boolean filterNeeded = where.hasQuery() && !(where.query() instanceof Literal);
boolean hasDocTables = left instanceof QueriedDocTable || right instanceof QueriedDocTable;
boolean isDistributed = hasDocTables && filterNeeded && !joinType.isOuter();
Limits limits = context.plannerContext().getLimits(querySpec);
if (filterNeeded || joinCondition != null || statement.remainingOrderBy().isPresent()) {
left.querySpec().limit(Optional.empty());
right.querySpec().limit(Optional.empty());
left.querySpec().offset(Optional.empty());
right.querySpec().offset(Optional.empty());
}
if (!filterNeeded && joinCondition == null && querySpec.limit().isPresent()) {
context.requiredPageSize(limits.limitAndOffset());
}
context.setFetchMode(FetchMode.NEVER);
Plan leftPlan = context.plannerContext().planSubRelation(left, context);
Plan rightPlan = context.plannerContext().planSubRelation(right, context);
context.requiredPageSize(null);
ResultDescription leftResultDesc = leftPlan.resultDescription();
ResultDescription rightResultDesc = rightPlan.resultDescription();
isDistributed = isDistributed &&
(!leftResultDesc.nodeIds().isEmpty() && !rightResultDesc.nodeIds().isEmpty());
boolean broadcastLeftTable = false;
if (isDistributed) {
broadcastLeftTable = isLeftSmallerThanRight(left, right);
if (broadcastLeftTable) {
Plan tmpPlan = leftPlan;
leftPlan = rightPlan;
rightPlan = tmpPlan;
QueriedRelation tmpRelation = left;
left = right;
right = tmpRelation;
joinType = joinType.invert();
leftResultDesc = leftPlan.resultDescription();
rightResultDesc = rightPlan.resultDescription();
}
}
Collection<String> nlExecutionNodes = ImmutableSet.of(clusterService.localNode().getId());
MergePhase leftMerge = null;
MergePhase rightMerge = null;
if (isDistributed) {
leftPlan.setDistributionInfo(DistributionInfo.DEFAULT_SAME_NODE);
nlExecutionNodes = leftResultDesc.nodeIds();
} else {
if (isMergePhaseNeeded(nlExecutionNodes, leftResultDesc.nodeIds(), false)) {
leftMerge = new MergePhase(
context.plannerContext().jobId(),
context.plannerContext().nextExecutionPhaseId(),
"nl-merge",
leftResultDesc.nodeIds().size(),
nlExecutionNodes,
leftResultDesc.streamOutputs(),
Collections.emptyList(),
DistributionInfo.DEFAULT_SAME_NODE,
PositionalOrderBy.of(left.querySpec().orderBy().orElse(null), left.querySpec().outputs())
);
}
}
if (nlExecutionNodes.size() == 1
&& nlExecutionNodes.equals(rightResultDesc.nodeIds())) {
// if the left and the right plan are executed on the same single node the mergePhase
// should be omitted. This is the case if the left and right table have only one shards which
// are on the same node
rightPlan.setDistributionInfo(DistributionInfo.DEFAULT_SAME_NODE);
} else {
if (isMergePhaseNeeded(nlExecutionNodes, rightResultDesc.nodeIds(), isDistributed)) {
rightMerge = new MergePhase(
context.plannerContext().jobId(),
context.plannerContext().nextExecutionPhaseId(),
"nl-merge",
rightResultDesc.nodeIds().size(),
nlExecutionNodes,
rightResultDesc.streamOutputs(),
Collections.emptyList(),
DistributionInfo.DEFAULT_SAME_NODE,
PositionalOrderBy.of(right.querySpec().orderBy().orElse(null), right.querySpec().outputs())
);
}
rightPlan.setDistributionInfo(DistributionInfo.DEFAULT_BROADCAST);
}
if (broadcastLeftTable) {
Plan tmpPlan = leftPlan;
leftPlan = rightPlan;
rightPlan = tmpPlan;
leftMerge = rightMerge;
rightMerge = null;
leftResultDesc = leftPlan.resultDescription();
rightResultDesc = rightPlan.resultDescription();
}
List<Projection> projections = new ArrayList<>();
if (filterNeeded) {
projections.add(ProjectionBuilder.filterProjection(nlOutputs, where));
}
if (joinCondition != null) {
joinCondition = InputColumns.create(joinCondition, nlOutputs);
assert joinCondition instanceof Function : "Only function symbols are valid join conditions";
assert !SymbolVisitors.any(Symbols.IS_COLUMN, joinCondition)
: "Processed joinCondition must not contain column symbols.\njoinCondition="
+ joinCondition + " nlOutputs=" + nlOutputs;
}
List<Symbol> postNLOutputs = Lists.newArrayList(querySpec.outputs());
if (orderByBeforeSplit != null && isDistributed) {
for (Symbol symbol : orderByBeforeSplit.orderBySymbols()) {
if (postNLOutputs.indexOf(symbol) == -1) {
postNLOutputs.add(symbol);
}
}
}
OrderBy orderBy = statement.remainingOrderBy().orElse(null);
if (orderBy == null && joinType.isOuter()) {
orderBy = orderByBeforeSplit;
}
int limit = isDistributed ? limits.limitAndOffset() : limits.finalLimit();
Projection topN = ProjectionBuilder.topNOrEval(
nlOutputs,
orderBy,
isDistributed ? 0 : limits.offset(),
limit,
postNLOutputs
);
projections.add(topN);
NestedLoopPhase nl = new NestedLoopPhase(
context.plannerContext().jobId(),
context.plannerContext().nextExecutionPhaseId(),
isDistributed ? "distributed-nested-loop" : "nested-loop",
projections,
leftMerge,
rightMerge,
nlExecutionNodes,
joinType,
joinCondition,
left.querySpec().outputs().size(),
right.querySpec().outputs().size()
);
// postNLOutputs includes orderBy only symbols, these need to be stripped in the handlerMerge
int postMergeNumOutput = querySpec.outputs().size();
if (isDistributed) {
return new NestedLoop(
nl,
leftPlan,
rightPlan,
limits.finalLimit(),
limits.offset(),
limit,
postMergeNumOutput,
PositionalOrderBy.of(orderByBeforeSplit, postNLOutputs)
);
} else {
return new NestedLoop(nl, leftPlan, rightPlan, TopN.NO_LIMIT, 0, limit, postMergeNumOutput, null);
}
}
private boolean isLeftSmallerThanRight(QueriedRelation qrLeft, QueriedRelation qrRight) {
if (qrLeft instanceof QueriedTableRelation && qrRight instanceof QueriedTableRelation) {
return isLeftSmallerThanRight(
((QueriedTableRelation) qrLeft).tableRelation().tableInfo().ident(),
((QueriedTableRelation) qrRight).tableRelation().tableInfo().ident()
);
}
return false;
}
private boolean isLeftSmallerThanRight(TableIdent leftIdent, TableIdent rightIdent) {
long leftNumDocs = tableStats.numDocs(leftIdent);
long rightNumDocs = tableStats.numDocs(rightIdent);
if (leftNumDocs < rightNumDocs) {
LOGGER.debug("Right table is larger with {} docs (left has {}. Will change left plan to broadcast its result",
rightNumDocs, leftNumDocs);
return true;
}
return false;
}
private static boolean isMergePhaseNeeded(Collection<String> executionNodes,
Collection<String> upstreamPhaseExecutionNodes,
boolean isDistributed) {
if (!isDistributed && upstreamPhaseExecutionNodes.equals(executionNodes)) {
// if the nested loop is on the same node we don't need a mergePhase to receive requests
// but can access the RowReceiver of the nestedLoop directly
return false;
}
return true;
}
}
}