/* * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.presto.sql.planner.optimizations; import com.facebook.presto.Session; import com.facebook.presto.metadata.Metadata; import com.facebook.presto.spi.GroupingProperty; import com.facebook.presto.spi.LocalProperty; import com.facebook.presto.spi.SortingProperty; import com.facebook.presto.spi.type.Type; import com.facebook.presto.sql.parser.SqlParser; import com.facebook.presto.sql.planner.Partitioning; import com.facebook.presto.sql.planner.PartitioningScheme; import com.facebook.presto.sql.planner.PlanNodeIdAllocator; import com.facebook.presto.sql.planner.Symbol; import com.facebook.presto.sql.planner.SymbolAllocator; import com.facebook.presto.sql.planner.optimizations.StreamPropertyDerivations.StreamProperties; import com.facebook.presto.sql.planner.plan.AggregationNode; import com.facebook.presto.sql.planner.plan.DistinctLimitNode; import com.facebook.presto.sql.planner.plan.EnforceSingleRowNode; import com.facebook.presto.sql.planner.plan.ExchangeNode; import com.facebook.presto.sql.planner.plan.ExplainAnalyzeNode; import com.facebook.presto.sql.planner.plan.IndexJoinNode; import com.facebook.presto.sql.planner.plan.JoinNode; import com.facebook.presto.sql.planner.plan.LimitNode; import com.facebook.presto.sql.planner.plan.MarkDistinctNode; import com.facebook.presto.sql.planner.plan.OutputNode; import com.facebook.presto.sql.planner.plan.PlanNode; import com.facebook.presto.sql.planner.plan.PlanVisitor; import com.facebook.presto.sql.planner.plan.RowNumberNode; import com.facebook.presto.sql.planner.plan.SemiJoinNode; import com.facebook.presto.sql.planner.plan.SortNode; import com.facebook.presto.sql.planner.plan.TableFinishNode; import com.facebook.presto.sql.planner.plan.TableWriterNode; import com.facebook.presto.sql.planner.plan.TopNNode; import com.facebook.presto.sql.planner.plan.TopNRowNumberNode; import com.facebook.presto.sql.planner.plan.UnionNode; import com.facebook.presto.sql.planner.plan.WindowNode; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Lists; import java.util.ArrayList; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Optional; import java.util.Set; import static com.facebook.presto.SystemSessionProperties.getTaskConcurrency; import static com.facebook.presto.SystemSessionProperties.getTaskWriterCount; import static com.facebook.presto.sql.planner.SystemPartitioningHandle.FIXED_ARBITRARY_DISTRIBUTION; import static com.facebook.presto.sql.planner.SystemPartitioningHandle.FIXED_HASH_DISTRIBUTION; import static com.facebook.presto.sql.planner.SystemPartitioningHandle.SINGLE_DISTRIBUTION; import static com.facebook.presto.sql.planner.optimizations.StreamPreferredProperties.any; import static com.facebook.presto.sql.planner.optimizations.StreamPreferredProperties.defaultParallelism; import static com.facebook.presto.sql.planner.optimizations.StreamPreferredProperties.exactlyPartitionedOn; import static com.facebook.presto.sql.planner.optimizations.StreamPreferredProperties.fixedParallelism; import static com.facebook.presto.sql.planner.optimizations.StreamPreferredProperties.singleStream; import static com.facebook.presto.sql.planner.optimizations.StreamPropertyDerivations.StreamProperties.StreamDistribution.SINGLE; import static com.facebook.presto.sql.planner.plan.ChildReplacer.replaceChildren; import static com.facebook.presto.sql.planner.plan.ExchangeNode.Scope.LOCAL; import static com.facebook.presto.sql.planner.plan.ExchangeNode.Type.GATHER; import static com.facebook.presto.sql.planner.plan.ExchangeNode.Type.REPARTITION; import static com.facebook.presto.sql.planner.plan.ExchangeNode.gatheringExchange; import static com.facebook.presto.sql.planner.plan.ExchangeNode.partitionedExchange; import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.base.Preconditions.checkState; import static com.google.common.collect.ImmutableList.toImmutableList; import static com.google.common.collect.ImmutableSet.toImmutableSet; import static java.util.Objects.requireNonNull; import static java.util.stream.Collectors.toList; public class AddLocalExchanges implements PlanOptimizer { private final Metadata metadata; private final SqlParser parser; public AddLocalExchanges(Metadata metadata, SqlParser parser) { this.metadata = requireNonNull(metadata, "metadata is null"); this.parser = requireNonNull(parser, "parser is null"); } @Override public PlanNode optimize(PlanNode plan, Session session, Map<Symbol, Type> types, SymbolAllocator symbolAllocator, PlanNodeIdAllocator idAllocator) { PlanWithProperties result = plan.accept(new Rewriter(symbolAllocator, idAllocator, session), any()); return result.getNode(); } private class Rewriter extends PlanVisitor<StreamPreferredProperties, PlanWithProperties> { private final PlanNodeIdAllocator idAllocator; private final Session session; private final Map<Symbol, Type> types; public Rewriter(SymbolAllocator symbolAllocator, PlanNodeIdAllocator idAllocator, Session session) { this.types = ImmutableMap.copyOf(symbolAllocator.getTypes()); this.idAllocator = idAllocator; this.session = session; } @Override protected PlanWithProperties visitPlan(PlanNode node, StreamPreferredProperties parentPreferences) { return planAndEnforceChildren( node, parentPreferences.withoutPreference().withDefaultParallelism(session), parentPreferences.withDefaultParallelism(session)); } @Override public PlanWithProperties visitOutput(OutputNode node, StreamPreferredProperties parentPreferences) { return planAndEnforceChildren( node, any().withOrderSensitivity(), any().withOrderSensitivity()); } @Override public PlanWithProperties visitExplainAnalyze(ExplainAnalyzeNode node, StreamPreferredProperties parentPreferences) { // Although explain analyze discards all output, we want to maintain the behavior // of a normal output node, so declare the node to be order sensitive return planAndEnforceChildren( node, singleStream().withOrderSensitivity(), singleStream().withOrderSensitivity()); } // // Nodes that always require a single stream // @Override public PlanWithProperties visitSort(SortNode node, StreamPreferredProperties parentPreferences) { // sort requires that all data be in one stream // this node changes the input organization completely, so we do not pass through parent preferences return planAndEnforceChildren(node, singleStream(), defaultParallelism(session)); } @Override public PlanWithProperties visitTableFinish(TableFinishNode node, StreamPreferredProperties parentPreferences) { // table commit requires that all data be in one stream // this node changes the input organization completely, so we do not pass through parent preferences return planAndEnforceChildren(node, singleStream(), defaultParallelism(session)); } @Override public PlanWithProperties visitTopN(TopNNode node, StreamPreferredProperties parentPreferences) { if (node.isPartial()) { return planAndEnforceChildren( node, parentPreferences.withoutPreference().withDefaultParallelism(session), parentPreferences.withDefaultParallelism(session)); } // final topN requires that all data be in one stream // also, a final changes the input organization completely, so we do not pass through parent preferences return planAndEnforceChildren( node, singleStream(), defaultParallelism(session)); } @Override public PlanWithProperties visitLimit(LimitNode node, StreamPreferredProperties parentPreferences) { if (node.isPartial()) { return planAndEnforceChildren( node, parentPreferences.withoutPreference().withDefaultParallelism(session), parentPreferences.withDefaultParallelism(session)); } // final limit requires that all data be in one stream // also, a final changes the input organization completely, so we do not pass through parent preferences return planAndEnforceChildren( node, singleStream(), defaultParallelism(session)); } @Override public PlanWithProperties visitDistinctLimit(DistinctLimitNode node, StreamPreferredProperties parentPreferences) { // final limit requires that all data be in one stream StreamPreferredProperties requiredProperties; StreamPreferredProperties preferredProperties; if (node.isPartial()) { requiredProperties = parentPreferences.withoutPreference().withDefaultParallelism(session); preferredProperties = parentPreferences.withDefaultParallelism(session); } else { // a final changes the input organization completely, so we do not pass through parent preferences requiredProperties = singleStream(); preferredProperties = defaultParallelism(session); } return planAndEnforceChildren(node, requiredProperties, preferredProperties); } @Override public PlanWithProperties visitEnforceSingleRow(EnforceSingleRowNode node, StreamPreferredProperties parentPreferences) { return planAndEnforceChildren(node, singleStream(), defaultParallelism(session)); } // // Nodes that require parallel streams to be partitioned // @Override public PlanWithProperties visitAggregation(AggregationNode node, StreamPreferredProperties parentPreferences) { StreamPreferredProperties requiredProperties; StreamPreferredProperties preferredChildProperties; checkState(node.getStep() == AggregationNode.Step.SINGLE, "step of aggregation is expected to be SINGLE, but it is %s", node.getStep()); // aggregations would benefit from the finals being hash partitioned on groupId, however, we need to gather because the final HashAggregationOperator // needs to know whether input was received at the query level. if (node.getGroupingSets().stream().anyMatch(List::isEmpty)) { return planAndEnforceChildren(node, singleStream(), defaultParallelism(session)); } HashSet<Symbol> partitioningRequirement = new HashSet<>(node.getGroupingSets().get(0)); for (int i = 1; i < node.getGroupingSets().size(); i++) { partitioningRequirement.retainAll(node.getGroupingSets().get(i)); } requiredProperties = parentPreferences.withDefaultParallelism(session).withPartitioning(partitioningRequirement); preferredChildProperties = parentPreferences.withDefaultParallelism(session) .withPartitioning(partitioningRequirement); return planAndEnforceChildren(node, requiredProperties, preferredChildProperties); } @Override public PlanWithProperties visitWindow(WindowNode node, StreamPreferredProperties parentPreferences) { StreamPreferredProperties childRequirements = parentPreferences .constrainTo(node.getSource().getOutputSymbols()) .withDefaultParallelism(session) .withPartitioning(node.getPartitionBy()); PlanWithProperties child = planAndEnforce(node.getSource(), childRequirements, childRequirements); List<LocalProperty<Symbol>> desiredProperties = new ArrayList<>(); if (!node.getPartitionBy().isEmpty()) { desiredProperties.add(new GroupingProperty<>(node.getPartitionBy())); } for (Symbol symbol : node.getOrderBy()) { desiredProperties.add(new SortingProperty<>(symbol, node.getOrderings().get(symbol))); } Iterator<Optional<LocalProperty<Symbol>>> matchIterator = LocalProperties.match(child.getProperties().getLocalProperties(), desiredProperties).iterator(); Set<Symbol> prePartitionedInputs = ImmutableSet.of(); if (!node.getPartitionBy().isEmpty()) { Optional<LocalProperty<Symbol>> groupingRequirement = matchIterator.next(); Set<Symbol> unPartitionedInputs = groupingRequirement.map(LocalProperty::getColumns).orElse(ImmutableSet.of()); prePartitionedInputs = node.getPartitionBy().stream() .filter(symbol -> !unPartitionedInputs.contains(symbol)) .collect(toImmutableSet()); } int preSortedOrderPrefix = 0; if (prePartitionedInputs.equals(ImmutableSet.copyOf(node.getPartitionBy()))) { while (matchIterator.hasNext() && !matchIterator.next().isPresent()) { preSortedOrderPrefix++; } } WindowNode result = new WindowNode( node.getId(), child.getNode(), node.getSpecification(), node.getWindowFunctions(), node.getHashSymbol(), prePartitionedInputs, preSortedOrderPrefix); return deriveProperties(result, child.getProperties()); } @Override public PlanWithProperties visitMarkDistinct(MarkDistinctNode node, StreamPreferredProperties parentPreferences) { // mark distinct requires that all data partitioned StreamPreferredProperties requiredProperties = parentPreferences.withDefaultParallelism(session).withPartitioning(node.getDistinctSymbols()); return planAndEnforceChildren(node, requiredProperties, requiredProperties); } @Override public PlanWithProperties visitRowNumber(RowNumberNode node, StreamPreferredProperties parentPreferences) { // row number requires that all data be partitioned StreamPreferredProperties requiredProperties = parentPreferences.withDefaultParallelism(session).withPartitioning(node.getPartitionBy()); return planAndEnforceChildren(node, requiredProperties, requiredProperties); } @Override public PlanWithProperties visitTopNRowNumber(TopNRowNumberNode node, StreamPreferredProperties parentPreferences) { StreamPreferredProperties requiredProperties = parentPreferences.withDefaultParallelism(session); // final topN row number requires that all data be partitioned if (!node.isPartial()) { requiredProperties = requiredProperties.withPartitioning(node.getPartitionBy()); } return planAndEnforceChildren(node, requiredProperties, requiredProperties); } // // Table Writer // @Override public PlanWithProperties visitTableWriter(TableWriterNode node, StreamPreferredProperties parentPreferences) { StreamPreferredProperties requiredProperties; StreamPreferredProperties preferredProperties; if (getTaskWriterCount(session) > 1) { requiredProperties = fixedParallelism(); preferredProperties = fixedParallelism(); } else { requiredProperties = singleStream(); preferredProperties = defaultParallelism(session); } return planAndEnforceChildren(node, requiredProperties, preferredProperties); } // // Exchanges // @Override public PlanWithProperties visitExchange(ExchangeNode node, StreamPreferredProperties parentPreferences) { checkArgument(node.getScope() != LOCAL, "AddLocalExchanges can not process a plan containing a local exchange"); // this node changes the input organization completely, so we do not pass through parent preferences return planAndEnforceChildren(node, any(), defaultParallelism(session)); } @Override public PlanWithProperties visitUnion(UnionNode node, StreamPreferredProperties preferredProperties) { // Union is replaced with an exchange which does not retain streaming properties from the children List<PlanWithProperties> sourcesWithProperties = node.getSources().stream() .map(source -> source.accept(this, defaultParallelism(session))) .collect(toImmutableList()); List<PlanNode> sources = sourcesWithProperties.stream() .map(PlanWithProperties::getNode) .collect(toImmutableList()); List<StreamProperties> inputProperties = sourcesWithProperties.stream() .map(PlanWithProperties::getProperties) .collect(toImmutableList()); List<List<Symbol>> inputLayouts = new ArrayList<>(sources.size()); for (int i = 0; i < sources.size(); i++) { inputLayouts.add(node.sourceOutputLayout(i)); } if (preferredProperties.isSingleStreamPreferred()) { ExchangeNode exchangeNode = new ExchangeNode( idAllocator.getNextId(), GATHER, LOCAL, new PartitioningScheme(Partitioning.create(SINGLE_DISTRIBUTION, ImmutableList.of()), node.getOutputSymbols()), sources, inputLayouts); return deriveProperties(exchangeNode, inputProperties); } Optional<List<Symbol>> preferredPartitionColumns = preferredProperties.getPartitioningColumns(); if (preferredPartitionColumns.isPresent()) { ExchangeNode exchangeNode = new ExchangeNode( idAllocator.getNextId(), REPARTITION, LOCAL, new PartitioningScheme( Partitioning.create(FIXED_HASH_DISTRIBUTION, preferredPartitionColumns.get()), node.getOutputSymbols(), Optional.empty()), sources, inputLayouts); return deriveProperties(exchangeNode, inputProperties); } // multiple streams preferred ExchangeNode result = new ExchangeNode( idAllocator.getNextId(), REPARTITION, LOCAL, new PartitioningScheme(Partitioning.create(FIXED_ARBITRARY_DISTRIBUTION, ImmutableList.of()), node.getOutputSymbols()), sources, inputLayouts); ExchangeNode exchangeNode = result; return deriveProperties(exchangeNode, inputProperties); } // // Joins // @Override public PlanWithProperties visitJoin(JoinNode node, StreamPreferredProperties parentPreferences) { PlanWithProperties probe = planAndEnforce( node.getLeft(), defaultParallelism(session), parentPreferences.constrainTo(node.getLeft().getOutputSymbols()).withDefaultParallelism(session)); // this build consumes the input completely, so we do not pass through parent preferences List<Symbol> buildHashSymbols = Lists.transform(node.getCriteria(), JoinNode.EquiJoinClause::getRight); StreamPreferredProperties buildPreference; if (getTaskConcurrency(session) > 1) { buildPreference = exactlyPartitionedOn(buildHashSymbols); } else { buildPreference = singleStream(); } PlanWithProperties build = planAndEnforce(node.getRight(), buildPreference, buildPreference); return rebaseAndDeriveProperties(node, ImmutableList.of(probe, build)); } @Override public PlanWithProperties visitSemiJoin(SemiJoinNode node, StreamPreferredProperties parentPreferences) { PlanWithProperties source = planAndEnforce( node.getSource(), defaultParallelism(session), parentPreferences.constrainTo(node.getSource().getOutputSymbols()).withDefaultParallelism(session)); // this filter source consumes the input completely, so we do not pass through parent preferences PlanWithProperties filteringSource = planAndEnforce(node.getFilteringSource(), singleStream(), singleStream()); return rebaseAndDeriveProperties(node, ImmutableList.of(source, filteringSource)); } @Override public PlanWithProperties visitIndexJoin(IndexJoinNode node, StreamPreferredProperties parentPreferences) { PlanWithProperties probe = planAndEnforce( node.getProbeSource(), defaultParallelism(session), parentPreferences.constrainTo(node.getProbeSource().getOutputSymbols()).withDefaultParallelism(session)); // index source does not support local parallel and must produce a single stream StreamProperties indexStreamProperties = derivePropertiesRecursively(node.getIndexSource()); checkArgument(indexStreamProperties.getDistribution() == SINGLE, "index source must be single stream"); PlanWithProperties index = new PlanWithProperties(node.getIndexSource(), indexStreamProperties); return rebaseAndDeriveProperties(node, ImmutableList.of(probe, index)); } // // Helpers // private PlanWithProperties planAndEnforceChildren(PlanNode node, StreamPreferredProperties requiredProperties, StreamPreferredProperties preferredProperties) { // plan and enforce each child, but strip any requirement not in terms of symbols produced from the child // Note: this assumes the child uses the same symbols as the parent List<PlanWithProperties> children = node.getSources().stream() .map(source -> planAndEnforce( source, requiredProperties.constrainTo(source.getOutputSymbols()), preferredProperties.constrainTo(source.getOutputSymbols()))) .collect(toImmutableList()); return rebaseAndDeriveProperties(node, children); } private PlanWithProperties planAndEnforce(PlanNode node, StreamPreferredProperties requiredProperties, StreamPreferredProperties preferredProperties) { // verify properties are in terms of symbols produced by the node List<Symbol> outputSymbols = node.getOutputSymbols(); checkArgument(requiredProperties.getPartitioningColumns().map(outputSymbols::containsAll).orElse(true)); checkArgument(preferredProperties.getPartitioningColumns().map(outputSymbols::containsAll).orElse(true)); // plan the node using the preferred properties PlanWithProperties result = node.accept(this, preferredProperties); // enforce the required properties result = enforce(result, requiredProperties); return result; } private PlanWithProperties enforce(PlanWithProperties planWithProperties, StreamPreferredProperties requiredProperties) { if (requiredProperties.isSatisfiedBy(planWithProperties.getProperties())) { return planWithProperties; } if (requiredProperties.isSingleStreamPreferred()) { ExchangeNode exchangeNode = gatheringExchange(idAllocator.getNextId(), LOCAL, planWithProperties.getNode()); return deriveProperties(exchangeNode, planWithProperties.getProperties()); } Optional<List<Symbol>> requiredPartitionColumns = requiredProperties.getPartitioningColumns(); if (!requiredPartitionColumns.isPresent()) { // unpartitioned parallel streams required ExchangeNode exchangeNode = partitionedExchange( idAllocator.getNextId(), LOCAL, planWithProperties.getNode(), new PartitioningScheme(Partitioning.create(FIXED_ARBITRARY_DISTRIBUTION, ImmutableList.of()), planWithProperties.getNode().getOutputSymbols())); return deriveProperties(exchangeNode, planWithProperties.getProperties()); } if (requiredProperties.isParallelPreferred()) { // partitioned parallel streams required ExchangeNode exchangeNode = partitionedExchange( idAllocator.getNextId(), LOCAL, planWithProperties.getNode(), requiredPartitionColumns.get(), Optional.empty()); return deriveProperties(exchangeNode, planWithProperties.getProperties()); } // no explicit parallel requirement, so gather to a single stream ExchangeNode exchangeNode = gatheringExchange( idAllocator.getNextId(), LOCAL, planWithProperties.getNode()); return deriveProperties(exchangeNode, planWithProperties.getProperties()); } private PlanWithProperties rebaseAndDeriveProperties(PlanNode node, List<PlanWithProperties> children) { PlanNode result = replaceChildren( node, children.stream() .map(PlanWithProperties::getNode) .collect(toList())); List<StreamProperties> inputProperties = children.stream() .map(PlanWithProperties::getProperties) .collect(toImmutableList()); return deriveProperties(result, inputProperties); } private PlanWithProperties deriveProperties(PlanNode result, StreamProperties inputProperties) { return new PlanWithProperties(result, StreamPropertyDerivations.deriveProperties(result, inputProperties, metadata, session, types, parser)); } private PlanWithProperties deriveProperties(PlanNode result, List<StreamProperties> inputProperties) { return new PlanWithProperties(result, StreamPropertyDerivations.deriveProperties(result, inputProperties, metadata, session, types, parser)); } private StreamProperties derivePropertiesRecursively(PlanNode node) { List<StreamProperties> inputProperties = node.getSources().stream() .map(this::derivePropertiesRecursively) .collect(toImmutableList()); return StreamPropertyDerivations.deriveProperties(node, inputProperties, metadata, session, types, parser); } } private static class PlanWithProperties { private final PlanNode node; private final StreamProperties properties; public PlanWithProperties(PlanNode node, StreamProperties properties) { this.node = requireNonNull(node, "node is null"); this.properties = requireNonNull(properties, "StreamProperties is null"); } public PlanNode getNode() { return node; } public StreamProperties getProperties() { return properties; } } }