/* * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.presto.sql.planner; import com.facebook.presto.Session; import com.facebook.presto.metadata.Metadata; import com.facebook.presto.metadata.TableLayout; import com.facebook.presto.metadata.TableLayout.NodePartitioning; import com.facebook.presto.spi.connector.ConnectorPartitioningHandle; import com.facebook.presto.spi.type.Type; import com.facebook.presto.sql.planner.plan.ExchangeNode; import com.facebook.presto.sql.planner.plan.ExplainAnalyzeNode; import com.facebook.presto.sql.planner.plan.IndexJoinNode; import com.facebook.presto.sql.planner.plan.JoinNode; import com.facebook.presto.sql.planner.plan.MetadataDeleteNode; import com.facebook.presto.sql.planner.plan.OutputNode; import com.facebook.presto.sql.planner.plan.PlanFragmentId; import com.facebook.presto.sql.planner.plan.PlanNode; import com.facebook.presto.sql.planner.plan.PlanNodeId; import com.facebook.presto.sql.planner.plan.PlanVisitor; import com.facebook.presto.sql.planner.plan.RemoteSourceNode; import com.facebook.presto.sql.planner.plan.SemiJoinNode; import com.facebook.presto.sql.planner.plan.SimplePlanRewriter; import com.facebook.presto.sql.planner.plan.TableFinishNode; import com.facebook.presto.sql.planner.plan.TableScanNode; import com.facebook.presto.sql.planner.plan.ValuesNode; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Maps; import java.util.ArrayList; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Optional; import java.util.Set; import java.util.function.Consumer; import static com.facebook.presto.sql.planner.SystemPartitioningHandle.COORDINATOR_DISTRIBUTION; import static com.facebook.presto.sql.planner.SystemPartitioningHandle.SINGLE_DISTRIBUTION; import static com.facebook.presto.sql.planner.SystemPartitioningHandle.SOURCE_DISTRIBUTION; import static com.facebook.presto.sql.planner.plan.ExchangeNode.Scope.REMOTE; import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.base.Preconditions.checkState; import static com.google.common.base.Predicates.in; import static com.google.common.collect.ImmutableList.toImmutableList; import static java.lang.String.format; import static java.util.Objects.requireNonNull; /** * Splits a logical plan into fragments that can be shipped and executed on distributed nodes */ public class PlanFragmenter { private PlanFragmenter() { } public static SubPlan createSubPlans(Session session, Metadata metadata, Plan plan) { Fragmenter fragmenter = new Fragmenter(session, metadata, plan.getTypes()); FragmentProperties properties = new FragmentProperties(new PartitioningScheme(Partitioning.create(SINGLE_DISTRIBUTION, ImmutableList.of()), plan.getRoot().getOutputSymbols())) .setSingleNodeDistribution(); PlanNode root = SimplePlanRewriter.rewriteWith(fragmenter, plan.getRoot(), properties); SubPlan result = fragmenter.buildRootFragment(root, properties); checkState(result.getFragment().getPartitioning().isSingleNode(), "Root of PlanFragment is not single node"); result.sanityCheck(); return result; } private static class Fragmenter extends SimplePlanRewriter<FragmentProperties> { private static final int ROOT_FRAGMENT_ID = 0; private final Session session; private final Metadata metadata; private final Map<Symbol, Type> types; private int nextFragmentId = ROOT_FRAGMENT_ID + 1; public Fragmenter(Session session, Metadata metadata, Map<Symbol, Type> types) { this.session = requireNonNull(session, "session is null"); this.metadata = requireNonNull(metadata, "metadata is null"); this.types = ImmutableMap.copyOf(requireNonNull(types, "types is null")); } public SubPlan buildRootFragment(PlanNode root, FragmentProperties properties) { return buildFragment(root, properties, new PlanFragmentId(String.valueOf(ROOT_FRAGMENT_ID))); } private PlanFragmentId nextFragmentId() { return new PlanFragmentId(String.valueOf(nextFragmentId++)); } private SubPlan buildFragment(PlanNode root, FragmentProperties properties, PlanFragmentId fragmentId) { Set<Symbol> dependencies = SymbolExtractor.extract(root); List<PlanNodeId> schedulingOrder = new SchedulingOrderVisitor().getSchedulingOrder(root); boolean equals = properties.getPartitionedSources().equals(ImmutableSet.copyOf(schedulingOrder)); checkArgument(equals, "Expected scheduling order (%s) to contain an entry for all partitioned sources (%s)", schedulingOrder, properties.getPartitionedSources()); PlanFragment fragment = new PlanFragment( fragmentId, root, Maps.filterKeys(types, in(dependencies)), properties.getPartitioningHandle(), schedulingOrder, properties.getPartitioningScheme()); return new SubPlan(fragment, properties.getChildren()); } @Override public PlanNode visitOutput(OutputNode node, RewriteContext<FragmentProperties> context) { context.get().setSingleNodeDistribution(); // TODO: add support for distributed output return context.defaultRewrite(node, context.get()); } @Override public PlanNode visitExplainAnalyze(ExplainAnalyzeNode node, RewriteContext<FragmentProperties> context) { context.get().setCoordinatorOnlyDistribution(); return context.defaultRewrite(node, context.get()); } @Override public PlanNode visitTableFinish(TableFinishNode node, RewriteContext<FragmentProperties> context) { context.get().setCoordinatorOnlyDistribution(); return context.defaultRewrite(node, context.get()); } @Override public PlanNode visitMetadataDelete(MetadataDeleteNode node, RewriteContext<FragmentProperties> context) { context.get().setCoordinatorOnlyDistribution(); return context.defaultRewrite(node, context.get()); } @Override public PlanNode visitTableScan(TableScanNode node, RewriteContext<FragmentProperties> context) { PartitioningHandle partitioning = node.getLayout() .map(layout -> metadata.getLayout(session, layout)) .flatMap(TableLayout::getNodePartitioning) .map(NodePartitioning::getPartitioningHandle) .orElse(SOURCE_DISTRIBUTION); context.get().addSourceDistribution(node.getId(), partitioning); return context.defaultRewrite(node, context.get()); } @Override public PlanNode visitValues(ValuesNode node, RewriteContext<FragmentProperties> context) { context.get().setSingleNodeDistribution(); return context.defaultRewrite(node, context.get()); } @Override public PlanNode visitExchange(ExchangeNode exchange, RewriteContext<FragmentProperties> context) { if (exchange.getScope() != REMOTE) { return context.defaultRewrite(exchange, context.get()); } PartitioningScheme partitioningScheme = exchange.getPartitioningScheme(); if (exchange.getType() == ExchangeNode.Type.GATHER) { context.get().setSingleNodeDistribution(); } else if (exchange.getType() == ExchangeNode.Type.REPARTITION) { context.get().setDistribution(partitioningScheme.getPartitioning().getHandle()); } ImmutableList.Builder<SubPlan> builder = ImmutableList.builder(); for (int sourceIndex = 0; sourceIndex < exchange.getSources().size(); sourceIndex++) { FragmentProperties childProperties = new FragmentProperties(partitioningScheme.translateOutputLayout(exchange.getInputs().get(sourceIndex))); builder.add(buildSubPlan(exchange.getSources().get(sourceIndex), childProperties, context)); } List<SubPlan> children = builder.build(); context.get().addChildren(children); List<PlanFragmentId> childrenIds = children.stream() .map(SubPlan::getFragment) .map(PlanFragment::getId) .collect(toImmutableList()); return new RemoteSourceNode(exchange.getId(), childrenIds, exchange.getOutputSymbols()); } private SubPlan buildSubPlan(PlanNode node, FragmentProperties properties, RewriteContext<FragmentProperties> context) { PlanFragmentId planFragmentId = nextFragmentId(); PlanNode child = context.rewrite(node, properties); return buildFragment(child, properties, planFragmentId); } } private static class FragmentProperties { private final List<SubPlan> children = new ArrayList<>(); private final PartitioningScheme partitioningScheme; private Optional<PartitioningHandle> partitioningHandle = Optional.empty(); private final Set<PlanNodeId> partitionedSources = new HashSet<>(); public FragmentProperties(PartitioningScheme partitioningScheme) { this.partitioningScheme = partitioningScheme; } public List<SubPlan> getChildren() { return children; } public FragmentProperties setSingleNodeDistribution() { if (partitioningHandle.isPresent() && partitioningHandle.get().isSingleNode()) { // already single node distribution return this; } checkState(!partitioningHandle.isPresent(), "Cannot overwrite partitioning with %s (currently set to %s)", SINGLE_DISTRIBUTION, partitioningHandle); partitioningHandle = Optional.of(SINGLE_DISTRIBUTION); return this; } public FragmentProperties setDistribution(PartitioningHandle distribution) { if (partitioningHandle.isPresent()) { chooseDistribution(distribution); return this; } partitioningHandle = Optional.of(distribution); return this; } private void chooseDistribution(PartitioningHandle distribution) { checkState(partitioningHandle.isPresent(), "No partitioning to choose from"); if (partitioningHandle.get().equals(distribution) || partitioningHandle.get().isSingleNode() || isCompatibleSystemPartitioning(distribution)) { return; } if (partitioningHandle.get().equals(SOURCE_DISTRIBUTION)) { partitioningHandle = Optional.of(distribution); return; } throw new IllegalStateException(format( "Cannot set distribution to %s. Already set to %s", distribution, partitioningHandle)); } private boolean isCompatibleSystemPartitioning(PartitioningHandle distribution) { ConnectorPartitioningHandle currentHandle = partitioningHandle.get().getConnectorHandle(); ConnectorPartitioningHandle distributionHandle = distribution.getConnectorHandle(); if ((currentHandle instanceof SystemPartitioningHandle) && (distributionHandle instanceof SystemPartitioningHandle)) { return ((SystemPartitioningHandle) currentHandle).getPartitioning() == ((SystemPartitioningHandle) distributionHandle).getPartitioning(); } return false; } public FragmentProperties setCoordinatorOnlyDistribution() { if (partitioningHandle.isPresent() && partitioningHandle.get().isCoordinatorOnly()) { // already single node distribution return this; } // only system SINGLE can be upgraded to COORDINATOR_ONLY checkState(!partitioningHandle.isPresent() || partitioningHandle.get().equals(SINGLE_DISTRIBUTION), "Cannot overwrite partitioning with %s (currently set to %s)", COORDINATOR_DISTRIBUTION, partitioningHandle); partitioningHandle = Optional.of(COORDINATOR_DISTRIBUTION); return this; } public FragmentProperties addSourceDistribution(PlanNodeId source, PartitioningHandle distribution) { requireNonNull(source, "source is null"); requireNonNull(distribution, "distribution is null"); partitionedSources.add(source); if (partitioningHandle.isPresent()) { PartitioningHandle currentPartitioning = partitioningHandle.get(); if (!currentPartitioning.equals(distribution)) { // If already system SINGLE or COORDINATOR_ONLY, leave it as is (this is for single-node execution) checkState( currentPartitioning.equals(SINGLE_DISTRIBUTION) || currentPartitioning.equals(COORDINATOR_DISTRIBUTION), "Cannot overwrite distribution with %s (currently set to %s)", distribution, currentPartitioning); return this; } } partitioningHandle = Optional.of(distribution); return this; } public FragmentProperties addChildren(List<SubPlan> children) { this.children.addAll(children); return this; } public PartitioningScheme getPartitioningScheme() { return partitioningScheme; } public PartitioningHandle getPartitioningHandle() { return partitioningHandle.get(); } public Set<PlanNodeId> getPartitionedSources() { return partitionedSources; } } private static class SchedulingOrderVisitor extends PlanVisitor<Consumer<PlanNodeId>, Void> { public List<PlanNodeId> getSchedulingOrder(PlanNode node) { ImmutableList.Builder<PlanNodeId> schedulingOrder = ImmutableList.builder(); node.accept(this, schedulingOrder::add); return schedulingOrder.build(); } @Override protected Void visitPlan(PlanNode node, Consumer<PlanNodeId> schedulingOrder) { for (PlanNode source : node.getSources()) { source.accept(this, schedulingOrder); } return null; } @Override public Void visitJoin(JoinNode node, Consumer<PlanNodeId> schedulingOrder) { node.getRight().accept(this, schedulingOrder); node.getLeft().accept(this, schedulingOrder); return null; } @Override public Void visitSemiJoin(SemiJoinNode node, Consumer<PlanNodeId> schedulingOrder) { node.getFilteringSource().accept(this, schedulingOrder); node.getSource().accept(this, schedulingOrder); return null; } @Override public Void visitIndexJoin(IndexJoinNode node, Consumer<PlanNodeId> schedulingOrder) { node.getIndexSource().accept(this, schedulingOrder); node.getProbeSource().accept(this, schedulingOrder); return null; } @Override public Void visitTableScan(TableScanNode node, Consumer<PlanNodeId> schedulingOrder) { schedulingOrder.accept(node.getId()); return null; } } }