/** * Copyright 2011-2017 Asakusa Framework Team. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.asakusafw.compiler.flow.plan; import java.lang.annotation.Annotation; import java.text.MessageFormat; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Set; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.asakusafw.compiler.common.Precondition; import com.asakusafw.compiler.flow.FlowCompilerOptions; import com.asakusafw.compiler.flow.FlowCompilerOptions.GenericOptionValue; import com.asakusafw.compiler.flow.FlowGraphRewriter; import com.asakusafw.compiler.flow.FlowGraphRewriter.RewriteException; import com.asakusafw.compiler.flow.debugging.Debug; import com.asakusafw.compiler.flow.join.operator.SideDataBranch; import com.asakusafw.compiler.flow.join.operator.SideDataCheck; import com.asakusafw.utils.collections.Lists; import com.asakusafw.utils.collections.Maps; import com.asakusafw.utils.graph.Graph; import com.asakusafw.utils.graph.Graphs; import com.asakusafw.vocabulary.attribute.ViewInfo; import com.asakusafw.vocabulary.flow.graph.Connectivity; import com.asakusafw.vocabulary.flow.graph.FlowBoundary; import com.asakusafw.vocabulary.flow.graph.FlowElement; import com.asakusafw.vocabulary.flow.graph.FlowElementDescription; import com.asakusafw.vocabulary.flow.graph.FlowElementInput; import com.asakusafw.vocabulary.flow.graph.FlowElementKind; import com.asakusafw.vocabulary.flow.graph.FlowElementOutput; import com.asakusafw.vocabulary.flow.graph.FlowGraph; import com.asakusafw.vocabulary.flow.graph.FlowIn; import com.asakusafw.vocabulary.flow.graph.FlowOut; import com.asakusafw.vocabulary.flow.graph.FlowPartDescription; import com.asakusafw.vocabulary.flow.graph.Inline; import com.asakusafw.vocabulary.flow.graph.OperatorDescription; import com.asakusafw.vocabulary.flow.graph.PortConnection; import com.asakusafw.vocabulary.operator.Branch; import com.asakusafw.vocabulary.operator.Logging; import com.asakusafw.vocabulary.operator.Project; import com.asakusafw.vocabulary.operator.Restructure; import com.asakusafw.vocabulary.operator.Split; import com.asakusafw.vocabulary.operator.Trace; /** * Creates an execution plan from flow graphs. */ public class StagePlanner { static final String KEY_COMPRESS_FLOW_BLOCK_GROUP = "compressFlowBlockGroup"; //$NON-NLS-1$ static final GenericOptionValue DEFAULT_COMPRESS_FLOW_BLOCK_GROUP = GenericOptionValue.ENABLED; static final Comparator<FlowGraphRewriter> REWRITER_COMPARATOR = new Comparator<FlowGraphRewriter>() { @Override public int compare(FlowGraphRewriter o1, FlowGraphRewriter o2) { int phaseDiff = o1.getPhase().compareTo(o2.getPhase()); if (phaseDiff != 0) { return phaseDiff; } return o1.getClass().getName().compareTo(o2.getClass().getName()); } }; static final Logger LOG = LoggerFactory.getLogger(StagePlanner.class); private final List<? extends FlowGraphRewriter> rewriters; private final FlowCompilerOptions options; private final List<StagePlanner.Diagnostic> diagnostics = new ArrayList<>(); private int blockSequence = 1; /** * Creates a new instance. * @param rewriters the flow graph rewriters to be applied * @param options the current compiler options * @throws IllegalArgumentException if the parameters are {@code null} */ public StagePlanner(List<? extends FlowGraphRewriter> rewriters, FlowCompilerOptions options) { Precondition.checkMustNotBeNull(rewriters, "rewriters"); //$NON-NLS-1$ Precondition.checkMustNotBeNull(options, "options"); //$NON-NLS-1$ this.rewriters = sortRewriters(rewriters); this.options = options; } private List<? extends FlowGraphRewriter> sortRewriters(List<? extends FlowGraphRewriter> rw) { // for rewriting stability, we sort rewriters List<FlowGraphRewriter> results = new ArrayList<>(rw); Collections.sort(results, REWRITER_COMPARATOR); return results; } /** * Analyzes the flow graph and returns the corresponded stage graph. * @param graph the target flow graph * @return the analyzed result, or {@code null} if the flow graph is something wrong * @throws IllegalArgumentException if the parameter is {@code null} * @see #getDiagnostics() */ public StageGraph plan(FlowGraph graph) { Precondition.checkMustNotBeNull(graph, "graph"); //$NON-NLS-1$ if (validate(graph) == false) { return null; } LOG.debug("creating logical plan: {}", graph); //$NON-NLS-1$ LOG.debug("compressFlowPart: {}", options.isCompressFlowPart()); //$NON-NLS-1$ LOG.debug("compressConcurrentStage: {}", options.isCompressConcurrentStage()); //$NON-NLS-1$ FlowGraph copy = FlowGraphUtil.deepCopy(graph); if (rewrite(copy) == false) { return null; } normalizeFlowGraph(copy); StageGraph result = buildStageGraph(copy); return result; } private boolean rewrite(FlowGraph graph) { assert graph != null; LOG.debug("rewriting flow graph: {}", graph); //$NON-NLS-1$ boolean modified = false; for (FlowGraphRewriter rewriter : rewriters) { try { modified |= rewriter.rewrite(graph); } catch (RewriteException e) { LOG.warn(MessageFormat.format( Messages.getString("StagePlanner.warnFailedToRewrite"), //$NON-NLS-1$ rewriter.getClass().getName(), e.getMessage()), e); error( graph, Collections.emptyList(), Messages.getString("StagePlanner.errorFailedToRewrite"), //$NON-NLS-1$ e.getMessage()); return false; } } if (modified && validate(graph) == false) { return false; } return true; } private void unifyGlobalSideEffects(FlowGraph graph) { assert graph != null; LOG.debug("processing operators w/ global side-effects: {}", graph); //$NON-NLS-1$ for (FlowElement element : FlowGraphUtil.collectElements(graph)) { if (FlowGraphUtil.hasGlobalSideEffect(element)) { LOG.debug("inserting checkpoint before \"volatile\" operator: {}", element); //$NON-NLS-1$ for (FlowElementOutput output : element.getOutputPorts()) { FlowGraphUtil.insertCheckpoint(output); } } } } /** * Returns the diagnostics information while executing {@link #plan(FlowGraph)}. * @return the diagnostics information */ public List<StagePlanner.Diagnostic> getDiagnostics() { return diagnostics; } /** * Creates a stage graph from a {@link #normalizeFlowGraph(FlowGraph) normalized flow graph}. * @param graph the target flow graph * @return the created stage graph */ StageGraph buildStageGraph(FlowGraph graph) { assert graph != null; LOG.debug("building stage graph: {}", graph); //$NON-NLS-1$ FlowBlock input = buildInputBlock(graph); FlowBlock output = buildOutputBlock(graph); List<FlowBlock> computation = buildComputationBlocks(graph); connectFlowBlocks(input, output, computation); detachFlowBlocks(input, output, computation); trimFlowBlocks(computation); List<StageBlock> stageBlocks = buildStageBlocks(computation); compressStageBlocks(stageBlocks); sortStageBlocks(stageBlocks); return new StageGraph(input, output, stageBlocks); } private void compressStageBlocks(List<StageBlock> blocks) { assert blocks != null; boolean changed; LOG.debug("compressing stage blocks"); //$NON-NLS-1$ do { changed = false; Iterator<StageBlock> iter = blocks.iterator(); while (iter.hasNext()) { StageBlock block = iter.next(); changed |= block.compaction(); if (block.isEmpty()) { LOG.debug("removing empty stage block: {}", block); //$NON-NLS-1$ iter.remove(); changed = true; } } } while (changed); } private void sortStageBlocks(List<StageBlock> stageBlocks) { assert stageBlocks != null; LOG.debug("sorting stages in stage graph"); //$NON-NLS-1$ Map<FlowBlock, StageBlock> membership = new HashMap<>(); for (StageBlock stage : stageBlocks) { for (FlowBlock flow : stage.getMapBlocks()) { membership.put(flow, stage); } for (FlowBlock flow : stage.getReduceBlocks()) { membership.put(flow, stage); } } Graph<StageBlock> graph = Graphs.newInstance(); for (Map.Entry<FlowBlock, StageBlock> entry : membership.entrySet()) { FlowBlock flow = entry.getKey(); StageBlock stage = entry.getValue(); graph.addNode(stage); for (FlowBlock.Output output : flow.getBlockOutputs()) { for (FlowBlock.Connection conn : output.getConnections()) { FlowBlock succFlow = conn.getDownstream().getOwner(); StageBlock succ = membership.get(succFlow); if (succ == null || succ == stage) { continue; } graph.addEdge(succ, stage); } } } List<StageBlock> ordered = Graphs.sortPostOrder(graph); int stageNumber = 1; for (StageBlock stage : ordered) { stage.setStageNumber(stageNumber); stageNumber++; } Collections.sort(stageBlocks, (o1, o2) -> { int n1 = o1.getStageNumber(); int n2 = o2.getStageNumber(); if (n1 == n2) { return 0; } else if (n1 < n2) { return -1; } else { return +1; } }); } private List<StageBlock> buildStageBlocks(List<FlowBlock> blocks) { assert blocks != null; LOG.debug("building stage blocks: {}", blocks); //$NON-NLS-1$ List<StageBlock> results = new ArrayList<>(); List<FlowBlockGroup> flowBlockGroups = collectFlowBlockGroups(blocks); compressFlowBlockGroups(flowBlockGroups); for (FlowBlockGroup group : flowBlockGroups) { if (group.reducer) { Set<FlowBlock> predecessors = getPredecessors(group.members); assert predecessors.isEmpty() == false; StageBlock stage = new StageBlock(predecessors, group.members); results.add(stage); LOG.debug("stage {}: map={}, reduce={}", new Object[] { //$NON-NLS-1$ stage, predecessors, group.members, }); } else { StageBlock stage = new StageBlock(group.members, Collections.emptySet()); results.add(stage); LOG.debug("stage {}: map={}, reduce=N/A", stage, group.members); //$NON-NLS-1$ } } return results; } private void compressFlowBlockGroups(List<FlowBlockGroup> flowBlockGroups) { assert flowBlockGroups != null; GenericOptionValue active = options.getGenericExtraAttribute( KEY_COMPRESS_FLOW_BLOCK_GROUP, DEFAULT_COMPRESS_FLOW_BLOCK_GROUP); if (active == GenericOptionValue.DISABLED) { return; } LOG.debug("Compressing flow blocks"); //$NON-NLS-1$ // merge blocks List<FlowBlock> blocks = new ArrayList<>(); Map<FlowBlock.Input, Set<FlowBlock.Input>> inputMapping = new HashMap<>(); Map<FlowBlock.Output, Set<FlowBlock.Output>> outputMapping = new HashMap<>(); for (FlowBlockGroup group : flowBlockGroups) { if (group.reducer) { Set<FlowBlock> predecessors = getPredecessors(group.members); if (predecessors.size() >= 2) { LOG.debug("Compressing flow blocks: {}", predecessors); //$NON-NLS-1$ FlowBlock mergedPreds = FlowBlock.fromBlocks(predecessors, inputMapping, outputMapping); group.predeceaseBlocks.clear(); group.predeceaseBlocks.add(mergedPreds); blocks.add(mergedPreds); } } if (group.members.size() >= 2) { LOG.debug("Compressing flow blocks: {}", group.members); //$NON-NLS-1$ FlowBlock mergedBlocks = FlowBlock.fromBlocks(group.members, inputMapping, outputMapping); group.members.clear(); group.members.add(mergedBlocks); blocks.add(mergedBlocks); } } // reconnect for (Map.Entry<FlowBlock.Input, Set<FlowBlock.Input>> entry : inputMapping.entrySet()) { FlowBlock.Input origin = entry.getKey(); for (FlowBlock.Connection conn : Lists.from(origin.getConnections())) { FlowBlock.Output opposite = conn.getUpstream(); Collection<FlowBlock.Output> resolvedOpposites; if (outputMapping.containsKey(opposite)) { resolvedOpposites = outputMapping.get(opposite); } else { resolvedOpposites = Collections.singleton(opposite); } conn.disconnect(); for (FlowBlock.Input mapped : entry.getValue()) { for (FlowBlock.Output resolved : resolvedOpposites) { FlowBlock.connect(resolved, mapped); } } } } for (Map.Entry<FlowBlock.Output, Set<FlowBlock.Output>> entry : outputMapping.entrySet()) { FlowBlock.Output origin = entry.getKey(); for (FlowBlock.Connection conn : Lists.from(origin.getConnections())) { FlowBlock.Input opposite = conn.getDownstream(); Collection<FlowBlock.Input> resolvedOpposites; if (inputMapping.containsKey(opposite)) { resolvedOpposites = inputMapping.get(opposite); } else { resolvedOpposites = Collections.singleton(opposite); } conn.disconnect(); for (FlowBlock.Output mapped : entry.getValue()) { for (FlowBlock.Input resolved : resolvedOpposites) { FlowBlock.connect(mapped, resolved); } } } } // optimize detachFlowBlocks(blocks); unifyFlowBlocks(blocks); trimFlowBlocks(blocks); } private List<FlowBlockGroup> collectFlowBlockGroups(List<FlowBlock> blocks) { assert blocks != null; LOG.debug("collecting concurrent stages"); //$NON-NLS-1$ LinkedList<FlowBlockGroup> groups = new LinkedList<>(); for (FlowBlock block : blocks) { // ignores map blocks with following reduce blocks if (block.isReduceBlock() == false && block.isSucceedingReduceBlock()) { continue; } groups.add(new FlowBlockGroup(block)); } if (options.isCompressConcurrentStage() == false) { LOG.debug("compressing concurrent stages is disabled"); //$NON-NLS-1$ return Lists.from(groups); } LOG.debug("compressing concurrent stages"); //$NON-NLS-1$ computeCriticalPaths(groups); // merges blocks List<FlowBlockGroup> results = new ArrayList<>(); while (groups.isEmpty() == false) { FlowBlockGroup first = groups.removeFirst(); Iterator<FlowBlockGroup> rest = groups.iterator(); while (rest.hasNext()) { FlowBlockGroup next = rest.next(); if (first.combine(next)) { LOG.debug("merging flow block: {}, {}", first.founder, next.founder); //$NON-NLS-1$ rest.remove(); } } results.add(first); } return results; } private void computeCriticalPaths(List<FlowBlockGroup> groups) { assert groups != null; Map<FlowBlock, FlowBlockGroup> mapping = new HashMap<>(); LinkedList<FlowBlockGroup> work = new LinkedList<>(); for (FlowBlockGroup group : groups) { work.add(group); mapping.put(group.founder, group); } PROPAGATION: while (work.isEmpty() == false) { int maxDistance = 0; FlowBlockGroup first = work.removeFirst(); for (FlowBlock predecessor : first.predeceaseBlocks) { FlowBlockGroup predGroup = mapping.get(predecessor); if (predGroup.distance == -1) { work.addLast(first); continue PROPAGATION; } else { maxDistance = Math.max(maxDistance, predGroup.distance); } } first.distance = maxDistance + 1; } } private Set<FlowBlock> getPredecessors(Set<FlowBlock> blocks) { assert blocks != null; Set<FlowBlock> results = new HashSet<>(); for (FlowBlock block : blocks) { for (FlowBlock.Input port : block.getBlockInputs()) { for (FlowBlock.Connection conn : port.getConnections()) { FlowBlock pred = conn.getUpstream().getOwner(); results.add(pred); } } } return results; } /** * Creates an input block which only contains flow inputs. * @param graph the target flow graph * @return the created block */ private FlowBlock buildInputBlock(FlowGraph graph) { assert graph != null; List<FlowElementOutput> outputs = new ArrayList<>(); Set<FlowElement> elements = new HashSet<>(); for (FlowIn<?> node : graph.getFlowInputs()) { outputs.add(node.toOutputPort()); elements.add(node.getFlowElement()); } return FlowBlock.fromPorts( nextBlockSequenceNumber(), graph, Collections.emptyList(), outputs, elements); } /** * Creates an output block which only contains flow outputs. * @param graph the target flow graph * @return the created block */ private FlowBlock buildOutputBlock(FlowGraph graph) { assert graph != null; List<FlowElementInput> inputs = new ArrayList<>(); Set<FlowElement> elements = new HashSet<>(); for (FlowOut<?> node : graph.getFlowOutputs()) { inputs.add(node.toInputPort()); elements.add(node.getFlowElement()); } return FlowBlock.fromPorts( nextBlockSequenceNumber(), graph, inputs, Collections.emptyList(), elements); } /** * Creates flow blocks from {@link #normalizeFlowGraph(FlowGraph) the normalized flow graph}. * @param graph the target flow graph * @return the created blocks */ private List<FlowBlock> buildComputationBlocks(FlowGraph graph) { assert graph != null; LOG.debug("computing flow blocks: {}", graph); //$NON-NLS-1$ // shuffle bound -> next stage bounds Collection<FlowPath> shuffleSuccessors = new HashSet<>(); // shuffle bound <- previous stage bounds Collection<FlowPath> shufflePredecessors = new HashSet<>(); // stage bound -> any next (shuffle/stage) bounds Map<FlowElement, FlowPath> stageSuccessors = new HashMap<>(); // stage bound <- any previous (shuffle/stage) bounds Map<FlowElement, FlowPath> stagePredecessors = new HashMap<>(); for (FlowElement boundary : FlowGraphUtil.collectBoundaries(graph)) { boolean shuffle = FlowGraphUtil.isShuffleBoundary(boundary); boolean success = FlowGraphUtil.hasSuccessors(boundary); boolean predecease = FlowGraphUtil.hasPredecessors(boundary); if (shuffle) { assert success; assert predecease; shuffleSuccessors.add(FlowGraphUtil.getSucceedBoundaryPath(boundary)); shufflePredecessors.add(FlowGraphUtil.getPredeceaseBoundaryPath(boundary)); } else { if (success) { stageSuccessors.put(boundary, FlowGraphUtil.getSucceedBoundaryPath(boundary)); } if (predecease) { stagePredecessors.put(boundary, FlowGraphUtil.getPredeceaseBoundaryPath(boundary)); } } } List<FlowBlock> results = new ArrayList<>(); results.addAll(collectShuffleToStage(graph, shuffleSuccessors)); results.addAll(collectStageToShuffle(graph, shufflePredecessors, stageSuccessors)); results.addAll(collectStageToStage(graph, stageSuccessors, stagePredecessors)); return results; } private List<FlowBlock> collectStageToStage( FlowGraph graph, Map<FlowElement, FlowPath> stageSuccessors, Map<FlowElement, FlowPath> stagePredecessors) { assert graph != null; assert stageSuccessors != null; assert stagePredecessors != null; LOG.debug("computing map blocks (w/o succeeding reducers): {}", graph); //$NON-NLS-1$ // creates map blocks from (stage -> stage) path per their input List<FlowBlock> results = new ArrayList<>(); Collection<FlowPath> ss = stageSuccessors.values(); for (FlowPath stageForward : ss) { List<FlowPath> stageBackwards = new ArrayList<>(); for (FlowElement arrival : stageForward.getArrivals()) { if (FlowGraphUtil.isShuffleBoundary(arrival) == false) { FlowPath stageBackward = stagePredecessors.get(arrival); assert stageBackward != null; stageBackwards.add(stageBackward); } } if (stageBackwards.isEmpty()) { continue; } FlowPath backward = FlowGraphUtil.union(stageBackwards); FlowPath path = stageForward.transposeIntersect(backward); FlowBlock block = path.createBlock( graph, nextBlockSequenceNumber(), false, false); results.add(block); LOG.debug("add map block (stage -> stage): {} -> {}", //$NON-NLS-1$ block.getBlockInputs(), block.getBlockOutputs()); } return results; } private List<FlowBlock> collectStageToShuffle( FlowGraph graph, Collection<FlowPath> shufflePredecessors, Map<FlowElement, FlowPath> stageSuccessors) { assert graph != null; assert shufflePredecessors != null; assert stageSuccessors != null; LOG.debug("computing map blocks (w/ succeeding reducers): {}", graph); //$NON-NLS-1$ // creates map blocks from (stage -> shuffle) path per their input List<FlowBlock> results = new ArrayList<>(); for (FlowPath shuffleBackward : shufflePredecessors) { Set<FlowElement> arrivals = shuffleBackward.getArrivals(); for (FlowElement stageStart : arrivals) { assert FlowGraphUtil.isShuffleBoundary(stageStart) == false; FlowPath stageForward = stageSuccessors.get(stageStart); assert stageForward != null; FlowPath path = stageForward.transposeIntersect(shuffleBackward); FlowBlock block = path.createBlock( graph, nextBlockSequenceNumber(), false, false); results.add(block); LOG.debug("add map block (stage -> shuffle): {} -> {}", //$NON-NLS-1$ block.getBlockInputs(), block.getBlockOutputs()); } } return results; } private List<FlowBlock> collectShuffleToStage(FlowGraph graph, Collection<FlowPath> shuffleSuccessors) { assert graph != null; assert shuffleSuccessors != null; LOG.debug("computing reduce blocks (w/ succeeding reducers): {}", graph); //$NON-NLS-1$ // creates map blocks from [shuffle -> stage) path List<FlowBlock> results = new ArrayList<>(); for (FlowPath path : shuffleSuccessors) { FlowBlock block = path.createBlock( graph, nextBlockSequenceNumber(), true, false); results.add(block); LOG.debug("add reduce block (shuffle -> stage): {} -> {}", //$NON-NLS-1$ block.getBlockInputs(), block.getBlockOutputs()); } return results; } private int nextBlockSequenceNumber() { return blockSequence++; } private void connectFlowBlocks(FlowBlock inputBlock, FlowBlock outputBlock, List<FlowBlock> computationBlocks) { assert inputBlock != null; assert outputBlock != null; assert computationBlocks != null; LOG.debug("connecting flow blocks"); //$NON-NLS-1$ List<FlowBlock> blocks = new ArrayList<>(); blocks.add(inputBlock); blocks.add(outputBlock); blocks.addAll(computationBlocks); Map<PortConnection, Set<FlowBlock.Input>> mapping = new HashMap<>(); for (FlowBlock block : blocks) { for (FlowBlock.Input input : block.getBlockInputs()) { for (PortConnection conn : input.getOriginalConnections()) { Maps.addToSet(mapping, conn, input); } } } for (FlowBlock block : blocks) { for (FlowBlock.Output output : block.getBlockOutputs()) { for (PortConnection conn : output.getOriginalConnections()) { Set<PortConnection> next = FlowGraphUtil.getSucceedingConnections(conn, mapping.keySet()); for (PortConnection successor : next) { Set<FlowBlock.Input> connected = mapping.get(successor); for (FlowBlock.Input opposite : connected) { FlowBlock.connect(output, opposite); } } } } } } private void detachFlowBlocks(FlowBlock input, FlowBlock output, List<FlowBlock> computation) { assert input != null; assert output != null; assert computation != null; input.detach(); output.detach(); detachFlowBlocks(computation); } private void detachFlowBlocks(List<FlowBlock> blocks) { assert blocks != null; for (FlowBlock block : blocks) { block.detach(); } } private void unifyFlowBlocks(List<FlowBlock> blocks) { assert blocks != null; for (FlowBlock block : blocks) { block.unify(); } } private void trimFlowBlocks(List<FlowBlock> blocks) { assert blocks != null; boolean changed; LOG.debug("removing dead operators"); //$NON-NLS-1$ do { changed = false; Iterator<FlowBlock> iter = blocks.iterator(); while (iter.hasNext()) { FlowBlock block = iter.next(); changed |= block.compaction(); if (block.isEmpty()) { LOG.debug("removing empty block: {}", block); //$NON-NLS-1$ iter.remove(); changed = true; } } } while (changed); } /** * Normalizes the target flow graph. * <ul> * <li> flatten flow-parts </li> * <li> removes pseudo-pseudo chains (e.g. nested confluent) </li> * <li> insert stage boundary into ({@code shuffle -> shuffle}) paths </li> * <li> insert identity operator into ({@code boundary -> boundary}) </li> * <li> split identity operators </li> * <li> reduce redundant identity operators </li> * </ul> * @param graph the target flow graph * @throws IllegalArgumentException if the parameter is {@code null} */ void normalizeFlowGraph(FlowGraph graph) { assert graph != null; LOG.debug("normalizing operator graph: {}", graph); //$NON-NLS-1$ inlineFlowParts(graph); pushDownPseudoChain(graph); unifyGlobalSideEffects(graph); insertCheckpoints(graph); insertIdentities(graph); splitIdentities(graph); reduceIdentities(graph); } private void inlineFlowParts(FlowGraph graph) { assert graph != null; for (FlowElement element : FlowGraphUtil.collectFlowParts(graph)) { // first, inlines nested flow parts FlowPartDescription desc = (FlowPartDescription) element.getDescription(); inlineFlowParts(desc.getFlowGraph()); Inline inlineConfig = element.getAttribute(Inline.class); if (inlineConfig == null || inlineConfig == Inline.DEFAULT) { inlineConfig = options.isCompressFlowPart() ? Inline.FORCE_AGGREGATE : Inline.KEEP_SEGREGATED; } if (inlineConfig == Inline.FORCE_AGGREGATE) { LOG.debug("compressing flow-part: {}", element.getDescription().getName()); //$NON-NLS-1$ FlowGraphUtil.inlineFlowPart(element); } else { FlowGraphUtil.inlineFlowPart(element, FlowBoundary.STAGE); } } assert FlowGraphUtil.collectFlowParts(graph).isEmpty() : FlowGraphUtil.collectFlowParts(graph); } /** * Removes PSEUD-PSEUD chains. * @param graph the target flow graph (flattened) */ void pushDownPseudoChain(FlowGraph graph) { for (FlowElement element : FlowGraphUtil.collectElements(graph)) { if (element.getDescription().getKind() != FlowElementKind.PSEUD || element.getOutputPorts().isEmpty()) { continue; } assert element.getOutputPorts().size() == 1; for (FlowElementOutput upstream : element.getOutputPorts()) { for (PortConnection conn : new ArrayList<>(upstream.getConnected())) { FlowElement successor = conn.getDownstream().getOwner(); if (successor.getDescription().getKind() == FlowElementKind.PSEUD && FlowGraphUtil.isBoundary(successor) == false) { for (FlowElementOutput sUpstream : successor.getOutputPorts()) { for (PortConnection sConn : sUpstream.getConnected()) { PortConnection.connect(upstream, sConn.getDownstream()); } } conn.disconnect(); if (FlowGraphUtil.hasPredecessors(successor) == false) { FlowGraphUtil.disconnect(successor); } } } } } } /** * Inserts stage boundary into {@code shuffle -> shuffle} path. * @param graph the target flow graph */ void insertCheckpoints(FlowGraph graph) { assert graph != null; LOG.debug("inserting checkpoints on stage bounds: {}", graph); //$NON-NLS-1$ for (FlowElement element : FlowGraphUtil.collectBoundaries(graph)) { insertCheckpoints(element); } } private void insertCheckpoints(FlowElement element) { assert element != null; if (FlowGraphUtil.isShuffleBoundary(element) == false) { return; } for (FlowElementOutput output : element.getOutputPorts()) { insertCheckpointsWithPushDown(output); } } private void insertCheckpointsWithPushDown(FlowElementOutput start) { assert start != null; LinkedList<FlowElementOutput> work = new LinkedList<>(); work.add(start); while (work.isEmpty() == false) { FlowElementOutput output = work.removeFirst(); if (isSuccessShuffleBoundary(output) == false) { continue; } Set<PortConnection> connections = output.getConnected(); if (connections.size() != 1) { LOG.debug("Inserts checkpoint after {}", output); //$NON-NLS-1$ FlowGraphUtil.insertCheckpoint(output); continue; } FlowElementInput input = connections.iterator().next().getDownstream(); FlowElement successor = input.getOwner(); if (isPushDownTarget(successor) == false) { LOG.debug("Inserts checkpoint after {}", output); //$NON-NLS-1$ FlowGraphUtil.insertCheckpoint(output); continue; } LOG.debug("Pushdown operator {}", successor); //$NON-NLS-1$ work.addAll(successor.getOutputPorts()); } } private boolean isSuccessShuffleBoundary(FlowElementOutput output) { assert output != null; Collection<FlowElement> successors = FlowGraphUtil.getSucceedingBoundaries(output); for (FlowElement successor : successors) { assert FlowGraphUtil.isBoundary(successor); if (FlowGraphUtil.isShuffleBoundary(successor) == false) { continue; } return true; } return false; } private boolean isPushDownTarget(FlowElement element) { assert element != null; if (element.getInputPorts().size() != 1) { return false; } FlowElementInput input = element.getInputPorts().get(0); if (input.getConnected().size() != 1) { return false; } if (FlowGraphUtil.isBoundary(element)) { return false; } FlowElementDescription desc = element.getDescription(); if (desc.getKind() == FlowElementKind.PSEUD) { return true; } else if (desc.getKind() == FlowElementKind.OPERATOR) { OperatorDescription op = (OperatorDescription) desc; Class<? extends Annotation> kind = op.getDeclaration().getAnnotationType(); if (kind == Branch.class || kind == Split.class || kind == Project.class || kind == Restructure.class || kind == SideDataCheck.class || kind == SideDataBranch.class || kind == Logging.class || kind == Trace.class || kind == Debug.class) { return true; } } return false; } /** * Inserts identity operators into paths which have no body functions. * @param graph the target flow graph */ void insertIdentities(FlowGraph graph) { assert graph != null; for (FlowElement element : FlowGraphUtil.collectBoundaries(graph)) { insertIdentities(element); } } private void insertIdentities(FlowElement element) { assert element != null; if (FlowGraphUtil.isStageBoundary(element) == false) { return; } for (FlowElementOutput output : element.getOutputPorts()) { for (FlowElementInput opposite : output.getOpposites()) { FlowElement successor = opposite.getOwner(); if (FlowGraphUtil.isBoundary(successor)) { FlowGraphUtil.insertIdentity(output); } } } } /** * Normalizes identity operators: they must have only one upstream and downstream operators. * @param graph the target flow graph */ void splitIdentities(FlowGraph graph) { assert graph != null; LOG.debug("normalizing identity operators: {}", graph); //$NON-NLS-1$ boolean changed; do { changed = false; for (FlowElement element : FlowGraphUtil.collectElements(graph)) { if (FlowGraphUtil.isIdentity(element)) { changed |= FlowGraphUtil.splitIdentity(element); } } } while (changed); } /** * Removes redundant identity operators. * The target flow graph must be applied {@link #splitIdentities(FlowGraph)}. * @param graph the target flow graph */ void reduceIdentities(FlowGraph graph) { assert graph != null; LOG.debug("reducing identity operators: {}", graph); //$NON-NLS-1$ boolean changed; do { changed = false; for (FlowElement element : FlowGraphUtil.collectElements(graph)) { if (FlowGraphUtil.isIdentity(element) == false) { continue; } Set<FlowElement> preds = FlowGraphUtil.getPredecessors(element); Set<FlowElement> succs = FlowGraphUtil.getSuccessors(element); assert preds.size() == 1 && succs.size() == 1 : "all identities must be splitted"; //$NON-NLS-1$ FlowElement pred = preds.iterator().next(); FlowElement succ = succs.iterator().next(); if (FlowGraphUtil.isStageBoundary(pred) && FlowGraphUtil.isBoundary(succ)) { continue; } LOG.debug("removing redundant identity operator: {}", element); //$NON-NLS-1$ changed = true; FlowGraphUtil.skip(element); } } while (changed); } /** * Validates the target flow graph is well-formed. * Each element port must be connected to the other ports (if it is required), and the flow graph must be acyclic. * @param graph the target flow graph * @return {@code true} if the target flow graph is well-formed, otherwise {@code false} */ boolean validate(FlowGraph graph) { assert graph != null; LOG.debug("validating flow graph: {}", graph); //$NON-NLS-1$ Graph<FlowElement> elements = FlowGraphUtil.toElementGraph(graph); boolean valid = true; valid &= validateElements(graph, elements); valid &= validateConnection(graph, elements); valid &= validateAcyclic(graph, elements); for (FlowElement element : FlowGraphUtil.collectFlowParts(graph)) { FlowPartDescription description = (FlowPartDescription) element.getDescription(); valid &= validate(description.getFlowGraph()); } return valid; } private boolean validateElements(FlowGraph graph, Graph<FlowElement> elements) { boolean sawError = false; for (FlowElement element : elements.getNodeSet()) { for (FlowElementInput port : element.getInputPorts()) { if (port.getAttribute(ViewInfo.class) != null) { error( graph, Collections.singletonList(element), "View {1} (in {0}) is not supported in this platform", element.getDescription(), port.getDescription().getName()); sawError = true; } } } return sawError == false; } private boolean validateConnection(FlowGraph graph, Graph<FlowElement> elements) { assert graph != null; assert elements != null; LOG.debug("validating operator connections: {}", graph); //$NON-NLS-1$ boolean sawError = false; for (FlowElement element : elements.getNodeSet()) { Connectivity connectivity = element.getAttribute(Connectivity.class); if (connectivity == null) { connectivity = Connectivity.getDefault(); } for (FlowElementInput port : element.getInputPorts()) { if (port.getConnected().isEmpty() == false) { continue; } error( graph, Collections.singletonList(element), Messages.getString("StagePlanner.errorOrphanedInput"), //$NON-NLS-1$ element.getDescription(), port.getDescription().getName()); sawError = true; } for (FlowElementOutput port : element.getOutputPorts()) { if (port.getConnected().isEmpty() == false) { continue; } if (connectivity == Connectivity.MANDATORY) { error( graph, Collections.singletonList(element), Messages.getString("StagePlanner.errorOrphanedOutput"), //$NON-NLS-1$ element.getDescription(), port.getDescription().getName()); sawError = true; } else { LOG.debug("inserting implicit \"stop\" operator: {}.{}", //$NON-NLS-1$ element.getDescription().getName(), port.getDescription().getName()); FlowGraphUtil.stop(port); } } } return sawError == false; } private boolean validateAcyclic(FlowGraph graph, Graph<FlowElement> elements) { assert graph != null; assert elements != null; LOG.debug("validating cyclicity: {}", graph); //$NON-NLS-1$ Set<Set<FlowElement>> circuits = Graphs.findCircuit(elements); for (Set<FlowElement> cyclic : circuits) { List<FlowElement> context = Lists.from(cyclic); List<String> names = new ArrayList<>(); for (FlowElement elem : context) { names.add(elem.getDescription().getName()); } error( graph, context, Messages.getString("StagePlanner.errorCyclicGraph"), //$NON-NLS-1$ names); } return circuits.isEmpty(); } private void error( FlowGraph graph, List<FlowElement> context, String message, Object... messageArguments) { assert graph != null; assert context != null; assert message != null; assert messageArguments != null; String text; if (messageArguments.length == 0) { text = message; } else { text = MessageFormat.format(message, messageArguments); } diagnostics.add(new Diagnostic(graph, context, text)); } /** * A diagnostic information of {@link StagePlanner}. */ public static class Diagnostic { /** * The target flow graph. */ public final FlowGraph graph; /** * The target flow elements. */ public final List<FlowElement> context; /** * The diagnostics message. */ public final String message; /** * Creates a new instance. * @param graph the target flow graph * @param context the target flow elements * @param message the diagnostics message * @throws IllegalArgumentException if the parameters are {@code null} */ public Diagnostic(FlowGraph graph, List<FlowElement> context, String message) { Precondition.checkMustNotBeNull(graph, "graph"); //$NON-NLS-1$ Precondition.checkMustNotBeNull(context, "context"); //$NON-NLS-1$ Precondition.checkMustNotBeNull(message, "message"); //$NON-NLS-1$ this.graph = graph; this.context = Collections.unmodifiableList(context); this.message = message; } @Override public String toString() { return MessageFormat.format( "{0} (at {1})", //$NON-NLS-1$ message, graph.getDescription().getName()); } } private static class FlowBlockGroup { final FlowBlock founder; /** * The upstream output for this group. */ @SuppressWarnings("unused") private final Set<FlowBlock.Output> groupSource; /** * The direct upstream blocks. * This must be map blocks w/o reduce blocks, or reduce blocks. */ final Set<FlowBlock> predeceaseBlocks; final Set<FlowBlock> members; final boolean reducer; int distance = -1; FlowBlockGroup(FlowBlock flowBlock) { assert flowBlock != null; this.founder = flowBlock; this.members = new HashSet<>(); this.members.add(flowBlock); this.reducer = flowBlock.isReduceBlock(); this.groupSource = collectStageSource(flowBlock); this.predeceaseBlocks = collectPredeceaseBlocks(flowBlock); } private Set<FlowBlock.Output> collectStageSource(FlowBlock flowBlock) { assert flowBlock != null; if (flowBlock.isReduceBlock()) { Set<FlowBlock.Output> results = new HashSet<>(); for (FlowBlock predecessor : getPredeceaseBlocks(flowBlock)) { assert predecessor.isReduceBlock() == false; results.addAll(collectBlockSource(predecessor)); } return results; } else { return collectBlockSource(flowBlock); } } private Set<FlowBlock> getPredeceaseBlocks(FlowBlock flowBlock) { assert flowBlock != null; Set<FlowBlock> results = new HashSet<>(); for (FlowBlock.Input input : flowBlock.getBlockInputs()) { for (FlowBlock.Connection conn : input.getConnections()) { FlowBlock pred = conn.getUpstream().getOwner(); results.add(pred); } } return results; } private Set<FlowBlock.Output> collectBlockSource(FlowBlock flowBlock) { assert flowBlock != null; Set<FlowBlock.Output> results = new HashSet<>(); for (FlowBlock.Input input : flowBlock.getBlockInputs()) { for (FlowBlock.Connection conn : input.getConnections()) { results.add(conn.getUpstream()); } } return results; } private Set<FlowBlock> collectPredeceaseBlocks(FlowBlock flowBlock) { assert flowBlock != null; Set<FlowBlock> results = new HashSet<>(); LinkedList<FlowBlock> work = new LinkedList<>(); work.addLast(flowBlock); while (work.isEmpty() == false) { FlowBlock first = work.removeFirst(); Set<FlowBlock> preds = getPredeceaseBlocks(first); for (FlowBlock block : preds) { // ignores input blocks if (block.getBlockInputs().isEmpty()) { continue; } if (block.isReduceBlock() || block.isSucceedingReduceBlock() == false) { results.add(block); } else { work.addLast(block); } } } return results; } boolean combine(FlowBlockGroup other) { assert other != null; // merges only the same block type (map/reduce) if (this.reducer != other.reducer) { return false; } // merges that have the same distance from the head if (this.distance == -1 || this.distance != other.distance) { return false; } this.members.addAll(other.members); return true; } } }