/* * Copyright © 2016 Cask Data, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package co.cask.cdap.etl.common; import co.cask.cdap.etl.planner.Dag; import co.cask.cdap.etl.planner.StageInfo; import co.cask.cdap.etl.proto.Connection; import com.google.common.base.Joiner; import com.google.common.base.Predicate; import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Iterators; import com.google.common.collect.Sets; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Objects; import java.util.Set; /** * Keeps track of the plugin ids for the source, transforms, and sink of a pipeline phase. */ public class PipelinePhase implements Iterable<StageInfo> { // plugin type -> stage info private final Map<String, Set<StageInfo>> stages; private final Dag dag; private PipelinePhase(Map<String, Set<StageInfo>> stages, Dag dag) { this.stages = ImmutableMap.copyOf(stages); this.dag = dag; } /** * Get an unmodifiable set of stages that use the specified plugin type. * * @param pluginType the plugin type * @return unmodifiable set of stages that use the specified plugin type */ public Set<StageInfo> getStagesOfType(String pluginType) { Set<StageInfo> stageInfos = stages.get(pluginType); return Collections.unmodifiableSet(stageInfos == null ? new HashSet<StageInfo>() : stageInfos); } public Set<String> getStageOutputs(String stage) { Set<String> outputs = dag.getNodeOutputs(stage); return Collections.unmodifiableSet(outputs == null ? new HashSet<String>() : outputs); } public Set<String> getPluginTypes() { return stages.keySet(); } public Set<String> getSources() { return dag.getSources(); } public Set<String> getSinks() { return dag.getSinks(); } /** * Get a subset of the pipeline phase, starting from the sources and going to the specified nodes that will * be the new sinks of the pipeline subset. * * @param newSinks the new sinks to go to * @return subset of the pipeline, starting from current sources and going to the new sinks */ public PipelinePhase subsetTo(Set<String> newSinks) { return getSubset(dag.subsetFrom(dag.getSources(), newSinks)); } /** * Get a subset of the pipeline phase, starting from the specified new sources and going to the current sinks. * * @param newSources the new sources to start from * @return subset of the pipeline, starting from specified new sources and going to the current sinks */ public PipelinePhase subsetFrom(Set<String> newSources) { return getSubset(dag.subsetFrom(newSources)); } private PipelinePhase getSubset(final Dag subsetDag) { Map<String, Set<StageInfo>> subsetStages = new HashMap<>(); for (Map.Entry<String, Set<StageInfo>> stagesEntry : stages.entrySet()) { final Set<StageInfo> stagesOfType = Sets.filter(stagesEntry.getValue(), new Predicate<StageInfo>() { @Override public boolean apply(StageInfo stageInfo) { return subsetDag.getNodes().contains(stageInfo.getName()); } }); if (!stagesOfType.isEmpty()) { subsetStages.put(stagesEntry.getKey(), stagesOfType); } } return new PipelinePhase(subsetStages, subsetDag); } @Override public boolean equals(Object o) { if (this == o) { return true; } if (o == null || getClass() != o.getClass()) { return false; } PipelinePhase that = (PipelinePhase) o; return Objects.equals(stages, that.stages) && Objects.equals(dag, that.dag); } @Override public int hashCode() { return Objects.hash(stages, dag); } @Override public String toString() { return "PipelinePhase{" + "stages=" + stages + ", dag=" + dag + '}'; } /** * Get a builder used to create a pipeline phase. * * @param supportedPluginTypes types of plugins supported in the phase * @return builder used to create a pipeline phase */ public static Builder builder(Set<String> supportedPluginTypes) { return new Builder(supportedPluginTypes); } @Override public Iterator<StageInfo> iterator() { List<Iterator<StageInfo>> iterators = new ArrayList<>(stages.size()); for (Map.Entry<String, Set<StageInfo>> stagesEntry : stages.entrySet()) { iterators.add(stagesEntry.getValue().iterator()); } return Iterators.concat(iterators.iterator()); } /** * Builder to create a {@link PipelinePhase}. */ public static class Builder { private final Set<String> supportedPluginTypes; private final Map<String, Set<StageInfo>> stages; private final Set<co.cask.cdap.etl.proto.Connection> connections; public Builder(Set<String> supportedPluginTypes) { this.supportedPluginTypes = supportedPluginTypes; this.stages = new HashMap<>(); this.connections = new HashSet<>(); } public Builder addStage(String pluginType, StageInfo stageInfo) { return addStages(pluginType, ImmutableSet.of(stageInfo)); } public Builder addStages(String pluginType, Collection<StageInfo> stages) { if (!supportedPluginTypes.contains(pluginType)) { throw new IllegalArgumentException( String.format("%s is an unsupported plugin type. Plugin type must be one of %s.", pluginType, Joiner.on(',').join(supportedPluginTypes))); } Set<StageInfo> existingStages = this.stages.get(pluginType); if (existingStages == null) { existingStages = new HashSet<>(); this.stages.put(pluginType, existingStages); } existingStages.addAll(stages); return this; } public Builder addConnection(String from, String to) { return addConnections(from, ImmutableSet.of(to)); } public Builder addConnections(String from, Collection<String> to) { for (String toStage : to) { connections.add(new Connection(from, toStage)); } return this; } public Builder addConnections(Map<String, Set<String>> connections) { for (Map.Entry<String, Set<String>> entry : connections.entrySet()) { addConnections(entry.getKey(), entry.getValue()); } return this; } public PipelinePhase build() { return new PipelinePhase(stages, new Dag(connections)); } } }