/** * Copyright 2011-2017 Asakusa Framework Team. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.asakusafw.compiler.flow.jobflow; import java.text.MessageFormat; import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import org.apache.hadoop.mapreduce.OutputFormat; import com.asakusafw.compiler.common.Precondition; import com.asakusafw.compiler.flow.Compilable; import com.asakusafw.compiler.flow.ExternalIoDescriptionProcessor; import com.asakusafw.compiler.flow.ExternalIoDescriptionProcessor.SourceInfo; import com.asakusafw.compiler.flow.Location; import com.asakusafw.compiler.flow.plan.FlowBlock; import com.asakusafw.compiler.flow.plan.StageGraph; import com.asakusafw.compiler.flow.stage.StageModel; import com.asakusafw.runtime.stage.input.TemporaryInputFormat; import com.asakusafw.runtime.stage.output.TemporaryOutputFormat; import com.asakusafw.utils.collections.Sets; import com.asakusafw.utils.graph.Graph; import com.asakusafw.utils.graph.Graphs; import com.asakusafw.utils.java.model.syntax.Name; import com.asakusafw.vocabulary.flow.graph.FlowElementOutput; import com.asakusafw.vocabulary.flow.graph.InputDescription; import com.asakusafw.vocabulary.flow.graph.OutputDescription; /** * Structural information of jobflows. */ public class JobflowModel extends Compilable.Trait<CompiledJobflow> { private final StageGraph stageGraph; private final String batchId; private final String flowId; private final List<Import> imports; private final List<Export> exports; private final List<Stage> stages; /** * Creates a new instance. * @param stageGraph the original stage graph * @param batchId the batch ID * @param flowId the flow ID * @param imports the import stages * @param exports the export stages * @param stages the MapReduce stages * @throws IllegalArgumentException if the parameters are {@code null} */ public JobflowModel( StageGraph stageGraph, String batchId, String flowId, List<Import> imports, List<Export> exports, List<Stage> stages) { Precondition.checkMustNotBeNull(stageGraph, "stageGraph"); //$NON-NLS-1$ Precondition.checkMustNotBeNull(batchId, "batchId"); //$NON-NLS-1$ Precondition.checkMustNotBeNull(flowId, "flowId"); //$NON-NLS-1$ Precondition.checkMustNotBeNull(imports, "imports"); //$NON-NLS-1$ Precondition.checkMustNotBeNull(exports, "exports"); //$NON-NLS-1$ Precondition.checkMustNotBeNull(stages, "stages"); //$NON-NLS-1$ this.stageGraph = stageGraph; this.batchId = batchId; this.flowId = flowId; this.imports = imports; this.exports = exports; this.stages = stages; } /** * Returns the original stage graph. * @return the original stage graph */ public StageGraph getStageGraph() { return stageGraph; } /** * Returns the batch ID. * @return the batch ID */ public String getBatchId() { return batchId; } /** * Returns the flow ID. * @return the flow ID */ public String getFlowId() { return flowId; } /** * Returns information of the import stages. * @return the import stages */ public List<Import> getImports() { return imports; } /** * Returns information of the export stages. * @return the export stages */ public List<Export> getExports() { return exports; } /** * Returns information of the MapReduce stages. * @return the MapReduce stages */ public List<Stage> getStages() { return stages; } /** * Returns the dependency graph of MapReduce stages. * @return the dependency graph */ public Graph<Stage> getDependencyGraph() { Map<Delivery, Stage> deliveries = new HashMap<>(); for (Stage stage : stages) { for (Delivery delivery : stage.getDeliveries()) { deliveries.put(delivery, stage); } } Graph<Stage> graph = Graphs.newInstance(); for (Stage stage : stages) { graph.addNode(stage); for (Process process : stage.getProcesses()) { for (Source source : process.getResolvedSources()) { Stage dependence = deliveries.get(source); if (dependence == null) { // the head stage continue; } graph.addEdge(stage, dependence); } } } return graph; } /** * Structural information of MapReduce stages. */ public static class Stage extends Compilable.Trait<CompiledStage> { private final StageModel model; private final List<Process> processes; private final List<Delivery> deliveries; private final Reduce reduceOrNull; private final Set<SideData> sideData; /** * Creates a new instance. * @param model the stage model * @param processes the processes (Map actions) in this stage * @param deliveries the deliveries of this stage * @param reduceOrNull the reducer action (nullable) * @param sideData the side-data for this stage * @throws IllegalArgumentException if some parameters are {@code null} */ public Stage( StageModel model, List<Process> processes, List<Delivery> deliveries, Reduce reduceOrNull, Set<SideData> sideData) { Precondition.checkMustNotBeNull(model, "model"); //$NON-NLS-1$ Precondition.checkMustNotBeNull(processes, "processes"); //$NON-NLS-1$ Precondition.checkMustNotBeNull(deliveries, "deliveries"); //$NON-NLS-1$ Precondition.checkMustNotBeNull(sideData, "sideData"); //$NON-NLS-1$ this.model = model; this.processes = processes; this.deliveries = deliveries; this.reduceOrNull = reduceOrNull; this.sideData = sideData; } /** * Returns the stage number. * @return the stage number */ public int getNumber() { return model.getStageBlock().getStageNumber(); } /** * Returns the model of this stage. * @return the stage model */ public StageModel getModel() { return model; } /** * Returns information of the Map actions in this stage. * @return the Map actions */ public List<Process> getProcesses() { return processes; } /** * Returns information of the stage deliveries. * @return the stage deliveries */ public List<Delivery> getDeliveries() { return deliveries; } /** * Returns information of the Reduce action. * @return the Reduce action, or {@code null} if this stage does not contain Reduce actions */ public Reduce getReduceOrNull() { return reduceOrNull; } /** * Returns information of the side-data list for this stage. * @return the side-data list */ public Set<SideData> getSideData() { return sideData; } @Override public String toString() { return MessageFormat.format( "Stage({0})", //$NON-NLS-1$ String.valueOf(getNumber())); } } /** * Represents a set of configurations for Reduce actions. */ public static class Reduce { private final Name reducerTypeName; private final Name combinerTypeNameOrNull; private final Name keyTypeName; private final Name valueTypeName; private final Name groupingComparatorTypeName; private final Name sortComparatorTypeName; private final Name partitionerTypeName; /** * Creates a new instance. * @param reducerTypeName the qualified class name of the reducer * @param combinerTypeNameOrNull the qualified class name of the reducer (nullable) * @param keyTypeName the qualified class name of the shuffle key * @param valueTypeName the qualified class name of the shuffle value * @param groupingComparatorTypeName the qualified class name of the grouping comparator * @param sortComparatorTypeName the qualified class name of the sort comparator * @param partitionerTypeName the qualified class name of the partitioner * @throws IllegalArgumentException if the parameters are {@code null} */ public Reduce( Name reducerTypeName, Name combinerTypeNameOrNull, Name keyTypeName, Name valueTypeName, Name groupingComparatorTypeName, Name sortComparatorTypeName, Name partitionerTypeName) { Precondition.checkMustNotBeNull(reducerTypeName, "reducerTypeName"); //$NON-NLS-1$ Precondition.checkMustNotBeNull(keyTypeName, "keyTypeName"); //$NON-NLS-1$ Precondition.checkMustNotBeNull(valueTypeName, "valueTypeName"); //$NON-NLS-1$ Precondition.checkMustNotBeNull(groupingComparatorTypeName, "groupingComparatorTypeName"); //$NON-NLS-1$ Precondition.checkMustNotBeNull(sortComparatorTypeName, "sortComparatorTypeName"); //$NON-NLS-1$ Precondition.checkMustNotBeNull(partitionerTypeName, "partitionerTypeName"); //$NON-NLS-1$ this.reducerTypeName = reducerTypeName; this.combinerTypeNameOrNull = combinerTypeNameOrNull; this.keyTypeName = keyTypeName; this.valueTypeName = valueTypeName; this.groupingComparatorTypeName = groupingComparatorTypeName; this.sortComparatorTypeName = sortComparatorTypeName; this.partitionerTypeName = partitionerTypeName; } /** * Returns the qualified name of the combiner class. * @return the qualified class name, or {@code null} if combiner is not available */ public Name getCombinerTypeNameOrNull() { return combinerTypeNameOrNull; } /** * Returns the qualified name of the reducer class. * @return the qualified class name */ public Name getReducerTypeName() { return reducerTypeName; } /** * Returns the qualified name of the shuffle key class. * @return the qualified class name */ public Name getKeyTypeName() { return keyTypeName; } /** * Returns the qualified name of the shuffle value class. * @return the qualified class name */ public Name getValueTypeName() { return valueTypeName; } /** * Returns the qualified name of the grouping comparator class. * @return the qualified class name */ public Name getGroupingComparatorTypeName() { return groupingComparatorTypeName; } /** * Returns the qualified name of the sort comparator class. * @return the qualified class name */ public Name getSortComparatorTypeName() { return sortComparatorTypeName; } /** * Returns the qualified name of the partitioner class. * @return the qualified class name */ public Name getPartitionerTypeName() { return partitionerTypeName; } } /** * An abstract super class which provides output data-sets. */ public abstract static class Source { private final Set<FlowBlock.Output> outputs; /** * Creates a new instance. * @param outputs set of the corresponded output ports * @throws IllegalArgumentException if the parameter is {@code null} */ protected Source(Set<FlowBlock.Output> outputs) { Precondition.checkMustNotBeNull(outputs, "outputs"); //$NON-NLS-1$ this.outputs = outputs; } /** * Returns input information. * @return input information */ public abstract SourceInfo getInputInfo(); /** * Returns set of the corresponded output ports. * @return the corresponded output ports */ public Set<FlowBlock.Output> getOutputs() { return outputs; } } /** * An abstract super class which accepts data-sets from {@link Source}. */ public abstract static class Target { private final List<FlowBlock.Input> inputs; private Set<Source> sources; /** * Creates a new instance. * @param inputs set of the corresponded input ports * @throws IllegalArgumentException if the parameter is {@code null} */ public Target(List<FlowBlock.Input> inputs) { Precondition.checkMustNotBeNull(inputs, "inputs"); //$NON-NLS-1$ if (inputs.isEmpty()) { throw new IllegalArgumentException("inputs must not be empty"); //$NON-NLS-1$ } this.inputs = inputs; } /** * Sets opposite {@link Source} objects that provides data-sets for this target. * @param opposites the upstream sources * @throws IllegalArgumentException if the parameter is {@code null} */ public void resolveSources(Collection<? extends Source> opposites) { Precondition.checkMustNotBeNull(opposites, "opposites"); //$NON-NLS-1$ this.sources = Sets.from(opposites); } /** * Returns the opposite {@link Source} objects. * @return the upstream sources * @throws IllegalStateException if they have been not set yet * @see #resolveSources(Collection) */ public Set<Source> getResolvedSources() { if (sources == null) { throw new IllegalStateException(); } return sources; } /** * Returns the locations where the upstream data-sets will be stored. * @return the upstream data-set locations * @throws IllegalStateException if the opposite {@link Source} objects are not set * @see #resolveSources(Collection) */ public Set<Location> getResolvedLocations() { Set<Location> results = new HashSet<>(); for (Source source : getResolvedSources()) { results.addAll(source.getInputInfo().getLocations()); } return results; } /** * Returns the corresponded input ports. * @return the corresponded input ports */ public List<FlowBlock.Input> getInputs() { return inputs; } /** * Returns the data type. * @return the data type */ public java.lang.reflect.Type getDataType() { if (inputs.isEmpty()) { return void.class; } return inputs.get(0).getElementPort().getDescription().getDataType(); } } /** * An abstract super interface for external I/O models. */ public interface Processible { /** * Returns the processor for processing this external I/O operations. * @return the related processor */ ExternalIoDescriptionProcessor getProcessor(); } /** * Represents a set of configurations for Map actions. */ public static class Process extends Target { private final Name mapperTypeName; /** * Creates a new instance. * @param inputs the corresponded input ports * @param mapperTypeName the qualified class name of the mapper * @throws IllegalArgumentException if the parameters are {@code null} */ public Process(List<FlowBlock.Input> inputs, Name mapperTypeName) { super(inputs); Precondition.checkMustNotBeNull(mapperTypeName, "mapperTypeName"); //$NON-NLS-1$ this.mapperTypeName = mapperTypeName; } /** * Returns the qualified name of the mapper class. * @return the qualified class name */ public Name getMapperTypeName() { return mapperTypeName; } @Override public String toString() { return MessageFormat.format( "Process(inputs={0}, mapper={1})", //$NON-NLS-1$ getInputs(), getMapperTypeName()); } } /** * Represents deliveries from some actions. */ public static class Delivery extends Source { private final Set<Location> locations; /** * Creates a new instance. * @param outputs the corresponded output ports * @param locations the target output locations * @throws IllegalArgumentException if the parameters are {@code null} */ public Delivery(Set<FlowBlock.Output> outputs, Set<Location> locations) { super(outputs); Precondition.checkMustNotBeNull(locations, "locations"); //$NON-NLS-1$ this.locations = locations; } /** * Returns the data type. * @return the data type */ public java.lang.reflect.Type getDataType() { FlowBlock.Output first = getOutputs().iterator().next(); FlowElementOutput port = first.getElementPort(); return port.getDescription().getDataType(); } @Override public SourceInfo getInputInfo() { return new SourceInfo(locations, TemporaryInputFormat.class); } /** * Returns the Hadoop {@link OutputFormat} class for storing this delivery. * @return the Hadoop {@link OutputFormat} class */ @SuppressWarnings("rawtypes") public Class<? extends OutputFormat> getOutputFormatType() { return TemporaryOutputFormat.class; } @Override public String toString() { return MessageFormat.format( "Delivery(output={0}, locations={1})", //$NON-NLS-1$ getOutputs(), getInputInfo().getLocations()); } } /** * Represents import actions that obtains data-sets from external inputs. */ public static class Import extends Source implements Processible { private final InputDescription description; private final ExternalIoDescriptionProcessor processor; /** * Creates a new instance. * @param description the input description * @param processor the external I/O processor for processing this * @throws IllegalArgumentException if the parameters are {@code null} */ public Import(InputDescription description, ExternalIoDescriptionProcessor processor) { super(Collections.emptySet()); Precondition.checkMustNotBeNull(description, "description"); //$NON-NLS-1$ Precondition.checkMustNotBeNull(processor, "processor"); //$NON-NLS-1$ this.description = description; this.processor = processor; } /** * Creates a new instance. * @param output the target output port * @param description the input description * @param processor the external I/O processor for processing this * @throws IllegalArgumentException if the parameters are {@code null} */ public Import( FlowBlock.Output output, InputDescription description, ExternalIoDescriptionProcessor processor) { super(Collections.singleton(output)); Precondition.checkMustNotBeNull(description, "description"); //$NON-NLS-1$ Precondition.checkMustNotBeNull(processor, "processor"); //$NON-NLS-1$ this.description = description; this.processor = processor; } /** * Returns the ID of this action. * @return the action ID */ public String getId() { return description.getName(); } @Override public SourceInfo getInputInfo() { return processor.getInputInfo(description); } /** * Returns a Hadoop {@link OutputFormat} class which generates the output data-set from this importer. * @return the Hadoop {@link OutputFormat} class */ @SuppressWarnings("rawtypes") public Class<? extends OutputFormat> getOutputFormatType() { return TemporaryOutputFormat.class; } /** * Returns the input description. * @return the input description */ public InputDescription getDescription() { return description; } @Override public ExternalIoDescriptionProcessor getProcessor() { return processor; } @Override public String toString() { return MessageFormat.format( "Import(output={0}, locations={1}, description={2})", //$NON-NLS-1$ getOutputs(), getInputInfo().getLocations(), getDescription()); } } /** * Represents export actions that write data-sets into external outputs. */ public static class Export extends Target implements Processible { private final OutputDescription description; private final ExternalIoDescriptionProcessor processor; /** * Creates a new instance. * @param inputs the source input port * @param description the output description * @param processor the external I/O processor for processing this * @throws IllegalArgumentException if the parameters are {@code null} */ public Export( List<FlowBlock.Input> inputs, OutputDescription description, ExternalIoDescriptionProcessor processor) { super(inputs); Precondition.checkMustNotBeNull(description, "description"); //$NON-NLS-1$ Precondition.checkMustNotBeNull(processor, "processor"); //$NON-NLS-1$ this.description = description; this.processor = processor; } /** * Returns the ID of this action. * @return the action ID */ public String getId() { return description.getName(); } /** * Returns the output description. * @return the output description */ public OutputDescription getDescription() { return description; } @Override public ExternalIoDescriptionProcessor getProcessor() { return processor; } @Override public String toString() { return MessageFormat.format( "Export(inputs={0}, description={1})", //$NON-NLS-1$ getInputs(), getDescription()); } } /** * Information of side-data. */ public static class SideData { private final Set<Location> clusterPaths; private final String localName; /** * Creates a new instance. * @param clusterPaths the remote path of the target data * @param localName the unique local name * @throws IllegalArgumentException if parameters are {@code null} */ public SideData(Set<Location> clusterPaths, String localName) { Precondition.checkMustNotBeNull(clusterPaths, "clusterPath"); //$NON-NLS-1$ Precondition.checkMustNotBeNull(localName, "localName"); //$NON-NLS-1$ this.clusterPaths = clusterPaths; this.localName = localName; } /** * Returns the remote path of the target data. * @return the remote path of the target data */ public Set<Location> getClusterPaths() { return clusterPaths; } /** * Returns the unique local name. * @return the unique local name */ public String getLocalName() { return localName; } @Override public String toString() { return MessageFormat.format( "SideData(path={0}, name={1})", //$NON-NLS-1$ getClusterPaths(), getLocalName()); } } }