/** * Copyright 2011-2017 Asakusa Framework Team. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.asakusafw.compiler.flow.jobflow; import java.io.IOException; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Set; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.asakusafw.compiler.common.Precondition; import com.asakusafw.compiler.flow.ExternalIoCommandProvider; import com.asakusafw.compiler.flow.ExternalIoDescriptionProcessor; import com.asakusafw.compiler.flow.ExternalIoDescriptionProcessor.Input; import com.asakusafw.compiler.flow.ExternalIoDescriptionProcessor.IoContext; import com.asakusafw.compiler.flow.ExternalIoDescriptionProcessor.Output; import com.asakusafw.compiler.flow.ExternalIoDescriptionProcessor.SourceInfo; import com.asakusafw.compiler.flow.FlowCompilingEnvironment; import com.asakusafw.compiler.flow.jobflow.JobflowModel.Delivery; import com.asakusafw.compiler.flow.jobflow.JobflowModel.Export; import com.asakusafw.compiler.flow.jobflow.JobflowModel.Import; import com.asakusafw.compiler.flow.jobflow.JobflowModel.Process; import com.asakusafw.compiler.flow.jobflow.JobflowModel.Processible; import com.asakusafw.compiler.flow.jobflow.JobflowModel.Reduce; import com.asakusafw.compiler.flow.jobflow.JobflowModel.SideData; import com.asakusafw.compiler.flow.jobflow.JobflowModel.Source; import com.asakusafw.compiler.flow.jobflow.JobflowModel.Stage; import com.asakusafw.compiler.flow.plan.StageGraph; import com.asakusafw.compiler.flow.stage.StageModel; import com.asakusafw.utils.collections.Maps; import com.asakusafw.utils.graph.Graph; import com.asakusafw.utils.graph.Graphs; /** * Compiles jobflow described in the flow DSL. * @since 0.1.0 * @version 0.2.6 */ public class JobflowCompiler { static final Logger LOG = LoggerFactory.getLogger(JobflowCompiler.class); @SuppressWarnings("unused") private final FlowCompilingEnvironment environment; private final JobflowAnalyzer analyzer; private final StageClientEmitter stageClientEmitter; private final CleanupStageClientEmitter cleanupStageClientEmitter; /** * Creates a new instance. * @param environment the current environment * @throws IllegalArgumentException if the parameter is {@code null} */ public JobflowCompiler(FlowCompilingEnvironment environment) { Precondition.checkMustNotBeNull(environment, "environment"); //$NON-NLS-1$ this.environment = environment; this.analyzer = new JobflowAnalyzer(environment); this.stageClientEmitter = new StageClientEmitter(environment); this.cleanupStageClientEmitter = new CleanupStageClientEmitter(environment); } /** * Compiles the target stage graph. * @param graph the target stage graph * @param stageModels the stage models * @return the corresponded jobflow model object * @throws IOException if failed to compile * @throws IllegalArgumentException if the parameters are {@code null} */ public JobflowModel compile( StageGraph graph, Collection<StageModel> stageModels) throws IOException { Precondition.checkMustNotBeNull(graph, "graph"); //$NON-NLS-1$ Precondition.checkMustNotBeNull(stageModels, "stageModels"); //$NON-NLS-1$ LOG.debug("analyzing jobflow: {}", graph.getInput().getSource().getDescription().getName()); //$NON-NLS-1$ JobflowModel jobflow = analyze(graph, stageModels); compileClients(jobflow); CompiledJobflow compiled = emit(jobflow); jobflow.setCompiled(compiled); reportSummary(jobflow); return jobflow; } private JobflowModel analyze( StageGraph graph, Collection<StageModel> stageModels) throws IOException { assert graph != null; assert stageModels != null; JobflowModel jobflow = analyzer.analyze(graph, stageModels); if (analyzer.hasError()) { analyzer.clearError(); throw new IOException(Messages.getString("JobflowCompiler.errorFailedToAnalyze")); //$NON-NLS-1$ } return jobflow; } private CompiledJobflow emit(JobflowModel model) throws IOException { Precondition.checkMustNotBeNull(model, "model"); //$NON-NLS-1$ LOG.debug("generating external I/O tasks: {}.{}", model.getBatchId(), model.getFlowId()); //$NON-NLS-1$ Map<ExternalIoDescriptionProcessor, List<Import>> imports = group(model.getImports()); Map<ExternalIoDescriptionProcessor, List<Export>> exports = group(model.getExports()); fillEmptyList(imports, exports.keySet()); fillEmptyList(exports, imports.keySet()); List<ExternalIoCommandProvider> commands = new ArrayList<>(); List<ExternalIoStage> prologues = new ArrayList<>(); List<ExternalIoStage> epilogues = new ArrayList<>(); for (Map.Entry<ExternalIoDescriptionProcessor, List<Import>> entry : imports.entrySet()) { ExternalIoDescriptionProcessor proc = entry.getKey(); List<Import> importGroup = entry.getValue(); List<Export> exportGroup = exports.get(proc); assert exportGroup != null; assert importGroup.isEmpty() == false || exportGroup.isEmpty() == false; IoContext context = createEmitContext(proc, importGroup, exportGroup); LOG.debug("generating external I/O descriptions: {}", proc.getClass().getName()); //$NON-NLS-1$ proc.emitPackage(context); LOG.debug("generating prologue stages: {}", proc.getClass().getName()); //$NON-NLS-1$ prologues.addAll(proc.emitPrologue(context)); LOG.debug("generating epilogue stages: {}", proc.getClass().getName()); //$NON-NLS-1$ epilogues.addAll(proc.emitEpilogue(context)); commands.add(proc.createCommandProvider(context)); } return new CompiledJobflow(commands, prologues, epilogues); } private IoContext createEmitContext( ExternalIoDescriptionProcessor processor, List<Import> importGroup, List<Export> exportGroup) { assert processor != null; assert importGroup != null; assert exportGroup != null; List<Input> inputs = new ArrayList<>(); for (Import model : importGroup) { inputs.add(new Input(model.getDescription(), model.getOutputFormatType())); } List<Output> outputs = new ArrayList<>(); for (Export model : exportGroup) { List<SourceInfo> sources = new ArrayList<>(); for (Source source : model.getResolvedSources()) { sources.add(source.getInputInfo()); } outputs.add(new Output(model.getDescription(), sources)); } IoContext context = new IoContext(inputs, outputs); return context; } private void reportSummary(JobflowModel jobflow) { if (LOG.isDebugEnabled()) { LOG.debug("Compilation Report: {} - {}", jobflow.getBatchId(), jobflow.getFlowId()); //$NON-NLS-1$ LOG.debug("Imports: {}", jobflow.getImports().size()); //$NON-NLS-1$ LOG.debug("Exports: {}", jobflow.getExports().size()); //$NON-NLS-1$ LOG.debug("Stages : {}", jobflow.getStages().size()); //$NON-NLS-1$ LOG.debug("Details:"); //$NON-NLS-1$ for (Import stage : jobflow.getImports()) { LOG.debug("===="); //$NON-NLS-1$ LOG.debug("Import: {}", stage.getId()); //$NON-NLS-1$ LOG.debug("Description: {}", //$NON-NLS-1$ stage.getDescription().getImporterDescription().getClass().getName()); LOG.debug("Target: {}", stage.getInputInfo().getLocations()); //$NON-NLS-1$ LOG.debug("Format: {}", stage.getInputInfo().getFormat().getName()); //$NON-NLS-1$ } for (CompiledStage stage : jobflow.getCompiled().getPrologueStages()) { LOG.debug("===="); //$NON-NLS-1$ LOG.debug("Prologue: {}", stage.getStageId()); //$NON-NLS-1$ LOG.debug("Client: {}", stage.getQualifiedName().toNameString()); //$NON-NLS-1$ } Graph<Stage> graph = jobflow.getDependencyGraph(); Graph<Stage> tgraph = Graphs.transpose(graph); for (Stage stage : jobflow.getStages()) { LOG.debug("===="); //$NON-NLS-1$ LOG.debug("Stage: {}", stage.getCompiled().getStageId()); //$NON-NLS-1$ LOG.debug("Client: {}", stage.getCompiled().getQualifiedName().toNameString()); //$NON-NLS-1$ for (Process unit : stage.getProcesses()) { LOG.debug("Input: {} ({})", unit.getResolvedLocations(), unit.getDataType()); //$NON-NLS-1$ } for (Delivery unit : stage.getDeliveries()) { LOG.debug("Output: {} ({})", unit.getInputInfo().getLocations(), unit.getDataType()); //$NON-NLS-1$ } Reduce reducer = stage.getReduceOrNull(); if (reducer != null) { LOG.debug("ShuffleKey: {}", reducer.getKeyTypeName().toNameString()); //$NON-NLS-1$ LOG.debug("ShuffleValue: {}", reducer.getValueTypeName().toNameString()); //$NON-NLS-1$ LOG.debug("Partitioner: {}", reducer.getPartitionerTypeName().toNameString()); //$NON-NLS-1$ LOG.debug("Grouping: {}", reducer.getGroupingComparatorTypeName().toNameString()); //$NON-NLS-1$ LOG.debug("Sort: {}", reducer.getSortComparatorTypeName().toNameString()); //$NON-NLS-1$ LOG.debug("Combiner: {}", reducer.getCombinerTypeNameOrNull() == null //$NON-NLS-1$ ? "N/A" : reducer.getCombinerTypeNameOrNull().toNameString()); //$NON-NLS-1$ LOG.debug("Reducer: {}", reducer.getReducerTypeName().toNameString()); //$NON-NLS-1$ } for (SideData data : stage.getSideData()) { LOG.debug("SideData: {} ({})", data.getLocalName(), data.getClusterPaths()); //$NON-NLS-1$ } LOG.debug("Upstreams: {}", getStageIds(graph.getConnected(stage))); //$NON-NLS-1$ LOG.debug("Downstreams: {}", getStageIds(tgraph.getConnected(stage))); //$NON-NLS-1$ } for (CompiledStage stage : jobflow.getCompiled().getEpilogueStages()) { LOG.debug("===="); //$NON-NLS-1$ LOG.debug("Epilogue: {}", stage.getStageId()); //$NON-NLS-1$ LOG.debug("Client: {}", stage.getQualifiedName().toNameString()); //$NON-NLS-1$ } for (Export stage : jobflow.getExports()) { LOG.debug("===="); //$NON-NLS-1$ LOG.debug("Export: {}", stage.getId()); //$NON-NLS-1$ LOG.debug("Description: {}", //$NON-NLS-1$ stage.getDescription().getExporterDescription().getClass().getName()); LOG.debug("Source: {}", stage.getResolvedLocations()); //$NON-NLS-1$ } LOG.debug("===="); //$NON-NLS-1$ } } private List<String> getStageIds(Collection<Stage> stages) { assert stages != null; List<String> results = new ArrayList<>(); for (Stage stage : stages) { results.add(stage.getCompiled().getStageId()); } Collections.sort(results); return results; } private <K, V> void fillEmptyList(Map<K, List<V>> map, Set<K> samples) { assert map != null; assert samples != null; for (K sample : samples) { if (map.containsKey(sample) == false) { map.put(sample, Collections.emptyList()); } } } private <T extends Processible> Map<ExternalIoDescriptionProcessor, List<T>> group(List<T> targets) { assert targets != null; Map<ExternalIoDescriptionProcessor, List<T>> results = new HashMap<>(); for (T processible : targets) { ExternalIoDescriptionProcessor proc = processible.getProcessor(); Maps.addToList(results, proc, processible); } return results; } private void compileClients(JobflowModel jobflow) throws IOException { assert jobflow != null; for (Stage stage : jobflow.getStages()) { CompiledStage client = stageClientEmitter.emit(stage); stage.setCompiled(client); } cleanupStageClientEmitter.emit(); } }