/**
* Copyright 2011-2017 Asakusa Framework Team.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.asakusafw.compiler.flow.jobflow;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.asakusafw.compiler.common.Precondition;
import com.asakusafw.compiler.flow.ExternalIoDescriptionProcessor;
import com.asakusafw.compiler.flow.FlowCompilingEnvironment;
import com.asakusafw.compiler.flow.Location;
import com.asakusafw.compiler.flow.jobflow.JobflowModel.Delivery;
import com.asakusafw.compiler.flow.jobflow.JobflowModel.Export;
import com.asakusafw.compiler.flow.jobflow.JobflowModel.Import;
import com.asakusafw.compiler.flow.jobflow.JobflowModel.Process;
import com.asakusafw.compiler.flow.jobflow.JobflowModel.Reduce;
import com.asakusafw.compiler.flow.jobflow.JobflowModel.SideData;
import com.asakusafw.compiler.flow.jobflow.JobflowModel.Source;
import com.asakusafw.compiler.flow.jobflow.JobflowModel.Stage;
import com.asakusafw.compiler.flow.jobflow.JobflowModel.Target;
import com.asakusafw.compiler.flow.plan.FlowBlock;
import com.asakusafw.compiler.flow.plan.StageGraph;
import com.asakusafw.compiler.flow.stage.CompiledReduce;
import com.asakusafw.compiler.flow.stage.CompiledShuffle;
import com.asakusafw.compiler.flow.stage.StageModel;
import com.asakusafw.utils.collections.Lists;
import com.asakusafw.vocabulary.flow.graph.FlowElement;
import com.asakusafw.vocabulary.flow.graph.FlowElementDescription;
import com.asakusafw.vocabulary.flow.graph.FlowElementKind;
import com.asakusafw.vocabulary.flow.graph.InputDescription;
import com.asakusafw.vocabulary.flow.graph.OutputDescription;
/**
* Analyzes the structure of jobflows.
*/
public class JobflowAnalyzer {
static final Logger LOG = LoggerFactory.getLogger(JobflowAnalyzer.class);
private final FlowCompilingEnvironment environment;
private boolean sawError;
/**
* Creates a new instance.
* @param environment the current environment
* @throws IllegalArgumentException if the parameter is {@code null}
*/
public JobflowAnalyzer(FlowCompilingEnvironment environment) {
Precondition.checkMustNotBeNull(environment, "environment"); //$NON-NLS-1$
this.environment = environment;
}
/**
* Returns whether this analysis result contains any erroneous information or not.
* @return {@code true} if this contains any erroneous information, otherwise {@code false}
*/
public boolean hasError() {
return sawError;
}
/**
* Resets the current errors.
* @see #hasError()
*/
public void clearError() {
sawError = false;
}
/**
* Analyzes the target stage graph and returns the corresponding jobflow model object.
* @param graph the target jobflow object
* @param stageModels the stage model objects
* @return the analyzed jobflow model, or {@code null} if the target jobflow is not valid
* @throws IllegalArgumentException if the parameters are {@code null}
* @see #hasError()
*/
public JobflowModel analyze(StageGraph graph, Collection<StageModel> stageModels) {
Precondition.checkMustNotBeNull(graph, "graph"); //$NON-NLS-1$
Precondition.checkMustNotBeNull(stageModels, "stageModels"); //$NON-NLS-1$
LOG.debug("analyzing stage structure: {}", //$NON-NLS-1$
graph.getInput().getSource().getDescription().getName());
List<Import> imports = analyzeImports(graph, stageModels);
List<Export> exports = analyzeExports(graph, stageModels);
List<Stage> stages = analyzeStages(stageModels);
if (hasError()) {
return null;
}
resolve(imports, exports, stages);
if (hasError()) {
return null;
}
return new JobflowModel(
graph,
environment.getBatchId(),
environment.getFlowId(),
imports,
exports,
stages);
}
private List<Import> analyzeImports(StageGraph graph, Collection<StageModel> stageModels) {
assert graph != null;
assert stageModels != null;
LOG.debug("analyzing jobflow inputs: {}", graph.getInput()); //$NON-NLS-1$
Set<InputDescription> saw = new HashSet<>();
List<Import> results = new ArrayList<>();
for (FlowBlock.Output source : graph.getInput().getBlockOutputs()) {
FlowElement element = source.getElementPort().getOwner();
FlowElementDescription desc = element.getDescription();
if (desc.getKind() != FlowElementKind.INPUT) {
error(Messages.getString("JobflowAnalyzer.errorInvalidInput"), desc); //$NON-NLS-1$
continue;
}
InputDescription description = (InputDescription) desc;
saw.add(description);
ExternalIoDescriptionProcessor proc = environment.getExternals().findProcessor(description);
if (proc == null) {
error(Messages.getString("JobflowAnalyzer.errorMissingImporterProcessor"), desc); //$NON-NLS-1$
continue;
}
Import input = new Import(source, description, proc);
LOG.debug("found jobflow input: {}", input); //$NON-NLS-1$
results.add(input);
}
Set<InputDescription> sideData = new HashSet<>();
for (StageModel stage : stageModels) {
sideData.addAll(stage.getSideDataInputs());
}
sideData.removeAll(saw);
for (InputDescription description : sideData) {
ExternalIoDescriptionProcessor proc = environment.getExternals().findProcessor(description);
if (proc == null) {
error(Messages.getString("JobflowAnalyzer.errorMissingImporterProcessor"), description); //$NON-NLS-1$
continue;
}
Import input = new Import(description, proc);
LOG.debug("found side-data: {}", input); //$NON-NLS-1$
results.add(input);
}
return results;
}
private List<Export> analyzeExports(StageGraph graph, Collection<StageModel> stageModels) {
assert graph != null;
assert stageModels != null;
LOG.debug("analyzing jobflow outputs: {}", graph.getOutput()); //$NON-NLS-1$
List<Export> results = new ArrayList<>();
for (FlowBlock.Input target : graph.getOutput().getBlockInputs()) {
FlowElement element = target.getElementPort().getOwner();
FlowElementDescription desc = element.getDescription();
if (desc.getKind() != FlowElementKind.OUTPUT) {
error(Messages.getString("JobflowAnalyzer.errorInvalidOutput"), desc); //$NON-NLS-1$
continue;
}
OutputDescription description = (OutputDescription) desc;
ExternalIoDescriptionProcessor proc = environment.getExternals().findProcessor(description);
if (proc == null) {
error(Messages.getString("JobflowAnalyzer.errorMissingExporterProcessor"), desc); //$NON-NLS-1$
continue;
}
Export epilogue = new Export(
Collections.singletonList(target),
description,
proc);
results.add(epilogue);
LOG.debug("found jobflow output: {}", epilogue); //$NON-NLS-1$
}
return results;
}
private List<Stage> analyzeStages(Collection<StageModel> stageModels) {
assert stageModels != null;
List<Stage> results = new ArrayList<>();
for (StageModel model : sort(stageModels)) {
results.add(analyzeStage(model));
}
return results;
}
private Stage analyzeStage(StageModel model) {
assert model != null;
LOG.debug("analyzing jobflow stage: {}", model); //$NON-NLS-1$
List<Process> processes = analyzeProcesses(model);
List<Delivery> deliveries = analyzeDeliveries(model);
Set<SideData> sideData = analyzeSideData(model);
Reduce reduce = analyzeReduce(model);
Stage stage = new Stage(
model,
processes,
deliveries,
reduce,
sideData);
LOG.debug("found jobflow stage: {}", model); //$NON-NLS-1$
return stage;
}
private Reduce analyzeReduce(StageModel model) {
if (model.getShuffleModel() == null) {
assert model.getReduceUnits().isEmpty();
return null;
}
assert model.getReduceUnits().isEmpty() == false;
CompiledShuffle shuffle = model.getShuffleModel().getCompiled();
CompiledReduce reducer = model.getReduceUnits().get(0).getCompiled();
return new Reduce(
reducer.getReducerType().getQualifiedName(),
reducer.getCombinerTypeOrNull() == null
? null
: reducer.getCombinerTypeOrNull().getQualifiedName(),
shuffle.getKeyTypeName(),
shuffle.getValueTypeName(),
shuffle.getGroupComparatorTypeName(),
shuffle.getSortComparatorTypeName(),
shuffle.getPartitionerTypeName());
}
private List<Delivery> analyzeDeliveries(StageModel model) {
assert model != null;
Location base = environment.getStageLocation(model.getStageBlock().getStageNumber());
List<Delivery> deliveries = new ArrayList<>();
for (StageModel.Sink sink : model.getStageResults()) {
Location location = base.append(sink.getName()).asPrefix();
deliveries.add(new Delivery(sink.getOutputs(), Collections.singleton(location)));
}
return deliveries;
}
private List<Process> analyzeProcesses(StageModel model) {
List<Process> processes = new ArrayList<>();
for (StageModel.MapUnit unit : model.getMapUnits()) {
processes.add(new Process(
unit.getInputs(),
unit.getCompiled().getQualifiedName()));
}
return processes;
}
private Set<SideData> analyzeSideData(StageModel model) {
assert model != null;
Set<SideData> results = new HashSet<>();
for (InputDescription input : model.getSideDataInputs()) {
ExternalIoDescriptionProcessor proc = environment.getExternals().findProcessor(input);
if (proc == null) {
error(Messages.getString("JobflowAnalyzer.errorMissingImporterProcessor"), input); //$NON-NLS-1$
continue;
}
Set<Location> locations = proc.getInputInfo(input).getLocations();
results.add(new SideData(locations, input.getName()));
}
return results;
}
private List<StageModel> sort(Collection<StageModel> stageModels) {
List<StageModel> models = Lists.from(stageModels);
Collections.sort(models, (o1, o2) -> Integer.compare(
o1.getStageBlock().getStageNumber(),
o2.getStageBlock().getStageNumber()));
return models;
}
private void resolve(List<Import> imports, List<Export> exports, List<Stage> stages) {
assert imports != null;
assert exports != null;
assert stages != null;
Map<FlowBlock.Output, Source> sources = createOutputMap(imports, stages);
for (Target target : exports) {
resolveTarget(target, sources);
}
for (Stage stage : stages) {
for (Target target : stage.getProcesses()) {
resolveTarget(target, sources);
}
}
}
private void resolveTarget(Target target, Map<FlowBlock.Output, Source> sources) {
assert target != null;
assert sources != null;
Set<Source> opposites = new HashSet<>();
for (FlowBlock.Input input : target.getInputs()) {
for (FlowBlock.Connection conn : input.getConnections()) {
FlowBlock.Output upstream = conn.getUpstream();
Source source = sources.get(upstream);
assert source != null;
opposites.add(source);
}
}
target.resolveSources(opposites);
}
private Map<FlowBlock.Output, Source> createOutputMap(
List<Import> imports,
List<Stage> stages) {
assert imports != null;
assert stages != null;
Map<FlowBlock.Output, Source> sources = new HashMap<>();
for (Source source : imports) {
for (FlowBlock.Output output : source.getOutputs()) {
sources.put(output, source);
}
}
for (Stage stage : stages) {
for (Source source : stage.getDeliveries()) {
for (FlowBlock.Output output : source.getOutputs()) {
sources.put(output, source);
}
}
}
return sources;
}
private void error(String format, Object...args) {
environment.error(format, args);
sawError = true;
}
}