/**
* Copyright 2011-2017 Asakusa Framework Team.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.asakusafw.compiler.yaess;
import java.io.File;
import java.io.IOException;
import java.io.OutputStream;
import java.text.MessageFormat;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.asakusafw.compiler.batch.AbstractWorkflowProcessor;
import com.asakusafw.compiler.batch.WorkDescriptionProcessor;
import com.asakusafw.compiler.batch.Workflow;
import com.asakusafw.compiler.batch.WorkflowProcessor;
import com.asakusafw.compiler.batch.processor.JobFlowWorkDescriptionProcessor;
import com.asakusafw.compiler.common.Precondition;
import com.asakusafw.compiler.flow.ExternalIoCommandProvider;
import com.asakusafw.compiler.flow.ExternalIoCommandProvider.Command;
import com.asakusafw.compiler.flow.ExternalIoCommandProvider.CommandContext;
import com.asakusafw.compiler.flow.jobflow.CompiledStage;
import com.asakusafw.compiler.flow.jobflow.JobflowModel;
import com.asakusafw.utils.graph.Graph;
import com.asakusafw.vocabulary.batch.JobFlowWorkDescription;
import com.asakusafw.yaess.core.BatchScript;
import com.asakusafw.yaess.core.CommandScript;
import com.asakusafw.yaess.core.ExecutionPhase;
import com.asakusafw.yaess.core.ExecutionScript;
import com.asakusafw.yaess.core.FlowScript;
import com.asakusafw.yaess.core.HadoopScript;
/**
* An implementation of {@link WorkflowProcessor} for YAESS.
* @since 0.2.3
*/
public class YaessWorkflowProcessor extends AbstractWorkflowProcessor {
static final Logger LOG = LoggerFactory.getLogger(YaessWorkflowProcessor.class);
/**
* The output path.
*/
public static final String PATH = "etc/yaess-script.properties"; //$NON-NLS-1$
/**
* Computes and returns the path to the YAESS script output.
*
* @param outputDir
* compilation output path
* @return the JSON output path
* @throws IllegalArgumentException
* if some parameters were {@code null}
*/
public static File getScriptOutput(File outputDir) {
Precondition.checkMustNotBeNull(outputDir, "outputDir"); //$NON-NLS-1$
return new File(outputDir, PATH);
}
@Override
public Collection<Class<? extends WorkDescriptionProcessor<?>>> getDescriptionProcessors() {
List<Class<? extends WorkDescriptionProcessor<?>>> results = new ArrayList<>();
results.add(JobFlowWorkDescriptionProcessor.class);
return results;
}
@Override
public void process(Workflow workflow) throws IOException {
LOG.debug("Anayzing Workflow Structure for YAESS"); //$NON-NLS-1$
List<FlowScript> scripts = processJobflowList(workflow);
LOG.debug("Building YAESS Batch Script"); //$NON-NLS-1$
Properties properties = new Properties();
properties.setProperty(BatchScript.KEY_ID, getBatchId());
properties.setProperty(BatchScript.KEY_VERSION, BatchScript.VERSION);
properties.setProperty(BatchScript.KEY_VERIFICATION_CODE, getEnvironment().getBuildId());
for (FlowScript script : scripts) {
LOG.trace("Building YAESS Flow Script: {}", script.getId()); //$NON-NLS-1$
script.storeTo(properties);
}
LOG.debug("Exporting YAESS Batch Script"); //$NON-NLS-1$
try (OutputStream output = getEnvironment().openResource(PATH)) {
properties.store(output, MessageFormat.format(
"YAESS Batch Script for \"{0}\", version {1}", //$NON-NLS-1$
getBatchId(),
BatchScript.VERSION));
}
LOG.debug("Exported YAESS Batch Script"); //$NON-NLS-1$
}
private List<FlowScript> processJobflowList(Workflow workflow) {
assert workflow != null;
List<FlowScript> jobflows = new ArrayList<>();
for (Graph.Vertex<Workflow.Unit> vertex : sortJobflow(workflow.getGraph())) {
FlowScript jobflow = processJobflow(vertex.getNode(), vertex.getConnected());
jobflows.add(jobflow);
}
return jobflows;
}
private List<Graph.Vertex<JobflowModel.Stage>> sortStage(Iterable<Graph.Vertex<JobflowModel.Stage>> vertices) {
assert vertices != null;
List<Graph.Vertex<JobflowModel.Stage>> results = new ArrayList<>();
for (Graph.Vertex<JobflowModel.Stage> vertex : vertices) {
results.add(vertex);
}
Collections.sort(results, (o1, o2) -> Integer.compare(o1.getNode().getNumber(), o2.getNode().getNumber()));
return results;
}
private List<Graph.Vertex<Workflow.Unit>> sortJobflow(Iterable<Graph.Vertex<Workflow.Unit>> vertices) {
assert vertices != null;
List<Graph.Vertex<Workflow.Unit>> results = new ArrayList<>();
for (Graph.Vertex<Workflow.Unit> vertex : vertices) {
results.add(vertex);
}
Collections.sort(results, (o1, o2) ->
o1.getNode().getDescription().getName().compareTo(o2.getNode().getDescription().getName()));
return results;
}
private FlowScript processJobflow(Workflow.Unit unit, Set<Workflow.Unit> blockers) {
assert unit != null;
assert blockers != null;
JobflowModel model = toJobflowModel(unit);
CommandContext context = new CommandContext(
ExecutionScript.PLACEHOLDER_HOME + '/',
ExecutionScript.PLACEHOLDER_EXECUTION_ID,
ExecutionScript.PLACEHOLDER_ARGUMENTS);
Map<ExecutionPhase, List<ExecutionScript>> scripts = new HashMap<>();
scripts.put(ExecutionPhase.INITIALIZE, processInitializers(model, context));
scripts.put(ExecutionPhase.IMPORT, processImporters(model, context));
scripts.put(ExecutionPhase.PROLOGUE, processPrologues(model, context));
scripts.put(ExecutionPhase.MAIN, processMain(model, context));
scripts.put(ExecutionPhase.EPILOGUE, processEpilogues(model, context));
scripts.put(ExecutionPhase.EXPORT, processExporters(model, context));
scripts.put(ExecutionPhase.FINALIZE, processFinalizers(model, context));
return new FlowScript(
model.getFlowId(), toUnitNames(blockers),
scripts, EnumSet.allOf(ExecutionScript.Kind.class));
}
private List<ExecutionScript> processInitializers(JobflowModel model, CommandContext context) {
assert model != null;
assert context != null;
List<ExecutionScript> results = new ArrayList<>();
for (ExternalIoCommandProvider provider : model.getCompiled().getCommandProviders()) {
List<Command> commands = provider.getInitializeCommand(context);
List<ExecutionScript> scripts = processCommands(provider, commands);
results.addAll(scripts);
}
return results;
}
private List<ExecutionScript> processImporters(JobflowModel model, CommandContext context) {
assert model != null;
assert context != null;
List<ExecutionScript> results = new ArrayList<>();
for (ExternalIoCommandProvider provider : model.getCompiled().getCommandProviders()) {
List<ExecutionScript> scripts = processCommands(provider, provider.getImportCommand(context));
results.addAll(scripts);
}
return results;
}
private List<ExecutionScript> processExporters(JobflowModel model, CommandContext context) {
assert model != null;
assert context != null;
List<ExecutionScript> results = new ArrayList<>();
for (ExternalIoCommandProvider provider : model.getCompiled().getCommandProviders()) {
List<ExecutionScript> scripts = processCommands(provider, provider.getExportCommand(context));
results.addAll(scripts);
}
return results;
}
private List<ExecutionScript> processFinalizers(JobflowModel model, CommandContext context) {
assert model != null;
assert context != null;
List<ExecutionScript> results = new ArrayList<>();
for (ExternalIoCommandProvider provider : model.getCompiled().getCommandProviders()) {
List<ExecutionScript> scripts = processCommands(provider, provider.getFinalizeCommand(context));
results.addAll(scripts);
}
return results;
}
private List<ExecutionScript> processCommands(ExternalIoCommandProvider provider, List<Command> commands) {
assert provider != null;
assert commands != null;
List<ExecutionScript> scripts = new ArrayList<>();
for (Command command : commands) {
String profile = command.getProfileName();
scripts.add(new CommandScript(
command.getId(),
Collections.emptySet(),
profile == null ? CommandScript.DEFAULT_PROFILE_NAME : profile,
command.getModuleName(),
command.getCommandTokens(),
command.getEnvironment()));
}
return scripts;
}
private List<ExecutionScript> processPrologues(JobflowModel model, CommandContext context) {
assert model != null;
assert context != null;
return processStages(model.getCompiled().getPrologueStages());
}
private List<ExecutionScript> processEpilogues(JobflowModel model, CommandContext context) {
assert model != null;
assert context != null;
return processStages(model.getCompiled().getEpilogueStages());
}
private List<ExecutionScript> processMain(JobflowModel model, CommandContext context) {
assert model != null;
assert context != null;
List<ExecutionScript> results = new ArrayList<>();
for (Graph.Vertex<JobflowModel.Stage> stage : sortStage(model.getDependencyGraph())) {
results.add(processStage(stage.getNode().getCompiled(), stage.getConnected()));
}
return results;
}
private List<ExecutionScript> processStages(List<CompiledStage> stages) {
assert stages != null;
List<ExecutionScript> results = new ArrayList<>();
for (CompiledStage stage : stages) {
results.add(processStage(stage, Collections.emptySet()));
}
return results;
}
private ExecutionScript processStage(CompiledStage stage, Set<JobflowModel.Stage> blockers) {
assert stage != null;
assert blockers != null;
String stageId = stage.getStageId();
Set<String> blockerIds = toStageNames(blockers);
String className = stage.getQualifiedName().toNameString();
Map<String, String> props = Collections.emptyMap();
Map<String, String> envs = Collections.emptyMap();
return new HadoopScript(stageId, blockerIds, className, props, envs);
}
private JobflowModel toJobflowModel(Workflow.Unit unit) {
assert unit != null;
assert unit.getDescription() instanceof JobFlowWorkDescription;
return (JobflowModel) unit.getProcessed();
}
private Set<String> toUnitNames(Set<Workflow.Unit> blockers) {
assert blockers != null;
Set<String> names = new HashSet<>();
for (Workflow.Unit unit : blockers) {
names.add(unit.getDescription().getName());
}
return names;
}
private Set<String> toStageNames(Set<JobflowModel.Stage> blockers) {
assert blockers != null;
Set<String> names = new HashSet<>();
for (JobflowModel.Stage stage : blockers) {
names.add(stage.getCompiled().getStageId());
}
return names;
}
private String getBatchId() {
return getEnvironment().getConfiguration().getBatchId();
}
}