//Dstl (c) Crown Copyright 2017
package uk.gov.dstl.baleen.core.jobs;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.uima.analysis_engine.AnalysisEngine;
import org.apache.uima.collection.CollectionReader;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.yaml.snakeyaml.Yaml;
import uk.gov.dstl.baleen.core.pipelines.BaleenPipeline;
import uk.gov.dstl.baleen.core.pipelines.PipelineBuilder;
import uk.gov.dstl.baleen.core.pipelines.orderers.IPipelineOrderer;
import uk.gov.dstl.baleen.core.pipelines.orderers.NoOpOrderer;
import uk.gov.dstl.baleen.core.utils.BaleenDefaults;
import uk.gov.dstl.baleen.core.utils.YamlConfiguration;
/**
* This class provides functionality to convert a Baleen YAML job configuration file into a
* {@link BaleenPipeline} that can be executed by Baleen.
*
* The implementation is broadly similar to {@link PipelineBuilder}, except that different default
* packages are assumed and the format is expected to be as follows. Note that this changed in Baleen
* 2.4, and the <em>job</em> object required prior to this is no longer accepted.
*
* <pre>
* shape:
* color: red
* size: large
*
* schedule:
* class: Repeat
* count: 5
* tasks:
* - class: DummyTask
* - class: DummyTaskWithParams
* param: value
* </pre>
*
* The job pipeline will always run the tasks in the order specified.
*/
public class JobBuilder extends PipelineBuilder{
private static final Logger LOGGER = LoggerFactory.getLogger(JobBuilder.class);
/**
* Construct a JobBuilder from the name and YAML
*
* @param name
* Pipeline name
* @param yaml
* Pipeline YAML
*/
public JobBuilder(String name, String yaml) {
super(name, yaml);
}
@SuppressWarnings("unchecked")
@Override
protected void readConfiguration() {
LOGGER.debug("Reading configuration");
Yaml y = new Yaml();
String cleanYaml = YamlConfiguration.cleanTabs(yaml);
globalConfig = (Map<String, Object>) y.load(cleanYaml);
//Overwrite any specified orderer - jobs are always run sequentially
globalConfig.put("orderer", NoOpOrderer.class.getName());
if(globalConfig.containsKey("schedule")){
Object s = globalConfig.remove("schedule");
if(s instanceof String){
collectionReaderConfig = new HashMap<>();
collectionReaderConfig.put("class", s);
}else{
collectionReaderConfig = (Map<String, Object>) s;
}
}else{
collectionReaderConfig = new HashMap<>();
collectionReaderConfig.put("class", BaleenDefaults.DEFAULT_SCHEDULER);
}
annotatorsConfig = (List<Object>) globalConfig.remove("tasks");
consumersConfig = Collections.emptyList();
globalConfig.put(PIPELINE_NAME, name);
}
@Override
protected BaleenPipeline toPipeline(String name, String yaml, IPipelineOrderer orderer, CollectionReader collectionReader, List<AnalysisEngine> annotators, List<AnalysisEngine> consumers) {
return new BaleenJob(name, yaml, collectionReader, annotators);
}
@Override
protected String getDefaultReaderPackage() {
return BaleenDefaults.DEFAULT_SCHEDULE_PACKAGE;
}
@Override
protected String getDefaultAnnotatorPackage() {
return BaleenDefaults.DEFAULT_TASK_PACKAGE;
}
}