/*
* Copyright © 2015 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package co.cask.cdap.etl.batch;
import co.cask.cdap.api.data.schema.Schema;
import co.cask.cdap.api.metrics.Metrics;
import co.cask.cdap.api.workflow.AbstractWorkflow;
import co.cask.cdap.api.workflow.WorkflowContext;
import co.cask.cdap.etl.api.LookupProvider;
import co.cask.cdap.etl.api.batch.BatchActionContext;
import co.cask.cdap.etl.api.batch.PostAction;
import co.cask.cdap.etl.batch.mapreduce.ETLMapReduce;
import co.cask.cdap.etl.batch.spark.ETLSpark;
import co.cask.cdap.etl.common.DatasetContextLookupProvider;
import co.cask.cdap.etl.proto.Engine;
import co.cask.cdap.internal.io.SchemaTypeAdapter;
import com.google.gson.Gson;
import com.google.gson.GsonBuilder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.Map;
/**
* Workflow for scheduling Batch ETL MapReduce Driver.
*/
public class ETLWorkflow extends AbstractWorkflow {
public static final String NAME = "ETLWorkflow";
public static final String DESCRIPTION = "Workflow for ETL Batch MapReduce Driver";
private static final Logger LOG = LoggerFactory.getLogger(ETLWorkflow.class);
private static final Gson GSON = new GsonBuilder()
.registerTypeAdapter(Schema.class, new SchemaTypeAdapter()).create();
private final Engine engine;
private final BatchPipelineSpec spec;
private Map<String, PostAction> postActions;
// injected by cdap
@SuppressWarnings("unused")
private Metrics workflowMetrics;
public ETLWorkflow(BatchPipelineSpec spec, Engine engine) {
this.engine = engine;
this.spec = spec;
}
@Override
protected void configure() {
setName(NAME);
setDescription(DESCRIPTION);
switch (engine) {
case MAPREDUCE:
addMapReduce(ETLMapReduce.NAME);
break;
case SPARK:
addSpark(ETLSpark.class.getSimpleName());
break;
}
Map<String, String> properties = new HashMap<>();
properties.put("pipeline.spec", GSON.toJson(spec));
setProperties(properties);
}
@Override
public void initialize(WorkflowContext context) throws Exception {
super.initialize(context);
postActions = new LinkedHashMap<>();
BatchPipelineSpec batchPipelineSpec =
GSON.fromJson(context.getWorkflowSpecification().getProperty("pipeline.spec"), BatchPipelineSpec.class);
for (ActionSpec actionSpec : batchPipelineSpec.getEndingActions()) {
postActions.put(actionSpec.getName(), (PostAction) context.newPluginInstance(actionSpec.getName()));
}
}
@Override
public void destroy() {
WorkflowContext workflowContext = getContext();
LookupProvider lookupProvider = new DatasetContextLookupProvider(workflowContext);
Map<String, String> runtimeArgs = workflowContext.getRuntimeArguments();
long logicalStartTime = workflowContext.getLogicalStartTime();
for (Map.Entry<String, PostAction> endingActionEntry : postActions.entrySet()) {
String name = endingActionEntry.getKey();
PostAction action = endingActionEntry.getValue();
BatchActionContext context = new WorkflowBackedActionContext(workflowContext, workflowMetrics, lookupProvider,
name, logicalStartTime, runtimeArgs);
try {
action.run(context);
} catch (Throwable t) {
LOG.error("Error while running ending action {}.", name, t);
}
}
}
}