/* * Copyright © 2016 Cask Data, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package co.cask.cdap.etl.batch; import co.cask.cdap.api.metrics.Metrics; import co.cask.cdap.etl.api.StageLifecycle; import co.cask.cdap.etl.api.StageMetrics; import co.cask.cdap.etl.api.Transformation; import co.cask.cdap.etl.api.batch.BatchRuntimeContext; import co.cask.cdap.etl.common.DefaultStageMetrics; import co.cask.cdap.etl.common.PipelinePhase; import co.cask.cdap.etl.common.TrackedTransform; import co.cask.cdap.etl.common.TransformDetail; import co.cask.cdap.etl.common.TransformExecutor; import co.cask.cdap.etl.planner.StageInfo; import java.util.HashMap; import java.util.Map; /** * Helps create {@link TransformExecutor TransformExecutors}. * * @param <T> the type of input for the created transform executors */ public abstract class TransformExecutorFactory<T> { protected final PipelinePluginInstantiator pluginInstantiator; protected final Metrics metrics; public TransformExecutorFactory(PipelinePluginInstantiator pluginInstantiator, Metrics metrics) { this.pluginInstantiator = pluginInstantiator; this.metrics = metrics; } protected abstract BatchRuntimeContext createRuntimeContext(String stageName); protected TrackedTransform getTransformation(String pluginType, String stageName) throws Exception { return new TrackedTransform(getInitializedTransformation(stageName), new DefaultStageMetrics(metrics, stageName)); } /** * Create a transform executor for the specified pipeline. Will instantiate and initialize all sources, * transforms, and sinks in the pipeline. * * @param pipeline the pipeline to create a transform executor for * @return executor for the pipeline * @throws InstantiationException if there was an error instantiating a plugin * @throws Exception if there was an error initializing a plugin */ public TransformExecutor<T> create(PipelinePhase pipeline) throws Exception { Map<String, TransformDetail> transformations = new HashMap<>(); for (String pluginType : pipeline.getPluginTypes()) { for (StageInfo stageInfo : pipeline.getStagesOfType(pluginType)) { String stageName = stageInfo.getName(); transformations.put(stageName, new TransformDetail(getTransformation(pluginType, stageName), pipeline.getStageOutputs(stageName))); } } return new TransformExecutor<>(transformations, pipeline.getSources()); } /** * Instantiates and initializes the plugin for the stage. * * @param stageName the stage name. * @return the initialized Transformation * @throws InstantiationException if the plugin for the stage could not be instantiated * @throws Exception if there was a problem initializing the plugin */ protected <T extends Transformation & StageLifecycle<BatchRuntimeContext>> Transformation getInitializedTransformation(String stageName) throws Exception { BatchRuntimeContext runtimeContext = createRuntimeContext(stageName); T plugin = pluginInstantiator.newPluginInstance(stageName); plugin.initialize(runtimeContext); return plugin; } protected static <IN, OUT> TrackedTransform<IN, OUT> getTrackedGroupStep(Transformation<IN, OUT> transform, StageMetrics stageMetrics) { return new TrackedTransform<>(transform, stageMetrics, TrackedTransform.RECORDS_IN, null); } protected static <IN, OUT> TrackedTransform<IN, OUT> getTrackedAggregateStep(Transformation<IN, OUT> transform, StageMetrics stageMetrics) { // 'aggregator.groups' is the number of groups output by the aggregator return new TrackedTransform<>(transform, stageMetrics, "aggregator.groups", TrackedTransform.RECORDS_OUT); } }