/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package gobblin.runtime; import java.io.IOException; import java.util.Collections; import java.util.List; import com.google.common.base.Enums; import com.google.common.base.Splitter; import com.google.common.collect.Lists; import lombok.extern.slf4j.Slf4j; import gobblin.configuration.ConfigurationKeys; import gobblin.configuration.WorkUnitState; import gobblin.converter.Converter; import gobblin.fork.ForkOperator; import gobblin.instrumented.Instrumented; import gobblin.instrumented.converter.InstrumentedConverterDecorator; import gobblin.instrumented.fork.InstrumentedForkOperatorDecorator; import gobblin.publisher.TaskPublisher; import gobblin.publisher.TaskPublisherBuilderFactory; import gobblin.qualitychecker.row.RowLevelPolicyChecker; import gobblin.qualitychecker.row.RowLevelPolicyCheckerBuilderFactory; import gobblin.qualitychecker.task.TaskLevelPolicyCheckResults; import gobblin.qualitychecker.task.TaskLevelPolicyChecker; import gobblin.qualitychecker.task.TaskLevelPolicyCheckerBuilderFactory; import gobblin.runtime.util.TaskMetrics; import gobblin.source.Source; import gobblin.source.extractor.Extractor; import gobblin.source.workunit.WorkUnit; import gobblin.util.limiter.DefaultLimiterFactory; import gobblin.util.limiter.Limiter; import gobblin.util.limiter.NonRefillableLimiter; import gobblin.util.ForkOperatorUtils; import gobblin.writer.DataWriterBuilder; import gobblin.writer.Destination; import gobblin.writer.WatermarkStorage; import gobblin.writer.WriterOutputFormat; /** * A class containing all necessary information to construct and run a {@link Task}. * * @author Yinan Li */ @Slf4j public class TaskContext { private final TaskState taskState; private final TaskMetrics taskMetrics; private Extractor rawSourceExtractor; public TaskContext(WorkUnitState workUnitState) { this.taskState = new TaskState(workUnitState); this.taskMetrics = TaskMetrics.get(this.taskState); this.taskState.setProp(Instrumented.METRIC_CONTEXT_NAME_KEY, this.taskMetrics.getName()); } /** * Get a {@link TaskState} instance for the task. * * @return a {@link TaskState} instance */ public TaskState getTaskState() { return this.taskState; } /** * Get a {@link TaskMetrics} instance for the task. * * @return a {@link TaskMetrics} instance */ public TaskMetrics getTaskMetrics() { return this.taskMetrics; } /** * Get a {@link Source} instance used to get a list of {@link WorkUnit}s. * * @return the {@link Source} used to get the {@link WorkUnit}, <em>null</em> * if it fails to instantiate a {@link Source} object of the given class. */ public Source getSource() { try { return Source.class.cast(Class.forName(this.taskState.getProp(ConfigurationKeys.SOURCE_CLASS_KEY)).newInstance()); } catch (ClassNotFoundException cnfe) { throw new RuntimeException(cnfe); } catch (InstantiationException ie) { throw new RuntimeException(ie); } catch (IllegalAccessException iae) { throw new RuntimeException(iae); } } /** * Get a {@link Extractor} instance. * * @return a {@link Extractor} instance */ public Extractor getExtractor() { try { this.rawSourceExtractor = getSource().getExtractor(this.taskState); boolean throttlingEnabled = this.taskState.getPropAsBoolean(ConfigurationKeys.EXTRACT_LIMIT_ENABLED_KEY, ConfigurationKeys.DEFAULT_EXTRACT_LIMIT_ENABLED); if (throttlingEnabled) { Limiter limiter = DefaultLimiterFactory.newLimiter(this.taskState); if (!(limiter instanceof NonRefillableLimiter)) { throw new IllegalArgumentException("The Limiter used with an Extractor should be an instance of " + NonRefillableLimiter.class.getSimpleName()); } return new LimitingExtractorDecorator<>(this.rawSourceExtractor, limiter, this.taskState); } return this.rawSourceExtractor; } catch (IOException ioe) { throw new RuntimeException(ioe); } } public Extractor getRawSourceExtractor() { return this.rawSourceExtractor; } /** * Get the interval for status reporting. * * @return interval for status reporting */ public long getStatusReportingInterval() { return this.taskState.getPropAsLong(ConfigurationKeys.TASK_STATUS_REPORT_INTERVAL_IN_MS_KEY, ConfigurationKeys.DEFAULT_TASK_STATUS_REPORT_INTERVAL_IN_MS); } /** * Get the writer {@link Destination.DestinationType}. * * @param branches number of forked branches * @param index branch index * @return writer {@link Destination.DestinationType} */ public Destination.DestinationType getDestinationType(int branches, int index) { return Destination.DestinationType.valueOf(this.taskState.getProp( ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.WRITER_DESTINATION_TYPE_KEY, branches, index), Destination.DestinationType.HDFS.name())); } /** * Get the output format of the writer of type {@link WriterOutputFormat}. * * @param branches number of forked branches * @param index branch index * @return output format of the writer */ public WriterOutputFormat getWriterOutputFormat(int branches, int index) { String writerOutputFormatValue = this.taskState.getProp( ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.WRITER_OUTPUT_FORMAT_KEY, branches, index), WriterOutputFormat.OTHER.name()); log.debug("Found writer output format value = {}", writerOutputFormatValue); WriterOutputFormat wof = Enums.getIfPresent(WriterOutputFormat.class, writerOutputFormatValue.toUpperCase()) .or(WriterOutputFormat.OTHER); log.debug("Returning writer output format = {}", wof); return wof; } /** * Get the list of pre-fork {@link Converter}s. * * @return list (possibly empty) of {@link Converter}s */ public List<Converter<?, ?, ?, ?>> getConverters() { return getConverters(-1, this.taskState); } /** * Get the list of post-fork {@link Converter}s for a given branch. * * @param index branch index * @param forkTaskState a {@link TaskState} instance specific to the fork identified by the branch index * @return list (possibly empty) of {@link Converter}s */ @SuppressWarnings("unchecked") public List<Converter<?, ?, ?, ?>> getConverters(int index, TaskState forkTaskState) { String converterClassKey = ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.CONVERTER_CLASSES_KEY, index); if (!this.taskState.contains(converterClassKey)) { return Collections.emptyList(); } if (index >= 0) { forkTaskState.setProp(ConfigurationKeys.FORK_BRANCH_ID_KEY, index); } List<Converter<?, ?, ?, ?>> converters = Lists.newArrayList(); for (String converterClass : Splitter.on(",").omitEmptyStrings().trimResults() .split(this.taskState.getProp(converterClassKey))) { try { Converter<?, ?, ?, ?> converter = Converter.class.cast(Class.forName(converterClass).newInstance()); InstrumentedConverterDecorator instrumentedConverter = new InstrumentedConverterDecorator<>(converter); instrumentedConverter.init(forkTaskState); converters.add(instrumentedConverter); } catch (ClassNotFoundException cnfe) { throw new RuntimeException(cnfe); } catch (InstantiationException ie) { throw new RuntimeException(ie); } catch (IllegalAccessException iae) { throw new RuntimeException(iae); } } return converters; } /** * Get the {@link ForkOperator} to be applied to converted input schema and data record. * * @return {@link ForkOperator} to be used or <code>null</code> if none is specified */ @SuppressWarnings("unchecked") public ForkOperator getForkOperator() { try { ForkOperator fork = ForkOperator.class.cast(Class.forName(this.taskState.getProp(ConfigurationKeys.FORK_OPERATOR_CLASS_KEY, ConfigurationKeys.DEFAULT_FORK_OPERATOR_CLASS)).newInstance()); return new InstrumentedForkOperatorDecorator<>(fork); } catch (ClassNotFoundException cnfe) { throw new RuntimeException(cnfe); } catch (InstantiationException ie) { throw new RuntimeException(ie); } catch (IllegalAccessException iae) { throw new RuntimeException(iae); } } /** * Get a pre-fork {@link RowLevelPolicyChecker} for executing row-level * {@link gobblin.qualitychecker.row.RowLevelPolicy}. * * @return a {@link RowLevelPolicyChecker} */ public RowLevelPolicyChecker getRowLevelPolicyChecker() throws Exception { return getRowLevelPolicyChecker(-1); } /** * Get a post-fork {@link RowLevelPolicyChecker} for executing row-level * {@link gobblin.qualitychecker.row.RowLevelPolicy} in the given branch. * * @param index branch index * @return a {@link RowLevelPolicyChecker} */ public RowLevelPolicyChecker getRowLevelPolicyChecker(int index) throws Exception { return RowLevelPolicyCheckerBuilderFactory.newPolicyCheckerBuilder(this.taskState, index).build(); } /** * Get a post-fork {@link TaskLevelPolicyChecker} for executing task-level * {@link gobblin.qualitychecker.task.TaskLevelPolicy} in the given branch. * * @param taskState {@link TaskState} of a {@link Task} * @param index branch index * @return a {@link TaskLevelPolicyChecker} * @throws Exception */ public TaskLevelPolicyChecker getTaskLevelPolicyChecker(TaskState taskState, int index) throws Exception { return TaskLevelPolicyCheckerBuilderFactory.newPolicyCheckerBuilder(taskState, index).build(); } /** * Get a post-fork {@link TaskPublisher} for publishing data in the given branch. * * @param taskState {@link TaskState} of a {@link Task} * @param results Task-level policy checking results * @return a {@link TaskPublisher} */ public TaskPublisher getTaskPublisher(TaskState taskState, TaskLevelPolicyCheckResults results) throws Exception { return TaskPublisherBuilderFactory.newTaskPublisherBuilder(taskState, results).build(); } /** * Get a {@link DataWriterBuilder} for building a {@link gobblin.writer.DataWriter}. * * @param branches number of forked branches * @param index branch index * @return a {@link DataWriterBuilder} */ public DataWriterBuilder getDataWriterBuilder(int branches, int index) { String writerBuilderPropertyName = ForkOperatorUtils .getPropertyNameForBranch(ConfigurationKeys.WRITER_BUILDER_CLASS, branches, index); log.debug("Using property {} to get a writer builder for branches:{}, index:{}", writerBuilderPropertyName, branches, index); String dataWriterBuilderClassName = this.taskState.getProp(writerBuilderPropertyName, null); if (dataWriterBuilderClassName == null) { dataWriterBuilderClassName = ConfigurationKeys.DEFAULT_WRITER_BUILDER_CLASS; log.info("No configured writer builder found, using {} as the default builder", dataWriterBuilderClassName); } else { log.info("Found configured writer builder as {}", dataWriterBuilderClassName); } try { return DataWriterBuilder.class.cast(Class.forName(dataWriterBuilderClassName).newInstance()); } catch (ClassNotFoundException cnfe) { throw new RuntimeException(cnfe); } catch (InstantiationException ie) { throw new RuntimeException(ie); } catch (IllegalAccessException iae) { throw new RuntimeException(iae); } } public WatermarkStorage getWatermarkStorage() { return new StateStoreBasedWatermarkStorage(taskState); } }