/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package gobblin.runtime; import java.io.IOException; import java.util.List; import java.util.Map; import java.util.Properties; import java.util.Set; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import org.apache.commons.lang.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.slf4j.MDC; import com.google.common.base.CaseFormat; import com.google.common.base.Function; import com.google.common.base.Optional; import com.google.common.base.Preconditions; import com.google.common.base.Predicate; import com.google.common.collect.ImmutableList; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.eventbus.EventBus; import com.google.common.io.Closer; import com.typesafe.config.ConfigFactory; import gobblin.source.Source; import gobblin.source.WorkUnitStreamSource; import gobblin.source.workunit.BasicWorkUnitStream; import gobblin.source.workunit.WorkUnitStream; import gobblin.broker.gobblin_scopes.GobblinScopeTypes; import gobblin.broker.SharedResourcesBrokerFactory; import gobblin.broker.gobblin_scopes.GobblinScopeTypes; import gobblin.broker.iface.SharedResourcesBroker; import gobblin.commit.CommitSequence; import gobblin.commit.CommitSequenceStore; import gobblin.commit.DeliverySemantics; import gobblin.configuration.ConfigurationKeys; import gobblin.configuration.WorkUnitState; import gobblin.converter.initializer.ConverterInitializerFactory; import gobblin.metrics.GobblinMetrics; import gobblin.metrics.GobblinMetricsRegistry; import gobblin.metrics.MetricContext; import gobblin.metrics.Tag; import gobblin.metrics.event.EventSubmitter; import gobblin.metrics.event.JobEvent; import gobblin.metrics.event.TimingEvent; import gobblin.runtime.listeners.CloseableJobListener; import gobblin.runtime.listeners.JobExecutionEventSubmitterListener; import gobblin.runtime.listeners.JobListener; import gobblin.runtime.listeners.JobListeners; import gobblin.runtime.locks.JobLock; import gobblin.runtime.locks.JobLockEventListener; import gobblin.runtime.locks.JobLockException; import gobblin.runtime.locks.LegacyJobLockFactoryManager; import gobblin.runtime.util.JobMetrics; import gobblin.source.extractor.JobCommitPolicy; import gobblin.source.workunit.MultiWorkUnit; import gobblin.source.workunit.WorkUnit; import gobblin.util.ClusterNameTags; import gobblin.util.ExecutorsUtils; import gobblin.util.Id; import gobblin.util.JobLauncherUtils; import gobblin.util.ParallelRunner; import gobblin.writer.initializer.WriterInitializerFactory; import javax.annotation.Nullable; import lombok.RequiredArgsConstructor; /** * An abstract implementation of {@link JobLauncher} that handles common tasks for launching and running a job. * * @author Yinan Li */ public abstract class AbstractJobLauncher implements JobLauncher { static final Logger LOG = LoggerFactory.getLogger(AbstractJobLauncher.class); public static final String TASK_STATE_STORE_TABLE_SUFFIX = ".tst"; public static final String JOB_STATE_FILE_NAME = "job.state"; public static final String WORK_UNIT_FILE_EXTENSION = ".wu"; public static final String MULTI_WORK_UNIT_FILE_EXTENSION = ".mwu"; // Job configuration properties protected final Properties jobProps; // This contains all job context information protected final JobContext jobContext; // This (optional) JobLock is used to prevent the next scheduled run // of the job from starting if the current run has not finished yet protected Optional<JobLock> jobLockOptional = Optional.absent(); // A conditional variable for which the condition is satisfied if a cancellation is requested protected final Object cancellationRequest = new Object(); // A flag indicating whether a cancellation has been requested or not protected volatile boolean cancellationRequested = false; // A conditional variable for which the condition is satisfied if the cancellation is executed protected final Object cancellationExecution = new Object(); // A flag indicating whether a cancellation has been executed or not protected volatile boolean cancellationExecuted = false; // A single-thread executor for executing job cancellation protected final ExecutorService cancellationExecutor; // An MetricContext to track runtime metrics only if metrics are enabled. protected final Optional<MetricContext> runtimeMetricContext; // An EventBuilder with basic metadata. protected final EventSubmitter eventSubmitter; // This is for dispatching events related to job launching and execution to registered subscribers protected final EventBus eventBus = new EventBus(AbstractJobLauncher.class.getSimpleName()); // A list of JobListeners that will be injected into the user provided JobListener private final List<JobListener> mandatoryJobListeners = Lists.newArrayList(); public AbstractJobLauncher(Properties jobProps, List<? extends Tag<?>> metadataTags) throws Exception { this(jobProps, metadataTags, null); } public AbstractJobLauncher(Properties jobProps, List<? extends Tag<?>> metadataTags, @Nullable SharedResourcesBroker<GobblinScopeTypes> instanceBroker) throws Exception { Preconditions.checkArgument(jobProps.containsKey(ConfigurationKeys.JOB_NAME_KEY), "A job must have a job name specified by job.name"); // Add clusterIdentifier tag so that it is added to any new TaskState created List<Tag<?>> clusterNameTags = Lists.newArrayList(); clusterNameTags.addAll(Tag.fromMap(ClusterNameTags.getClusterNameTags())); GobblinMetrics.addCustomTagsToProperties(jobProps, clusterNameTags); // Make a copy for both the system and job configuration properties this.jobProps = new Properties(); this.jobProps.putAll(jobProps); if (!tryLockJob(this.jobProps)) { throw new JobException(String.format("Previous instance of job %s is still running, skipping this scheduled run", this.jobProps.getProperty(ConfigurationKeys.JOB_NAME_KEY))); } try { if (instanceBroker == null) { instanceBroker = createDefaultInstanceBroker(jobProps); } this.jobContext = new JobContext(this.jobProps, LOG, instanceBroker); this.eventBus.register(this.jobContext); this.cancellationExecutor = Executors.newSingleThreadExecutor( ExecutorsUtils.newThreadFactory(Optional.of(LOG), Optional.of("CancellationExecutor"))); this.runtimeMetricContext = this.jobContext.getJobMetricsOptional().transform(new Function<JobMetrics, MetricContext>() { @Override public MetricContext apply(JobMetrics input) { return input.getMetricContext(); } }); this.eventSubmitter = buildEventSubmitter(metadataTags); // Add all custom tags to the JobState so that tags are added to any new TaskState created GobblinMetrics.addCustomTagToState(this.jobContext.getJobState(), metadataTags); JobExecutionEventSubmitter jobExecutionEventSubmitter = new JobExecutionEventSubmitter(this.eventSubmitter); this.mandatoryJobListeners.add(new JobExecutionEventSubmitterListener(jobExecutionEventSubmitter)); } catch (Exception e) { unlockJob(); throw e; } } private static SharedResourcesBroker<GobblinScopeTypes> createDefaultInstanceBroker(Properties jobProps) { LOG.warn("Creating a job specific {}. Objects will only be shared at the job level.", SharedResourcesBroker.class.getSimpleName()); return SharedResourcesBrokerFactory.createDefaultTopLevelBroker(ConfigFactory.parseProperties(jobProps), GobblinScopeTypes.GLOBAL.defaultScopeInstance()); } /** * The JobContext of the particular job. * * @return {@link JobContext} of the job */ JobContext getJobContext() { return this.jobContext; } /** * A default implementation of {@link JobLauncher#cancelJob(JobListener)}. * * <p> * This implementation relies on two conditional variables: one for the condition that a cancellation * is requested, and the other for the condition that the cancellation is executed. Upon entrance, the * method notifies the cancellation executor started by {@link #startCancellationExecutor()} on the * first conditional variable to indicate that a cancellation has been requested so the executor is * unblocked. Then it waits on the second conditional variable for the cancellation to be executed. * </p> * * <p> * The actual execution of the cancellation is handled by the cancellation executor started by the * method {@link #startCancellationExecutor()} that uses the {@link #executeCancellation()} method * to execute the cancellation. * </p> * * {@inheritDoc JobLauncher#cancelJob(JobListener)} */ @Override public void cancelJob(JobListener jobListener) throws JobException { synchronized (this.cancellationRequest) { if (this.cancellationRequested) { // Return immediately if a cancellation has already been requested return; } this.cancellationRequested = true; // Notify the cancellation executor that a cancellation has been requested this.cancellationRequest.notify(); } synchronized (this.cancellationExecution) { try { while (!this.cancellationExecuted) { // Wait for the cancellation to be executed this.cancellationExecution.wait(); } try { LOG.info("Current job state is: " + this.jobContext.getJobState().getState()); if (this.jobContext.getJobState().getState() != JobState.RunningState.COMMITTED && ( this.jobContext.getJobCommitPolicy() == JobCommitPolicy.COMMIT_SUCCESSFUL_TASKS || this.jobContext.getJobCommitPolicy() == JobCommitPolicy.COMMIT_ON_PARTIAL_SUCCESS)) { this.jobContext.finalizeJobStateBeforeCommit(); this.jobContext.commit(true); } this.jobContext.close(); } catch (IOException ioe) { LOG.error("Could not close job context.", ioe); } notifyListeners(this.jobContext, jobListener, TimingEvent.LauncherTimings.JOB_CANCEL, new JobListenerAction() { @Override public void apply(JobListener jobListener, JobContext jobContext) throws Exception { jobListener.onJobCancellation(jobContext); } }); } catch (InterruptedException ie) { Thread.currentThread().interrupt(); } } } /** * This predicate checks if a work unit should be skipped. If yes, then it will removed * from the list of workUnits and it's state will be saved. */ @RequiredArgsConstructor private static class SkippedWorkUnitsFilter implements Predicate<WorkUnit> { private final JobState jobState; @Override public boolean apply(WorkUnit workUnit) { if (workUnit instanceof MultiWorkUnit) { Preconditions.checkArgument(!workUnit.contains(ConfigurationKeys.WORK_UNIT_SKIP_KEY), "Error: MultiWorkUnit cannot be skipped"); for (WorkUnit wu : ((MultiWorkUnit) workUnit).getWorkUnits()) { Preconditions.checkArgument(!wu.contains(ConfigurationKeys.WORK_UNIT_SKIP_KEY), "Error: MultiWorkUnit cannot contain skipped WorkUnit"); } } if (workUnit.getPropAsBoolean(ConfigurationKeys.WORK_UNIT_SKIP_KEY, false)) { WorkUnitState workUnitState = new WorkUnitState(workUnit, this.jobState); workUnitState.setWorkingState(WorkUnitState.WorkingState.SKIPPED); this.jobState.addSkippedTaskState(new TaskState(workUnitState)); return false; } return true; } } @Override public void launchJob(JobListener jobListener) throws JobException { String jobId = this.jobContext.getJobId(); final JobState jobState = this.jobContext.getJobState(); try { MDC.put(ConfigurationKeys.JOB_NAME_KEY, this.jobContext.getJobName()); MDC.put(ConfigurationKeys.JOB_KEY_KEY, this.jobContext.getJobKey()); TimingEvent launchJobTimer = this.eventSubmitter.getTimingEvent(TimingEvent.LauncherTimings.FULL_JOB_EXECUTION); try (Closer closer = Closer.create()) { notifyListeners(this.jobContext, jobListener, TimingEvent.LauncherTimings.JOB_PREPARE, new JobListenerAction() { @Override public void apply(JobListener jobListener, JobContext jobContext) throws Exception { jobListener.onJobPrepare(jobContext); } }); if (this.jobContext.getSemantics() == DeliverySemantics.EXACTLY_ONCE) { // If exactly-once is used, commit sequences of the previous run must be successfully compelted // before this run can make progress. executeUnfinishedCommitSequences(jobState.getJobName()); } TimingEvent workUnitsCreationTimer = this.eventSubmitter.getTimingEvent(TimingEvent.LauncherTimings.WORK_UNITS_CREATION); Source<?, ?> source = this.jobContext.getSource(); WorkUnitStream workUnitStream; if (source instanceof WorkUnitStreamSource) { workUnitStream = ((WorkUnitStreamSource) source).getWorkunitStream(jobState); } else { workUnitStream = new BasicWorkUnitStream.Builder(source.getWorkunits(jobState)).build(); } workUnitsCreationTimer.stop(); // The absence means there is something wrong getting the work units if (workUnitStream == null || workUnitStream.getWorkUnits() == null) { this.eventSubmitter.submit(JobEvent.WORK_UNITS_MISSING); jobState.setState(JobState.RunningState.FAILED); throw new JobException("Failed to get work units for job " + jobId); } // No work unit to run if (!workUnitStream.getWorkUnits().hasNext()) { this.eventSubmitter.submit(JobEvent.WORK_UNITS_EMPTY); LOG.warn("No work units have been created for job " + jobId); jobState.setState(JobState.RunningState.COMMITTED); notifyListeners(this.jobContext, jobListener, TimingEvent.LauncherTimings.JOB_COMPLETE, new JobListenerAction() { @Override public void apply(JobListener jobListener, JobContext jobContext) throws Exception { jobListener.onJobCompletion(jobContext); } }); return; } //Initialize writer and converter(s) closer.register(WriterInitializerFactory.newInstace(jobState, workUnitStream)).initialize(); closer.register(ConverterInitializerFactory.newInstance(jobState, workUnitStream)).initialize(); TimingEvent stagingDataCleanTimer = this.eventSubmitter.getTimingEvent(TimingEvent.RunJobTimings.MR_STAGING_DATA_CLEAN); // Cleanup left-over staging data possibly from the previous run. This is particularly // important if the current batch of WorkUnits include failed WorkUnits from the previous // run which may still have left-over staging data not cleaned up yet. cleanLeftoverStagingData(workUnitStream, jobState); stagingDataCleanTimer.stop(); long startTime = System.currentTimeMillis(); jobState.setStartTime(startTime); jobState.setState(JobState.RunningState.RUNNING); try { LOG.info("Starting job " + jobId); notifyListeners(this.jobContext, jobListener, TimingEvent.LauncherTimings.JOB_START, new JobListenerAction() { @Override public void apply(JobListener jobListener, JobContext jobContext) throws Exception { jobListener.onJobStart(jobContext); } }); TimingEvent workUnitsPreparationTimer = this.eventSubmitter.getTimingEvent(TimingEvent.LauncherTimings.WORK_UNITS_PREPARATION); // Add task ids workUnitStream = prepareWorkUnits(workUnitStream, jobState); // Remove skipped workUnits from the list of work units to execute. workUnitStream = workUnitStream.filter(new SkippedWorkUnitsFilter(jobState)); // Add surviving tasks to jobState workUnitStream = workUnitStream.transform(new MultiWorkUnitForEach() { @Override public void forWorkUnit(WorkUnit workUnit) { jobState.incrementTaskCount(); jobState.addTaskState(new TaskState(new WorkUnitState(workUnit, jobState))); } }); workUnitsPreparationTimer.stop(); // Write job execution info to the job history store before the job starts to run this.jobContext.storeJobExecutionInfo(); TimingEvent jobRunTimer = this.eventSubmitter.getTimingEvent(TimingEvent.LauncherTimings.JOB_RUN); // Start the job and wait for it to finish runWorkUnitStream(workUnitStream); jobRunTimer.stop(); this.eventSubmitter .submit(CaseFormat.UPPER_UNDERSCORE.to(CaseFormat.UPPER_CAMEL, "JOB_" + jobState.getState())); // Check and set final job jobPropsState upon job completion if (jobState.getState() == JobState.RunningState.CANCELLED) { LOG.info(String.format("Job %s has been cancelled, aborting now", jobId)); return; } TimingEvent jobCommitTimer = this.eventSubmitter.getTimingEvent(TimingEvent.LauncherTimings.JOB_COMMIT); this.jobContext.finalizeJobStateBeforeCommit(); this.jobContext.commit(); postProcessJobState(jobState); jobCommitTimer.stop(); } finally { long endTime = System.currentTimeMillis(); jobState.setEndTime(endTime); jobState.setDuration(endTime - jobState.getStartTime()); } } catch (Throwable t) { jobState.setState(JobState.RunningState.FAILED); String errMsg = "Failed to launch and run job " + jobId; LOG.error(errMsg + ": " + t, t); } finally { try { TimingEvent jobCleanupTimer = this.eventSubmitter.getTimingEvent(TimingEvent.LauncherTimings.JOB_CLEANUP); cleanupStagingData(jobState); jobCleanupTimer.stop(); // Write job execution info to the job history store upon job termination this.jobContext.storeJobExecutionInfo(); } finally { launchJobTimer.stop(); } } for (JobState.DatasetState datasetState : this.jobContext.getDatasetStatesByUrns().values()) { // Set the overall job state to FAILED if the job failed to process any dataset if (datasetState.getState() == JobState.RunningState.FAILED) { jobState.setState(JobState.RunningState.FAILED); break; } } notifyListeners(this.jobContext, jobListener, TimingEvent.LauncherTimings.JOB_COMPLETE, new JobListenerAction() { @Override public void apply(JobListener jobListener, JobContext jobContext) throws Exception { jobListener.onJobCompletion(jobContext); } }); if (jobState.getState() == JobState.RunningState.FAILED) { notifyListeners(this.jobContext, jobListener, TimingEvent.LauncherTimings.JOB_FAILED, new JobListenerAction() { @Override public void apply(JobListener jobListener, JobContext jobContext) throws Exception { jobListener.onJobFailure(jobContext); } }); throw new JobException(String.format("Job %s failed", jobId)); } } finally { // Stop metrics reporting if (this.jobContext.getJobMetricsOptional().isPresent()) { JobMetrics.remove(jobState); } MDC.remove(ConfigurationKeys.JOB_NAME_KEY); MDC.remove(ConfigurationKeys.JOB_KEY_KEY); } } private void executeUnfinishedCommitSequences(String jobName) throws IOException { Preconditions.checkState(this.jobContext.getCommitSequenceStore().isPresent()); CommitSequenceStore commitSequenceStore = this.jobContext.getCommitSequenceStore().get(); for (String datasetUrn : commitSequenceStore.get(jobName)) { Optional<CommitSequence> commitSequence = commitSequenceStore.get(jobName, datasetUrn); if (commitSequence.isPresent()) { commitSequence.get().execute(); } commitSequenceStore.delete(jobName, datasetUrn); } } /** * Subclasses can override this method to do whatever processing on the {@link TaskState}s, * e.g., aggregate task-level metrics into job-level metrics. * * @deprecated Use {@link #postProcessJobState(JobState) */ @Deprecated protected void postProcessTaskStates(@SuppressWarnings("unused") List<TaskState> taskStates) { // Do nothing } /** * Subclasses can override this method to do whatever processing on the {@link JobState} and its * associated {@link TaskState}s, e.g., aggregate task-level metrics into job-level metrics. */ protected void postProcessJobState(JobState jobState) { postProcessTaskStates(jobState.getTaskStates()); } @Override public void close() throws IOException { try { this.cancellationExecutor.shutdownNow(); try { this.jobContext.getSource().shutdown(this.jobContext.getJobState()); } finally { if (GobblinMetrics.isEnabled(this.jobProps)) { GobblinMetricsRegistry.getInstance().remove(this.jobContext.getJobId()); } } } finally { unlockJob(); } } /** * Run the given job. * * <p> * The contract between {@link AbstractJobLauncher#launchJob(JobListener)} and this method is this method * is responsible for for setting {@link JobState.RunningState} properly and upon returning from this method * (either normally or due to exceptions) whatever {@link JobState.RunningState} is set in this method is * used to determine if the job has finished. * </p> * * @param workUnits List of {@link WorkUnit}s of the job */ protected abstract void runWorkUnits(List<WorkUnit> workUnits) throws Exception; /** * Run the given job. * * <p> * The contract between {@link AbstractJobLauncher#launchJob(JobListener)} and this method is this method * is responsible for for setting {@link JobState.RunningState} properly and upon returning from this method * (either normally or due to exceptions) whatever {@link JobState.RunningState} is set in this method is * used to determine if the job has finished. * </p> * * @param workUnitStream stream of {@link WorkUnit}s of the job */ protected void runWorkUnitStream(WorkUnitStream workUnitStream) throws Exception { runWorkUnits(materializeWorkUnitList(workUnitStream)); } /** * Materialize a {@link WorkUnitStream} into an in-memory list. Note that infinite work unit streams cannot be materialized. */ private List<WorkUnit> materializeWorkUnitList(WorkUnitStream workUnitStream) { if (!workUnitStream.isFiniteStream()) { throw new UnsupportedOperationException("Cannot materialize an infinite work unit stream."); } return Lists.newArrayList(workUnitStream.getWorkUnits()); } /** * Get a {@link JobLock} to be used for the job. * * @param properties the job properties * @param jobLockEventListener the listener for lock events. * @return {@link JobLock} to be used for the job * @throws JobLockException throw when the {@link JobLock} fails to initialize */ protected JobLock getJobLock(Properties properties, JobLockEventListener jobLockEventListener) throws JobLockException { return LegacyJobLockFactoryManager.getJobLock(properties, jobLockEventListener); } /** * Execute the job cancellation. */ protected abstract void executeCancellation(); /** * Start the scheduled executor for executing job cancellation. * * <p> * The executor, upon started, waits on the condition variable indicating a cancellation is requested, * i.e., it waits for a cancellation request to arrive. If a cancellation is requested, the executor * is unblocked and calls {@link #executeCancellation()} to execute the cancellation. Upon completion * of the cancellation execution, the executor notifies the caller that requested the cancellation on * the conditional variable indicating the cancellation has been executed so the caller is unblocked. * Upon successful execution of the cancellation, it sets the job state to * {@link JobState.RunningState#CANCELLED}. * </p> */ protected void startCancellationExecutor() { this.cancellationExecutor.execute(new Runnable() { @Override public void run() { synchronized (AbstractJobLauncher.this.cancellationRequest) { try { while (!AbstractJobLauncher.this.cancellationRequested) { // Wait for a cancellation request to arrive AbstractJobLauncher.this.cancellationRequest.wait(); } LOG.info("Cancellation has been requested for job " + AbstractJobLauncher.this.jobContext.getJobId()); executeCancellation(); LOG.info("Cancellation has been executed for job " + AbstractJobLauncher.this.jobContext.getJobId()); } catch (InterruptedException ie) { Thread.currentThread().interrupt(); } } synchronized (AbstractJobLauncher.this.cancellationExecution) { AbstractJobLauncher.this.cancellationExecuted = true; AbstractJobLauncher.this.jobContext.getJobState().setState(JobState.RunningState.CANCELLED); // Notify that the cancellation has been executed AbstractJobLauncher.this.cancellationExecution.notifyAll(); } } }); } /** * Prepare the flattened {@link WorkUnit}s for execution by populating the job and task IDs. */ private WorkUnitStream prepareWorkUnits(WorkUnitStream workUnits, JobState jobState) { return workUnits.transform(new WorkUnitPreparator(this.jobContext.getJobId())); } private static abstract class MultiWorkUnitForEach implements Function<WorkUnit, WorkUnit> { @Nullable @Override public WorkUnit apply(WorkUnit input) { if (input instanceof MultiWorkUnit) { for (WorkUnit wu : ((MultiWorkUnit) input).getWorkUnits()) { forWorkUnit(wu); } } else { forWorkUnit(input); } return input; } protected abstract void forWorkUnit(WorkUnit workUnit); } @RequiredArgsConstructor private static class WorkUnitPreparator extends MultiWorkUnitForEach { private int taskIdSequence = 0; private final String jobId; @Override protected void forWorkUnit(WorkUnit workUnit) { workUnit.setProp(ConfigurationKeys.JOB_ID_KEY, this.jobId); String taskId = JobLauncherUtils.newTaskId(this.jobId, this.taskIdSequence++); workUnit.setId(taskId); workUnit.setProp(ConfigurationKeys.TASK_ID_KEY, taskId); workUnit.setProp(ConfigurationKeys.TASK_KEY_KEY, Long.toString(Id.Task.parse(taskId).getSequence())); } } /** * Try acquiring the job lock and return whether the lock is successfully locked. * * @param properties the job properties */ private boolean tryLockJob(Properties properties) { try { if (Boolean.valueOf(properties.getProperty(ConfigurationKeys.JOB_LOCK_ENABLED_KEY, Boolean.TRUE.toString()))) { this.jobLockOptional = Optional.of(getJobLock(properties, new JobLockEventListener() { @Override public void onLost() { executeCancellation(); } })); } return !this.jobLockOptional.isPresent() || this.jobLockOptional.get().tryLock(); } catch (JobLockException ioe) { LOG.error(String.format("Failed to acquire job lock for job %s: %s", this.jobContext.getJobId(), ioe), ioe); return false; } } /** * Unlock a completed or failed job. */ private void unlockJob() { if (this.jobLockOptional.isPresent()) { try { // Unlock so the next run of the same job can proceed this.jobLockOptional.get().unlock(); } catch (JobLockException ioe) { LOG.error(String.format("Failed to unlock for job %s: %s", this.jobContext.getJobId(), ioe), ioe); } finally { try { this.jobLockOptional.get().close(); } catch (IOException e) { LOG.error(String.format("Failed to close job lock for job %s: %s", this.jobContext.getJobId(), e), e); } finally { this.jobLockOptional = Optional.absent(); } } } } /** * Combines the specified {@link JobListener} with the {@link #mandatoryJobListeners} for this job. Uses * {@link JobListeners#parallelJobListener(List)} to create a {@link CloseableJobListener} that will execute all * the {@link JobListener}s in parallel. */ private CloseableJobListener getParallelCombinedJobListener(JobState jobState, JobListener jobListener) { List<JobListener> jobListeners = Lists.newArrayList(this.mandatoryJobListeners); jobListeners.add(jobListener); Set<String> jobListenerClassNames = jobState.getPropAsSet(ConfigurationKeys.JOB_LISTENERS_KEY, StringUtils.EMPTY); for (String jobListenerClassName : jobListenerClassNames) { try { @SuppressWarnings("unchecked") Class<? extends JobListener> jobListenerClass = (Class<? extends JobListener>) Class.forName(jobListenerClassName); jobListeners.add(jobListenerClass.newInstance()); } catch (ClassNotFoundException | InstantiationException | IllegalAccessException e) { LOG.warn(String.format("JobListener could not be created due to %s", jobListenerClassName), e); } } return JobListeners.parallelJobListener(jobListeners); } /** * Takes a {@link List} of {@link Tag}s and returns a new {@link List} with the original {@link Tag}s as well as any * additional {@link Tag}s returned by {@link ClusterNameTags#getClusterNameTags()}. * * @see ClusterNameTags */ private static List<Tag<?>> addClusterNameTags(List<? extends Tag<?>> tags) { return ImmutableList.<Tag<?>>builder().addAll(tags).addAll(Tag.fromMap(ClusterNameTags.getClusterNameTags())) .build(); } /** * Build the {@link EventSubmitter} for this class. */ private EventSubmitter buildEventSubmitter(List<? extends Tag<?>> tags) { return new EventSubmitter.Builder(this.runtimeMetricContext, "gobblin.runtime") .addMetadata(Tag.toMap(Tag.tagValuesToString(tags))).build(); } /** * Cleanup the left-over staging data possibly from the previous run of the job that may have failed * and not cleaned up its staging data. * * Property {@link ConfigurationKeys#CLEANUP_STAGING_DATA_PER_TASK} controls whether to cleanup * staging data per task, or to cleanup entire job's staging data at once. * * Staging data will not be cleaned if the job has unfinished {@link CommitSequence}s. */ private void cleanLeftoverStagingData(WorkUnitStream workUnits, JobState jobState) throws JobException { if (jobState.getPropAsBoolean(ConfigurationKeys.CLEANUP_STAGING_DATA_BY_INITIALIZER, false)) { //Clean up will be done by initializer. return; } try { if (!canCleanStagingData(jobState)) { LOG.error("Job " + jobState.getJobName() + " has unfinished commit sequences. Will not clean up staging data."); return; } } catch (IOException e) { throw new JobException("Failed to check unfinished commit sequences", e); } try { if (this.jobContext.shouldCleanupStagingDataPerTask()) { if (workUnits.isSafeToMaterialize()) { Closer closer = Closer.create(); Map<String, ParallelRunner> parallelRunners = Maps.newHashMap(); try { for (WorkUnit workUnit : JobLauncherUtils.flattenWorkUnits(workUnits.getMaterializedWorkUnitCollection())) { JobLauncherUtils.cleanTaskStagingData(new WorkUnitState(workUnit, jobState), LOG, closer, parallelRunners); } } catch (Throwable t) { throw closer.rethrow(t); } finally { closer.close(); } } else { throw new RuntimeException("Work unit streams do not support cleaning staging data per task."); } } else { JobLauncherUtils.cleanJobStagingData(jobState, LOG); } } catch (Throwable t) { // Catch Throwable instead of just IOException to make sure failure of this won't affect the current run LOG.error("Failed to clean leftover staging data", t); } } /** * Cleanup the job's task staging data. This is not doing anything in case job succeeds * and data is successfully committed because the staging data has already been moved * to the job output directory. But in case the job fails and data is not committed, * we want the staging data to be cleaned up. * * Property {@link ConfigurationKeys#CLEANUP_STAGING_DATA_PER_TASK} controls whether to cleanup * staging data per task, or to cleanup entire job's staging data at once. * * Staging data will not be cleaned if the job has unfinished {@link CommitSequence}s. */ private void cleanupStagingData(JobState jobState) throws JobException { if (jobState.getPropAsBoolean(ConfigurationKeys.CLEANUP_STAGING_DATA_BY_INITIALIZER, false)) { //Clean up will be done by initializer. return; } try { if (!canCleanStagingData(jobState)) { LOG.error("Job " + jobState.getJobName() + " has unfinished commit sequences. Will not clean up staging data."); return; } } catch (IOException e) { throw new JobException("Failed to check unfinished commit sequences", e); } if (this.jobContext.shouldCleanupStagingDataPerTask()) { cleanupStagingDataPerTask(jobState); } else { cleanupStagingDataForEntireJob(jobState); } } /** * Staging data cannot be cleaned if exactly once semantics is used, and the job has unfinished * commit sequences. */ private boolean canCleanStagingData(JobState jobState) throws IOException { return this.jobContext.getSemantics() != DeliverySemantics.EXACTLY_ONCE || !this.jobContext.getCommitSequenceStore() .get().exists(jobState.getJobName()); } private static void cleanupStagingDataPerTask(JobState jobState) { Closer closer = Closer.create(); Map<String, ParallelRunner> parallelRunners = Maps.newHashMap(); try { for (TaskState taskState : jobState.getTaskStates()) { try { JobLauncherUtils.cleanTaskStagingData(taskState, LOG, closer, parallelRunners); } catch (IOException e) { LOG.error(String.format("Failed to clean staging data for task %s: %s", taskState.getTaskId(), e), e); } } } finally { try { closer.close(); } catch (IOException e) { LOG.error("Failed to clean staging data", e); } } } private static void cleanupStagingDataForEntireJob(JobState jobState) { try { JobLauncherUtils.cleanJobStagingData(jobState, LOG); } catch (IOException e) { LOG.error("Failed to clean staging data for job " + jobState.getJobId(), e); } } private void notifyListeners(JobContext jobContext, JobListener jobListener, String timerEventName, JobListenerAction action) throws JobException { TimingEvent timer = this.eventSubmitter.getTimingEvent(timerEventName); try (CloseableJobListener parallelJobListener = getParallelCombinedJobListener(this.jobContext.getJobState(), jobListener)) { action.apply(parallelJobListener, jobContext); } catch (Exception e) { throw new JobException("Failed to execute all JobListeners", e); } finally { timer.stop(); } } private interface JobListenerAction { void apply(JobListener jobListener, JobContext jobContext) throws Exception; } }