/* * Copyright (C) 2014-2016 LinkedIn Corp. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not use * this file except in compliance with the License. You may obtain a copy of the * License at http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software distributed * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR * CONDITIONS OF ANY KIND, either express or implied. */ package gobblin.runtime.fork; import java.io.IOException; import java.util.concurrent.TimeUnit; import gobblin.runtime.BoundedBlockingRecordQueue; import gobblin.runtime.ExecutionModel; import gobblin.runtime.Task; import gobblin.runtime.TaskContext; import gobblin.runtime.TaskExecutor; import gobblin.runtime.TaskState; import lombok.extern.slf4j.Slf4j; import com.google.common.base.Optional; import com.google.common.base.Throwables; import gobblin.configuration.ConfigurationKeys; import gobblin.converter.DataConversionException; /** * A class representing a forked branch of operations of a {@link Task} flow. The {@link Fork}s of a * {@link Task} are executed in a thread pool managed by the {@link TaskExecutor}, which is different * from the thread pool used to execute {@link Task}s. * * <p> * Each {@link Fork} consists of the following steps: * <ul> * <li>Getting the next record off the record queue.</li> * <li>Converting the record and doing row-level quality checking if applicable.</li> * <li>Writing the record out if it passes the quality checking.</li> * <li>Cleaning up and exiting once all the records have been processed.</li> * </ul> * </p> * * @author Yinan Li */ @Slf4j @SuppressWarnings("unchecked") public class AsynchronousFork extends Fork { private final BoundedBlockingRecordQueue<Object> recordQueue; public AsynchronousFork(TaskContext taskContext, Object schema, int branches, int index, ExecutionModel executionModel) throws Exception { super(taskContext, schema, branches, index, executionModel); TaskState taskState = taskContext.getTaskState(); this.recordQueue = BoundedBlockingRecordQueue.newBuilder() .hasCapacity(taskState.getPropAsInt( ConfigurationKeys.FORK_RECORD_QUEUE_CAPACITY_KEY, ConfigurationKeys.DEFAULT_FORK_RECORD_QUEUE_CAPACITY)) .useTimeout(taskState.getPropAsLong( ConfigurationKeys.FORK_RECORD_QUEUE_TIMEOUT_KEY, ConfigurationKeys.DEFAULT_FORK_RECORD_QUEUE_TIMEOUT)) .useTimeoutTimeUnit(TimeUnit.valueOf(taskState.getProp( ConfigurationKeys.FORK_RECORD_QUEUE_TIMEOUT_UNIT_KEY, ConfigurationKeys.DEFAULT_FORK_RECORD_QUEUE_TIMEOUT_UNIT))) .collectStats() .build(); } @Override public Optional<BoundedBlockingRecordQueue<Object>.QueueStats> queueStats() { return this.recordQueue.stats(); } @Override protected void processRecords() throws IOException, DataConversionException { while (processRecord()) { } } @Override protected boolean putRecordImpl(Object record) throws InterruptedException { return this.recordQueue.put(record); } boolean processRecord() throws IOException, DataConversionException { try { Object record = this.recordQueue.get(); if (record == null || record == Fork.SHUTDOWN_RECORD) { // The parent task has already done pulling records so no new record means this fork is done if (this.isParentTaskDone()) { return false; } } else { this.processRecord(record); } } catch (InterruptedException ie) { log.warn("Interrupted while trying to get a record off the queue", ie); Throwables.propagate(ie); } return true; } }