/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package gobblin.source.extractor.extract;
import java.util.List;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import gobblin.configuration.ConfigurationKeys;
import gobblin.configuration.SourceState;
import gobblin.configuration.WorkUnitState;
import gobblin.source.Source;
import gobblin.source.extractor.JobCommitPolicy;
import gobblin.source.extractor.WorkUnitRetryPolicy;
import gobblin.source.workunit.ExtractFactory;
import gobblin.source.workunit.WorkUnit;
import gobblin.source.workunit.Extract;
import gobblin.source.workunit.Extract.TableType;
/**
* A base implementation of {@link gobblin.source.Source} that provides default behavior.
*
* @author Yinan Li
*/
public abstract class AbstractSource<S, D> implements Source<S, D> {
private final ExtractFactory extractFactory = new ExtractFactory("yyyyMMddHHmmss");
/**
* Get a list of {@link WorkUnitState}s of previous {@link WorkUnit}s subject for retries.
*
* <p>
* We use two keys for configuring work unit retries. The first one specifies
* whether work unit retries are enabled or not. This is for individual jobs
* or a group of jobs that following the same rule for work unit retries.
* The second one that is more advanced is for specifying a retry policy.
* This one is particularly useful for being a global policy for a group of
* jobs that have different job commit policies and want work unit retries only
* for a specific job commit policy. The first one probably is sufficient for
* most jobs that only need a way to enable/disable work unit retries. The
* second one gives users more flexibilities.
* </p>
*
* @param state Source state
* @return list of {@link WorkUnitState}s of previous {@link WorkUnit}s subject for retries
*/
protected List<WorkUnitState> getPreviousWorkUnitStatesForRetry(SourceState state) {
if (Iterables.isEmpty(state.getPreviousWorkUnitStates())) {
return ImmutableList.of();
}
// Determine a work unit retry policy
WorkUnitRetryPolicy workUnitRetryPolicy;
if (state.contains(ConfigurationKeys.WORK_UNIT_RETRY_POLICY_KEY)) {
// Use the given work unit retry policy if specified
workUnitRetryPolicy = WorkUnitRetryPolicy.forName(state.getProp(ConfigurationKeys.WORK_UNIT_RETRY_POLICY_KEY));
} else {
// Otherwise set the retry policy based on if work unit retry is enabled
boolean retryFailedWorkUnits = state.getPropAsBoolean(ConfigurationKeys.WORK_UNIT_RETRY_ENABLED_KEY, true);
workUnitRetryPolicy = retryFailedWorkUnits ? WorkUnitRetryPolicy.ALWAYS : WorkUnitRetryPolicy.NEVER;
}
if (workUnitRetryPolicy == WorkUnitRetryPolicy.NEVER) {
return ImmutableList.of();
}
List<WorkUnitState> previousWorkUnitStates = Lists.newArrayList();
// Get previous work units that were not successfully committed (subject for retries)
for (WorkUnitState workUnitState : state.getPreviousWorkUnitStates()) {
if (workUnitState.getWorkingState() != WorkUnitState.WorkingState.COMMITTED) {
if (state.getPropAsBoolean(ConfigurationKeys.OVERWRITE_CONFIGS_IN_STATESTORE,
ConfigurationKeys.DEFAULT_OVERWRITE_CONFIGS_IN_STATESTORE)) {
// We need to make a copy here since getPreviousWorkUnitStates returns ImmutableWorkUnitStates
// for which addAll is not supported
WorkUnitState workUnitStateCopy = new WorkUnitState(workUnitState.getWorkunit(), state);
workUnitStateCopy.addAll(workUnitState);
workUnitStateCopy.overrideWith(state);
previousWorkUnitStates.add(workUnitStateCopy);
} else {
previousWorkUnitStates.add(workUnitState);
}
}
}
if (workUnitRetryPolicy == WorkUnitRetryPolicy.ALWAYS) {
return previousWorkUnitStates;
}
JobCommitPolicy jobCommitPolicy = JobCommitPolicy
.forName(state.getProp(ConfigurationKeys.JOB_COMMIT_POLICY_KEY, ConfigurationKeys.DEFAULT_JOB_COMMIT_POLICY));
if ((workUnitRetryPolicy == WorkUnitRetryPolicy.ON_COMMIT_ON_PARTIAL_SUCCESS
&& jobCommitPolicy == JobCommitPolicy.COMMIT_ON_PARTIAL_SUCCESS)
|| (workUnitRetryPolicy == WorkUnitRetryPolicy.ON_COMMIT_ON_FULL_SUCCESS
&& jobCommitPolicy == JobCommitPolicy.COMMIT_ON_FULL_SUCCESS)) {
return previousWorkUnitStates;
}
// Return an empty list if job commit policy and work unit retry policy do not match
return ImmutableList.of();
}
/**
* Get a list of previous {@link WorkUnit}s subject for retries.
*
* <p>
* This method uses {@link AbstractSource#getPreviousWorkUnitStatesForRetry(SourceState)}.
* </p>
*
* @param state Source state
* @return list of previous {@link WorkUnit}s subject for retries
*/
protected List<WorkUnit> getPreviousWorkUnitsForRetry(SourceState state) {
List<WorkUnit> workUnits = Lists.newArrayList();
for (WorkUnitState workUnitState : getPreviousWorkUnitStatesForRetry(state)) {
// Make a copy here as getWorkUnit() below returns an ImmutableWorkUnit
workUnits.add(WorkUnit.copyOf(workUnitState.getWorkunit()));
}
return workUnits;
}
public Extract createExtract(TableType type, String namespace, String table) {
return this.extractFactory.getUniqueExtract(type, namespace, table);
}
}