/*
* Copyright © 2015-2016 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package co.cask.cdap.api.dataset.lib.partitioned;
import co.cask.cdap.api.dataset.lib.PartitionDetail;
import co.cask.cdap.api.dataset.lib.PartitionKey;
import co.cask.cdap.api.dataset.lib.PartitionedFileSet;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.concurrent.TimeUnit;
/**
* A {@link PartitionConsumer} that supports multiple instances consuming the same set of partitions by using a
* working set of partitions, and keeping track of their progress state during processing of those partitions.
*/
public class ConcurrentPartitionConsumer extends AbstractPartitionConsumer {
private static final Logger LOG = LoggerFactory.getLogger(ConcurrentPartitionConsumer.class);
public ConcurrentPartitionConsumer(PartitionedFileSet partitionedFileSet, StatePersistor statePersistor) {
super(partitionedFileSet, statePersistor);
}
public ConcurrentPartitionConsumer(PartitionedFileSet partitionedFileSet, StatePersistor statePersistor,
ConsumerConfiguration configuration) {
super(partitionedFileSet, statePersistor, configuration);
}
@Override
public PartitionConsumerResult doConsume(ConsumerWorkingSet workingSet, PartitionAcceptor acceptor) {
doExpiry(workingSet);
workingSet.populate(getPartitionedFileSet(), getConfiguration());
List<PartitionDetail> toConsume = selectPartitions(acceptor, workingSet.getPartitions());
return new PartitionConsumerResult(toConsume, removeDiscardedPartitions(workingSet));
}
private List<PartitionDetail> selectPartitions(PartitionAcceptor acceptor,
List<? extends ConsumablePartition> partitions) {
long now = System.currentTimeMillis();
List<PartitionDetail> toConsume = new ArrayList<>();
for (ConsumablePartition consumablePartition : partitions) {
if (ProcessState.AVAILABLE != consumablePartition.getProcessState()) {
continue;
}
PartitionDetail partition = getPartitionedFileSet().getPartition(consumablePartition.getPartitionKey());
if (partition == null) {
// no longer exists
continue;
}
PartitionAcceptor.Return accept = acceptor.accept(partition);
switch (accept) {
case ACCEPT:
consumablePartition.take();
consumablePartition.setTimestamp(now);
toConsume.add(partition);
continue;
case SKIP:
continue;
case STOP:
return toConsume;
}
}
return toConsume;
}
@Override
public void doFinish(ConsumerWorkingSet workingSet, List<? extends PartitionKey> partitionKeys, boolean succeeded) {
doExpiry(workingSet);
if (succeeded) {
commit(workingSet, partitionKeys);
} else {
abort(workingSet, partitionKeys);
}
}
/**
* Removes the given partition keys from the working set, as they have been successfully processed.
*/
protected void commit(ConsumerWorkingSet workingSet, List<? extends PartitionKey> partitionKeys) {
for (PartitionKey key : partitionKeys) {
ConsumablePartition consumablePartition = workingSet.lookup(key);
assertInProgress(consumablePartition);
workingSet.remove(key);
}
}
/**
* Resets the process state of the given partition keys, as they were not successfully processed, or discards the
* partition if it has already been attempted the configured number of attempts.
*/
protected void abort(ConsumerWorkingSet workingSet, List<? extends PartitionKey> partitionKeys) {
List<PartitionKey> discardedPartitions = new ArrayList<>();
for (PartitionKey key : partitionKeys) {
ConsumablePartition consumablePartition = workingSet.lookup(key);
assertInProgress(consumablePartition);
// either reset its processState, or remove it from the workingSet, depending on how many tries it already has
if (consumablePartition.getNumFailures() < getConfiguration().getMaxRetries()) {
consumablePartition.retry();
} else {
discardedPartitions.add(key);
workingSet.lookup(key).discard();
}
}
if (!discardedPartitions.isEmpty()) {
LOG.warn("Discarded keys due to being retried {} times: {}",
getConfiguration().getMaxRetries(), discardedPartitions);
}
}
/**
* ensure that caller doesn't try to commit/abort a partition that isn't in progress
* @throws IllegalArgumentException if the given partition is in progress
*/
protected void assertInProgress(ConsumablePartition consumablePartition) {
if (!(consumablePartition.getProcessState() == ProcessState.IN_PROGRESS)) {
throw new IllegalStateException(String.format("Partition not in progress: %s",
consumablePartition.getPartitionKey()));
}
}
/**
* Removes the list of partitions that have failed processing the configured number of times from the working set and
* returns them.
*/
protected List<PartitionDetail> removeDiscardedPartitions(ConsumerWorkingSet workingSet) {
List<PartitionDetail> failedPartitions = new ArrayList<>();
Iterator<ConsumablePartition> iter = workingSet.getPartitions().iterator();
while (iter.hasNext()) {
ConsumablePartition partition = iter.next();
if (partition.getProcessState() == ProcessState.DISCARDED) {
failedPartitions.add(getPartitionedFileSet().getPartition(partition.getPartitionKey()));
iter.remove();
}
}
return failedPartitions;
}
/**
* @return a timestamp which determines partition expiry. Partitions with a timestamp smaller (older) than this value
* are considered 'expired'.
*/
protected long getExpiryBorder() {
long now = System.currentTimeMillis();
long expirationTimeoutMillis = TimeUnit.SECONDS.toMillis(getConfiguration().getTimeout());
return now - expirationTimeoutMillis;
}
/**
* Goes through all partitions. If any IN_PROGRESS partition is older than the configured timeout, reset its state
* to AVAILABLE, unless it has already been retried the configured number of times, in which case it is discarded.
*/
protected void doExpiry(ConsumerWorkingSet workingSet) {
long expiryTime = getExpiryBorder();
List<PartitionKey> expiredPartitions = new ArrayList<>();
List<PartitionKey> discardedPartitions = new ArrayList<>();
for (ConsumablePartition partition : workingSet.getPartitions()) {
if (partition.getProcessState() == ProcessState.IN_PROGRESS && partition.getTimestamp() < expiryTime) {
// either reset its processState, or remove it from the workingSet, depending on how many tries it already has
if (partition.getNumFailures() < getConfiguration().getMaxRetries()) {
partition.retry();
} else {
partition.discard();
}
expiredPartitions.add(partition.getPartitionKey());
}
}
if (!expiredPartitions.isEmpty()) {
LOG.warn("Expiring in progress partitions: {}", expiredPartitions);
if (!discardedPartitions.isEmpty()) {
LOG.warn("Discarded keys due to being retried {} times: {}",
getConfiguration().getMaxRetries(), discardedPartitions);
}
}
}
}