/* * Copyright © 2015-2016 Cask Data, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package co.cask.cdap.api.dataset.lib.partitioned; import co.cask.cdap.api.annotation.Beta; import co.cask.cdap.api.dataset.lib.Partition; import co.cask.cdap.api.dataset.lib.PartitionKey; import co.cask.cdap.api.dataset.lib.PartitionedFileSet; import java.util.List; /** * Incrementally consumes new/unprocessed {@link Partition}s of a {@link PartitionedFileSet}. * In order to support multiple partition consumers consuming different partitions from the same PartitionedFileSet, * the consumePartitions method must be called in its own, short transaction before the processing of the partitions. * This is so that other concurrent consumers can see that the partitions have been marked as IN_PROGRESS. */ @Beta public interface PartitionConsumer { /** * @return a {@link PartitionConsumerResult}s containing partitions that have not yet processed. */ PartitionConsumerResult consumePartitions(); /** * @param limit upper limit on number of partitions to consume * @return a {@link PartitionConsumerResult}s containing partitions that have not yet processed. */ PartitionConsumerResult consumePartitions(int limit); /** * @param acceptor defines which and how many partitions to consume * @return a {@link PartitionConsumerResult}s containing partitions that have not yet processed. */ PartitionConsumerResult consumePartitions(PartitionAcceptor acceptor); /** * This method must be called on any partitions returned by the {@code #consumePartitions} method. * If a program fails to call this method for any partitions, those partitions will be 'expired' after a timeout * defined on the configured {@link ConsumerConfiguration}. * * @param partitions list of partitions to mark as either succeeded or failed processing * @param succeeded whether or not processing of the specified partitions was successful */ void onFinish(List<? extends Partition> partitions, boolean succeeded); /** * Same as {@link #onFinish(List, boolean)}, but allows specifying {@link PartitionKey}s * instead of {@link co.cask.cdap.api.dataset.lib.Partition}s. * * @param partitionKeys list of partition keys to mark as either succeeded or failed processing * @param succeeded whether or not processing of the specified partitions was successful */ void onFinishWithKeys(List<? extends PartitionKey> partitionKeys, boolean succeeded); }