/*
* Copyright © 2015-2016 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package co.cask.cdap.api.dataset.lib.partitioned;
import co.cask.cdap.api.Predicate;
import co.cask.cdap.api.common.Bytes;
import co.cask.cdap.api.dataset.lib.PartitionConsumerState;
import co.cask.cdap.api.dataset.lib.PartitionDetail;
import co.cask.cdap.api.dataset.lib.PartitionKey;
import co.cask.cdap.api.dataset.lib.PartitionedFileSet;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.List;
/**
* Keeps track of a list of partitions that are either available for consuming or are currently being consumed.
*/
public class ConsumerWorkingSet {
private static final int VERSION = 0;
private final List<ConsumablePartition> partitions;
private PartitionConsumerState partitionConsumerState;
/**
* Constructs an empty working set.
*/
public ConsumerWorkingSet() {
this(PartitionConsumerState.FROM_BEGINNING,
new ArrayList<ConsumablePartition>());
}
/**
* Constructs a working set using the given PartitionConsumerState and list of ConsumablePartitions.
*/
private ConsumerWorkingSet(PartitionConsumerState partitionConsumerState,
List<ConsumablePartition> partitions) {
this.partitionConsumerState = partitionConsumerState;
this.partitions = partitions;
}
/**
* @return the list of partitions of this working set
*/
public List<ConsumablePartition> getPartitions() {
return partitions;
}
/**
* Adds a new partition to the working set.
*/
public void addPartition(PartitionKey partitionKey) {
partitions.add(new DefaultConsumablePartition(partitionKey));
}
/**
* @return the ConsumablePartition with the given PartitionKey, from the working set, after removing it from
* the partitions list
*/
public ConsumablePartition remove(PartitionKey partitionKey) {
for (int i = 0; i < partitions.size(); i++) {
if (partitionKey.equals(partitions.get(i).getPartitionKey())) {
return partitions.remove(i);
}
}
throw new IllegalArgumentException("PartitionKey not found: " + partitionKey);
}
/**
* @return the ConsumablePartition with the given PartitionKey, from the working set
*/
public ConsumablePartition lookup(PartitionKey partitionKey) {
for (int i = 0; i < partitions.size(); i++) {
if (partitionKey.equals(partitions.get(i).getPartitionKey())) {
return partitions.get(i);
}
}
throw new IllegalArgumentException("PartitionKey not found: " + partitionKey);
}
/**
* Populates the ConsumerWorkingSet by fetching partitions from the given PartitionedFileSet.
*
* @param partitionedFileSet the PartitionedFileSet to fetch partitions from
* @param configuration the ConsumerConfiguration which defines parameters for consuming
*/
public void populate(PartitionedFileSet partitionedFileSet, ConsumerConfiguration configuration) {
int numToPopulate = configuration.getMaxWorkingSetSize() - partitions.size();
Predicate<PartitionDetail> predicate = configuration.getPartitionPredicate();
co.cask.cdap.api.dataset.lib.PartitionConsumerResult result =
partitionedFileSet.consumePartitions(partitionConsumerState, numToPopulate, predicate);
List<PartitionDetail> partitions = result.getPartitions();
for (PartitionDetail partition : partitions) {
addPartition(partition.getPartitionKey());
}
partitionConsumerState = result.getPartitionConsumerState();
}
// deserializes a ConsumerWorkingSet from a byte array
public static ConsumerWorkingSet fromBytes(byte[] bytes) {
ByteBuffer bb = ByteBuffer.wrap(bytes);
byte serializationFormatVersion = bb.get();
if (serializationFormatVersion != VERSION) {
throw new IllegalArgumentException("Unsupported serialization format: " + serializationFormatVersion);
}
int numPartitions = bb.getInt();
List<ConsumablePartition> partitions = new ArrayList<>(numPartitions);
for (int i = 0; i < numPartitions; i++) {
int consumablePartitionBytesLength = bb.getInt();
byte[] consumablePartitionBytes = new byte[consumablePartitionBytesLength];
bb.get(consumablePartitionBytes, 0, consumablePartitionBytesLength);
partitions.add(DefaultConsumablePartition.fromBytes(consumablePartitionBytes));
}
int sizeOfMarker = bb.getInt();
byte[] markerBytes = new byte[sizeOfMarker];
bb.get(markerBytes);
return new ConsumerWorkingSet(PartitionConsumerState.fromBytes(markerBytes), partitions);
}
// serializes this ConsumerWorkingSet into a byte array
public byte[] toBytes() {
// first byte for serialization format version
int numBytes = 1;
numBytes += Bytes.SIZEOF_INT;
for (ConsumablePartition partition : partitions) {
byte[] partitionBytes = ((DefaultConsumablePartition) partition).toBytes();
numBytes += Bytes.SIZEOF_INT;
numBytes += partitionBytes.length;
}
byte[] markerBytes = partitionConsumerState.toBytes();
numBytes += Bytes.SIZEOF_INT;
numBytes += markerBytes.length;
ByteBuffer bb = ByteBuffer.allocate(numBytes);
bb.put((byte) VERSION);
bb.putInt(partitions.size());
for (ConsumablePartition partition : partitions) {
byte[] partitionBytes = ((DefaultConsumablePartition) partition).toBytes();
bb.putInt(partitionBytes.length);
bb.put(partitionBytes);
}
bb.putInt(markerBytes.length);
bb.put(markerBytes);
return bb.array();
}
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
ConsumerWorkingSet that = (ConsumerWorkingSet) o;
return partitions.equals(that.partitions)
&& partitionConsumerState.equals(that.partitionConsumerState);
}
@Override
public int hashCode() {
int result = partitions.hashCode();
result = 31 * result + partitionConsumerState.hashCode();
return result;
}
}