/*
* Copyright © 2014-2015 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package co.cask.cdap.data2.transaction.queue;
import co.cask.cdap.api.common.Bytes;
import co.cask.cdap.common.queue.QueueName;
import co.cask.cdap.data2.queue.ConsumerConfig;
import co.cask.cdap.data2.queue.DequeueStrategy;
import co.cask.cdap.data2.queue.QueueEntry;
import co.cask.tephra.Transaction;
import com.google.common.base.Charsets;
import com.google.common.base.Objects;
import com.google.common.hash.Hashing;
import com.google.common.primitives.Ints;
import com.google.common.primitives.Longs;
import org.apache.hadoop.hbase.KeyValue;
import java.io.IOException;
import java.util.Arrays;
import java.util.Map;
/**
* Holds logic of how queue entry row is constructed
*/
public class QueueEntryRow {
public static final byte[] COLUMN_FAMILY = new byte[] {'q'};
public static final byte[] DATA_COLUMN = new byte[] {'d'};
public static final byte[] META_COLUMN = new byte[] {'m'};
public static final byte[] STATE_COLUMN_PREFIX = new byte[] {'s'};
/**
* Returns a byte array representing prefix of a queue. The prefix is formed by first byte of
* MD5 of the queue name followed by the queue name.
*/
public static byte[] getQueueRowPrefix(QueueName queueName) {
if (queueName.isStream()) {
// NOTE: stream is uniquely identified by table name
return Bytes.EMPTY_BYTE_ARRAY;
}
String flowlet = queueName.getFourthComponent();
String output = queueName.getSimpleName();
byte[] idWithinFlow = (flowlet + "/" + output).getBytes(Charsets.US_ASCII);
return getQueueRowPrefix(idWithinFlow);
}
/**
* Simple method to return a queue entry row key. This method is not for performance usage (e.g. in enqueue).
*/
public static byte[] getQueueEntryRowKey(QueueName queueName, long writePoint, int count) {
return Bytes.add(QueueEntryRow.getQueueRowPrefix(queueName), Bytes.toBytes(writePoint), Bytes.toBytes(count));
}
/**
* Returns a byte array representing prefix of a queue. The prefix is formed by first byte of
* MD5 of the queue name followed by the queue name.
*/
private static byte[] getQueueRowPrefix(byte[] queueIdWithinFlow) {
byte[] bytes = new byte[queueIdWithinFlow.length + 1];
Hashing.md5().hashBytes(queueIdWithinFlow).writeBytesTo(bytes, 0, 1);
System.arraycopy(queueIdWithinFlow, 0, bytes, 1, queueIdWithinFlow.length);
return bytes;
}
/**
* Determine whether a column represent the state of a consumer.
*/
public static boolean isStateColumn(byte[] columnName) {
return Bytes.startsWith(columnName, QueueEntryRow.STATE_COLUMN_PREFIX);
}
/**
* @param stateValue value of the state column
* @return write pointer of the latest change of the state value
*/
public static long getStateWritePointer(byte[] stateValue) {
return Bytes.toLong(stateValue, 0, Longs.BYTES);
}
/**
* @param stateValue value of the state column
* @return state instance id
*/
public static int getStateInstanceId(byte[] stateValue) {
return Bytes.toInt(stateValue, Longs.BYTES, Ints.BYTES);
}
/**
* @param stateValue value of the state column
* @return consumer entry state
*/
public static ConsumerEntryState getState(byte[] stateValue) {
return ConsumerEntryState.fromState(stateValue[Longs.BYTES + Ints.BYTES]);
}
/**
* Extracts the queue name from the KeyValue row, which the row must be a queue entry.
*/
public static QueueName getQueueName(String namespaceId, String appName, String flowName, int prefixBytes,
byte[] rowBuffer, int rowOffset, int rowLength) {
// Entry key is always (prefix bytes + 1 MD5 byte + queueName + longWritePointer + intCounter)
int queueNameEnd = rowOffset + rowLength - Bytes.SIZEOF_LONG - Bytes.SIZEOF_INT;
// <flowlet><source>
byte[] idWithinFlow = Arrays.copyOfRange(rowBuffer,
rowOffset + prefixBytes + 1,
queueNameEnd);
String idWithinFlowAsString = new String(idWithinFlow, Charsets.US_ASCII);
// <flowlet><source>
String[] parts = idWithinFlowAsString.split("/");
return QueueName.fromFlowlet(namespaceId, appName, flowName, parts[0], parts[1]);
}
/**
* Returns true if the given row is a queue entry of the given queue based on queue row prefix
*/
public static boolean isQueueEntry(byte[] queueRowPrefix, int prefixBytes,
byte[] rowBuffer, int rowOffset, int rowLength) {
// Entry key is always (prefix bytes + 1 MD5 byte + queueName + longWritePointer + intCounter)
return isPrefix(rowBuffer,
rowOffset + prefixBytes + 1,
rowLength - prefixBytes - 1,
queueRowPrefix);
}
/**
* Returns {@code true} if the given {@link KeyValue} is a state column in queue entry row.
*/
public static boolean isStateColumn(KeyValue keyValue) {
return columnHasPrefix(keyValue, STATE_COLUMN_PREFIX);
}
/**
* Returns {@code true} if the given {@code byte[]} is a state column qualifier in queue entry row.
*/
public static boolean isStateColumn(byte[] qualifierBuffer, int qualifierOffset) {
return columnHasPrefix(qualifierBuffer, qualifierOffset, STATE_COLUMN_PREFIX);
}
/**
* Returns {@code true} if the given {@link KeyValue} is a meta column in queue entry row.
*/
public static boolean isMetaColumn(KeyValue keyValue) {
return columnHasPrefix(keyValue, META_COLUMN);
}
/**
* Returns {@code true} if the given {@code byte[]} is a meta column qualifier in queue entry row.
*/
public static boolean isMetaColumn(byte[] qualifierBuffer, int qualifierOffset) {
return columnHasPrefix(qualifierBuffer, qualifierOffset, META_COLUMN);
}
/**
* Returns {@code true} if the given {@link KeyValue} is a data column in queue entry row.
*/
public static boolean isDataColumn(KeyValue keyValue) {
return columnHasPrefix(keyValue, DATA_COLUMN);
}
/**
* Returns {@code true} if the given {@code byte[]} is a data column qualifier in queue entry row.
*/
public static boolean isDataColumn(byte[] qualifierBuffer, int qualifierOffset) {
return columnHasPrefix(qualifierBuffer, qualifierOffset, DATA_COLUMN);
}
private static boolean columnHasPrefix(KeyValue keyValue, byte[] prefix) {
return columnHasPrefix(keyValue.getBuffer(), keyValue.getQualifierOffset(), prefix);
}
private static boolean columnHasPrefix(byte[] qualifierBuffer, int qualifierOffset, byte[] prefix) {
// only comparing prefix bytes so we use the prefix length for both cases
return Bytes.equals(prefix, 0, prefix.length, qualifierBuffer, qualifierOffset, prefix.length);
}
private static boolean isPrefix(byte[] bytes, int off, int len, byte[] prefix) {
int prefixLen = prefix.length;
if (len < prefixLen) {
return false;
}
int i = 0;
while (i < prefixLen) {
if (bytes[off++] != prefix[i++]) {
return false;
}
}
return true;
}
// Consuming logic
/**
* Defines if queue entry can be consumed
*/
public static enum CanConsume {
YES,
NO,
NO_INCLUDING_ALL_OLDER
}
/**
* Looks at specific queue entry and determines if consumer with given consumer config and current transaction
* can consume this entry. The answer can be
* "yes" ({@link co.cask.cdap.data2.transaction.queue.QueueEntryRow.CanConsume#YES},
* "no" ({@link co.cask.cdap.data2.transaction.queue.QueueEntryRow.CanConsume#NO},
* "no" with a hint that given consumer cannot consume any of the entries prior to this one
* ({@link co.cask.cdap.data2.transaction.queue.QueueEntryRow.CanConsume#NO_INCLUDING_ALL_OLDER}.
* The latter one allows for some optimizations when doing scans of entries to be
* consumed.
*
* @param consumerConfig config of the consumer
* @param transaction current tx
* @param enqueueWritePointer write pointer used by enqueue of this entry
* @param counter counter of this entry
* @param metaValue value of meta column of this entry
* @param stateValue value of state column of this entry
* @return one {@link co.cask.cdap.data2.transaction.queue.QueueEntryRow.CanConsume} as per description above.
*/
public static CanConsume canConsume(ConsumerConfig consumerConfig, Transaction transaction,
long enqueueWritePointer, int counter,
byte[] metaValue, byte[] stateValue) {
DequeueStrategy dequeueStrategy = consumerConfig.getDequeueStrategy();
if (stateValue != null) {
// If the state is written by the current transaction, ignore it, as it's processing
long stateWritePointer = QueueEntryRow.getStateWritePointer(stateValue);
if (stateWritePointer == transaction.getWritePointer()) {
return CanConsume.NO;
}
// If the state was updated by a different consumer instance that is still active, ignore this entry.
// The assumption is, the corresponding instance is either processing (claimed)
// or going to process it (due to rollback/restart).
// This only applies to FIFO, as for hash and rr, repartition needs to happen if group size change.
int stateInstanceId = QueueEntryRow.getStateInstanceId(stateValue);
if (dequeueStrategy == DequeueStrategy.FIFO
&& stateInstanceId < consumerConfig.getGroupSize()
&& stateInstanceId != consumerConfig.getInstanceId()) {
return CanConsume.NO;
}
// If state is PROCESSED and committed, ignore it:
ConsumerEntryState state = QueueEntryRow.getState(stateValue);
if (state == ConsumerEntryState.PROCESSED && transaction.isVisible(stateWritePointer)) {
// If the entry's enqueue write pointer is smaller than smallest in progress tx, then everything before it
// must be processed, too (it is not possible that an enqueue before this is still in progress). So it is
// safe to move the start row after this entry.
// Note: here we ignore the long-running transactions, because we know they don't interact with queues.
if (enqueueWritePointer < transaction.getFirstShortInProgress()) {
return CanConsume.NO_INCLUDING_ALL_OLDER;
}
return CanConsume.NO;
}
}
// Always try to process (claim) if using FIFO. The resolution will be done by atomically setting state to CLAIMED
int instanceId = consumerConfig.getInstanceId();
if (dequeueStrategy == DequeueStrategy.ROUND_ROBIN) {
instanceId = getRoundRobinConsumerInstance(enqueueWritePointer, counter, consumerConfig.getGroupSize());
} else if (dequeueStrategy == DequeueStrategy.HASH) {
try {
Map<String, Integer> hashKeys = QueueEntry.deserializeHashKeys(metaValue);
instanceId = getHashConsumerInstance(hashKeys, consumerConfig.getHashKey(), consumerConfig.getGroupSize());
} catch (IOException e) {
// SHOULD NEVER happen
throw new RuntimeException(e);
}
}
return consumerConfig.getInstanceId() == instanceId ? CanConsume.YES : CanConsume.NO;
}
/**
* Returns the consumer instance id for consuming an entry enqueued with the given write pointer and counter.
*/
public static int getRoundRobinConsumerInstance(long writePointer, int counter, int groupSize) {
return Math.abs(Objects.hashCode(writePointer, counter) % groupSize);
}
public static int getHashConsumerInstance(Map<String, Integer> hashes, String key, int groupSize) {
Integer value = hashes.get(key);
return value == null ? 0 : (Math.abs(value) % groupSize);
}
/**
* Gets the stop row for scan up to the read pointer of a transaction. Stop row is queueName + (readPointer + 1).
*/
public static byte[] getStopRowForTransaction(byte[] queueRowPrefix, Transaction transaction) {
return Bytes.add(queueRowPrefix, Bytes.toBytes(transaction.getReadPointer() + 1));
}
/**
* For a queue entry consumer state, serialized to byte array, return whether it is processed and committed.
*/
public static boolean isCommittedProcessed(byte[] stateBytes, Transaction tx) {
long writePointer = Bytes.toLong(stateBytes, 0, Longs.BYTES);
if (!tx.isVisible(writePointer)) {
return false;
}
byte state = stateBytes[Longs.BYTES + Ints.BYTES];
return state == ConsumerEntryState.PROCESSED.getState();
}
/**
* Gets the write pointer for a row.
*/
public static long getWritePointer(byte[] rowKey, int queueRowPrefixLength) {
// Row key is queue_name + writePointer + counter
return Bytes.toLong(rowKey, queueRowPrefixLength, Longs.BYTES);
}
}