/*
* Copyright © 2014 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package co.cask.cdap.api.dataset.lib;
import co.cask.cdap.api.annotation.Property;
import co.cask.cdap.api.common.Bytes;
import co.cask.cdap.api.dataset.DatasetSpecification;
import co.cask.cdap.api.dataset.table.Put;
import co.cask.cdap.api.dataset.table.Row;
import co.cask.cdap.api.dataset.table.Table;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;
/**
* An abstract class for time series Datasets.
*/
abstract class TimeseriesDataset extends AbstractDataset {
public static final String ATTR_TIME_INTERVAL_TO_STORE_PER_ROW = "timeIntervalToStorePerRow";
/**
* See {@link TimeseriesTable} javadoc for description.
*/
public static final long DEFAULT_TIME_INTERVAL_PER_ROW = TimeUnit.HOURS.toMillis(1);
// This is a hard limit on the number of rows to read per read. This is safety-check, not intended to rely on in user
// code. We need this check in current implementation and this may change when we have readHigherOrEq() mentioned
// above.
// That means that max time range to be scanned is
// rowPartitionIntervalSize * MAX_ROWS_TO_SCAN_PER_READ
// For 1 min intervals this is ~ 70 days, for 1 hour intervals this is ~11.5 years
/**
* Limit on the number of rows to scan per read.
*/
public static final int MAX_ROWS_TO_SCAN_PER_READ = 100000;
protected final Table table;
@Property
long rowPartitionIntervalSize;
/**
* Base constructor that only sets the name of the data set.
*/
TimeseriesDataset(DatasetSpecification spec, Table table) {
super(spec.getName(), table);
this.rowPartitionIntervalSize = spec.getLongProperty(ATTR_TIME_INTERVAL_TO_STORE_PER_ROW,
DEFAULT_TIME_INTERVAL_PER_ROW);
this.table = table;
}
/**
* Writes constructed value. This implementation overrides the existing value.
* This method can be overridden to apply update logic relevant to the subclass (e.g. increment counter).
*
* @param row row key to write to
* @param columnName column name to write to
* @param value value passed with {@link Entry} into
*/
void write(byte[] row, byte[] columnName, byte[] value) {
Put put = new Put(row, columnName, value);
table.put(put);
}
void write(byte[] key, byte[] value, long timestamp, byte[]... tags) {
// Note: no need to validate entry as long as its fullness enforced by its constructor
// Please see the class javadoc for details on the stored data format.
byte[] row = createRow(key, timestamp, rowPartitionIntervalSize);
// Note: we could move sorting code to Entry, but we didn't as we use same ctor when reading and we
// don't need to sort during reading (they are already sorted asc according to storage format).
byte[][] sortedTags = tags.clone();
sortTags(sortedTags);
byte[] columnName = createColumnName(timestamp, sortedTags);
write(row, columnName, value);
}
long internalIncrement(byte[] counter, long amount, long timestamp, byte[]... tags) {
byte[][] sortedTags = tags.clone();
sortTags(sortedTags);
byte[] columnName = createColumnName(timestamp, sortedTags);
byte[] rowName = createRow(counter, timestamp, rowPartitionIntervalSize);
return table.incrementAndGet(rowName, columnName, amount);
}
private int applyLimitOnRowsToRead(long timeIntervalsCount) {
return (timeIntervalsCount > MAX_ROWS_TO_SCAN_PER_READ) ? MAX_ROWS_TO_SCAN_PER_READ : (int) timeIntervalsCount;
}
/**
* Returns the value that will be used as the actual row key.
* It has the following format:
* {@code <key>[<timestamp>/<rowPartitionIntervalSize>]}.
*
* @param key a user-provided entry key value
* @param timestamp is 8-byte encoded long which defines interval timestamp stamp
* @param rowPartitionIntervalSize the size of time interval for partitioning data into rows. Used for performance
* optimization. Please refer to {@link TimeseriesTable} for more details including how to choose
* this value.
* @return a composite value used as the row key
*/
static byte[] createRow(byte[] key, long timestamp, long rowPartitionIntervalSize) {
return Bytes.add(key, Bytes.toBytes(getRowKeyTimestampPart(timestamp, rowPartitionIntervalSize)));
}
private static long getRowKeyTimestampPart(final long timestamp, final long rowPartitionIntervalSize) {
return timestamp / rowPartitionIntervalSize;
}
private static void sortTags(byte[][] tags) {
Arrays.sort(tags, Bytes.BYTES_COMPARATOR);
}
/**
* Returns the value that will be used as the actual column name.
* Column name has the following format: {@code <timestamp><tags>}. Sorting of tags is needed for
* efficient filtering based on provided tags during reading
*
* @param timestamp is 8-byte encoded long: user-provided entry timestamp.
* @param tags is an encoded user-provided entry tags list. It is formatted as:
* {@code [<tag_length><tag_value>]*}, where tag length is the 4-byte encoded int length of the tag and tags
* are sorted in ascending order
*/
static byte[] createColumnName(long timestamp, byte[][] tags) {
// hint: possible perf improvement: we can calculate the columnLength ahead of time and avoid creating many array
// objects
// hint: possible perf improvement: we can actually store just the diff from the timestamp encoded in the row key
// and by doing that reduce the footprint of every stored entry
// hint: consider different column name format: we may want to know "sooner" how many there are tags to make other
// parts of the code run faster and avoid creating too many array objects. This may be easily doable as column
// name is immutable.
byte[] columnName = createColumnNameFirstPart(timestamp);
for (byte[] tag : tags) {
// hint: possible perf improvement: use compressed int (see Bytes.intToByte()) or at least Bytes.toBytes(short)
// which should be well enough
columnName = Bytes.add(columnName, Bytes.toBytes(tag.length), tag);
}
return columnName;
}
private static byte[] createColumnNameFirstPart(final long timestamp) {
return Bytes.toBytes(timestamp);
}
static long getTimeIntervalsCount(final long startTime, final long endTime,
final long rowPartitionIntervalSize) {
return (getRowKeyTimestampPart(endTime, rowPartitionIntervalSize) -
getRowKeyTimestampPart(startTime, rowPartitionIntervalSize) + 1);
}
static byte[] getRowOfKthInterval(final byte[] key,
final long timeRangeStart,
// zero-based
final int intervalIndex,
final long rowPartitionIntervalSize) {
return createRow(key, timeRangeStart + intervalIndex * rowPartitionIntervalSize, rowPartitionIntervalSize);
}
static boolean hasTags(final byte[] columnName) {
// if columnName only has timestamp, then there's no tags encoded into column name
return (columnName.length > Bytes.SIZEOF_LONG);
}
static long parseTimeStamp(final byte[] columnName) {
return Bytes.toLong(columnName, 0);
}
/**
* Reads entries for a given time range and returns an {@code Iterator<Entry>}. This method is intended to be
* used by subclasses to define their own public <code>read</code> method.
* NOTE: A limit is placed on the max number of time intervals to be scanned during a read, as defined by
* {@link #MAX_ROWS_TO_SCAN_PER_READ}.
*
* @param key name of the entry to read
* @param startTime defines start of the time range to read, inclusive
* @param endTime defines end of the time range to read, inclusive
* @param tags defines a set of tags that MUST present in every returned entry.
* NOTE: using tags returns entries containing all tags that were providing during writing
* @return an iterator over entries that satisfy provided conditions
*/
final Iterator<Entry> readInternal(byte[] key, long startTime, long endTime, byte[]... tags) {
// validating params
if (startTime > endTime) {
throw new IllegalArgumentException("Provided time range condition is incorrect: startTime > endTime");
}
return new EntryScanner(key, startTime, endTime, tags);
}
/**
* Create Entry. Checking if filter tags are contained in columnName and parsing tags in one pass.
*
* @param key key of the entries to read
* @param value value of the entries
* @param columnName columnName of the entries integrated timestamp and tags
* @param tags the tags to filter entries
* @return an Entry by parsing tags from columnName, if the columnName contains sortedTags. Otherwise, return
* <code>null</code>
*/
private Entry createEntry(final byte[] key, final byte[] value, final byte[] columnName, final byte[][] tags) {
// columnName doesn't contain tags.
if (!hasTags(columnName)) {
if (tags == null || tags.length == 0) {
return new Entry(key, value, parseTimeStamp(columnName));
}
return null;
}
// columnName contains tags.
byte[][] sortedTags = null;
if (tags != null) {
sortedTags = tags.clone();
sortTags(sortedTags);
}
// Since we know that tags are sorted we can test match in one pass (like in merge sort)
int curPos = Bytes.SIZEOF_LONG;
int curTagToCheck = 0;
List<byte[]> parsedTags = new ArrayList<>();
while (curPos < columnName.length - 1) {
int tagLength = Bytes.toInt(columnName, curPos);
curPos += Bytes.SIZEOF_INT;
int tagStartPos = curPos;
curPos += tagLength;
// parse tag from columnName
if (tagLength > columnName.length) {
return null;
} else {
byte[] tag = new byte[tagLength];
System.arraycopy(columnName, tagStartPos, tag, 0, tagLength);
parsedTags.add(tag);
}
// we need to parse all tags in columnName if no sortedTags is passed. And we need parse the remaining tags
// in the columnName, after sortedTags are matched.
if (sortedTags == null || sortedTags.length == 0 || curTagToCheck == sortedTags.length) {
continue;
}
// check tags encoded in columnName against sortedTags.
// tag is encoded in columnName array from curPos and in length of tagLength.
int tagsMatch;
tagsMatch = Bytes.compareTo(columnName, tagStartPos, tagLength,
sortedTags[curTagToCheck], 0, sortedTags[curTagToCheck].length);
if (tagsMatch == 0) {
// Tags match, advancing to the next tag to be checked.
curTagToCheck++;
} else if (tagsMatch > 0) {
// Tags do NOT match and fetched tag is bigger than the one we are matching against. Since tags encoded in
// sorted order this means we will not find this tag we are matching against.
return null;
}
// tagsMatch < 0 means we can advance and check against next tag encoded into the column
}
if (sortedTags != null && curTagToCheck < sortedTags.length) {
// this means we didn't find all required tags in the entry data
return null;
}
return new Entry(key, value, parseTimeStamp(columnName), parsedTags.toArray(new byte[parsedTags.size()][]));
}
/**
* An iterator over entries.
*/
public final class EntryScanner extends AbstractCloseableIterator<Entry> {
private final byte[] key;
private final long startTime;
private final byte[][] tags;
// the number of rows to fetch
private final long timeIntervalsCount;
private final byte[] startColumnName;
private final byte[] endColumnName;
// track the number of rows scanned through
private int rowScanned;
// use an internal iterator to avoid leaking AbstractIterator methods to outside.
private Iterator<Map.Entry<byte[], byte[]>> internalIterator;
/**
* Construct an EntryScanner. Should only be called by TimeseriesTable.
*
* @param key key of the entries to read
* @param startTime defines start of the time range to read, inclusive
* @param endTime defines end of the time range to read, inclusive
* @param tags defines a set of tags that MUST present in every returned entry.
* NOTE: using tags returns entries containing all tags that were providing during writing
*/
EntryScanner(byte[] key, long startTime, long endTime, byte[][] tags) {
this.key = key;
this.startTime = startTime;
this.tags = tags;
// calculating time intervals (i.e. rows, as one row = one time interval) to fetch.
long timeIntervals = getTimeIntervalsCount(startTime, endTime, rowPartitionIntervalSize);
timeIntervalsCount = applyLimitOnRowsToRead(timeIntervals);
// Note: do NOT use tags when calculating start/stop column keys due to the column name format.
startColumnName = createColumnNameFirstPart(startTime);
endColumnName = createColumnNameFirstPart(endTime + 1);
internalIterator = null;
}
@Override
protected Entry computeNext() {
while ((internalIterator == null || !internalIterator.hasNext()) && rowScanned < timeIntervalsCount) {
byte[] row = getRowOfKthInterval(key, startTime, rowScanned, rowPartitionIntervalSize);
internalIterator = createIterator(row);
rowScanned++;
}
if (rowScanned <= timeIntervalsCount && internalIterator != null && internalIterator.hasNext()) {
Map.Entry<byte[], byte[]> entry = internalIterator.next();
Entry returnValue = createEntry(key, entry.getValue(), entry.getKey(), tags);
if (returnValue == null) {
return computeNext();
}
return returnValue;
}
return endOfData();
}
private Iterator<Map.Entry<byte[], byte[]>> createIterator(byte[] row) {
Row currentRow = table.get(row,
// we only need to set left bound on the first row: others cannot have records
// with the timestamp less than startTime
(rowScanned == 0) ? startColumnName : null,
// we only need to set right bound on the last row: others cannot have records
// with the timestamp greater than startTime
(rowScanned == timeIntervalsCount - 1) ? endColumnName : null,
// read all
-1);
if (!currentRow.isEmpty()) {
return currentRow.getColumns().entrySet().iterator();
}
return null;
}
@Override
public void close() {
// no op for now since the internal scanner is created from Row, which is a local byte[]
}
}
/**
* Time series DataSet entry.
*/
static class Entry {
private byte[] key;
private byte[] value;
private long timestamp;
private byte[][] tags;
/**
* Creates instance of the time series entry.
*
* @param key key of the entry. E.g. "metric1"
* @param value value to store
* @param timestamp timestamp of the entry
* @param tags optional list of tags associated with the entry
*/
public Entry(final byte[] key, final byte[] value, final long timestamp, final byte[]... tags) {
this.key = key;
this.value = value;
this.timestamp = timestamp;
this.tags = tags;
}
/**
* Returns the key of the entry.
* @return the key of the entry
*/
public byte[] getKey() {
return key;
}
/**
* Returns the count value of the entry.
* @return the count value of the entry
*/
public byte[] getValue() {
return value;
}
/**
* Returns the timestamp of the entry.
* @return the timestamp of the entry
*/
public long getTimestamp() {
return timestamp;
}
/**
* Returns the tags associated with the entry.
* @return the tags associated with the entry
*/
public byte[][] getTags() {
return tags;
}
}
}