// This file is part of OpenTSDB.
// Copyright (C) 2010-2015 The OpenTSDB Authors.
//
// This program is free software: you can redistribute it and/or modify it
// under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 2.1 of the License, or (at your
// option) any later version. This program is distributed in the hope that it
// will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty
// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser
// General Public License for more details. You should have received a copy
// of the GNU Lesser General Public License along with this program. If not,
// see <http://www.gnu.org/licenses/>.
package net.opentsdb.query;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Map.Entry;
import net.opentsdb.core.Const;
import net.opentsdb.core.Internal;
import net.opentsdb.core.RowKey;
import net.opentsdb.core.TSDB;
import net.opentsdb.uid.UniqueId;
import org.hbase.async.Bytes;
import org.hbase.async.FilterList;
import org.hbase.async.FuzzyRowFilter;
import org.hbase.async.KeyRegexpFilter;
import org.hbase.async.Bytes.ByteMap;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.hbase.async.ScanFilter;
import org.hbase.async.Scanner;
/**
* A simple class with utility methods for executing queries against the storage
* layer.
* @since 2.2
*/
public class QueryUtil {
private static final Logger LOG = LoggerFactory.getLogger(QueryUtil.class);
/**
* Crafts a regular expression for scanning over data table rows and filtering
* time series that the user doesn't want. At least one of the parameters
* must be set and have values.
* NOTE: This method will sort the group bys.
* @param group_bys An optional list of tag keys that we want to group on. May
* be null.
* @param row_key_literals An optional list of key value pairs to filter on.
* May be null.
* @return A regular expression string to pass to the storage layer.
*/
public static String getRowKeyUIDRegex(final List<byte[]> group_bys,
final ByteMap<byte[][]> row_key_literals) {
return getRowKeyUIDRegex(group_bys, row_key_literals, false, null, null);
}
/**
* Crafts a regular expression for scanning over data table rows and filtering
* time series that the user doesn't want. Also fills in an optional fuzzy
* mask and key as it builds the regex if configured to do so.
* @param group_bys An optional list of tag keys that we want to group on. May
* be null.
* @param row_key_literals An optional list of key value pairs to filter on.
* May be null.
* @param explicit_tags Whether or not explicit tags are enabled so that the
* regex only picks out series with the specified tags
* @param fuzzy_key An optional fuzzy filter row key
* @param fuzzy_mask An optional fuzzy filter mask
* @return A regular expression string to pass to the storage layer.
* @since 2.3
*/
public static String getRowKeyUIDRegex(
final List<byte[]> group_bys,
final ByteMap<byte[][]> row_key_literals,
final boolean explicit_tags,
final byte[] fuzzy_key,
final byte[] fuzzy_mask) {
if (group_bys != null) {
Collections.sort(group_bys, Bytes.MEMCMP);
}
final int prefix_width = Const.SALT_WIDTH() + TSDB.metrics_width() +
Const.TIMESTAMP_BYTES;
final short name_width = TSDB.tagk_width();
final short value_width = TSDB.tagv_width();
final short tagsize = (short) (name_width + value_width);
// Generate a regexp for our tags. Say we have 2 tags: { 0 0 1 0 0 2 }
// and { 4 5 6 9 8 7 }, the regexp will be:
// "^.{7}(?:.{6})*\\Q\000\000\001\000\000\002\\E(?:.{6})*\\Q\004\005\006\011\010\007\\E(?:.{6})*$"
final StringBuilder buf = new StringBuilder(
15 // "^.{N}" + "(?:.{M})*" + "$"
+ ((13 + tagsize) // "(?:.{M})*\\Q" + tagsize bytes + "\\E"
* ((row_key_literals == null ? 0 : row_key_literals.size()) +
(group_bys == null ? 0 : group_bys.size() * 3))));
// In order to avoid re-allocations, reserve a bit more w/ groups ^^^
// Alright, let's build this regexp. From the beginning...
buf.append("(?s)" // Ensure we use the DOTALL flag.
+ "^.{")
// ... start by skipping the salt, metric ID and timestamp.
.append(Const.SALT_WIDTH() + TSDB.metrics_width() + Const.TIMESTAMP_BYTES)
.append("}");
final Iterator<Entry<byte[], byte[][]>> it = row_key_literals == null ?
new ByteMap<byte[][]>().iterator() : row_key_literals.iterator();
int fuzzy_offset = Const.SALT_WIDTH() + TSDB.metrics_width();
if (fuzzy_mask != null) {
// make sure to skip the timestamp when scanning
while (fuzzy_offset < prefix_width) {
fuzzy_mask[fuzzy_offset++] = 1;
}
}
while(it.hasNext()) {
Entry<byte[], byte[][]> entry = it.hasNext() ? it.next() : null;
// TODO - This look ahead may be expensive. We need to get some data around
// whether it's faster for HBase to scan with a look ahead or simply pass
// the rows back to the TSD for filtering.
final boolean not_key =
entry.getValue() != null && entry.getValue().length == 0;
// Skip any number of tags.
if (!explicit_tags) {
buf.append("(?:.{").append(tagsize).append("})*");
} else if (fuzzy_mask != null) {
// TODO - see if we can figure out how to improve the fuzzy filter by
// setting explicit tag values whenever we can. In testing there was
// a conflict between the row key regex and fuzzy filter that prevented
// results from returning properly.
System.arraycopy(entry.getKey(), 0, fuzzy_key, fuzzy_offset, name_width);
fuzzy_offset += name_width;
for (int i = 0; i < value_width; i++) {
fuzzy_mask[fuzzy_offset++] = 1;
}
}
if (not_key) {
// start the lookahead as we have a key we explicitly do not want in the
// results
buf.append("(?!");
}
buf.append("\\Q");
addId(buf, entry.getKey(), true);
if (entry.getValue() != null && entry.getValue().length > 0) { // Add a group_by.
// We want specific IDs. List them: /(AAA|BBB|CCC|..)/
buf.append("(?:");
for (final byte[] value_id : entry.getValue()) {
if (value_id == null) {
continue;
}
buf.append("\\Q");
addId(buf, value_id, true);
buf.append('|');
}
// Replace the pipe of the last iteration.
buf.setCharAt(buf.length() - 1, ')');
} else {
buf.append(".{").append(value_width).append('}'); // Any value ID.
}
if (not_key) {
// be sure to close off the look ahead
buf.append(")");
}
}
// Skip any number of tags before the end.
if (!explicit_tags) {
buf.append("(?:.{").append(tagsize).append("})*");
}
buf.append("$");
return buf.toString();
}
/**
* Sets a filter or filter list on the scanner based on whether or not the
* query had tags it needed to match.
* @param scanner The scanner to modify.
* @param group_bys An optional list of tag keys that we want to group on. May
* be null.
* @param row_key_literals An optional list of key value pairs to filter on.
* May be null.
* @param explicit_tag sWhether or not explicit tags are enabled so that the
* regex only picks out series with the specified tags
* @param enable_fuzzy_filter Whether or not a fuzzy filter should be used
* in combination with the explicit tags param. If explicit tags is disabled
* then this param is ignored.
* @param end_time The end of the query time so the fuzzy filter knows when
* to stop scanning.
*/
public static void setDataTableScanFilter(
final Scanner scanner,
final List<byte[]> group_bys,
final ByteMap<byte[][]> row_key_literals,
final boolean explicit_tags,
final boolean enable_fuzzy_filter,
final int end_time) {
// no-op
if ((group_bys == null || group_bys.isEmpty())
&& (row_key_literals == null || row_key_literals.isEmpty())) {
return;
}
final int prefix_width = Const.SALT_WIDTH() + TSDB.metrics_width() +
Const.TIMESTAMP_BYTES;
final short name_width = TSDB.tagk_width();
final short value_width = TSDB.tagv_width();
final byte[] fuzzy_key;
final byte[] fuzzy_mask;
if (explicit_tags && enable_fuzzy_filter) {
fuzzy_key = new byte[prefix_width + (row_key_literals.size() *
(name_width + value_width))];
fuzzy_mask = new byte[prefix_width + (row_key_literals.size() *
(name_width + value_width))];
System.arraycopy(scanner.getCurrentKey(), 0, fuzzy_key, 0,
scanner.getCurrentKey().length);
} else {
fuzzy_key = fuzzy_mask = null;
}
final String regex = getRowKeyUIDRegex(group_bys, row_key_literals,
explicit_tags, fuzzy_key, fuzzy_mask);
final KeyRegexpFilter regex_filter = new KeyRegexpFilter(
regex.toString(), Const.ASCII_CHARSET);
if (LOG.isDebugEnabled()) {
LOG.debug("Regex for scanner: " + scanner + ": " +
byteRegexToString(regex));
}
if (!(explicit_tags && enable_fuzzy_filter)) {
scanner.setFilter(regex_filter);
return;
}
scanner.setStartKey(fuzzy_key);
final byte[] stop_key = Arrays.copyOf(fuzzy_key, fuzzy_key.length);
Internal.setBaseTime(stop_key, end_time);
int idx = Const.SALT_WIDTH() + TSDB.metrics_width() +
Const.TIMESTAMP_BYTES + TSDB.tagk_width();
// max out the tag values
while (idx < stop_key.length) {
for (int i = 0; i < TSDB.tagv_width(); i++) {
stop_key[idx++] = (byte) 0xFF;
}
idx += TSDB.tagk_width();
}
scanner.setStopKey(stop_key);
final List<ScanFilter> filters = new ArrayList<ScanFilter>(2);
filters.add(
new FuzzyRowFilter(
new FuzzyRowFilter.FuzzyFilterPair(fuzzy_key, fuzzy_mask)));
filters.add(regex_filter);
scanner.setFilter(new FilterList(filters));
}
/**
* Creates a regular expression with a list of or'd TUIDs to compare
* against the rows in storage.
* @param tsuids The list of TSUIDs to scan for
* @return A regular expression string to pass to the storage layer.
*/
public static String getRowKeyTSUIDRegex(final List<String> tsuids) {
Collections.sort(tsuids);
// first, convert the tags to byte arrays and count up the total length
// so we can allocate the string builder
final short metric_width = TSDB.metrics_width();
int tags_length = 0;
final ArrayList<byte[]> uids = new ArrayList<byte[]>(tsuids.size());
for (final String tsuid : tsuids) {
final String tags = tsuid.substring(metric_width * 2);
final byte[] tag_bytes = UniqueId.stringToUid(tags);
tags_length += tag_bytes.length;
uids.add(tag_bytes);
}
// Generate a regexp for our tags based on any metric and timestamp (since
// those are handled by the row start/stop) and the list of TSUID tagk/v
// pairs. The generated regex will look like: ^.{7}(tags|tags|tags)$
// where each "tags" is similar to \\Q\000\000\001\000\000\002\\E
final StringBuilder buf = new StringBuilder(
13 // "(?s)^.{N}(" + ")$"
+ (tsuids.size() * 11) // "\\Q" + "\\E|"
+ tags_length); // total # of bytes in tsuids tagk/v pairs
// Alright, let's build this regexp. From the beginning...
buf.append("(?s)" // Ensure we use the DOTALL flag.
+ "^.{")
// ... start by skipping the metric ID and timestamp.
.append(Const.SALT_WIDTH() + metric_width + Const.TIMESTAMP_BYTES)
.append("}(");
for (final byte[] tags : uids) {
// quote the bytes
buf.append("\\Q");
addId(buf, tags, true);
buf.append('|');
}
// Replace the pipe of the last iteration, close and set
buf.setCharAt(buf.length() - 1, ')');
buf.append("$");
return buf.toString();
}
/**
* Compiles an HBase scanner against the main data table
* @param tsdb The TSDB with a configured HBaseClient
* @param salt_bucket An optional salt bucket ID for salting the start/stop
* keys.
* @param metric The metric to scan for
* @param start The start time stamp in seconds
* @param stop The stop timestamp in seconds
* @param table The table name to scan over
* @param family The table family to scan over
* @return A scanner ready for processing.
*/
public static Scanner getMetricScanner(final TSDB tsdb, final int salt_bucket,
final byte[] metric, final int start, final int stop,
final byte[] table, final byte[] family) {
final short metric_width = TSDB.metrics_width();
final int metric_salt_width = metric_width + Const.SALT_WIDTH();
final byte[] start_row = new byte[metric_salt_width + Const.TIMESTAMP_BYTES];
final byte[] end_row = new byte[metric_salt_width + Const.TIMESTAMP_BYTES];
if (Const.SALT_WIDTH() > 0) {
final byte[] salt = RowKey.getSaltBytes(salt_bucket);
System.arraycopy(salt, 0, start_row, 0, Const.SALT_WIDTH());
System.arraycopy(salt, 0, end_row, 0, Const.SALT_WIDTH());
}
Bytes.setInt(start_row, start, metric_salt_width);
Bytes.setInt(end_row, stop, metric_salt_width);
System.arraycopy(metric, 0, start_row, Const.SALT_WIDTH(), metric_width);
System.arraycopy(metric, 0, end_row, Const.SALT_WIDTH(), metric_width);
final Scanner scanner = tsdb.getClient().newScanner(table);
scanner.setMaxNumRows(tsdb.getConfig().scanner_maxNumRows());
scanner.setStartKey(start_row);
scanner.setStopKey(end_row);
scanner.setFamily(family);
return scanner;
}
/**
* Appends the given UID to the given regular expression buffer
* @param buf The String buffer to modify
* @param id The UID to add
* @param close Whether or not to append "\\E" to the end
*/
public static void addId(final StringBuilder buf, final byte[] id,
final boolean close) {
boolean backslash = false;
for (final byte b : id) {
buf.append((char) (b & 0xFF));
if (b == 'E' && backslash) { // If we saw a `\' and now we have a `E'.
// So we just terminated the quoted section because we just added \E
// to `buf'. So let's put a litteral \E now and start quoting again.
buf.append("\\\\E\\Q");
} else {
backslash = b == '\\';
}
}
if (close) {
buf.append("\\E");
}
}
/**
* Little helper to print out the regular expression by converting the UID
* bytes to an array.
* @param regexp The regex string to print to the debug log
*/
public static String byteRegexToString(final String regexp) {
final StringBuilder buf = new StringBuilder();
for (int i = 0; i < regexp.length(); i++) {
if (i > 0 && regexp.charAt(i - 1) == 'Q') {
if (regexp.charAt(i - 3) == '*') {
// tagk
byte[] tagk = new byte[TSDB.tagk_width()];
for (int x = 0; x < TSDB.tagk_width(); x++) {
tagk[x] = (byte)regexp.charAt(i + x);
}
i += TSDB.tagk_width();
buf.append(Arrays.toString(tagk));
} else {
// tagv
byte[] tagv = new byte[TSDB.tagv_width()];
for (int x = 0; x < TSDB.tagv_width(); x++) {
tagv[x] = (byte)regexp.charAt(i + x);
}
i += TSDB.tagv_width();
buf.append(Arrays.toString(tagv));
}
} else {
buf.append(regexp.charAt(i));
}
}
return buf.toString();
}
}