// This file is part of OpenTSDB. // Copyright (C) 2010-2012 The OpenTSDB Authors. // // This program is free software: you can redistribute it and/or modify it // under the terms of the GNU Lesser General Public License as published by // the Free Software Foundation, either version 2.1 of the License, or (at your // option) any later version. This program is distributed in the hope that it // will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty // of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser // General Public License for more details. You should have received a copy // of the GNU Lesser General Public License along with this program. If not, // see <http://www.gnu.org/licenses/>. package net.opentsdb.core; import java.util.Arrays; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.Map; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.hbase.async.Bytes; import net.opentsdb.uid.NoSuchUniqueId; import net.opentsdb.uid.NoSuchUniqueName; /** Helper functions to deal with tags. */ public final class Tags { private static final Logger LOG = LoggerFactory.getLogger(Tags.class); private Tags() { // Can't create instances of this utility class. } /** * Optimized version of {@code String#split} that doesn't use regexps. * This function works in O(5n) where n is the length of the string to * split. * @param s The string to split. * @param c The separator to use to split the string. * @return A non-null, non-empty array. */ public static String[] splitString(final String s, final char c) { final char[] chars = s.toCharArray(); int num_substrings = 1; for (final char x : chars) { if (x == c) { num_substrings++; } } final String[] result = new String[num_substrings]; final int len = chars.length; int start = 0; // starting index in chars of the current substring. int pos = 0; // current index in chars. int i = 0; // number of the current substring. for (; pos < len; pos++) { if (chars[pos] == c) { result[i++] = new String(chars, start, pos - start); start = pos + 1; } } result[i] = new String(chars, start, pos - start); return result; } /** * Parses a tag into a HashMap. * @param tags The HashMap into which to store the tag. * @param tag A String of the form "tag=value". * @throws IllegalArgumentException if the tag is malformed. * @throws IllegalArgumentException if the tag was already in tags with a * different value. */ public static void parse(final HashMap<String, String> tags, final String tag) { final String[] kv = splitString(tag, '='); if (kv.length != 2 || kv[0].length() <= 0 || kv[1].length() <= 0) { throw new IllegalArgumentException("invalid tag: " + tag); } if (kv[1].equals(tags.get(kv[0]))) { return; } if (tags.get(kv[0]) != null) { throw new IllegalArgumentException("duplicate tag: " + tag + ", tags=" + tags); } tags.put(kv[0], kv[1]); } /** * Parses the metric and tags out of the given string. * @param metric A string of the form "metric" or "metric{tag=value,...}". * @param tags The map to populate with the tags parsed out of the first * argument. * @return The name of the metric. * @throws IllegalArgumentException if the metric is malformed. */ public static String parseWithMetric(final String metric, final HashMap<String, String> tags) { final int curly = metric.indexOf('{'); if (curly < 0) { return metric; } final int len = metric.length(); if (metric.charAt(len - 1) != '}') { // "foo{" throw new IllegalArgumentException("Missing '}' at the end of: " + metric); } else if (curly == len - 2) { // "foo{}" return metric.substring(0, len - 2); } // substring the tags out of "foo{a=b,...,x=y}" and parse them. for (final String tag : splitString(metric.substring(curly + 1, len - 1), ',')) { try { parse(tags, tag); } catch (IllegalArgumentException e) { throw new IllegalArgumentException("When parsing tag '" + tag + "': " + e.getMessage()); } } // Return the "foo" part of "foo{a=b,...,x=y}" return metric.substring(0, curly); } /** * Parses an integer value as a long from the given character sequence. * <p> * This is equivalent to {@link Long#parseLong(String)} except it's up to * 100% faster on {@link String} and always works in O(1) space even with * {@link StringBuilder} buffers (where it's 2x to 5x faster). * @param s The character sequence containing the integer value to parse. * @return The value parsed. * @throws NumberFormatException if the value is malformed or overflows. */ public static long parseLong(final CharSequence s) { final int n = s.length(); // Will NPE if necessary. if (n == 0) { throw new NumberFormatException("Empty string"); } char c = s.charAt(0); // Current character. int i = 1; // index in `s'. if (c < '0' && (c == '+' || c == '-')) { // Only 1 test in common case. if (n == 1) { throw new NumberFormatException("Just a sign, no value: " + s); } else if (n > 20) { // "+9223372036854775807" or "-9223372036854775808" throw new NumberFormatException("Value too long: " + s); } c = s.charAt(1); i = 2; // Skip over the sign. } else if (n > 19) { // "9223372036854775807" throw new NumberFormatException("Value too long: " + s); } long v = 0; // The result (negated to easily handle MIN_VALUE). do { if ('0' <= c && c <= '9') { v -= c - '0'; } else { throw new NumberFormatException("Invalid character '" + c + "' in " + s); } if (i == n) { break; } v *= 10; c = s.charAt(i++); } while (true); if (v > 0) { throw new NumberFormatException("Overflow in " + s); } else if (s.charAt(0) == '-') { return v; // Value is already negative, return unchanged. } else if (v == Long.MIN_VALUE) { throw new NumberFormatException("Overflow in " + s); } else { return -v; // Positive value, need to fix the sign. } } /** * Extracts the value of the given tag name from the given row key. * @param tsdb The TSDB instance to use for UniqueId lookups. * @param row The row key in which to search the tag name. * @param name The name of the tag to search in the row key. * @return The value associated with the given tag name, or null if this tag * isn't present in this row key. */ static String getValue(final TSDB tsdb, final byte[] row, final String name) throws NoSuchUniqueName { validateString("tag name", name); final byte[] id = tsdb.tag_names.getId(name); final byte[] value_id = getValueId(tsdb, row, id); if (value_id == null) { return null; } // This shouldn't throw a NoSuchUniqueId. try { return tsdb.tag_values.getName(value_id); } catch (NoSuchUniqueId e) { LOG.error("Internal error, NoSuchUniqueId unexpected here!", e); throw e; } } /** * Extracts the value ID of the given tag UD name from the given row key. * @param tsdb The TSDB instance to use for UniqueId lookups. * @param row The row key in which to search the tag name. * @param name The name of the tag to search in the row key. * @return The value ID associated with the given tag ID, or null if this * tag ID isn't present in this row key. */ static byte[] getValueId(final TSDB tsdb, final byte[] row, final byte[] tag_id) { final short name_width = tsdb.tag_names.width(); final short value_width = tsdb.tag_values.width(); // TODO(tsuna): Can do a binary search. for (short pos = (short) (tsdb.metrics.width() + Const.TIMESTAMP_BYTES); pos < row.length; pos += name_width + value_width) { if (rowContains(row, pos, tag_id)) { pos += name_width; return Arrays.copyOfRange(row, pos, pos + value_width); } } return null; } /** * Checks whether or not the row key contains the given byte array at the * given offset. * @param row The row key in which to search. * @param offset The offset in {@code row} at which to start searching. * @param bytes The bytes to search that the given offset. * @return true if {@code bytes} are present in {@code row} at * {@code offset}, false otherwise. */ private static boolean rowContains(final byte[] row, short offset, final byte[] bytes) { for (int pos = bytes.length - 1; pos >= 0; pos--) { if (row[offset + pos] != bytes[pos]) { return false; } } return true; } /** * Returns the tags stored in the given row key. * @param tsdb The TSDB instance to use for Unique ID lookups. * @param row The row key from which to extract the tags. * @return A map of tag names (keys), tag values (values). * @throws NoSuchUniqueId if the row key contained an invalid ID (unlikely). */ static Map<String, String> getTags(final TSDB tsdb, final byte[] row) throws NoSuchUniqueId { final short name_width = tsdb.tag_names.width(); final short value_width = tsdb.tag_values.width(); final short tag_bytes = (short) (name_width + value_width); final byte[] tmp_name = new byte[name_width]; final byte[] tmp_value = new byte[value_width]; final short metric_ts_bytes = (short) (tsdb.metrics.width() + Const.TIMESTAMP_BYTES); final HashMap<String, String> result = new HashMap<String, String>((row.length - metric_ts_bytes) / tag_bytes); for (short pos = metric_ts_bytes; pos < row.length; pos += tag_bytes) { System.arraycopy(row, pos, tmp_name, 0, name_width); final String name = tsdb.tag_names.getName(tmp_name); System.arraycopy(row, pos + name_width, tmp_value, 0, value_width); final String value = tsdb.tag_values.getName(tmp_value); result.put(name, value); } return result; } /** * Ensures that a given string is a valid metric name or tag name/value. * @param what A human readable description of what's being validated. * @param s The string to validate. * @throws IllegalArgumentException if the string isn't valid. */ static void validateString(final String what, final String s) { if (s == null) { throw new IllegalArgumentException("Invalid " + what + ": null"); } final int n = s.length(); for (int i = 0; i < n; i++) { final char c = s.charAt(i); if (!(('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || ('0' <= c && c <= '9') || c == '-' || c == '_' || c == '.' || c == '/')) { throw new IllegalArgumentException("Invalid " + what + " (\"" + s + "\"): illegal character: " + c); } } } /** * Resolves all the tags (name=value) into the a sorted byte arrays. * This function is the opposite of {@link #resolveIds}. * @param tsdb The TSDB to use for UniqueId lookups. * @param tags The tags to resolve. * @return an array of sorted tags (tag id, tag name). * @throws NoSuchUniqueName if one of the elements in the map contained an * unknown tag name or tag value. */ public static ArrayList<byte[]> resolveAll(final TSDB tsdb, final Map<String, String> tags) throws NoSuchUniqueName { return resolveAllInternal(tsdb, tags, false); } /** * Resolves (and creates, if necessary) all the tags (name=value) into the a * sorted byte arrays. * @param tsdb The TSDB to use for UniqueId lookups. * @param tags The tags to resolve. If a new tag name or tag value is * seen, it will be assigned an ID. * @return an array of sorted tags (tag id, tag name). */ static ArrayList<byte[]> resolveOrCreateAll(final TSDB tsdb, final Map<String, String> tags) { return resolveAllInternal(tsdb, tags, true); } private static ArrayList<byte[]> resolveAllInternal(final TSDB tsdb, final Map<String, String> tags, final boolean create) throws NoSuchUniqueName { final ArrayList<byte[]> tag_ids = new ArrayList<byte[]>(tags.size()); for (final Map.Entry<String, String> entry : tags.entrySet()) { final byte[] tag_id = (create ? tsdb.tag_names.getOrCreateId(entry.getKey()) : tsdb.tag_names.getId(entry.getKey())); final byte[] value_id = (create ? tsdb.tag_values.getOrCreateId(entry.getValue()) : tsdb.tag_values.getId(entry.getValue())); final byte[] thistag = new byte[tag_id.length + value_id.length]; System.arraycopy(tag_id, 0, thistag, 0, tag_id.length); System.arraycopy(value_id, 0, thistag, tag_id.length, value_id.length); tag_ids.add(thistag); } // Now sort the tags. Collections.sort(tag_ids, Bytes.MEMCMP); return tag_ids; } /** * Resolves all the tags IDs (name followed by value) into the a map. * This function is the opposite of {@link #resolveAll}. * @param tsdb The TSDB to use for UniqueId lookups. * @param tags The tag IDs to resolve. * @return A map mapping tag names to tag values. * @throws NoSuchUniqueId if one of the elements in the array contained an * invalid ID. * @throws IllegalArgumentException if one of the elements in the array had * the wrong number of bytes. */ static HashMap<String, String> resolveIds(final TSDB tsdb, final ArrayList<byte[]> tags) throws NoSuchUniqueId { final short name_width = tsdb.tag_names.width(); final short value_width = tsdb.tag_values.width(); final short tag_bytes = (short) (name_width + value_width); final byte[] tmp_name = new byte[name_width]; final byte[] tmp_value = new byte[value_width]; final HashMap<String, String> result = new HashMap<String, String>(tags.size()); for (final byte[] tag : tags) { if (tag.length != tag_bytes) { throw new IllegalArgumentException("invalid length: " + tag.length + " (expected " + tag_bytes + "): " + Arrays.toString(tag)); } System.arraycopy(tag, 0, tmp_name, 0, name_width); final String name = tsdb.tag_names.getName(tmp_name); System.arraycopy(tag, name_width, tmp_value, 0, value_width); final String value = tsdb.tag_values.getName(tmp_value); result.put(name, value); } return result; } /** * Returns true if the given string looks like an integer. * <p> * This function doesn't do any checking on the string other than looking * for some characters that are generally found in floating point values * such as '.' or 'e'. * @since 1.1 */ public static boolean looksLikeInteger(final String value) { final int n = value.length(); for (int i = 0; i < n; i++) { final char c = value.charAt(i); if (c == '.' || c == 'e' || c == 'E') { return false; } } return true; } }