package com.rackspacecloud.blueflood.io; import com.codahale.metrics.Histogram; import com.codahale.metrics.Meter; import com.codahale.metrics.Timer; import com.rackspacecloud.blueflood.service.Configuration; import com.rackspacecloud.blueflood.service.ElasticIOConfig; import com.rackspacecloud.blueflood.utils.GlobPattern; import com.rackspacecloud.blueflood.utils.Metrics; import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.client.Client; import org.elasticsearch.common.lang3.StringUtils; import org.elasticsearch.index.query.*; import org.elasticsearch.search.SearchHit; import org.elasticsearch.search.aggregations.AggregationBuilder; import org.elasticsearch.search.aggregations.AggregationBuilders; import org.elasticsearch.search.aggregations.bucket.terms.Terms; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import static com.rackspacecloud.blueflood.types.Locator.METRIC_TOKEN_SEPARATOR_REGEX; import static java.util.stream.Collectors.toSet; import static org.elasticsearch.index.query.QueryBuilders.*; public abstract class AbstractElasticIO implements DiscoveryIO { protected Client client; // todo: these should be instances per client. protected final Timer searchTimer = Metrics.timer(getClass(), "Search Duration"); protected final Timer esMetricNamesQueryTimer = Metrics.timer(getClass(), "ES Metric Names Query Duration"); protected final Timer writeTimer = Metrics.timer(getClass(), "Write Duration"); protected final Histogram batchHistogram = Metrics.histogram(getClass(), "Batch Sizes"); protected Meter classCastExceptionMeter = Metrics.meter(getClass(), "Failed Cast to IMetric"); protected Histogram queryBatchHistogram = Metrics.histogram(getClass(), "Query Batch Size"); public static String METRICS_TOKENS_AGGREGATE = "metric_tokens"; public static String ELASTICSEARCH_INDEX_NAME_WRITE = Configuration.getInstance().getStringProperty(ElasticIOConfig.ELASTICSEARCH_INDEX_NAME_WRITE); public static String ELASTICSEARCH_INDEX_NAME_READ = Configuration.getInstance().getStringProperty(ElasticIOConfig.ELASTICSEARCH_INDEX_NAME_READ); public static int MAX_RESULT_LIMIT = 100000; //grabs chars until the next "." which is basically a token protected static final String REGEX_TO_GRAB_SINGLE_TOKEN = "[^.]*"; public List<SearchResult> search(String tenant, String query) throws Exception { return search(tenant, Arrays.asList(query)); } public List<SearchResult> search(String tenant, List<String> queries) throws Exception { String[] indexes = getIndexesToSearch(); return searchESByIndexes(tenant, queries, indexes); } private List<SearchResult> searchESByIndexes(String tenant, List<String> queries, String[] indexes) { List<SearchResult> results = new ArrayList<SearchResult>(); Timer.Context multiSearchCtx = searchTimer.time(); SearchResponse response; try { queryBatchHistogram.update(queries.size()); BoolQueryBuilder bqb = boolQuery(); QueryBuilder qb; for (String query : queries) { GlobPattern pattern = new GlobPattern(query); if (!pattern.hasWildcard()) { qb = termQuery(ESFieldLabel.metric_name.name(), query); } else { qb = regexpQuery(ESFieldLabel.metric_name.name(), pattern.compiled().toString()); } bqb.should(boolQuery() .must(termQuery(ESFieldLabel.tenantId.toString(), tenant)) .must(qb) ); } response = client.prepareSearch(indexes) .setRouting(tenant) .setSize(MAX_RESULT_LIMIT) .setVersion(true) .setQuery(bqb) .execute() .actionGet(); } finally { multiSearchCtx.stop(); } for (SearchHit hit : response.getHits().getHits()) { SearchResult result = convertHitToMetricDiscoveryResult(hit); results.add(result); } return dedupResults(results); } /** * This method returns a list of {@link MetricName}'s matching the given glob query. * * for metrics: foo.bar.xxx, * foo.bar.baz.qux, * * for query=foo.bar.*, returns the below list of metric names * * new MetricName("foo.bar.xxx", true) <- From metric foo.bar.xxx * new MetricName("foo.bar.baz", false) <- From metric foo.bar.baz.qux * * @param tenant * @param query is glob representation of hierarchical levels of token. Ex: foo.bar.* * @return * @throws Exception */ public List<MetricName> getMetricNames(final String tenant, final String query) throws Exception { Timer.Context esMetricNamesQueryTimerCtx = esMetricNamesQueryTimer.time(); SearchResponse response; try { response = getMetricNamesFromES(tenant, regexToGrabCurrentAndNextLevel(query)); } finally { esMetricNamesQueryTimerCtx.stop(); } // For example, if query = foo.bar.*, base level is 3 which is equal to the number of tokens in the query. int baseLevel = getTotalTokens(query); MetricIndexData metricIndexData = buildMetricIndexData(response, baseLevel); List<MetricName> metricNames = new ArrayList<>(); //Metric Names matching query which have next level metricNames.addAll(metricIndexData.getMetricNamesWithNextLevel() .stream() .map(x -> new MetricName(x, false)) .collect(toSet())); //complete metric names matching query metricNames.addAll(metricIndexData.getCompleteMetricNamesAtBaseLevel() .stream() .map(x -> new MetricName(x, true)) .collect(toSet())); return metricNames; } private int getTotalTokens(String query) { if (StringUtils.isEmpty(query)) return 0; return query.split(METRIC_TOKEN_SEPARATOR_REGEX).length; } /** * Performs terms aggregation by metric_name which returns doc_count by * metric_name index that matches the given regex. * * Sample request body: * * { * "size": 0, * "query": { * "bool" : { * "must" : [ { * "term" : { * "tenantId" : "ratanasv" * } * }, { * "regexp" : { * "metric_name" : { * "value" : "<regex>" * } * } * } ] * } * }, * "aggs": { * "metric_name_tokens": { * "terms": { * "field" : "metric_name", * "include": "<regex>", * "execution_hint": "map", * "size": 0 * } * } * } * } * * The two regex expressions used in the query above would be same, one to filter * at query level and another to filter the aggregation buckets. * * Execution hint of "map" works by using field values directly instead of ordinals * in order to aggregate data per-bucket * * @param tenant * @param regexMetricName * @return */ private SearchResponse getMetricNamesFromES(final String tenant, final String regexMetricName) { AggregationBuilder aggregationBuilder = AggregationBuilders.terms(METRICS_TOKENS_AGGREGATE) .field(ESFieldLabel.metric_name.name()) .include(regexMetricName) .executionHint("map") .size(0); TermQueryBuilder tenantIdQuery = QueryBuilders.termQuery(ESFieldLabel.tenantId.toString(), tenant); RegexpQueryBuilder metricNameQuery = QueryBuilders.regexpQuery(ESFieldLabel.metric_name.name(), regexMetricName); return client.prepareSearch(new String[] {ELASTICSEARCH_INDEX_NAME_READ}) .setRouting(tenant) .setSize(0) .setVersion(true) .setQuery(QueryBuilders.boolQuery().must(tenantIdQuery).must(metricNameQuery)) .addAggregation(aggregationBuilder) .execute() .actionGet(); } private MetricIndexData buildMetricIndexData(final SearchResponse response, final int baseLevel) { MetricIndexData metricIndexData = new MetricIndexData(baseLevel); Terms aggregateTerms = response.getAggregations().get(METRICS_TOKENS_AGGREGATE); for (Terms.Bucket bucket: aggregateTerms.getBuckets()) { metricIndexData.add(bucket.getKey(), bucket.getDocCount()); } return metricIndexData; } /** * Returns regex which could grab metric names from current level to the next level * for a given query. * * (Some exceptions when query has only one level due to the nature of underlying data) * * for metrics : foo.bar.baz, * foo.bar.baz.qux, * * for query=foo.bar.*, the regex which this method returns will capture the following metric token paths. * * "foo.bar.baz" <- current level * "foo.bar.baz.qux" <- next level * * @param query * @return */ protected String regexToGrabCurrentAndNextLevel(final String query) { if (StringUtils.isEmpty(query)) { throw new IllegalArgumentException("Query(glob) string cannot be null/empty"); } String queryRegex = getRegex(query); int totalQueryTokens = getTotalTokens(query); if (totalQueryTokens == 1) { // get metric names which matches the given query and have a next level, // Ex: For metric foo.bar.baz.qux, if query=*, we should get foo.bar. We are not // grabbing 0 level as it will give back bar, baz, qux because of the way data is structured. String baseRegex = convertRegexToCaptureUptoNextToken(queryRegex); return baseRegex + METRIC_TOKEN_SEPARATOR_REGEX + REGEX_TO_GRAB_SINGLE_TOKEN; } else { String[] queryRegexParts = queryRegex.split("\\\\."); String queryRegexUptoPrevLevel = StringUtils.join(queryRegexParts, METRIC_TOKEN_SEPARATOR_REGEX, 0, totalQueryTokens - 1); String baseRegex = convertRegexToCaptureUptoNextToken(queryRegexUptoPrevLevel); String queryRegexLastLevel = queryRegexParts[totalQueryTokens - 1]; String lastTokenRegex = convertRegexToCaptureUptoNextToken(queryRegexLastLevel); // Ex: For metric foo.bar.baz.qux.xxx, if query=foo.bar.b*, get foo.bar.baz, foo.bar.baz.qux // In this case baseRegex = "foo.bar", lastTokenRegex = "b[^.]*"' and the final // regex is foo\.bar\.b[^.]*(\.[^.]*){0,1} return baseRegex + METRIC_TOKEN_SEPARATOR_REGEX + lastTokenRegex + "(" + METRIC_TOKEN_SEPARATOR_REGEX + REGEX_TO_GRAB_SINGLE_TOKEN + ")" + "{0,1}"; } } private String convertRegexToCaptureUptoNextToken(String queryRegex) { return queryRegex.replaceAll("\\.\\*", REGEX_TO_GRAB_SINGLE_TOKEN); } private String getRegex(String glob) { GlobPattern pattern = new GlobPattern(glob); return pattern.compiled().toString(); } protected abstract String[] getIndexesToSearch(); protected abstract List<SearchResult> dedupResults(List<SearchResult> results); protected abstract SearchResult convertHitToMetricDiscoveryResult(SearchHit hit); }