/*
* Copyright (c) 2015 Spotify AB.
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package com.spotify.heroic.metadata.elasticsearch;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.hash.HashCode;
import com.spotify.heroic.common.DateRange;
import com.spotify.heroic.common.Groups;
import com.spotify.heroic.common.OptionalLimit;
import com.spotify.heroic.common.RequestTimer;
import com.spotify.heroic.common.Series;
import com.spotify.heroic.elasticsearch.AbstractElasticsearchMetadataBackend;
import com.spotify.heroic.elasticsearch.BackendType;
import com.spotify.heroic.elasticsearch.Connection;
import com.spotify.heroic.elasticsearch.RateLimitedCache;
import com.spotify.heroic.filter.AndFilter;
import com.spotify.heroic.filter.FalseFilter;
import com.spotify.heroic.filter.Filter;
import com.spotify.heroic.filter.FilterModifier;
import com.spotify.heroic.filter.HasTagFilter;
import com.spotify.heroic.filter.MatchKeyFilter;
import com.spotify.heroic.filter.MatchTagFilter;
import com.spotify.heroic.filter.NotFilter;
import com.spotify.heroic.filter.OrFilter;
import com.spotify.heroic.filter.RegexFilter;
import com.spotify.heroic.filter.StartsWithFilter;
import com.spotify.heroic.filter.TrueFilter;
import com.spotify.heroic.lifecycle.LifeCycleRegistry;
import com.spotify.heroic.lifecycle.LifeCycles;
import com.spotify.heroic.metadata.CountSeries;
import com.spotify.heroic.metadata.DeleteSeries;
import com.spotify.heroic.metadata.FindKeys;
import com.spotify.heroic.metadata.FindSeries;
import com.spotify.heroic.metadata.FindSeriesIds;
import com.spotify.heroic.metadata.FindTags;
import com.spotify.heroic.metadata.MetadataBackend;
import com.spotify.heroic.metadata.WriteMetadata;
import com.spotify.heroic.statistics.MetadataBackendReporter;
import eu.toolchain.async.AsyncFramework;
import eu.toolchain.async.AsyncFuture;
import eu.toolchain.async.LazyTransform;
import eu.toolchain.async.Managed;
import eu.toolchain.async.ManagedAction;
import eu.toolchain.async.Transform;
import lombok.Data;
import lombok.RequiredArgsConstructor;
import lombok.ToString;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.tuple.Pair;
import org.elasticsearch.action.count.CountRequestBuilder;
import org.elasticsearch.action.deletebyquery.DeleteByQueryRequestBuilder;
import org.elasticsearch.action.index.IndexRequest.OpType;
import org.elasticsearch.action.index.IndexRequestBuilder;
import org.elasticsearch.action.search.SearchRequestBuilder;
import org.elasticsearch.action.search.SearchType;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.index.query.BoolFilterBuilder;
import org.elasticsearch.index.query.FilterBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.index.query.TermFilterBuilder;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.aggregations.AggregationBuilder;
import org.elasticsearch.search.aggregations.AggregationBuilders;
import org.elasticsearch.search.aggregations.Aggregations;
import org.elasticsearch.search.aggregations.bucket.SingleBucketAggregation;
import org.elasticsearch.search.aggregations.bucket.filter.FilterAggregationBuilder;
import org.elasticsearch.search.aggregations.bucket.nested.Nested;
import org.elasticsearch.search.aggregations.bucket.nested.NestedBuilder;
import org.elasticsearch.search.aggregations.bucket.terms.Terms;
import org.elasticsearch.search.aggregations.bucket.terms.TermsBuilder;
import javax.inject.Inject;
import javax.inject.Named;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.SortedMap;
import java.util.TreeMap;
import java.util.concurrent.Callable;
import java.util.function.Consumer;
import java.util.function.Function;
import static org.elasticsearch.index.query.FilterBuilders.andFilter;
import static org.elasticsearch.index.query.FilterBuilders.boolFilter;
import static org.elasticsearch.index.query.FilterBuilders.matchAllFilter;
import static org.elasticsearch.index.query.FilterBuilders.nestedFilter;
import static org.elasticsearch.index.query.FilterBuilders.notFilter;
import static org.elasticsearch.index.query.FilterBuilders.prefixFilter;
import static org.elasticsearch.index.query.FilterBuilders.regexpFilter;
import static org.elasticsearch.index.query.FilterBuilders.termFilter;
@ElasticsearchScope
@ToString(of = {"connection"})
public class MetadataBackendV1 extends AbstractElasticsearchMetadataBackend
implements MetadataBackend, LifeCycles {
private static final TimeValue SCROLL_TIME = TimeValue.timeValueMillis(5000);
private static final int SCROLL_SIZE = 1000;
private final Groups groups;
private final MetadataBackendReporter reporter;
private final AsyncFramework async;
private final Managed<Connection> connection;
private final RateLimitedCache<Pair<String, HashCode>> writeCache;
private final FilterModifier modifier;
private final boolean configure;
@Inject
public MetadataBackendV1(
Groups groups, MetadataBackendReporter reporter, AsyncFramework async,
Managed<Connection> connection, RateLimitedCache<Pair<String, HashCode>> writeCache,
FilterModifier modifier, @Named("configure") boolean configure
) {
super(async, ElasticsearchUtils.TYPE_METADATA);
this.groups = groups;
this.reporter = reporter;
this.async = async;
this.connection = connection;
this.writeCache = writeCache;
this.modifier = modifier;
this.configure = configure;
}
@Override
public void register(LifeCycleRegistry registry) {
registry.start(this::start);
registry.stop(this::stop);
}
@Override
protected Managed<Connection> connection() {
return connection;
}
@Override
protected FilterBuilder filter(Filter filter) {
return CTX.filter(filter);
}
@Override
protected Series toSeries(SearchHit hit) {
return ElasticsearchUtils.toSeries(hit.getSource());
}
private String toId(SearchHit source) {
return source.getId();
}
@Override
public AsyncFuture<Void> configure() {
return doto(c -> c.configure());
}
@Override
public Groups groups() {
return groups;
}
private static final ElasticsearchUtils.FilterContext CTX = ElasticsearchUtils.context();
@Override
public AsyncFuture<FindTags> findTags(final FindTags.Request request) {
return doto(c -> {
final Callable<SearchRequestBuilder> setup =
() -> c.search(ElasticsearchUtils.TYPE_METADATA);
final FindTagKeys.Request findTagKeys =
new FindTagKeys.Request(request.getFilter(), request.getRange(),
request.getLimit());
return findTagKeys(findTagKeys).lazyTransform(
new FindTagsTransformer(request.getFilter(), setup, CTX));
});
}
@Override
public AsyncFuture<WriteMetadata> write(final WriteMetadata.Request request) {
return doto(c -> {
final Series series = request.getSeries();
final String id = series.hash();
final String[] indices = c.writeIndices();
final List<AsyncFuture<WriteMetadata>> futures = new ArrayList<>();
for (final String index : indices) {
if (!writeCache.acquire(Pair.of(index, series.getHashCode()))) {
reporter.reportWriteDroppedByRateLimit();
continue;
}
final XContentBuilder source = XContentFactory.jsonBuilder();
source.startObject();
ElasticsearchUtils.buildMetadataDoc(source, series);
source.endObject();
final IndexRequestBuilder builder = c
.index(index, ElasticsearchUtils.TYPE_METADATA)
.setId(id)
.setSource(source)
.setOpType(OpType.CREATE);
final RequestTimer<WriteMetadata> timer = WriteMetadata.timer();
futures.add(bind(builder.execute()).directTransform(result -> timer.end()));
}
return async.collect(futures, WriteMetadata.reduce());
});
}
@Override
public AsyncFuture<CountSeries> countSeries(final CountSeries.Request request) {
return doto(c -> {
final OptionalLimit limit = request.getLimit();
final FilterBuilder f = CTX.filter(request.getFilter());
final CountRequestBuilder builder =
c.count(ElasticsearchUtils.TYPE_METADATA);
limit.asInteger().ifPresent(builder::setTerminateAfter);
builder.setQuery(QueryBuilders.filteredQuery(QueryBuilders.matchAllQuery(), f));
return bind(builder.execute()).directTransform(
response -> CountSeries.of(response.getCount(), false));
});
}
@Override
public AsyncFuture<FindSeries> findSeries(final FindSeries.Request request) {
return entries(request.getFilter(), request.getLimit(), request.getRange(), this::toSeries,
l -> FindSeries.of(l.getSet(), l.isLimited()), builder -> {
});
}
@Override
public AsyncFuture<FindSeriesIds> findSeriesIds(final FindSeriesIds.Request request) {
return entries(request.getFilter(), request.getLimit(), request.getRange(), this::toId,
l -> FindSeriesIds.of(l.getSet(), l.isLimited()), builder -> {
builder.setFetchSource(false);
});
}
@Override
public AsyncFuture<DeleteSeries> deleteSeries(final DeleteSeries.Request request) {
return doto(c -> {
final FilterBuilder f = CTX.filter(request.getFilter());
final DeleteByQueryRequestBuilder builder =
c.deleteByQuery(ElasticsearchUtils.TYPE_METADATA);
builder.setQuery(QueryBuilders.filteredQuery(QueryBuilders.matchAllQuery(), f));
return bind(builder.execute()).directTransform(response -> DeleteSeries.of());
});
}
private AsyncFuture<FindTagKeys> findTagKeys(final FindTagKeys.Request filter) {
return doto(c -> {
final FilterBuilder f = CTX.filter(filter.getFilter());
final SearchRequestBuilder builder = c
.search(ElasticsearchUtils.TYPE_METADATA)
.setSearchType("count");
builder.setQuery(QueryBuilders.filteredQuery(QueryBuilders.matchAllQuery(), f));
{
final AggregationBuilder<?> terms =
AggregationBuilders.terms("terms").field(CTX.tagsKey()).size(0);
final AggregationBuilder<?> nested =
AggregationBuilders.nested("nested").path(CTX.tags()).subAggregation(terms);
builder.addAggregation(nested);
}
return bind(builder.execute()).directTransform(response -> {
final Terms terms;
{
final Aggregations aggregations = response.getAggregations();
final Nested attributes = aggregations.get("nested");
terms = attributes.getAggregations().get("terms");
}
final Set<String> keys = new HashSet<>();
for (final Terms.Bucket bucket : terms.getBuckets()) {
keys.add(bucket.getKey());
}
return new FindTagKeys(keys, keys.size());
});
});
}
@Override
public AsyncFuture<FindKeys> findKeys(final FindKeys.Request request) {
return doto(c -> {
final FilterBuilder f = CTX.filter(request.getFilter());
final SearchRequestBuilder builder = c
.search(ElasticsearchUtils.TYPE_METADATA)
.setSearchType("count");
builder.setQuery(QueryBuilders.filteredQuery(QueryBuilders.matchAllQuery(), f));
{
final AggregationBuilder<?> terms =
AggregationBuilders.terms("terms").field(CTX.seriesKey()).size(0);
builder.addAggregation(terms);
}
return bind(builder.execute()).directTransform(response -> {
final Terms terms = (Terms) response.getAggregations().get("terms");
final Set<String> keys = new HashSet<String>();
int size = terms.getBuckets().size();
int duplicates = 0;
for (final Terms.Bucket bucket : terms.getBuckets()) {
if (keys.add(bucket.getKey())) {
duplicates += 1;
}
}
return FindKeys.of(keys, size, duplicates);
});
});
}
@Override
public boolean isReady() {
return connection.isReady();
}
private AsyncFuture<Void> start() {
final AsyncFuture<Void> future = connection.start();
if (!configure) {
return future;
}
return future.lazyTransform(v -> configure());
}
private <T, O> AsyncFuture<O> entries(
final Filter filter, final OptionalLimit limit, final DateRange range,
final Function<SearchHit, T> converter, final Transform<LimitedSet<T>, O> collector,
final Consumer<SearchRequestBuilder> modifier
) {
return doto(c -> {
final FilterBuilder f = CTX.filter(filter);
final SearchRequestBuilder builder = c
.search(ElasticsearchUtils.TYPE_METADATA)
.setScroll(SCROLL_TIME)
.setSearchType(SearchType.SCAN);
builder.setSize(limit.asMaxInteger(SCROLL_SIZE));
builder.setQuery(QueryBuilders.filteredQuery(QueryBuilders.matchAllQuery(), f));
modifier.accept(builder);
return scrollEntries(c, builder, limit, converter).directTransform(collector);
});
}
private AsyncFuture<Void> stop() {
return connection.stop();
}
private <T> AsyncFuture<T> doto(ManagedAction<Connection, T> action) {
return connection.doto(action);
}
private static final class ElasticsearchUtils {
public static final String TYPE_METADATA = "metadata";
/**
* Fields for type "metadata".
*/
public static final String METADATA_KEY = "key";
public static final String METADATA_TAGS = "tags";
/**
* common fields, but nested in different ways depending on document type.
*
* @see FilterContext
*/
public static final String KEY = "key";
public static final String TAGS = "tags";
public static final String TAGS_KEY = "key";
public static final String TAGS_KEY_RAW = "key.raw";
public static final String TAGS_VALUE = "value";
public static final String TAGS_VALUE_RAW = "value.raw";
@SuppressWarnings("unchecked")
public static Series toSeries(Map<String, Object> source) {
final String key = (String) source.get("key");
final SortedMap<String, String> tags =
toTags((List<Map<String, String>>) source.get("tags"));
return Series.of(key, tags);
}
public static SortedMap<String, String> toTags(final List<Map<String, String>> source) {
final SortedMap<String, String> tags = new TreeMap<>();
for (final Map<String, String> entry : source) {
final String key = entry.get("key");
final String value = entry.get("value");
if (value != null && key != null) {
tags.put(key, value);
}
}
return tags;
}
public static void buildMetadataDoc(final XContentBuilder b, Series series)
throws IOException {
b.field(METADATA_KEY, series.getKey());
b.startArray(METADATA_TAGS);
if (series.getTags() != null && !series.getTags().isEmpty()) {
for (final Map.Entry<String, String> entry : series.getTags().entrySet()) {
b.startObject();
b.field(TAGS_KEY, entry.getKey());
b.field(TAGS_VALUE, entry.getValue());
b.endObject();
}
}
b.endArray();
}
public static FilterContext context(String... path) {
return new FilterContext(path);
}
public static final class FilterContext {
private final String seriesKey;
private final String tags;
private final String tagsKey;
private final String tagsValue;
private FilterContext(String... path) {
this(ImmutableList.<String>builder().add(path).build());
}
private FilterContext(List<String> path) {
this.seriesKey = path(path, KEY);
this.tags = path(path, TAGS);
this.tagsKey = path(path, TAGS, TAGS_KEY_RAW);
this.tagsValue = path(path, TAGS, TAGS_VALUE_RAW);
}
private String path(List<String> path, String tail) {
return StringUtils.join(ImmutableList.builder().addAll(path).add(tail).build(),
'.');
}
private String path(List<String> path, String tailN, String tail) {
return StringUtils.join(
ImmutableList.builder().addAll(path).add(tailN).add(tail).build(), '.');
}
public String seriesKey() {
return seriesKey;
}
public String tags() {
return tags;
}
public String tagsKey() {
return tagsKey;
}
public String tagsValue() {
return tagsValue;
}
public FilterBuilder filter(final Filter filter) {
return filter.visit(new Filter.Visitor<FilterBuilder>() {
@Override
public FilterBuilder visitTrue(final TrueFilter t) {
return matchAllFilter();
}
@Override
public FilterBuilder visitFalse(final FalseFilter f) {
return notFilter(matchAllFilter());
}
@Override
public FilterBuilder visitAnd(final AndFilter and) {
final List<FilterBuilder> filters = new ArrayList<>(and.terms().size());
for (final Filter stmt : and.terms()) {
filters.add(filter(stmt));
}
return andFilter(filters.toArray(new FilterBuilder[0]));
}
@Override
public FilterBuilder visitOr(final OrFilter or) {
final List<FilterBuilder> filters = new ArrayList<>(or.terms().size());
for (final Filter stmt : or.terms()) {
filters.add(filter(stmt));
}
return andFilter(filters.toArray(new FilterBuilder[0]));
}
@Override
public FilterBuilder visitNot(final NotFilter not) {
return notFilter(filter(not.getFilter()));
}
@Override
public FilterBuilder visitMatchTag(final MatchTagFilter matchTag) {
final BoolFilterBuilder nested = boolFilter();
nested.must(termFilter(tagsKey, matchTag.getTag()));
nested.must(termFilter(tagsValue, matchTag.getValue()));
return nestedFilter(tags, nested);
}
@Override
public FilterBuilder visitStartsWith(final StartsWithFilter startsWith) {
final BoolFilterBuilder nested = boolFilter();
nested.must(termFilter(tagsKey, startsWith.getTag()));
nested.must(prefixFilter(tagsValue, startsWith.getValue()));
return nestedFilter(tags, nested);
}
@Override
public FilterBuilder visitRegex(final RegexFilter regex) {
final BoolFilterBuilder nested = boolFilter();
nested.must(termFilter(tagsKey, regex.getTag()));
nested.must(regexpFilter(tagsValue, regex.getValue()));
return nestedFilter(tags, nested);
}
@Override
public FilterBuilder visitHasTag(final HasTagFilter hasTag) {
final TermFilterBuilder nested = termFilter(tagsKey, hasTag.getTag());
return nestedFilter(tags, nested);
}
@Override
public FilterBuilder visitMatchKey(final MatchKeyFilter matchKey) {
return termFilter(seriesKey, matchKey.getValue());
}
@Override
public FilterBuilder defaultAction(final Filter filter) {
throw new IllegalArgumentException(
"Unsupported filter statement: " + filter);
}
});
}
}
}
public AsyncFuture<FindTags> findtags(
final Callable<SearchRequestBuilder> setup, final ElasticsearchUtils.FilterContext ctx,
final FilterBuilder filter, final String key
) throws Exception {
final SearchRequestBuilder request = setup.call().setSearchType("count").setSize(0);
request.setQuery(QueryBuilders.filteredQuery(QueryBuilders.matchAllQuery(), filter));
{
final TermsBuilder terms =
AggregationBuilders.terms("terms").field(ctx.tagsValue()).size(0);
final FilterAggregationBuilder filterAggregation = AggregationBuilders
.filter("filter")
.filter(termFilter(ctx.tagsKey(), key))
.subAggregation(terms);
final NestedBuilder nestedAggregation = AggregationBuilders
.nested("nested")
.path(ctx.tags())
.subAggregation(filterAggregation);
request.addAggregation(nestedAggregation);
}
return bind(request.execute()).directTransform(response -> {
final Terms terms;
/* IMPORTANT: has to be unwrapped with the correct type in the correct order as
* specified above! */
{
final Aggregations aggregations = response.getAggregations();
final Nested tags = aggregations.get("nested");
final SingleBucketAggregation f = tags.getAggregations().get("filter");
terms = f.getAggregations().get("terms");
}
final Set<String> values = new HashSet<String>();
for (final Terms.Bucket bucket : terms.getBuckets()) {
values.add(bucket.getKey());
}
final Map<String, Set<String>> result = new HashMap<String, Set<String>>();
result.put(key, values);
return FindTags.of(result, result.size());
});
}
@RequiredArgsConstructor
private class FindTagsTransformer implements LazyTransform<FindTagKeys, FindTags> {
private final Filter filter;
private final Callable<SearchRequestBuilder> setup;
private final ElasticsearchUtils.FilterContext ctx;
@Override
public AsyncFuture<FindTags> transform(FindTagKeys result) throws Exception {
final List<AsyncFuture<FindTags>> callbacks = new ArrayList<>();
for (final String tag : result.getKeys()) {
callbacks.add(findSingle(tag));
}
return async.collect(callbacks, FindTags.reduce());
}
/**
* Finds a single set of tags, excluding any criteria for this specific set of tags.
*
* @throws Exception
*/
private AsyncFuture<FindTags> findSingle(final String tag) throws Exception {
final Filter filter = modifier.removeTag(this.filter, tag);
final FilterBuilder f = ctx.filter(filter);
return findtags(setup, ctx, f, tag);
}
}
@Data
static class FindTagKeys {
private final Set<String> keys;
private final int size;
@Data
public static class Request {
private final Filter filter;
private final DateRange range;
private final OptionalLimit limit;
}
}
public static BackendType backendType() {
final Map<String, Map<String, Object>> mappings = new HashMap<>();
mappings.put("metadata", ElasticsearchMetadataUtils.loadJsonResource("v1/metadata.json"));
return new BackendType(mappings, ImmutableMap.of(), MetadataBackendV1.class);
}
}