/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package com.github.fhuss.storm.elasticsearch.state;
import backtype.storm.task.IMetricsContext;
import backtype.storm.topology.FailedException;
import backtype.storm.topology.ReportedFailedException;
import backtype.storm.tuple.Values;
import com.github.fhuss.storm.elasticsearch.ClientFactory;
import com.github.fhuss.storm.elasticsearch.handler.BulkResponseHandler;
import com.google.common.base.Objects;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.action.bulk.BulkRequestBuilder;
import org.elasticsearch.action.get.GetResponse;
import org.elasticsearch.action.get.MultiGetItemResponse;
import org.elasticsearch.action.get.MultiGetRequestBuilder;
import org.elasticsearch.action.get.MultiGetResponse;
import org.elasticsearch.client.Client;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import storm.trident.state.OpaqueValue;
import storm.trident.state.State;
import storm.trident.state.StateFactory;
import storm.trident.state.StateType;
import storm.trident.state.TransactionalValue;
import storm.trident.state.map.CachedMap;
import storm.trident.state.map.IBackingMap;
import storm.trident.state.map.MapState;
import storm.trident.state.map.NonTransactionalMap;
import storm.trident.state.map.OpaqueMap;
import storm.trident.state.map.SnapshottableMap;
import storm.trident.state.map.TransactionalMap;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.ListIterator;
import java.util.Map;
import static com.github.fhuss.storm.elasticsearch.state.ValueSerializer.*;
/**
* This class implements Trident State on top of ElasticSearch.
* It follows trident-memcached library (https://github.com/nathanmarz/trident-memcached) as a template.
*
* @author fhussonnois
* @param <T> OpaqueValue, TransactionalValue or any other non transactional type
*/
public class ESIndexMapState<T> implements IBackingMap<T> {
private static final Logger LOGGER = LoggerFactory.getLogger(ESIndexMapState.class);
public static class Options extends HashMap<String, String> {
private static final int DEFAULT_CACHE_SIZE = 1000;
private static final String DEFAULT_GLOBAL_KEY = "GLOBAL$KEY";
public static final String REPORT_ERROR = "trident.elasticsearch.state.report.error";
public static final String CACHE_SIZE = "trident.elasticsearch.state.cache.size";
public static final String GLOBAL_KEY = "trident.elasticsearch.state.global.key";
public Options(Map<String, String> conf) {
super(conf);
}
public boolean reportError() {
return Boolean.valueOf(get(REPORT_ERROR));
}
public int getCachedMapSize( ) {
String cacheSize = get(CACHE_SIZE);
return cacheSize != null ? Integer.valueOf(cacheSize) : DEFAULT_CACHE_SIZE;
}
public String getGlobalKey( ) {
String globalKey = get(GLOBAL_KEY);
return globalKey != null ? globalKey : DEFAULT_GLOBAL_KEY;
}
}
public static <T> Factory<OpaqueValue<T>> opaque(ClientFactory client, Class<T> type) {
return new OpaqueFactory<>(client, StateType.OPAQUE, new OpaqueValueSerializer<>(type));
}
public static <T> Factory<TransactionalValue<T>> transactional(ClientFactory client, Class<T> type) {
return new TransactionalFactory<>(client, StateType.TRANSACTIONAL, new TransactionalValueSerializer<>(type));
}
public static <T> Factory<T> nonTransactional(ClientFactory client, Class<T> type) {
return new NonTransactionalFactory<>(client, StateType.NON_TRANSACTIONAL, new NonTransactionalValueSerializer<>(type));
}
public abstract static class Factory<T> implements StateFactory {
protected ValueSerializer<T> serializer;
protected ClientFactory clientFactory;
protected StateType stateType;
public Factory(ClientFactory clientFactory, StateType stateType, ValueSerializer<T> serializer) {
this.clientFactory = clientFactory;
this.stateType = stateType;
this.serializer = serializer;
}
}
public static class OpaqueFactory<T> extends Factory<OpaqueValue<T>> {
public OpaqueFactory(ClientFactory clientFactory, StateType stateType, ValueSerializer<OpaqueValue<T>> serializer) {
super(clientFactory, stateType, serializer);
}
@Override
public State makeState(Map conf, IMetricsContext iMetricsContext, int i, int i2) {
Options options = new Options(conf);
ESIndexMapState<OpaqueValue<T>> mapState = new ESIndexMapState<>(clientFactory.makeClient(conf), serializer, new BulkResponseHandler.LoggerResponseHandler(), options.reportError());
MapState ms = OpaqueMap.build(new CachedMap(mapState, options.getCachedMapSize()));
return new SnapshottableMap<OpaqueValue<T>>(ms, new Values(options.getGlobalKey()));
}
}
public static class TransactionalFactory<T> extends Factory<TransactionalValue<T>> {
public TransactionalFactory(ClientFactory clientFactory, StateType stateType, ValueSerializer<TransactionalValue<T>> serializer) {
super(clientFactory, stateType, serializer);
}
@Override
public State makeState(Map conf, IMetricsContext iMetricsContext, int i, int i2) {
Options options = new Options(conf);
ESIndexMapState<TransactionalValue<T>> mapState = new ESIndexMapState<>(clientFactory.makeClient(conf), serializer, new BulkResponseHandler.LoggerResponseHandler(), options.reportError());
MapState<T> ms = TransactionalMap.build(new CachedMap(mapState, options.getCachedMapSize()));
Values snapshotKey = new Values(options.getGlobalKey());
return new SnapshottableMap<>(ms, snapshotKey);
}
}
public static class NonTransactionalFactory<T> extends Factory<T> {
public NonTransactionalFactory(ClientFactory clientFactory, StateType stateType, ValueSerializer<T> serializer) {
super(clientFactory, stateType, serializer);
}
@Override
public State makeState(Map conf, IMetricsContext iMetricsContext, int i, int i2) {
Options options = new Options(conf);
ESIndexMapState<T> mapState = new ESIndexMapState<>(clientFactory.makeClient(conf), serializer, new BulkResponseHandler.LoggerResponseHandler(), options.reportError());
MapState<T> ms = NonTransactionalMap.build(new CachedMap<>(mapState, options.getCachedMapSize()));
return new SnapshottableMap<>(ms, new Values(options.getGlobalKey()));
}
}
private BulkResponseHandler bulkResponseHandler;
private ValueSerializer<T> serializer;
private Client client;
private boolean reportError;
public ESIndexMapState(Client client, ValueSerializer<T> serializer, BulkResponseHandler bulkResponseHandler, boolean reportError) {
this.client = client;
this.serializer = serializer;
this.bulkResponseHandler = bulkResponseHandler;
this.reportError = reportError;
}
@Override
public List<T> multiGet(List<List<Object>> keys) {
List<T> responses = new ArrayList<>(keys.size());
List<GroupByKey> groupByKeys = new ArrayList<>(keys.size());
for(List<Object> key : keys) {
groupByKeys.add(GroupByKey.fromKeysList(key));
}
if( ! groupByKeys.isEmpty() ) {
MultiGetRequestBuilder request = client.prepareMultiGet();
for(GroupByKey key : groupByKeys) {
request.add(key.index, key.type, key.id);
}
MultiGetResponse multiGetResponses;
try {
multiGetResponses = request.execute().actionGet();
} catch (ElasticsearchException e) {
String error = "Failed to read data into elasticsearch";
throw (reportError) ? new ReportedFailedException(error, e) : new FailedException(error, e);
}
for(MultiGetItemResponse itemResponse : multiGetResponses.getResponses()) {
GetResponse res = itemResponse.getResponse();
if( res != null && !res.isSourceEmpty()) {
try {
responses.add(serializer.deserialize(res.getSourceAsBytes()));
} catch (IOException e) {
LOGGER.error("error while trying to deserialize data from json", e);
responses.add(null);
}
} else {
responses.add(null);
}
}
}
return responses;
}
@Override
public void multiPut(List<List<Object>> keys, List<T> values) {
BulkRequestBuilder bulkRequestBuilder = client.prepareBulk();
ListIterator<T> listIterator = values.listIterator();
while (listIterator.hasNext()) {
GroupByKey groupBy = GroupByKey.fromKeysList(keys.get(listIterator.nextIndex()));
T value = listIterator.next();
try {
byte[] source = serializer.serialize(value);
bulkRequestBuilder.add(client.prepareIndex(groupBy.index, groupBy.type, groupBy.id).setSource(source));
} catch (IOException e) {
LOGGER.error("Oops data loss - error while trying to serialize data to json", e);
}
}
try {
bulkResponseHandler.handle(bulkRequestBuilder.execute().actionGet());
} catch(ElasticsearchException e) {
LOGGER.error("error while executing bulk request to elasticsearch");
String error = "Failed to store data into elasticsearch";
throw (reportError) ? new ReportedFailedException(error, e) : new FailedException(error, e);
}
}
private static class GroupByKey {
public final String index;
public final String type;
public final String id;
public GroupByKey(String index, String type, String id) {
this.index = index;
this.type = type;
this.id = id;
}
public static GroupByKey fromKeysList(List<Object> keys) {
if( keys == null || keys.size() < 3) {
throw new RuntimeException("Keys not supported " + keys);
}
return new GroupByKey(keys.get(0).toString(), keys.get(1).toString(), keys.get(2).toString());
}
public String toString( ) {
return Objects.toStringHelper(this)
.add("index", index)
.add("type", type)
.add("id", id).toString();
}
}
}