//Dstl (c) Crown Copyright 2017
package uk.gov.dstl.baleen.history.elasticsearch;
import java.io.IOException;
import java.util.Collections;
import java.util.concurrent.LinkedBlockingDeque;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.descriptor.ExternalResource;
import org.elasticsearch.action.get.GetAction;
import org.elasticsearch.action.get.GetRequestBuilder;
import org.elasticsearch.action.get.GetResponse;
import org.elasticsearch.action.index.IndexAction;
import org.elasticsearch.action.index.IndexRequestBuilder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import uk.gov.dstl.baleen.core.history.HistoryEvent;
import uk.gov.dstl.baleen.core.history.memory.AbstractCachingBaleenHistory;
import uk.gov.dstl.baleen.exceptions.BaleenException;
import uk.gov.dstl.baleen.history.utils.HistoryModule;
import uk.gov.dstl.baleen.resources.SharedElasticsearchResource;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.DeserializationFeature;
import com.fasterxml.jackson.databind.ObjectMapper;
/**
* A history implementation which backs off to Elasticsearch.
*
* The implementation uses Elasticsearch as a data store. Events are collected
* in memory and then ON CLOSE the data is persisted to ES. If the document is
* requested again (and is not in the local cache) then the history is queried
* from Elasticsearch.
*
* Thus the in memory and the ES history are not necessarily in sync (though this
* should make no difference in Baleen's use case).
*
* You must ensure that close is called to persist the data to ES.
*
* Use the history.esIndex and history.esType to configure where the data is
* stored in Elasticsearch.
*
*
* @baleen.javadoc
*/
public class ElasticsearchHistory extends AbstractCachingBaleenHistory<ElasticsearchDocumentHistory> {
private static final Logger LOGGER = LoggerFactory.getLogger(ElasticsearchHistory.class);
/**
* Connection to Elasticsearch
*
* @baleen.resource uk.gov.dstl.baleen.resources.SharedElasticsearchResource
*/
public static final String KEY_ELASTICSEARCH = "elasticsearch";
@ExternalResource(key = KEY_ELASTICSEARCH, mandatory=true)
private SharedElasticsearchResource elasticsearch;
/**
* The Elasticsearch index to write history to
*
* @baleen.config history
*/
public static final String PARAM_INDEX = "history.esIndex";
@ConfigurationParameter(name = PARAM_INDEX, defaultValue = "history")
private String esIndex;
/**
* The type to use when writing history to Elasticsearch
*
* @baleen.config event
*/
public static final String PARAM_TYPE = "history.esType";
@ConfigurationParameter(name = PARAM_TYPE, defaultValue = "event")
private String esType;
private ObjectMapper mapper;
/**
* New instance, used by UimaFit.
*
*/
public ElasticsearchHistory() {
// Do nothing
}
/**
* New instance for use with UimaFit DI, eg for testing.
*
* @param elasticsearch
* the Elasticsearch resource
*/
public ElasticsearchHistory(SharedElasticsearchResource elasticsearch) {
this.elasticsearch = elasticsearch;
}
@Override
protected void initialize() throws BaleenException {
super.initialize();
mapper = new ObjectMapper();
mapper.disable(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES);
mapper.registerModule(new HistoryModule());
// It might be worth setting the mapping explicitly here, but that will
// depend on the specific events (which might be extended outside core).
// So currently we trust ES to do the right thing.
}
@Override
public void closeHistory(String documentId) {
ElasticsearchDocumentHistory dh = getCachedHistoryIfPresent(documentId);
if (dh == null) {
LOGGER.warn("Attempt to close a document {} which is not in cache, thus can't be persisted", documentId);
return;
}
try {
byte[] source = mapper.writeValueAsBytes(new ESHistory(documentId, dh.getAllHistory()));
new IndexRequestBuilder(elasticsearch.getClient(), IndexAction.INSTANCE).setIndex(documentId).setIndex(esIndex).setType(esType)
.setId(documentId).setSource(source).get();
} catch (JsonProcessingException e) {
LOGGER.warn("Unable to convert history to source, so can't be persisted {}", documentId, e);
}
super.closeHistory(documentId);
}
@Override
protected ElasticsearchDocumentHistory createNewDocumentHistory(String documentId) {
return new ElasticsearchDocumentHistory(this, documentId);
}
@Override
protected ElasticsearchDocumentHistory loadExistingDocumentHistory(String documentId) throws BaleenException {
try {
GetResponse response = new GetRequestBuilder(elasticsearch.getClient(), GetAction.INSTANCE).setId(documentId).setIndex(esIndex)
.setType(esType).get();
if (!response.isExists() || response.isSourceEmpty()) {
// If we don't have any data, then let parent implementation create a new history
return null;
} else {
ESHistory esh = mapper.readValue(response.getSourceAsBytes(), ESHistory.class);
if(esh == null){
return new ElasticsearchDocumentHistory(this, documentId, new LinkedBlockingDeque<HistoryEvent>(
Collections.emptyList()));
}else{
return new ElasticsearchDocumentHistory(this, documentId, new LinkedBlockingDeque<HistoryEvent>(
esh.getEvents()));
}
}
} catch (IOException e) {
throw new BaleenException(e);
}
}
}