//Dstl (c) Crown Copyright 2017
package uk.gov.dstl.baleen.core.history.memory;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import uk.gov.dstl.baleen.core.history.AbstractBaleenHistory;
import uk.gov.dstl.baleen.core.history.DocumentHistory;
import uk.gov.dstl.baleen.core.utils.ConfigUtils;
import uk.gov.dstl.baleen.exceptions.BaleenException;
import com.google.common.cache.CacheBuilder;
import com.google.common.cache.CacheLoader;
import com.google.common.cache.LoadingCache;
/** A base class for implementing histories which hold data in memory.
*
* It is extremely important that closeHistory() is called (either here or on
* {@link DocumentHistory} in order to free resources at the end of processing.
*
* By default the history is only kept for 24 hours (use set history.timeout in
* seconds in the global config to control this). After that period the document
* history will return blank. Still pipelines should not rely on this behaviour
* as it will consume excessive memory unnecessarily.
*
* Thus history.size should be set to the maximum number of documents to be processed
* concurrently (typically 1 but it depends on threading in annotators and batching)
* and history.timeout should be set to (a little) longer than the maximum processing
* time of a single document.
*
* Implementors should override the create and load functions. The different being
* that load existing can throw an exception and/or return null (at which point
* create will be called to instance a fresh document history).
*
* Implementations should ensure that super.initialise/close/destroy are called to
* ensure the cache is maintained.
*
*
* @baleen.javadoc
*/
public abstract class AbstractCachingBaleenHistory<H extends DocumentHistory> extends AbstractBaleenHistory {
private static final Logger LOGGER = LoggerFactory.getLogger(AbstractCachingBaleenHistory.class);
/**
* The amount of time, in seconds, that the history is kept in memory for
*
* @baleen.config 86400
*/
public static final String PARAM_TIMEOUT = "history.timeout";
@ConfigurationParameter(name = PARAM_TIMEOUT, defaultValue = "86400")
private String cacheTimeoutString;
//Parse the cacheTimeout config parameter into this variable to avoid issues with parameter types
private long cacheTimeout;
/**
* The number of documents that can be kept in memory at the same time.
*
* @baleen.config 16
*/
public static final String PARAM_SIZE = "history.size";
@ConfigurationParameter(name = PARAM_SIZE, defaultValue = "16")
private long cacheSize;
private LoadingCache<String, H> cachedHistories;
@Override
protected void initialize() throws BaleenException {
super.initialize();
cacheTimeout = ConfigUtils.stringToLong(cacheTimeoutString, 86400L);
cachedHistories = CacheBuilder.newBuilder()
.expireAfterAccess(cacheTimeout, TimeUnit.SECONDS)
.maximumSize(cacheSize)
.build(new CacheLoader<String, H>() {
@Override
public H load(String documentId)
throws Exception {
H dh = null;
try {
dh = loadExistingDocumentHistory(documentId);
} catch(Exception e) {
LOGGER.error("Loading caused error, so using a fresh history. This will likely overwrite the original history", e);
}
if(dh == null) {
dh = createNewDocumentHistory(documentId);
}
return dh;
}
});
}
@Override
public synchronized DocumentHistory getHistory(String documentId) {
try {
return cachedHistories.get(documentId);
} catch (ExecutionException e) {
LOGGER.error("Cache threw exception, this should not happen!", e);
// Fake the same result we'd expect from the cache
H dh = createNewDocumentHistory(documentId);
cachedHistories.put(documentId, dh);
return dh;
}
}
/** Create a new history for the document id.
* @param documentId the document owning the history
* @return non-null history
*/
protected abstract H createNewDocumentHistory(String documentId);
/** Load a history for the document id.
* @param documentId the document to load
* @return null if the history doesn't exist, otherwise a valid history
*/
protected abstract H loadExistingDocumentHistory(String documentId) throws BaleenException;
protected H getCachedHistoryIfPresent(String documentId) {
return cachedHistories.getIfPresent(documentId);
}
@Override
public synchronized void closeHistory(String documentId) {
LOGGER.info("Deleting history for document {}", documentId);
cachedHistories.invalidate(documentId);
}
@Override
public void destroy() {
super.destroy();
if(cachedHistories != null) {
cachedHistories.invalidateAll();
cachedHistories = null;
}
}
}