package com.thinkbiganalytics.nifi.provenance.cache;
/*-
* #%L
* thinkbig-nifi-provenance-repo
* %%
* Copyright (C) 2017 ThinkBig Analytics
* %%
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* #L%
*/
import com.thinkbiganalytics.nifi.provenance.model.FeedFlowFile;
import org.mapdb.DB;
import org.mapdb.DBMaker;
import org.mapdb.HTreeMap;
import org.mapdb.Serializer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import java.util.Collection;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.TimeUnit;
import javax.annotation.PostConstruct;
/**
* Persist any running flowfiles to disk when NiFi shuts down to maintain the processing feed status when NiFi comes back up
*/
public class FeedFlowFileMapDbCache implements FeedFlowFileCacheListener {
private static final Logger log = LoggerFactory.getLogger(FeedFlowFileMapDbCache.class);
/**
* the in memory MapDb database
*/
private DB inMemoryDb;
/**
* The cache loaded from Disk an put into Memory.
*/
private ConcurrentMap<String, FeedFlowFile> memFeedFlowFileCache;
/**
* the persistent mapdb database
*/
private DB persistentDb;
/**
* The file that will be persisted to disk
*/
private ConcurrentMap<String, FeedFlowFile> persistentFlowFileCache;
@Autowired
private FeedFlowFileGuavaCache cache;
private int expireAfterNumber = 3;
private TimeUnit expireAfterUnit = TimeUnit.DAYS;
public FeedFlowFileMapDbCache(String fileLocation) {
log.info("Initialize FeedFlowFileMapDbCache cache at: {}, keeping running flowfiles for {} days", fileLocation, expireAfterNumber);
try {
//delete the file after its loaded/opened
inMemoryDb = DBMaker.fileDB(fileLocation).fileMmapEnable()
.fileMmapEnableIfSupported() // Only enable mmap on supported platforms
.fileMmapPreclearDisable() // Make mmap file faster
.cleanerHackEnable()
.checksumHeaderBypass()
.fileDeleteAfterOpen()
.closeOnJvmShutdown().make();
memFeedFlowFileCache =
(HTreeMap<String, FeedFlowFile>) inMemoryDb.hashMap("feedFlowFile").keySerializer(Serializer.STRING).valueSerializer(Serializer.JAVA).expireAfterCreate(expireAfterNumber,
expireAfterUnit)
.createOrOpen();
//create a new db that will be used when persisting the data to disk
persistentDb = DBMaker.fileDB(fileLocation).fileMmapEnable()
.fileMmapEnableIfSupported() // Only enable mmap on supported platforms
.fileMmapPreclearDisable() // Make mmap file faster
.cleanerHackEnable()
.checksumHeaderBypass()
.closeOnJvmShutdown().make();
persistentFlowFileCache =
(HTreeMap<String, FeedFlowFile>) persistentDb.hashMap("feedFlowFile").keySerializer(Serializer.STRING).valueSerializer(Serializer.JAVA)
.createOrOpen();
log.info("Successfully created FeedFlowFileMapDbCache cache at: {}, with starting size of: {} ", fileLocation, memFeedFlowFileCache.size());
} catch (Exception e) {
log.error("Error creating mapdb cache. {}. If NiFi goes down with flows in progress Kylo will not be able to connect the running flows on restart to their Kylo job executions",
e.getMessage(), e);
memFeedFlowFileCache = new ConcurrentHashMap<>();
persistentFlowFileCache = new ConcurrentHashMap<>();
}
}
@PostConstruct
private void init() {
cache.subscribe(this);
}
/**
* When the {@link FeedFlowFileGuavaCache} is invalidated then it is also removed from the persistent disk storage if it exists.
*/
public void onInvalidate(FeedFlowFile flowFile) {
if (flowFile.isBuiltFromMapDb()) {
log.debug("Removing completed flowfile {} from mapDbCache ", flowFile.getId());
memFeedFlowFileCache.remove(flowFile.getId());
//remove any other references to this feed flowfile
if (flowFile.getChildFlowFiles() != null) {
flowFile.getChildFlowFiles().stream().forEach(flowFileId -> memFeedFlowFileCache.remove(flowFileId));
}
}
}
/**
* Load the persisted cached back into the {@link FeedFlowFileGuavaCache}
*/
public int loadGuavaCache() {
memFeedFlowFileCache.values().stream().forEach(feedFlowFile -> {
feedFlowFile.setBuiltFromMapDb(true);
cache.add(feedFlowFile.getId(), feedFlowFile);
if (feedFlowFile.getActiveChildFlowFiles() != null) {
feedFlowFile.getActiveChildFlowFiles().stream().forEach(feedFlowFileId -> cache.add(feedFlowFileId, feedFlowFile));
}
});
return memFeedFlowFileCache.values().size();
}
/**
* return the size of the MapDB Cache
*/
public Integer size() {
return memFeedFlowFileCache.size();
}
public Collection<FeedFlowFile> getCache() {
return memFeedFlowFileCache.values();
}
/**
* Persist the {@link FeedFlowFileGuavaCache} to disk
*/
public int persistFlowFiles() {
Collection<FeedFlowFile> flowFiles = cache.getFlowFiles();
log.info("About to persist {} flow files to disk via MapDB ", flowFiles.size());
flowFiles.stream().forEach(feedFlowFile -> cacheFlowFile(feedFlowFile));
log.info("Successfully persisted {} flow files to disk via MapDB. Persisted Map Size is: {} entries ", flowFiles.size(), persistentFlowFileCache.size());
if (flowFiles != null && !flowFiles.isEmpty()) {
if (inMemoryDb != null) {
inMemoryDb.commit();
inMemoryDb.close();
}
if (persistentDb != null) {
persistentDb.commit();
persistentDb.close();
log.info("Successfully closed the flow file MapDB cache file.");
}
}
return flowFiles.size();
}
public void cacheFlowFile(FeedFlowFile flowFile) {
persistentFlowFileCache.put(flowFile.getId(), flowFile);
}
}