package com.thinkbiganalytics.feedmgr.nifi.cache;
/*-
* #%L
* thinkbig-feed-manager-controller
* %%
* Copyright (C) 2017 ThinkBig Analytics
* %%
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* #L%
*/
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.function.Function;
import java.util.stream.Collectors;
import javax.annotation.PostConstruct;
import javax.inject.Inject;
import org.apache.commons.lang3.StringUtils;
import org.apache.nifi.web.api.dto.ConnectionDTO;
import org.apache.nifi.web.api.dto.ControllerServiceDTO;
import org.apache.nifi.web.api.dto.ProcessGroupDTO;
import org.apache.nifi.web.api.dto.ProcessorDTO;
import org.apache.nifi.web.api.dto.ReportingTaskDTO;
import org.joda.time.DateTime;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import com.google.common.util.concurrent.Uninterruptibles;
import com.thinkbiganalytics.DateTimeUtil;
import com.thinkbiganalytics.feedmgr.nifi.NifiConnectionListener;
import com.thinkbiganalytics.feedmgr.nifi.NifiConnectionService;
import com.thinkbiganalytics.feedmgr.nifi.PropertyExpressionResolver;
import com.thinkbiganalytics.feedmgr.rest.model.FeedMetadata;
import com.thinkbiganalytics.feedmgr.rest.model.RegisteredTemplate;
import com.thinkbiganalytics.feedmgr.service.MetadataService;
import com.thinkbiganalytics.metadata.api.MetadataAccess;
import com.thinkbiganalytics.metadata.api.PostMetadataConfigAction;
import com.thinkbiganalytics.metadata.api.app.KyloVersionProvider;
import com.thinkbiganalytics.metadata.api.feed.FeedProvider;
import com.thinkbiganalytics.metadata.rest.model.nifi.NiFiFlowCacheConnectionData;
import com.thinkbiganalytics.metadata.rest.model.nifi.NiFiFlowCacheSync;
import com.thinkbiganalytics.metadata.rest.model.nifi.NifiFlowCacheSnapshot;
import com.thinkbiganalytics.nifi.provenance.NiFiProvenanceConstants;
import com.thinkbiganalytics.nifi.rest.client.LegacyNifiRestClient;
import com.thinkbiganalytics.nifi.rest.client.NifiClientRuntimeException;
import com.thinkbiganalytics.nifi.rest.model.flow.NiFiFlowConnectionConverter;
import com.thinkbiganalytics.nifi.rest.model.flow.NifiFlowConnection;
import com.thinkbiganalytics.nifi.rest.model.flow.NifiFlowProcessGroup;
import com.thinkbiganalytics.nifi.rest.model.flow.NifiFlowProcessor;
import com.thinkbiganalytics.nifi.rest.support.NifiConnectionUtil;
import com.thinkbiganalytics.nifi.rest.support.NifiProcessUtil;
/**
* Cache processor definitions in a flow for use by the KyloProvenanceReportingTask
*
* Each Processor has an internal {@code flowId} generated why Kylo walks the flow This internal id is used to associate the Feed flow as a template with the Feed flow created when the feed is
* saved/updated
*
* @see com.thinkbiganalytics.nifi.rest.visitor.NifiConnectionOrderVisitor
*/
public class NifiFlowCache implements NifiConnectionListener, PostMetadataConfigAction, NiFiProvenanceConstants {
private static final Logger log = LoggerFactory.getLogger(NifiFlowCache.class);
public static final String ITEM_LAST_MODIFIED_KEY = "NIFI_FLOW_CACHE";
@Inject
LegacyNifiRestClient nifiRestClient;
@Inject
MetadataService metadataService;
@Inject
FeedProvider feedProvider;
@Inject
MetadataAccess metadataAccess;
@Inject
PropertyExpressionResolver propertyExpressionResolver;
@Inject
private NifiConnectionService nifiConnectionService;
@Inject
private KyloVersionProvider kyloVersionProvider;
@Inject
private NifiFlowCacheClusterManager nifiFlowCacheClusterManager;
private Map<String, String> feedNameToTemplateNameMap = new ConcurrentHashMap<>();
private Map<String, Map<String, List<NifiFlowProcessor>>> feedFlowIdProcessorMap = new ConcurrentHashMap<>();
private Map<String, Map<String, List<NifiFlowProcessor>>> feedProcessorIdProcessorMap = new ConcurrentHashMap<>();
private Map<String, NifiFlowProcessor> processorIdMap = new ConcurrentHashMap<>();
/**
* Flag to mark if the cache is loaded or not This is used to determine if the cache is ready to be used
*/
private boolean loaded = false;
/**
* Flag to indicate we are connected to NiFi
*/
private boolean nifiConnected = false;
/**
* Flag to indicate Modeshape is available
*/
private boolean modeShapeAvailable = false;
private Map<String, String> processorIdToFeedProcessGroupId = new ConcurrentHashMap<>();
private Map<String, String> processorIdToFeedNameMap = new ConcurrentHashMap<>();
private Map<String, String> processorIdToProcessorName = new ConcurrentHashMap<>();
private Map<String, NiFiFlowCacheConnectionData> connectionIdToConnectionMap = new ConcurrentHashMap<>();
private Map<String, String> connectionIdCacheNameMap = new ConcurrentHashMap<>();
/**
* Set of the category.feed names for those that are just streaming feeds
*/
private Set<String> streamingFeeds = new HashSet();
/**
* Set of the category.feed names
*/
private Set<String> allFeeds = new HashSet<>();
private Map<String, Long> feedLastUpated = new ConcurrentHashMap<>();
/**
* Map of the sync id to cache
* This is the cache of the items out there that others have built and will check/update themseleves based upon the base maps in the object
*/
private Map<String, NiFiFlowCacheSync> syncMap = new ConcurrentHashMap<>();
/**
* Map with the sync Id and the last time that item was sync'd with the system
* This is used to expire the stale non used caches
*/
private Map<String, DateTime> lastSyncTimeMap = new ConcurrentHashMap<>();
private DateTime lastUpdated = null;
@PostConstruct
private void init() {
nifiConnectionService.subscribeConnectionListener(this);
initExpireTimerThread();
}
/**
* Metadata is available
*/
@Override
public void run() {
boolean isLatest = kyloVersionProvider.isUpToDate();
if(isLatest) {
this.modeShapeAvailable = true;
checkAndInitializeCache();
}
}
/**
* NiFi has made a connection
*/
@Override
public void onNiFiConnected() {
this.nifiConnected = true;
checkAndInitializeCache();
}
@Override
public void onNiFiDisconnected() {
this.nifiConnected = false;
//reset the flag to force cache initialization on nifi availability
this.loaded = false;
}
/**
* When modeshape and nifi are connected and ready attempt to initialize the cache
*/
private void checkAndInitializeCache() {
if (modeShapeAvailable && nifiConnected && !loaded) {
rebuildCacheWithRetry();
}
}
/**
* rebuild a given cache resetting the cache with the given sync id to the latest data in the cache
*
* @param syncId a cache id
* @return the latest cache
*/
public NiFiFlowCacheSync refreshAll(String syncId) {
NiFiFlowCacheSync sync = getSync(syncId);
if (!sync.isUnavailable()) {
sync.reset();
return syncAndReturnUpdates(sync, false);
} else {
return NiFiFlowCacheSync.UNAVAILABLE;
}
}
/**
* Check to see if the cache is loaded
*
* @return {@code true} if the cache is populated, {@code false} if the cache is not populated
*/
public boolean isAvailable() {
return loaded;
}
/**
* If kylo is clustered it needs to do an additional check to ensure the flow cache is synchronized across all kylo instances
* @return true if kylo is clustered, false if not.
*
*/
public boolean isKyloClustered() {
return nifiFlowCacheClusterManager.isClustered();
}
/**
* Return only the records that were updated since the last sync
*
* @param syncId a cache id
* @return updates that have been applied to the cache.
*/
public NiFiFlowCacheSync syncAndReturnUpdates(String syncId) {
NiFiFlowCacheSync sync = getSync(syncId);
if (!sync.isUnavailable()) {
return syncAndReturnUpdates(sync);
}
return sync;
}
/**
* Return the data in the cache for a given cache id
*
* @param syncId a cache id
* @return the data in the cache for a given cache id
*/
public NiFiFlowCacheSync getCache(String syncId) {
NiFiFlowCacheSync sync = getSync(syncId);
return sync;
}
/**
* Preview any new updates that will be applied to a given cache
*
* @param syncId a cache id
* @return any new updates that will be applied to a given cache
*/
public NiFiFlowCacheSync previewUpdates(String syncId) {
NiFiFlowCacheSync sync = getSync(syncId, true);
if (!sync.isUnavailable()) {
return previewUpdates(sync);
}
return sync;
}
/**
* Rebuild the base cache that others will update from.
*/
public synchronized void rebuildAll() {
loaded = false;
try {
ensureNiFiKyloReportingTask();
} catch (Exception e) {
log.error("Exception while trying to ensure KyloReportingTask {}", e.getMessage(), e);
}
List<NifiFlowProcessGroup> allFlows = nifiRestClient.getFeedFlows();
List<RegisteredTemplate> templates = null;
clearAll();
templates = metadataAccess.read(() -> metadataService.getRegisteredTemplates(), MetadataAccess.SERVICE);
Map<String, RegisteredTemplate> feedTemplatesMap = new HashMap<>();
//populate the template mappings and feeds to determine if the feed uses a streaming or batch template
templates.stream().forEach(template -> populateTemplateMappingCache(template, feedTemplatesMap));
allFlows.stream().forEach(nifiFlowProcessGroup -> {
RegisteredTemplate template = feedTemplatesMap.get(nifiFlowProcessGroup.getFeedName());
if (template != null) {
updateFlow(nifiFlowProcessGroup.getFeedName(), template.isStream(), nifiFlowProcessGroup);
} else {
//this is possibly a reusable template.
//update the processorid and connection name maps
updateProcessorIdMaps(nifiFlowProcessGroup.getFeedName(), nifiFlowProcessGroup.getProcessorMap().values());
this.connectionIdToConnectionMap.putAll(toConnectionIdMap(nifiFlowProcessGroup.getConnectionIdMap().values()));
}
});
lastUpdated = DateTime.now();
loaded = true;
}
/**
* Rebuilds the cache.
* If an exception occurs during the rebuild it will attempt to retry to build it up to 10 times before aborting
*/
public void rebuildCacheWithRetry() {
Exception lastError = null;
int retries = 10;
int waitTime = 5;
for (int count = 1; count <= retries; ++count) {
try {
log.info("Attempting to build the NiFiFlowCache");
rebuildAll();
if (loaded) {
log.info("Successfully built the NiFiFlowCache");
break;
}
} catch (final Exception e) {
log.error("Error attempting to build cache. The system will attempt to retry {} more times. Next attempt to rebuild in {} seconds. The error was: {}. ", (retries - count), waitTime,
e.getMessage());
lastError = e;
Uninterruptibles.sleepUninterruptibly(waitTime, TimeUnit.SECONDS);
}
}
if (!loaded) {
log.error(
"Unable to build the NiFi Flow Cache! You will need to manually rebuild the cache using the following url: http://KYLO_HOST:PORT/proxy/v1/metadata/nifi-provenance/nifi-flow-cache/reset-cache ",
lastError);
}
}
private NiFiFlowCacheSync previewUpdates(NiFiFlowCacheSync sync) {
return syncAndReturnUpdates(sync, true);
}
private NiFiFlowCacheSync syncAndReturnUpdates(NiFiFlowCacheSync sync) {
return syncAndReturnUpdates(sync, false);
}
private NiFiFlowCacheSync getSync(String syncId) {
return getSync(syncId, false);
}
private NiFiFlowCacheSync getSync(String syncId, boolean forPreview) {
if (isAvailable()) {
NiFiFlowCacheSync sync = null;
if (syncId == null || !syncMap.containsKey(syncId)) {
sync = new NiFiFlowCacheSync();
if (StringUtils.isNotBlank(syncId)) {
sync.setSyncId(syncId);
}
if (!forPreview) {
syncMap.put(sync.getSyncId(), sync);
}
} else {
sync = syncMap.get(syncId);
}
return sync;
} else {
return NiFiFlowCacheSync.UNAVAILABLE;
}
}
/**
* if Kylo is clustered it needs to sync any updates from the other Kylo instances before proceeding
*/
private void applyClusterUpdates(){
List<NifiFlowCacheClusterUpdateMessage> updates = nifiFlowCacheClusterManager.findUpdates();
Set<String> templateUpdates = new HashSet<>();
boolean needsUpdates = !updates.isEmpty();
if(needsUpdates){
log.info("Kylo Cluster Update: Detected changes. About to apply {} updates ",updates.size());
}
updates.stream().forEach(update -> {
switch(update.getType()) {
case FEED:
NifiFlowCacheFeedUpdate feedUpdate= nifiFlowCacheClusterManager.getFeedUpdate(update.getMessage());
log.info("Kylo Cluster Update: Applying Feed Change update for {}",feedUpdate.getFeedName());
updateFlow(feedUpdate);
break;
case CONNECTION:
Collection<ConnectionDTO> connectionDTOS = nifiFlowCacheClusterManager.getConnectionsUpdate(update.getMessage());
log.info("Kylo Cluster Update: Applying Connection list update");
updateConnectionMap(connectionDTOS, false);
break;
case PROCESSOR:
Collection<ProcessorDTO> processorDTOS = nifiFlowCacheClusterManager.getProcessorsUpdate(update.getMessage());
log.info("Kylo Cluster Update: Applying Processor list update");
updateProcessorIdNames(processorDTOS, false);
break;
case TEMPLATE:
if(!templateUpdates.contains(update.getMessage())) {
RegisteredTemplate template = nifiFlowCacheClusterManager.getTemplate(update.getMessage());
log.info("Kylo Cluster Update: Applying Template update for {} ",template.getTemplateName());
updateRegisteredTemplate(template,false);
templateUpdates.add(update.getMessage());
}
break;
default:
break;
}
});
if(needsUpdates){
nifiFlowCacheClusterManager.appliedUpdates(updates);
lastUpdated = DateTime.now();
log.info("Kylo Cluster Update: NiFi Flow File Cache is in sync. All {} updates have been applied to the cache. ",updates.size());
}
}
private NiFiFlowCacheSync syncAndReturnUpdates(NiFiFlowCacheSync sync, boolean preview) {
if (!preview) {
lastSyncTimeMap.put(sync.getSyncId(), DateTime.now());
}
if(isKyloClustered()){
applyClusterUpdates();
}
if (sync.needsUpdate(lastUpdated)) {
Map<String, String> processorIdToFeedNameMapCopy = ImmutableMap.copyOf(processorIdToFeedNameMap);
Map<String, String> processorIdToFeedProcessGroupIdCopy = ImmutableMap.copyOf(processorIdToFeedProcessGroupId);
Map<String, String> processorIdToProcessorNameCopy = ImmutableMap.copyOf(processorIdToProcessorName);
Set<String> streamingFeedsCopy = ImmutableSet.copyOf(streamingFeeds);
Set<String> allFeedsCopy = ImmutableSet.copyOf(allFeeds);
Map<String, NiFiFlowCacheConnectionData> connectionDataMapCopy = ImmutableMap.copyOf(connectionIdToConnectionMap);
//get feeds updated since last sync
NifiFlowCacheSnapshot latest = new NifiFlowCacheSnapshot.Builder()
.withProcessorIdToFeedNameMap(processorIdToFeedNameMapCopy)
.withProcessorIdToFeedProcessGroupId(processorIdToFeedProcessGroupIdCopy)
.withProcessorIdToProcessorName(processorIdToProcessorNameCopy)
.withStreamingFeeds(streamingFeedsCopy)
.withFeeds(allFeedsCopy)
.withConnections(connectionDataMapCopy)
.withSnapshotDate(lastUpdated).build();
return syncAndReturnUpdates(sync, latest, preview);
} else {
return NiFiFlowCacheSync.EMPTY(sync.getSyncId());
}
}
private NiFiFlowCacheSync syncAndReturnUpdates(NiFiFlowCacheSync sync, NifiFlowCacheSnapshot latest, boolean preview) {
if (latest != null && sync.needsUpdate(latest.getSnapshotDate())) {
NifiFlowCacheSnapshot updated = new NifiFlowCacheSnapshot.Builder()
.withProcessorIdToFeedNameMap(sync.getProcessorIdToFeedNameMapUpdatedSinceLastSync(latest.getProcessorIdToFeedNameMap()))
.withProcessorIdToFeedProcessGroupId(sync.getProcessorIdToProcessGroupIdUpdatedSinceLastSync(latest.getProcessorIdToFeedProcessGroupId()))
.withProcessorIdToProcessorName(sync.getProcessorIdToProcessorNameUpdatedSinceLastSync(latest.getProcessorIdToProcessorName()))
.withStreamingFeeds(latest.getAllStreamingFeeds())
.withConnections(sync.getConnectionIdToConnectionUpdatedSinceLastSync(latest.getConnectionIdToConnectionName(), latest.getConnectionIdToConnection()))
.withFeeds(sync.getFeedsUpdatedSinceLastSync(latest.getAllFeeds()))
.build();
//reset the pointers on this sync to be the latest
if (!preview) {
sync.setSnapshot(latest);
sync.setLastSync(latest.getSnapshotDate());
}
NiFiFlowCacheSync updatedSync = new NiFiFlowCacheSync(sync.getSyncId(), updated);
updatedSync.setUpdated(true);
if (!preview) {
updatedSync.setLastSync(latest.getSnapshotDate());
}
return updatedSync;
}
return NiFiFlowCacheSync.EMPTY(sync.getSyncId());
}
/**
* clears the current cache
***/
private void clearAll() {
processorIdToFeedProcessGroupId.clear();
processorIdToFeedProcessGroupId.clear();
processorIdToProcessorName.clear();
connectionIdToConnectionMap.clear();
connectionIdCacheNameMap.clear();
streamingFeeds.clear();
allFeeds.clear();
feedNameToTemplateNameMap.clear();
}
private void populateTemplateMappingCache(RegisteredTemplate template, Map<String, RegisteredTemplate> feedTemplatesMap) {
template.getFeedNames().stream().forEach(feedName -> {
if (feedTemplatesMap != null) {
feedTemplatesMap.put(feedName, template);
}
feedNameToTemplateNameMap.put(feedName, template.getTemplateName());
if (template.isStream()) {
streamingFeeds.add(feedName);
} else {
streamingFeeds.remove(feedName);
}
});
}
/**
* Ensure that there is a configured reporting task
*/
private void ensureNiFiKyloReportingTask() {
String reportingTaskName = StringUtils.substringAfterLast(NiFiKyloProvenanceEventReportingTaskType, ".");
if (!nifiRestClient.getNiFiRestClient().reportingTasks().findFirstByType(NiFiKyloProvenanceEventReportingTaskType).isPresent()) {
log.info("Attempting to create the {} in NiFi ", reportingTaskName);
//create it
//1 ensure the controller service exists and is wired correctly
Optional<ControllerServiceDTO> controllerService = nifiRestClient.getNiFiRestClient().reportingTasks().findFirstControllerServiceByType(NiFiMetadataControllerServiceType);
ControllerServiceDTO metadataService = null;
if (controllerService.isPresent()) {
metadataService = controllerService.get();
} else {
log.info("Attempting to create the Controller Service: {} with the name {} in NiFi ", NiFiMetadataControllerServiceType, NiFiMetadataServiceName);
//create it and enable it
//first create it
ControllerServiceDTO controllerServiceDTO = new ControllerServiceDTO();
controllerServiceDTO.setType(NiFiMetadataControllerServiceType);
controllerServiceDTO.setName(NiFiMetadataServiceName);
metadataService = nifiRestClient.getNiFiRestClient().reportingTasks().createReportingTaskControllerService(controllerServiceDTO);
//find the properties to inject
Map<String, Object> configProperties = propertyExpressionResolver.getStaticConfigProperties();
Map<String, String> stringConfigProperties = new HashMap<>();
if (configProperties != null) {
//transform the object map to the String map
stringConfigProperties = configProperties.entrySet().stream().collect(Collectors.toMap(Map.Entry::getKey, e -> e.getValue() != null ? e.getValue().toString() : null));
}
metadataService = nifiRestClient.enableControllerServiceAndSetProperties(metadataService.getId(), stringConfigProperties);
}
if (metadataService != null) {
try {
if (NifiProcessUtil.SERVICE_STATE.DISABLED.name().equalsIgnoreCase(metadataService.getState())) {
log.info("Reporting Task Controller Service {} exists, ensuring it is enabled.", NiFiMetadataServiceName);
//enable it....
metadataService = nifiRestClient.enableControllerServiceAndSetProperties(metadataService.getId(), null);
}
} catch (NifiClientRuntimeException e) {
//swallow the exception and attempt to move on to create the task
}
log.info("Creating the Reporting Task {} ", reportingTaskName);
ReportingTaskDTO reportingTaskDTO = new ReportingTaskDTO();
reportingTaskDTO.setType(NiFiKyloProvenanceEventReportingTaskType);
reportingTaskDTO = nifiRestClient.getNiFiRestClient().reportingTasks().createReportingTask(reportingTaskDTO);
//now set the properties
ReportingTaskDTO updatedReportingTask = new ReportingTaskDTO();
updatedReportingTask.setType(NiFiKyloProvenanceEventReportingTaskType);
updatedReportingTask.setId(reportingTaskDTO.getId());
updatedReportingTask.setName(reportingTaskName);
updatedReportingTask.setProperties(new HashMap<>(1));
updatedReportingTask.getProperties().put("Metadata Service", metadataService.getId());
updatedReportingTask.setSchedulingStrategy("TIMER_DRIVEN");
updatedReportingTask.setSchedulingPeriod("5 secs");
updatedReportingTask.setComments("Reporting task that will query the provenance repository and send the events and summary statistics over to Kylo via a JMS queue");
updatedReportingTask.setState(NifiProcessUtil.PROCESS_STATE.RUNNING.name());
//update it
reportingTaskDTO = nifiRestClient.getNiFiRestClient().reportingTasks().update(updatedReportingTask);
if (reportingTaskDTO != null) {
log.info("Successfully created the Reporting Task {} ", reportingTaskName);
} else {
log.info("Error creating the Reporting Task {}. You will need to go into NiFi to resolve. ", reportingTaskName);
}
}
}
;
}
/**
* Called after someone updates/Registers a template in the UI using the template stepper
* This is used to update the feed marker for streaming/batch feeds
*/
public synchronized void updateRegisteredTemplate(RegisteredTemplate template, boolean notifyClusterMembers) {
populateTemplateMappingCache(template, null);
//update the processortype cachefeedNameToTemplateNameMap
List<String>
feedNames =
feedNameToTemplateNameMap.entrySet().stream().filter(entry -> entry.getValue().equalsIgnoreCase(template.getTemplateName())).map(entry -> entry.getKey()).collect(Collectors.toList());
if (template.isStream()) {
streamingFeeds.addAll(feedNames);
} else {
streamingFeeds.removeAll(feedNames);
}
if(notifyClusterMembers) {
//mark the persistent table that this was updated
if(nifiFlowCacheClusterManager.isClustered()) {
nifiFlowCacheClusterManager.updateTemplate(template.getTemplateName());
}
lastUpdated = DateTime.now();
}
}
/**
* Update the cache of processorIds and connections when a reusable template is updated
* @param templateName the name of the template
* @param processGroupDTO the process group that stores the flow of the reusable template
*/
public void updateCacheForReusableTemplate(String templateName, ProcessGroupDTO processGroupDTO){
Collection<ProcessorDTO> processors = NifiProcessUtil.getProcessors(processGroupDTO);
updateProcessorIdNames(templateName, processors);
Set<ConnectionDTO> connections = NifiConnectionUtil.getAllConnections(processGroupDTO);
updateConnectionMap(templateName, connections);
lastUpdated = DateTime.now();
}
/**
* add processors to the cache
*
* @param templateName a template name
* @param processors processors to add to the cache
*/
public void updateProcessorIdNames(String templateName, Collection<ProcessorDTO> processors) {
updateProcessorIdNames(processors, true);
}
private void updateProcessorIdNames(Collection<ProcessorDTO> processors, boolean notifyClusterMembers) {
Map<String, String> processorIdToProcessorName = new HashMap<>();
processors.stream().forEach(flowProcessor -> {
processorIdToProcessorName.put(flowProcessor.getId(), flowProcessor.getName());
});
this.processorIdToProcessorName.putAll(processorIdToProcessorName);
if(notifyClusterMembers) {
if(nifiFlowCacheClusterManager.isClustered()) {
nifiFlowCacheClusterManager.updateProcessors(processors);
}
lastUpdated = DateTime.now();
}
}
/**
* Add connections to the cache
*
* @param templateName a template name
* @param connections connections to add to the cache
*/
public void updateConnectionMap(String templateName, Collection<ConnectionDTO> connections) {
updateConnectionMap(connections, true);
}
private void updateConnectionMap(Collection<ConnectionDTO> connections, boolean notifyClusterMembers) {
Map<String, NifiFlowConnection> connectionIdToConnectionMap = new HashMap<>();
if (connections != null) {
connections.stream().forEach(connectionDTO -> {
NifiFlowConnection nifiFlowConnection = NiFiFlowConnectionConverter.toNiFiFlowConnection(connectionDTO);
if (nifiFlowConnection != null) {
connectionIdToConnectionMap.put(nifiFlowConnection.getConnectionIdentifier(), nifiFlowConnection);
}
});
}
this.connectionIdToConnectionMap.putAll(toConnectionIdMap(connectionIdToConnectionMap.values()));
if(notifyClusterMembers) {
if(nifiFlowCacheClusterManager.isClustered()) {
nifiFlowCacheClusterManager.updateConnections(connections);
}
lastUpdated = DateTime.now();
}
}
/**
* Update cache for a feeds flow
* Used by CreateFeed builder
*
* @param feed a feed
* @param feedProcessGroup the process group created with this feed
*/
public void updateFlow(FeedMetadata feed, NifiFlowProcessGroup feedProcessGroup) {
String feedName = feed.getCategoryAndFeedName();
this.updateFlow(feedName, feed.getRegisteredTemplate().isStream(), feedProcessGroup.getId(), feedProcessGroup.getProcessorMap().values(), feedProcessGroup.getConnectionIdMap().values(), true);
}
/**
* Update cache for a feed
*
* @param feedName the name of the feed
* @param isStream {@code true} if its a streaming feed, {@code false} if its a batch feed
* @param feedProcessGroup the process group created with this feed
*/
public void updateFlow(String feedName, boolean isStream, NifiFlowProcessGroup feedProcessGroup) {
// feedProcessGroup.calculateCriticalPathProcessors();
this.updateFlow(feedName, isStream, feedProcessGroup.getId(), feedProcessGroup.getProcessorMap().values(), feedProcessGroup.getConnectionIdMap().values(), true);
}
/**
* update for clustered kylo
* @param flowCacheFeedUpdate
*/
public void updateFlow(NifiFlowCacheFeedUpdate flowCacheFeedUpdate){
updateFlow(flowCacheFeedUpdate.getFeedName(),flowCacheFeedUpdate.isStream(),flowCacheFeedUpdate.getFeedProcessGroupId(),flowCacheFeedUpdate.getProcessors(),flowCacheFeedUpdate.getConnections(),false);
}
private void updateFlow(String feedName, boolean isStream, String feedProcessGroupId, Collection<NifiFlowProcessor> processors, Collection<NifiFlowConnection> connections, boolean notifyClusterMembers) {
feedFlowIdProcessorMap.put(feedName, toFlowIdProcessorMap(processors));
feedProcessorIdProcessorMap.put(feedName, toProcessorIdProcessorMap(processors));
updateProcessorIdMaps(feedProcessGroupId, processors);
Map<String, String> processorIdToProcessGroupId = new HashMap<>();
Map<String, String> processorIdToProcessorName = new HashMap<>();
processors.stream().forEach(flowProcessor -> {
processorIdToProcessGroupId.put(flowProcessor.getId(), feedProcessGroupId);
processorIdToProcessorName.put(flowProcessor.getId(), flowProcessor.getName());
});
this.processorIdToFeedProcessGroupId.putAll(processorIdToProcessGroupId);
this.processorIdToProcessorName.putAll(processorIdToProcessorName);
connectionIdToConnectionMap.putAll(toConnectionIdMap(connections));
if (connections != null) {
Map<String, String> connectionIdToNameMap = connections.stream().collect(Collectors.toMap(conn -> conn.getConnectionIdentifier(), conn -> conn.getName()));
connectionIdCacheNameMap.putAll(connectionIdToNameMap);
}
processorIdMap.putAll(toProcessorIdMap(processors));
processorIdToFeedNameMap.putAll(toProcessorIdFeedNameMap(processors, feedName));
if (isStream) {
streamingFeeds.add(feedName);
}
allFeeds.add(feedName);
Long lastUpdatedTime = DateTimeUtil.getNowUTCTime().getMillis();
//notify others of the cache update only if we are not doing a full refresh
if(loaded && notifyClusterMembers){
if(nifiFlowCacheClusterManager.isClustered()) {
nifiFlowCacheClusterManager.updateFeed(feedName, isStream, feedProcessGroupId, processors, connections);
}
lastUpdated = DateTime.now();
}
}
private void updateProcessorIdMaps(String processGroupId, Collection<NifiFlowProcessor> processors) {
Map<String, String> processorIdToProcessGroupId = new HashMap<>();
Map<String, String> processorIdToProcessorName = new HashMap<>();
processors.stream().forEach(flowProcessor -> {
processorIdToProcessGroupId.put(flowProcessor.getId(), processGroupId);
processorIdToProcessorName.put(flowProcessor.getId(), flowProcessor.getName());
});
this.processorIdToFeedProcessGroupId.putAll(processorIdToProcessGroupId);
this.processorIdToProcessorName.putAll(processorIdToProcessorName);
}
private Map<String, NiFiFlowCacheConnectionData> toConnectionIdMap(Collection<NifiFlowConnection> connections) {
Map<String, NiFiFlowCacheConnectionData> connectionMap = new HashMap<>();
connections.stream().forEach(conn -> {
connectionMap
.put(conn.getConnectionIdentifier(), new NiFiFlowCacheConnectionData(conn.getConnectionIdentifier(), conn.getName(), conn.getSourceIdentifier(), conn.getDestinationIdentifier()));
});
return connectionMap;
}
private Map<String, NifiFlowProcessor> toProcessorIdMap(Collection<NifiFlowProcessor> processors) {
return processors.stream().collect(Collectors.toMap(NifiFlowProcessor::getId, Function.identity()));
}
private Map<String, String> toProcessorIdFeedNameMap(Collection<NifiFlowProcessor> processors, String feedName) {
return processors.stream().collect(Collectors.toMap(NifiFlowProcessor::getId, name -> feedName));
}
private Map<String, List<NifiFlowProcessor>> toFlowIdProcessorMap(Collection<NifiFlowProcessor> processors) {
if (processors != null && !processors.isEmpty()) {
return processors.stream().filter(nifiFlowProcessor -> nifiFlowProcessor.getFlowId() != null).collect(Collectors.groupingBy(NifiFlowProcessor::getFlowId));
}
return Collections.emptyMap();
}
private Map<String, List<NifiFlowProcessor>> toProcessorIdProcessorMap(Collection<NifiFlowProcessor> processors) {
if (processors != null && !processors.isEmpty()) {
return processors.stream().collect(Collectors.groupingBy(NifiFlowProcessor::getId));
}
return new HashMap<>();
}
public CacheSummary cacheSummary() {
return CacheSummary.build(syncMap);
}
private void initExpireTimerThread() {
long timer = 30; // run ever 30 sec to check and expire
ScheduledExecutorService service = Executors
.newSingleThreadScheduledExecutor();
service.scheduleAtFixedRate(() -> {
checkAndExpireUnusedCache();
}, timer, timer, TimeUnit.SECONDS);
}
/**
* Expire any cache entries that havent been touched in 60 minutes
*/
public void checkAndExpireUnusedCache() {
int minutes = 60;
try {
long expireAfter = minutes * 1000 * 60; //60 min
Set<String> itemsRemoved = new HashSet<>();
//find cache items that havent been synced in allotted time
lastSyncTimeMap.entrySet().stream().filter(entry -> ((DateTime.now().getMillis() - entry.getValue().getMillis()) > expireAfter)).forEach(entry -> {
syncMap.remove(entry.getKey());
itemsRemoved.add(entry.getKey());
log.info("Expiring Cache {}. This cache has not been used in over {} minutes", entry.getKey(), minutes);
});
itemsRemoved.stream().forEach(item -> lastSyncTimeMap.remove(item));
} catch (Exception e) {
log.error("Error attempting to invalidate flow cache for items not touched in {} or more minutes", minutes, e);
}
}
public static class CacheSummary {
private Map<String, Integer> summary = new HashMap<>();
private Integer cachedSyncIds;
public CacheSummary() {
}
private CacheSummary(Map<String, Integer> cacheIds) {
this.summary = cacheIds;
this.cachedSyncIds = cacheIds.keySet().size();
}
public static CacheSummary build(Map<String, NiFiFlowCacheSync> syncMap) {
Map<String, Integer>
cacheIds =
syncMap.entrySet().stream().collect(Collectors.toMap(stringNiFiFlowCacheSyncEntry -> stringNiFiFlowCacheSyncEntry.getKey(),
stringNiFiFlowCacheSyncEntry1 -> stringNiFiFlowCacheSyncEntry1.getValue().getSnapshot().getProcessorIdToFeedNameMap().size()));
return new CacheSummary(cacheIds);
}
public Map<String, Integer> getSummary() {
return summary;
}
public void setSummary(Map<String, Integer> summary) {
this.summary = summary;
}
public Integer getCachedSyncIds() {
return cachedSyncIds;
}
public void setCachedSyncIds(Integer cachedSyncIds) {
this.cachedSyncIds = cachedSyncIds;
}
}
}