package com.thinkbiganalytics.nifi.provenance.cache; /*- * #%L * thinkbig-nifi-provenance-repo * %% * Copyright (C) 2017 ThinkBig Analytics * %% * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * #L% */ import com.thinkbiganalytics.nifi.provenance.FeedFlowFileNotFoundException; import com.thinkbiganalytics.nifi.provenance.KyloProcessorFlowType; import com.thinkbiganalytics.nifi.provenance.ProvenanceFeedLookup; import com.thinkbiganalytics.nifi.provenance.model.FeedFlowFile; import com.thinkbiganalytics.nifi.provenance.model.ProvenanceEventRecordDTO; import com.thinkbiganalytics.nifi.provenance.model.util.ProvenanceEventUtil; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; import java.util.concurrent.atomic.AtomicLong; /** * Utility to build the FlowFile graph from an incoming Provenance Event and cache the FlowFile Graph. */ public class FeedFlowFileCacheUtil { private static final Logger log = LoggerFactory.getLogger(FeedFlowFileCacheUtil.class); @Autowired ProvenanceFeedLookup provenanceFeedLookup; @Autowired FeedFlowFileGuavaCache flowFileGuavaCache; // internal counters for general stats AtomicLong eventCounter = new AtomicLong(0L); public FeedFlowFileCacheUtil() { } public void logStats() { log.info("Processed {} events. ", eventCounter.get()); } /** * Create the FlowFile graph and cache the FlowFile with event into the GuavaCache for processing */ public void cacheAndBuildFlowFileGraph(ProvenanceEventRecordDTO event) { // Get the FlowFile from the Cache. It is LoadingCache so if the file is new the Cache will create it FeedFlowFileGuavaCache flowFileCache = flowFileGuavaCache; //An event is the very first in the flow if it is a CREATE or RECEIVE event and if there are no Parent flow files //This indicates the start of a Job. if (ProvenanceEventUtil.isFirstEvent(event) && (event.getParentUuids() == null || (event.getParentUuids() != null && event.getParentUuids().isEmpty()))) { //we only need to store references to the root feed flow file. FeedFlowFile flowFile = null; if (flowFileCache.isCached(event.getFlowFileUuid())) { flowFile = flowFileCache.getEntry(event.getFlowFileUuid()); } else { flowFile = new FeedFlowFile(event.getFlowFileUuid()); flowFileCache.add(event.getFlowFileUuid(), flowFile); } flowFile.setFirstEvent(event); event.setFeedFlowFile(flowFile); event.setIsStartOfJob(true); } FeedFlowFile feedFlowFile = null; if (flowFileCache.isCached(event.getFlowFileUuid())) { feedFlowFile = flowFileCache.getEntry(event.getFlowFileUuid()); event.setFeedFlowFile(feedFlowFile); } FeedFlowFile parentFlowFile = null; //Build the graph of parent/child flow files if (event.getParentUuids() != null && !event.getParentUuids().isEmpty()) { for (String parent : event.getParentUuids()) { if (flowFileCache.isCached(parent)) { //set this flowfileid pointing to the parent parentFlowFile = flowFileCache.getEntry(parent); flowFileCache.add(event.getFlowFileUuid(), parentFlowFile); //if the parent == the id of the flowfile in the cache it means this is a starting flow that relates to another starting feed flow //likely the flow files got merged and are linked. //track this relationship //mark this event as a Job event if (parentFlowFile.getId().equals(parent) && event.isStartOfJob()) { //relate them parentFlowFile.addChildFlowFile(event.getFeedFlowFile().getId()); } //assign the event flow as a child to the feed flow if (!event.getFlowFileUuid().equals(parentFlowFile.getId())) { parentFlowFile.assignFlowFileToParent(event.getFlowFileUuid(), parentFlowFile.getId()); } } else { //UNABLE TO FIND PARENT! } } } if (feedFlowFile == null && parentFlowFile != null) { feedFlowFile = parentFlowFile; } if (feedFlowFile == null) { log.error("Unable to find feed flow file in cache!!!! for {} ", event); throw new FeedFlowFileNotFoundException("Unable to find Feed Flow File for event " + event.getEventId() + ", Processor: " + event.getComponentId()); } event.setFeedFlowFile(feedFlowFile); if (event.getChildUuids() != null && !event.getChildUuids().isEmpty()) { for (String child : event.getChildUuids()) { flowFileCache.add(child, feedFlowFile); //set the child activity feedFlowFile.assignFlowFileToParent(child, event.getFlowFileUuid()); feedFlowFile.assignChildFlowFileStartTime(child, event.getEventTime().getMillis()); feedFlowFile.addChildFlowFile(child); } } event.setComponentName(provenanceFeedLookup.getProcessorName(event.getComponentId())); //assign the feed info for quick lookup on the flow file? boolean assignedFeedInfo = provenanceFeedLookup.assignFeedInformationToFlowFile(feedFlowFile); if (!assignedFeedInfo) { log.error("Unable to assign Feed Info to flow file {}, root: {}, for event {} ({}) processorId: {} ", feedFlowFile.getId(), feedFlowFile, event.getComponentName(), event.getEventId(), event.getComponentId()); } else { event.setFeedName(feedFlowFile.getFeedName()); event.setFeedProcessGroupId(feedFlowFile.getFeedProcessGroupId()); event.setComponentName(provenanceFeedLookup.getProcessorName(event.getComponentId())); } event.setStream(provenanceFeedLookup.isStream(event)); event.setJobFlowFileId(feedFlowFile.getId()); feedFlowFile.addEvent(event); KyloProcessorFlowType flowType = provenanceFeedLookup.setProcessorFlowType(event); feedFlowFile.checkIfEventStartsTheFlowFile(event); feedFlowFile.checkAndMarkComplete(event); if (KyloProcessorFlowType.FAILURE.equals(flowType)) { if (event.getFeedFlowFile() != null) { event.getFeedFlowFile().incrementFailedEvents(); } } if (event.isEndingFlowFileEvent() && feedFlowFile.isFeedComplete()) { event.setIsEndOfJob(true); event.setIsFinalJobEvent(true); log.info("Ending the Job for Feed {} and flowfile: {}. Event: {} ", event.getFeedName(), event.getFlowFileUuid(), event); } eventCounter.incrementAndGet(); } }