/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.nifi.controller.repository; import java.io.BufferedOutputStream; import java.io.ByteArrayInputStream; import java.io.Closeable; import java.io.EOFException; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.nio.file.Files; import java.nio.file.Path; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.NoSuchElementException; import java.util.Objects; import java.util.Set; import java.util.UUID; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicLong; import java.util.regex.Pattern; import java.util.stream.Collectors; import org.apache.commons.io.IOUtils; import org.apache.nifi.connectable.Connectable; import org.apache.nifi.connectable.Connection; import org.apache.nifi.controller.ProcessorNode; import org.apache.nifi.controller.queue.FlowFileQueue; import org.apache.nifi.controller.queue.QueueSize; import org.apache.nifi.controller.repository.claim.ContentClaim; import org.apache.nifi.controller.repository.claim.ContentClaimWriteCache; import org.apache.nifi.controller.repository.claim.ResourceClaim; import org.apache.nifi.controller.repository.io.DisableOnCloseInputStream; import org.apache.nifi.controller.repository.io.DisableOnCloseOutputStream; import org.apache.nifi.controller.repository.io.FlowFileAccessInputStream; import org.apache.nifi.controller.repository.io.FlowFileAccessOutputStream; import org.apache.nifi.controller.repository.io.LimitedInputStream; import org.apache.nifi.flowfile.FlowFile; import org.apache.nifi.flowfile.attributes.CoreAttributes; import org.apache.nifi.processor.FlowFileFilter; import org.apache.nifi.processor.ProcessSession; import org.apache.nifi.processor.Relationship; import org.apache.nifi.processor.exception.FlowFileAccessException; import org.apache.nifi.processor.exception.FlowFileHandlingException; import org.apache.nifi.processor.exception.MissingFlowFileException; import org.apache.nifi.processor.exception.ProcessException; import org.apache.nifi.processor.io.InputStreamCallback; import org.apache.nifi.processor.io.OutputStreamCallback; import org.apache.nifi.processor.io.StreamCallback; import org.apache.nifi.provenance.ProvenanceEventBuilder; import org.apache.nifi.provenance.ProvenanceEventRecord; import org.apache.nifi.provenance.ProvenanceEventRepository; import org.apache.nifi.provenance.ProvenanceEventType; import org.apache.nifi.provenance.ProvenanceReporter; import org.apache.nifi.provenance.StandardProvenanceEventRecord; import org.apache.nifi.stream.io.ByteCountingInputStream; import org.apache.nifi.stream.io.ByteCountingOutputStream; import org.apache.nifi.stream.io.StreamUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * <p> * Provides a ProcessSession that ensures all accesses, changes and transfers * occur in an atomic manner for all FlowFiles including their contents and * attributes * </p> * <p> * NOT THREAD SAFE * </p> * <p/> */ public final class StandardProcessSession implements ProcessSession, ProvenanceEventEnricher { private static final AtomicLong idGenerator = new AtomicLong(0L); private static final AtomicLong enqueuedIndex = new AtomicLong(0L); // determines how many things must be transferred, removed, modified in order to avoid logging the FlowFile ID's on commit/rollback public static final int VERBOSE_LOG_THRESHOLD = 10; public static final String DEFAULT_FLOWFILE_PATH = "./"; private static final Logger LOG = LoggerFactory.getLogger(StandardProcessSession.class); private static final Logger claimLog = LoggerFactory.getLogger(StandardProcessSession.class.getSimpleName() + ".claims"); private static final int MAX_ROLLBACK_FLOWFILES_TO_LOG = 5; private final Map<FlowFileRecord, StandardRepositoryRecord> records = new HashMap<>(); private final Map<String, StandardFlowFileEvent> connectionCounts = new HashMap<>(); private final Map<FlowFileQueue, Set<FlowFileRecord>> unacknowledgedFlowFiles = new HashMap<>(); private final Map<String, Long> counters = new HashMap<>(); private final Map<ContentClaim, ByteCountingOutputStream> appendableStreams = new HashMap<>(); private final ProcessContext context; private final Map<FlowFile, Integer> readRecursionSet = new HashMap<>();// set used to track what is currently being operated on to prevent logic failures if recursive calls occurring private final Set<FlowFile> writeRecursionSet = new HashSet<>(); private final Map<FlowFile, Path> deleteOnCommit = new HashMap<>(); private final long sessionId; private final String connectableDescription; private final Set<String> removedFlowFiles = new HashSet<>(); private final Set<String> createdFlowFiles = new HashSet<>(); private final StandardProvenanceReporter provenanceReporter; private int removedCount = 0; // number of flowfiles removed in this session private long removedBytes = 0L; // size of all flowfiles removed in this session private long bytesRead = 0L; private long bytesWritten = 0L; private int flowFilesIn = 0, flowFilesOut = 0; private long contentSizeIn = 0L, contentSizeOut = 0L; private ContentClaim currentReadClaim = null; private ByteCountingInputStream currentReadClaimStream = null; private long processingStartTime; // List of InputStreams that have been opened by calls to {@link #read(FlowFile)} and not yet closed private final Map<FlowFile, InputStream> openInputStreams = new HashMap<>(); // List of OutputStreams that have been opened by calls to {@link #write(FlowFile)} and not yet closed private final Map<FlowFile, OutputStream> openOutputStreams = new HashMap<>(); // maps a FlowFile to all Provenance Events that were generated for that FlowFile. // we do this so that if we generate a Fork event, for example, and then remove the event in the same // Session, we will not send that event to the Provenance Repository private final Map<FlowFile, List<ProvenanceEventRecord>> generatedProvenanceEvents = new HashMap<>(); // when Forks are generated for a single parent, we add the Fork event to this map, with the Key being the parent // so that we are able to aggregate many into a single Fork Event. private final Map<FlowFile, ProvenanceEventBuilder> forkEventBuilders = new HashMap<>(); private Checkpoint checkpoint = new Checkpoint(); private final ContentClaimWriteCache claimCache; public StandardProcessSession(final ProcessContext context) { this.context = context; final Connectable connectable = context.getConnectable(); final String componentType; String description = connectable.toString(); switch (connectable.getConnectableType()) { case PROCESSOR: final ProcessorNode procNode = (ProcessorNode) connectable; componentType = procNode.getComponentType(); description = procNode.getProcessor().toString(); break; case INPUT_PORT: componentType = "Input Port"; break; case OUTPUT_PORT: componentType = "Output Port"; break; case REMOTE_INPUT_PORT: componentType = ProvenanceEventRecord.REMOTE_INPUT_PORT_TYPE; break; case REMOTE_OUTPUT_PORT: componentType = ProvenanceEventRecord.REMOTE_OUTPUT_PORT_TYPE; break; case FUNNEL: componentType = "Funnel"; break; default: throw new AssertionError("Connectable type is " + connectable.getConnectableType()); } this.provenanceReporter = new StandardProvenanceReporter(this, connectable.getIdentifier(), componentType, context.getProvenanceRepository(), this); this.sessionId = idGenerator.getAndIncrement(); this.connectableDescription = description; this.claimCache = new ContentClaimWriteCache(context.getContentRepository()); LOG.trace("Session {} created for {}", this, connectableDescription); processingStartTime = System.nanoTime(); } private void closeStreams(final Map<FlowFile, ? extends Closeable> streamMap) { final Map<FlowFile, ? extends Closeable> openStreamCopy = new HashMap<>(streamMap); // avoid ConcurrentModificationException by creating a copy of the List for (final Map.Entry<FlowFile, ? extends Closeable> entry : openStreamCopy.entrySet()) { final FlowFile flowFile = entry.getKey(); final Closeable openStream = entry.getValue(); LOG.warn("{} closing {} for {} because the session was committed without the stream being closed.", this, openStream, flowFile); try { openStream.close(); } catch (final Exception e) { LOG.warn("{} Attempted to close {} for {} due to session commit but close failed", this, openStream, this.connectableDescription); LOG.warn("", e); } } } public void checkpoint() { resetWriteClaims(false); closeStreams(openInputStreams); closeStreams(openOutputStreams); if (!readRecursionSet.isEmpty()) { throw new IllegalStateException(); } if (!writeRecursionSet.isEmpty()) { throw new IllegalStateException(); } if (this.checkpoint == null) { this.checkpoint = new Checkpoint(); } if (records.isEmpty()) { LOG.trace("{} checkpointed, but no events were performed by this ProcessSession", this); return; } // any drop event that is the result of an auto-terminate should happen at the very end, so we keep the // records in a separate List so that they can be persisted to the Provenance Repo after all of the // Processor-reported events. List<ProvenanceEventRecord> autoTerminatedEvents = null; // validate that all records have a transfer relationship for them and if so determine the destination node and clone as necessary final Map<FlowFileRecord, StandardRepositoryRecord> toAdd = new HashMap<>(); for (final StandardRepositoryRecord record : records.values()) { if (record.isMarkedForDelete()) { continue; } final Relationship relationship = record.getTransferRelationship(); if (relationship == null) { rollback(); throw new FlowFileHandlingException(record.getCurrent() + " transfer relationship not specified"); } final List<Connection> destinations = new ArrayList<>(context.getConnections(relationship)); if (destinations.isEmpty() && !context.getConnectable().isAutoTerminated(relationship)) { if (relationship != Relationship.SELF) { rollback(); throw new FlowFileHandlingException(relationship + " does not have any destinations for " + context.getConnectable()); } } if (destinations.isEmpty() && relationship == Relationship.SELF) { record.setDestination(record.getOriginalQueue()); } else if (destinations.isEmpty()) { record.markForDelete(); if (autoTerminatedEvents == null) { autoTerminatedEvents = new ArrayList<>(); } final ProvenanceEventRecord dropEvent; try { dropEvent = provenanceReporter.generateDropEvent(record.getCurrent(), "Auto-Terminated by " + relationship.getName() + " Relationship"); autoTerminatedEvents.add(dropEvent); } catch (final Exception e) { LOG.warn("Unable to generate Provenance Event for {} on behalf of {} due to {}", record.getCurrent(), connectableDescription, e); if (LOG.isDebugEnabled()) { LOG.warn("", e); } } } else { final Connection finalDestination = destinations.remove(destinations.size() - 1); // remove last element record.setDestination(finalDestination.getFlowFileQueue()); incrementConnectionInputCounts(finalDestination, record); for (final Connection destination : destinations) { // iterate over remaining destinations and "clone" as needed incrementConnectionInputCounts(destination, record); final FlowFileRecord currRec = record.getCurrent(); final StandardFlowFileRecord.Builder builder = new StandardFlowFileRecord.Builder().fromFlowFile(currRec); builder.id(context.getNextFlowFileSequence()); final String newUuid = UUID.randomUUID().toString(); builder.addAttribute(CoreAttributes.UUID.key(), newUuid); final FlowFileRecord clone = builder.build(); final StandardRepositoryRecord newRecord = new StandardRepositoryRecord(destination.getFlowFileQueue()); provenanceReporter.clone(currRec, clone, false); final ContentClaim claim = clone.getContentClaim(); if (claim != null) { context.getContentRepository().incrementClaimaintCount(claim); } newRecord.setWorking(clone, Collections.<String, String> emptyMap()); newRecord.setDestination(destination.getFlowFileQueue()); newRecord.setTransferRelationship(record.getTransferRelationship()); // put the mapping into toAdd because adding to records now will cause a ConcurrentModificationException toAdd.put(clone, newRecord); } } } records.putAll(toAdd); toAdd.clear(); checkpoint.checkpoint(this, autoTerminatedEvents); resetState(); } @Override public void commit() { checkpoint(); commit(this.checkpoint); this.checkpoint = null; } @SuppressWarnings({"unchecked", "rawtypes"}) private void commit(final Checkpoint checkpoint) { try { final long commitStartNanos = System.nanoTime(); resetReadClaim(); try { claimCache.flush(); } finally { claimCache.reset(); } final long updateProvenanceStart = System.nanoTime(); updateProvenanceRepo(checkpoint); final long claimRemovalStart = System.nanoTime(); final long updateProvenanceNanos = claimRemovalStart - updateProvenanceStart; /** * Figure out which content claims can be released. At this point, * we will decrement the Claimant Count for the claims via the * Content Repository. We do not actually destroy the content * because otherwise, we could remove the Original Claim and * crash/restart before the FlowFileRepository is updated. This will * result in the FlowFile being restored such that the content claim * points to the Original Claim -- which has already been removed! * */ for (final Map.Entry<FlowFileRecord, StandardRepositoryRecord> entry : checkpoint.records.entrySet()) { final FlowFile flowFile = entry.getKey(); final StandardRepositoryRecord record = entry.getValue(); if (record.isMarkedForDelete()) { // if the working claim is not the same as the original claim, we can immediately destroy the working claim // because it was created in this session and is to be deleted. We don't need to wait for the FlowFile Repo to sync. decrementClaimCount(record.getWorkingClaim()); if (record.getOriginalClaim() != null && !record.getOriginalClaim().equals(record.getWorkingClaim())) { // if working & original claim are same, don't remove twice; we only want to remove the original // if it's different from the working. Otherwise, we remove two claimant counts. This causes // an issue if we only updated the FlowFile attributes. decrementClaimCount(record.getOriginalClaim()); } final long flowFileLife = System.currentTimeMillis() - flowFile.getEntryDate(); final Connectable connectable = context.getConnectable(); final Object terminator = connectable instanceof ProcessorNode ? ((ProcessorNode) connectable).getProcessor() : connectable; LOG.info("{} terminated by {}; life of FlowFile = {} ms", new Object[] {flowFile, terminator, flowFileLife}); } else if (record.isWorking() && record.getWorkingClaim() != record.getOriginalClaim()) { // records which have been updated - remove original if exists decrementClaimCount(record.getOriginalClaim()); } } final long claimRemovalFinishNanos = System.nanoTime(); final long claimRemovalNanos = claimRemovalFinishNanos - claimRemovalStart; // Update the FlowFile Repository try { final Collection<StandardRepositoryRecord> repoRecords = checkpoint.records.values(); context.getFlowFileRepository().updateRepository((Collection) repoRecords); } catch (final IOException ioe) { // if we fail to commit the session, we need to roll back // the checkpoints as well because none of the checkpoints // were ever committed. rollback(false, true); throw new ProcessException("FlowFile Repository failed to update", ioe); } final long flowFileRepoUpdateFinishNanos = System.nanoTime(); final long flowFileRepoUpdateNanos = flowFileRepoUpdateFinishNanos - claimRemovalFinishNanos; updateEventRepository(checkpoint); final long updateEventRepositoryFinishNanos = System.nanoTime(); final long updateEventRepositoryNanos = updateEventRepositoryFinishNanos - flowFileRepoUpdateFinishNanos; // transfer the flowfiles to the connections' queues. final Map<FlowFileQueue, Collection<FlowFileRecord>> recordMap = new HashMap<>(); for (final StandardRepositoryRecord record : checkpoint.records.values()) { if (record.isMarkedForAbort() || record.isMarkedForDelete()) { continue; // these don't need to be transferred } // record.getCurrent() will return null if this record was created in this session -- // in this case, we just ignore it, and it will be cleaned up by clearing the records map. if (record.getCurrent() != null) { Collection<FlowFileRecord> collection = recordMap.get(record.getDestination()); if (collection == null) { collection = new ArrayList<>(); recordMap.put(record.getDestination(), collection); } collection.add(record.getCurrent()); } } for (final Map.Entry<FlowFileQueue, Collection<FlowFileRecord>> entry : recordMap.entrySet()) { entry.getKey().putAll(entry.getValue()); } final long enqueueFlowFileFinishNanos = System.nanoTime(); final long enqueueFlowFileNanos = enqueueFlowFileFinishNanos - updateEventRepositoryFinishNanos; // Delete any files from disk that need to be removed. for (final Path path : checkpoint.deleteOnCommit.values()) { try { Files.deleteIfExists(path); } catch (final IOException e) { throw new FlowFileAccessException("Unable to delete " + path.toFile().getAbsolutePath(), e); } } checkpoint.deleteOnCommit.clear(); if (LOG.isInfoEnabled()) { final String sessionSummary = summarizeEvents(checkpoint); if (!sessionSummary.isEmpty()) { LOG.info("{} for {}, committed the following events: {}", new Object[] {this, connectableDescription, sessionSummary}); } } for (final Map.Entry<String, Long> entry : checkpoint.counters.entrySet()) { adjustCounter(entry.getKey(), entry.getValue(), true); } acknowledgeRecords(); resetState(); if (LOG.isDebugEnabled()) { final StringBuilder timingInfo = new StringBuilder(); timingInfo.append("Session commit for ").append(this).append(" [").append(connectableDescription).append("]").append(" took "); final long commitNanos = System.nanoTime() - commitStartNanos; formatNanos(commitNanos, timingInfo); timingInfo.append("; FlowFile Repository Update took "); formatNanos(flowFileRepoUpdateNanos, timingInfo); timingInfo.append("; Claim Removal took "); formatNanos(claimRemovalNanos, timingInfo); timingInfo.append("; FlowFile Event Update took "); formatNanos(updateEventRepositoryNanos, timingInfo); timingInfo.append("; Enqueuing FlowFiles took "); formatNanos(enqueueFlowFileNanos, timingInfo); timingInfo.append("; Updating Provenance Event Repository took "); formatNanos(updateProvenanceNanos, timingInfo); LOG.debug(timingInfo.toString()); } } catch (final Exception e) { try { // if we fail to commit the session, we need to roll back // the checkpoints as well because none of the checkpoints // were ever committed. rollback(false, true); } catch (final Exception e1) { e.addSuppressed(e1); } if (e instanceof RuntimeException) { throw (RuntimeException) e; } else { throw new ProcessException(e); } } } private void updateEventRepository(final Checkpoint checkpoint) { int flowFilesReceived = 0; int flowFilesSent = 0; long bytesReceived = 0L; long bytesSent = 0L; for (final ProvenanceEventRecord event : checkpoint.reportedEvents) { if (isSpuriousForkEvent(event, checkpoint.removedFlowFiles)) { continue; } switch (event.getEventType()) { case SEND: flowFilesSent++; bytesSent += event.getFileSize(); break; case RECEIVE: case FETCH: flowFilesReceived++; bytesReceived += event.getFileSize(); break; default: break; } } try { // update event repository final Connectable connectable = context.getConnectable(); final StandardFlowFileEvent flowFileEvent = new StandardFlowFileEvent(connectable.getIdentifier()); flowFileEvent.setBytesRead(checkpoint.bytesRead); flowFileEvent.setBytesWritten(checkpoint.bytesWritten); flowFileEvent.setContentSizeIn(checkpoint.contentSizeIn); flowFileEvent.setContentSizeOut(checkpoint.contentSizeOut); flowFileEvent.setContentSizeRemoved(checkpoint.removedBytes); flowFileEvent.setFlowFilesIn(checkpoint.flowFilesIn); flowFileEvent.setFlowFilesOut(checkpoint.flowFilesOut); flowFileEvent.setFlowFilesRemoved(checkpoint.removedCount); flowFileEvent.setFlowFilesReceived(flowFilesReceived); flowFileEvent.setBytesReceived(bytesReceived); flowFileEvent.setFlowFilesSent(flowFilesSent); flowFileEvent.setBytesSent(bytesSent); long lineageMillis = 0L; for (final Map.Entry<FlowFileRecord, StandardRepositoryRecord> entry : checkpoint.records.entrySet()) { final FlowFile flowFile = entry.getKey(); final long lineageDuration = System.currentTimeMillis() - flowFile.getLineageStartDate(); lineageMillis += lineageDuration; } flowFileEvent.setAggregateLineageMillis(lineageMillis); context.getFlowFileEventRepository().updateRepository(flowFileEvent); for (final FlowFileEvent connectionEvent : checkpoint.connectionCounts.values()) { context.getFlowFileEventRepository().updateRepository(connectionEvent); } } catch (final IOException ioe) { LOG.error("FlowFile Event Repository failed to update", ioe); } } private void addEventType(final Map<String, Set<ProvenanceEventType>> map, final String id, final ProvenanceEventType eventType) { Set<ProvenanceEventType> eventTypes = map.get(id); if (eventTypes == null) { eventTypes = new HashSet<>(); map.put(id, eventTypes); } eventTypes.add(eventType); } private void updateProvenanceRepo(final Checkpoint checkpoint) { // Update Provenance Repository final ProvenanceEventRepository provenanceRepo = context.getProvenanceRepository(); // We need to de-dupe the events that we've created and those reported to the provenance reporter, // in case the Processor developer submitted the same events to the reporter. So we use a LinkedHashSet // for this, so that we are able to ensure that the events are submitted in the proper order. final Set<ProvenanceEventRecord> recordsToSubmit = new LinkedHashSet<>(); final Map<String, Set<ProvenanceEventType>> eventTypesPerFlowFileId = new HashMap<>(); final Set<ProvenanceEventRecord> processorGenerated = checkpoint.reportedEvents; // We first want to submit FORK events because if the Processor is going to create events against // a FlowFile, that FlowFile needs to be shown to be created first. // However, if the Processor has generated a FORK event, we don't want to use the Framework-created one -- // we prefer to use the event generated by the Processor. We can determine this by checking if the Set of events genereated // by the Processor contains any of the FORK events that we generated for (final Map.Entry<FlowFile, ProvenanceEventBuilder> entry : checkpoint.forkEventBuilders.entrySet()) { final ProvenanceEventBuilder builder = entry.getValue(); final FlowFile flowFile = entry.getKey(); updateEventContentClaims(builder, flowFile, checkpoint.records.get(flowFile)); final ProvenanceEventRecord event = builder.build(); if (!event.getChildUuids().isEmpty() && !isSpuriousForkEvent(event, checkpoint.removedFlowFiles)) { // If framework generated the event, add it to the 'recordsToSubmit' Set. if (!processorGenerated.contains(event)) { recordsToSubmit.add(event); } // Register the FORK event for each child and each parent. for (final String childUuid : event.getChildUuids()) { addEventType(eventTypesPerFlowFileId, childUuid, event.getEventType()); } for (final String parentUuid : event.getParentUuids()) { addEventType(eventTypesPerFlowFileId, parentUuid, event.getEventType()); } } } // Now add any Processor-reported events. for (final ProvenanceEventRecord event : processorGenerated) { if (isSpuriousForkEvent(event, checkpoint.removedFlowFiles)) { continue; } // Check if the event indicates that the FlowFile was routed to the same // connection from which it was pulled (and only this connection). If so, discard the event. if (isSpuriousRouteEvent(event, checkpoint.records)) { continue; } recordsToSubmit.add(event); addEventType(eventTypesPerFlowFileId, event.getFlowFileUuid(), event.getEventType()); } // Finally, add any other events that we may have generated. for (final List<ProvenanceEventRecord> eventList : checkpoint.generatedProvenanceEvents.values()) { for (final ProvenanceEventRecord event : eventList) { if (isSpuriousForkEvent(event, checkpoint.removedFlowFiles)) { continue; } recordsToSubmit.add(event); addEventType(eventTypesPerFlowFileId, event.getFlowFileUuid(), event.getEventType()); } } // Check if content or attributes changed. If so, register the appropriate events. for (final StandardRepositoryRecord repoRecord : checkpoint.records.values()) { final ContentClaim original = repoRecord.getOriginalClaim(); final ContentClaim current = repoRecord.getCurrentClaim(); boolean contentChanged = false; if (original == null && current != null) { contentChanged = true; } if (original != null && current == null) { contentChanged = true; } if (original != null && current != null && !original.equals(current)) { contentChanged = true; } final FlowFileRecord curFlowFile = repoRecord.getCurrent(); final String flowFileId = curFlowFile.getAttribute(CoreAttributes.UUID.key()); boolean eventAdded = false; if (checkpoint.removedFlowFiles.contains(flowFileId)) { continue; } final boolean newFlowFile = repoRecord.getOriginal() == null; if (contentChanged && !newFlowFile) { recordsToSubmit.add(provenanceReporter.build(curFlowFile, ProvenanceEventType.CONTENT_MODIFIED).build()); addEventType(eventTypesPerFlowFileId, flowFileId, ProvenanceEventType.CONTENT_MODIFIED); eventAdded = true; } if (checkpoint.createdFlowFiles.contains(flowFileId)) { final Set<ProvenanceEventType> registeredTypes = eventTypesPerFlowFileId.get(flowFileId); boolean creationEventRegistered = false; if (registeredTypes != null) { if (registeredTypes.contains(ProvenanceEventType.CREATE) || registeredTypes.contains(ProvenanceEventType.FORK) || registeredTypes.contains(ProvenanceEventType.JOIN) || registeredTypes.contains(ProvenanceEventType.RECEIVE) || registeredTypes.contains(ProvenanceEventType.FETCH)) { creationEventRegistered = true; } } if (!creationEventRegistered) { recordsToSubmit.add(provenanceReporter.build(curFlowFile, ProvenanceEventType.CREATE).build()); eventAdded = true; } } if (!eventAdded && !repoRecord.getUpdatedAttributes().isEmpty()) { // We generate an ATTRIBUTES_MODIFIED event only if no other event has been // created for the FlowFile. We do this because all events contain both the // newest and the original attributes, so generating an ATTRIBUTES_MODIFIED // event is redundant if another already exists. if (!eventTypesPerFlowFileId.containsKey(flowFileId)) { recordsToSubmit.add(provenanceReporter.build(curFlowFile, ProvenanceEventType.ATTRIBUTES_MODIFIED).build()); addEventType(eventTypesPerFlowFileId, flowFileId, ProvenanceEventType.ATTRIBUTES_MODIFIED); } } } // We want to submit the 'recordsToSubmit' collection, followed by the auto-terminated events to the Provenance Repository. // We want to do this with a single call to ProvenanceEventRepository#registerEvents because it may be much more efficient // to do so. // However, we want to modify the events in 'recordsToSubmit' to obtain the data from the most recent version of the FlowFiles // (except for SEND events); see note below as to why this is // Therefore, we create an Iterable that can iterate over each of these events, modifying them as needed, and returning them // in the appropriate order. This prevents an unnecessary step of creating an intermediate List and adding all of those values // to the List. // This is done in a similar veign to how Java 8's streams work, iterating over the events and returning a processed version // one-at-a-time as opposed to iterating over the entire Collection and putting the results in another Collection. However, // we don't want to change the Framework to require Java 8 at this time, because it's not yet as prevalent as we would desire final Map<String, FlowFileRecord> flowFileRecordMap = new HashMap<>(); for (final StandardRepositoryRecord repoRecord : checkpoint.records.values()) { final FlowFileRecord flowFile = repoRecord.getCurrent(); flowFileRecordMap.put(flowFile.getAttribute(CoreAttributes.UUID.key()), flowFile); } final List<ProvenanceEventRecord> autoTermEvents = checkpoint.autoTerminatedEvents; final Iterable<ProvenanceEventRecord> iterable = new Iterable<ProvenanceEventRecord>() { final Iterator<ProvenanceEventRecord> recordsToSubmitIterator = recordsToSubmit.iterator(); final Iterator<ProvenanceEventRecord> autoTermIterator = autoTermEvents == null ? null : autoTermEvents.iterator(); @Override public Iterator<ProvenanceEventRecord> iterator() { return new Iterator<ProvenanceEventRecord>() { @Override public boolean hasNext() { return recordsToSubmitIterator.hasNext() || autoTermIterator != null && autoTermIterator.hasNext(); } @Override public ProvenanceEventRecord next() { if (recordsToSubmitIterator.hasNext()) { final ProvenanceEventRecord rawEvent = recordsToSubmitIterator.next(); // Update the Provenance Event Record with all of the info that we know about the event. // For SEND events, we do not want to update the FlowFile info on the Event, because the event should // reflect the FlowFile as it was sent to the remote system. However, for other events, we want to use // the representation of the FlowFile as it is committed, as this is the only way in which it really // exists in our system -- all other representations are volatile representations that have not been // exposed. return enrich(rawEvent, flowFileRecordMap, checkpoint.records, rawEvent.getEventType() != ProvenanceEventType.SEND); } else if (autoTermIterator != null && autoTermIterator.hasNext()) { return enrich(autoTermIterator.next(), flowFileRecordMap, checkpoint.records, true); } throw new NoSuchElementException(); } @Override public void remove() { throw new UnsupportedOperationException(); } }; } }; provenanceRepo.registerEvents(iterable); } private void updateEventContentClaims(final ProvenanceEventBuilder builder, final FlowFile flowFile, final StandardRepositoryRecord repoRecord) { final ContentClaim originalClaim = repoRecord.getOriginalClaim(); if (originalClaim == null) { builder.setCurrentContentClaim(null, null, null, null, 0L); } else { final ResourceClaim resourceClaim = originalClaim.getResourceClaim(); builder.setCurrentContentClaim( resourceClaim.getContainer(), resourceClaim.getSection(), resourceClaim.getId(), repoRecord.getOriginal().getContentClaimOffset() + originalClaim.getOffset(), repoRecord.getOriginal().getSize()); builder.setPreviousContentClaim( resourceClaim.getContainer(), resourceClaim.getSection(), resourceClaim.getId(), repoRecord.getOriginal().getContentClaimOffset() + originalClaim.getOffset(), repoRecord.getOriginal().getSize()); } } @Override public StandardProvenanceEventRecord enrich(final ProvenanceEventRecord rawEvent, final FlowFile flowFile) { final StandardRepositoryRecord repoRecord = records.get(flowFile); if (repoRecord == null) { throw new FlowFileHandlingException(flowFile + " is not known in this session (" + toString() + ")"); } final StandardProvenanceEventRecord.Builder recordBuilder = new StandardProvenanceEventRecord.Builder().fromEvent(rawEvent); if (repoRecord.getCurrent() != null && repoRecord.getCurrentClaim() != null) { final ContentClaim currentClaim = repoRecord.getCurrentClaim(); final long currentOffset = repoRecord.getCurrentClaimOffset(); final long size = flowFile.getSize(); final ResourceClaim resourceClaim = currentClaim.getResourceClaim(); recordBuilder.setCurrentContentClaim(resourceClaim.getContainer(), resourceClaim.getSection(), resourceClaim.getId(), currentOffset + currentClaim.getOffset(), size); } if (repoRecord.getOriginal() != null && repoRecord.getOriginalClaim() != null) { final ContentClaim originalClaim = repoRecord.getOriginalClaim(); final long originalOffset = repoRecord.getOriginal().getContentClaimOffset(); final long originalSize = repoRecord.getOriginal().getSize(); final ResourceClaim resourceClaim = originalClaim.getResourceClaim(); recordBuilder.setPreviousContentClaim(resourceClaim.getContainer(), resourceClaim.getSection(), resourceClaim.getId(), originalOffset + originalClaim.getOffset(), originalSize); } final FlowFileQueue originalQueue = repoRecord.getOriginalQueue(); if (originalQueue != null) { recordBuilder.setSourceQueueIdentifier(originalQueue.getIdentifier()); } recordBuilder.setAttributes(repoRecord.getOriginalAttributes(), repoRecord.getUpdatedAttributes()); return recordBuilder.build(); } private StandardProvenanceEventRecord enrich( final ProvenanceEventRecord rawEvent, final Map<String, FlowFileRecord> flowFileRecordMap, final Map<FlowFileRecord, StandardRepositoryRecord> records, final boolean updateAttributes) { final StandardProvenanceEventRecord.Builder recordBuilder = new StandardProvenanceEventRecord.Builder().fromEvent(rawEvent); final FlowFileRecord eventFlowFile = flowFileRecordMap.get(rawEvent.getFlowFileUuid()); if (eventFlowFile != null) { final StandardRepositoryRecord repoRecord = records.get(eventFlowFile); if (repoRecord.getCurrent() != null && repoRecord.getCurrentClaim() != null) { final ContentClaim currentClaim = repoRecord.getCurrentClaim(); final long currentOffset = repoRecord.getCurrentClaimOffset(); final long size = eventFlowFile.getSize(); final ResourceClaim resourceClaim = currentClaim.getResourceClaim(); recordBuilder.setCurrentContentClaim(resourceClaim.getContainer(), resourceClaim.getSection(), resourceClaim.getId(), currentOffset + currentClaim.getOffset(), size); } if (repoRecord.getOriginal() != null && repoRecord.getOriginalClaim() != null) { final ContentClaim originalClaim = repoRecord.getOriginalClaim(); final long originalOffset = repoRecord.getOriginal().getContentClaimOffset(); final long originalSize = repoRecord.getOriginal().getSize(); final ResourceClaim resourceClaim = originalClaim.getResourceClaim(); recordBuilder.setPreviousContentClaim(resourceClaim.getContainer(), resourceClaim.getSection(), resourceClaim.getId(), originalOffset + originalClaim.getOffset(), originalSize); } final FlowFileQueue originalQueue = repoRecord.getOriginalQueue(); if (originalQueue != null) { recordBuilder.setSourceQueueIdentifier(originalQueue.getIdentifier()); } } if (updateAttributes) { final FlowFileRecord flowFileRecord = flowFileRecordMap.get(rawEvent.getFlowFileUuid()); if (flowFileRecord != null) { final StandardRepositoryRecord record = records.get(flowFileRecord); if (record != null) { recordBuilder.setAttributes(record.getOriginalAttributes(), record.getUpdatedAttributes()); } } } return recordBuilder.build(); } /** * Checks if the given event is a spurious FORK, meaning that the FORK has a * single child and that child was removed in this session. This happens * when a Processor calls #create(FlowFile) and then removes the created * FlowFile. * * @param event event * @return true if spurious fork */ private boolean isSpuriousForkEvent(final ProvenanceEventRecord event, final Set<String> removedFlowFiles) { if (event.getEventType() == ProvenanceEventType.FORK) { final List<String> childUuids = event.getChildUuids(); if (childUuids != null && childUuids.size() == 1 && removedFlowFiles.contains(childUuids.get(0))) { return true; } } return false; } /** * Checks if the given event is a spurious ROUTE, meaning that the ROUTE * indicates that a FlowFile was routed to a relationship with only 1 * connection and that Connection is the Connection from which the FlowFile * was pulled. I.e., the FlowFile was really routed nowhere. * * @param event event * @param records records * @return true if spurious route */ private boolean isSpuriousRouteEvent(final ProvenanceEventRecord event, final Map<FlowFileRecord, StandardRepositoryRecord> records) { if (event.getEventType() == ProvenanceEventType.ROUTE) { final String relationshipName = event.getRelationship(); final Relationship relationship = new Relationship.Builder().name(relationshipName).build(); final Collection<Connection> connectionsForRelationship = this.context.getConnections(relationship); // If the number of connections for this relationship is not 1, then we can't ignore this ROUTE event, // as it may be cloning the FlowFile and adding to multiple connections. if (connectionsForRelationship.size() == 1) { for (final Map.Entry<FlowFileRecord, StandardRepositoryRecord> entry : records.entrySet()) { final FlowFileRecord flowFileRecord = entry.getKey(); if (event.getFlowFileUuid().equals(flowFileRecord.getAttribute(CoreAttributes.UUID.key()))) { final StandardRepositoryRecord repoRecord = entry.getValue(); if (repoRecord.getOriginalQueue() == null) { return false; } final String originalQueueId = repoRecord.getOriginalQueue().getIdentifier(); final Connection destinationConnection = connectionsForRelationship.iterator().next(); final String destinationQueueId = destinationConnection.getFlowFileQueue().getIdentifier(); return originalQueueId.equals(destinationQueueId); } } } } return false; } @Override public void rollback() { rollback(false); } @Override public void rollback(final boolean penalize) { rollback(penalize, false); } private void rollback(final boolean penalize, final boolean rollbackCheckpoint) { if (LOG.isDebugEnabled()) { LOG.debug("{} session rollback called, FlowFile records are {} {}", this, loggableFlowfileInfo(), new Throwable("Stack Trace on rollback")); } deleteOnCommit.clear(); closeStreams(openInputStreams); closeStreams(openOutputStreams); try { claimCache.reset(); } catch (IOException e1) { LOG.warn("{} Attempted to close Output Stream for {} due to session rollback but close failed", this, this.connectableDescription, e1); } final Set<StandardRepositoryRecord> recordsToHandle = new HashSet<>(); recordsToHandle.addAll(records.values()); if (rollbackCheckpoint) { final Checkpoint existingCheckpoint = this.checkpoint; this.checkpoint = null; if (existingCheckpoint != null && existingCheckpoint.records != null) { recordsToHandle.addAll(existingCheckpoint.records.values()); } } resetWriteClaims(); resetReadClaim(); if (recordsToHandle.isEmpty()) { LOG.trace("{} was rolled back, but no events were performed by this ProcessSession", this); acknowledgeRecords(); resetState(); return; } for (final StandardRepositoryRecord record : recordsToHandle) { // remove the working claims if they are different than the originals. removeTemporaryClaim(record); } final Set<RepositoryRecord> abortedRecords = new HashSet<>(); final Set<StandardRepositoryRecord> transferRecords = new HashSet<>(); for (final StandardRepositoryRecord record : recordsToHandle) { if (record.isMarkedForAbort()) { decrementClaimCount(record.getWorkingClaim()); if (record.getCurrentClaim() != null && !record.getCurrentClaim().equals(record.getWorkingClaim())) { // if working & original claim are same, don't remove twice; we only want to remove the original // if it's different from the working. Otherwise, we remove two claimant counts. This causes // an issue if we only updated the flowfile attributes. decrementClaimCount(record.getCurrentClaim()); } abortedRecords.add(record); } else { transferRecords.add(record); } } // Put the FlowFiles that are not marked for abort back to their original queues for (final StandardRepositoryRecord record : transferRecords) { if (record.getOriginal() != null) { final FlowFileQueue originalQueue = record.getOriginalQueue(); if (originalQueue != null) { if (penalize) { final long expirationEpochMillis = System.currentTimeMillis() + context.getConnectable().getPenalizationPeriod(TimeUnit.MILLISECONDS); final FlowFileRecord newFile = new StandardFlowFileRecord.Builder().fromFlowFile(record.getOriginal()).penaltyExpirationTime(expirationEpochMillis).build(); originalQueue.put(newFile); } else { originalQueue.put(record.getOriginal()); } } } } if (!abortedRecords.isEmpty()) { try { context.getFlowFileRepository().updateRepository(abortedRecords); } catch (final IOException ioe) { LOG.error("Unable to update FlowFile repository for aborted records due to {}", ioe.toString()); if (LOG.isDebugEnabled()) { LOG.error("", ioe); } } } // If we have transient claims that need to be cleaned up, do so. final List<ContentClaim> transientClaims = recordsToHandle.stream() .flatMap(record -> record.getTransientClaims().stream()) .collect(Collectors.toList()); if (!transientClaims.isEmpty()) { final RepositoryRecord repoRecord = new TransientClaimRepositoryRecord(transientClaims); try { context.getFlowFileRepository().updateRepository(Collections.singletonList(repoRecord)); } catch (final IOException ioe) { LOG.error("Unable to update FlowFile repository to cleanup transient claims due to {}", ioe.toString()); if (LOG.isDebugEnabled()) { LOG.error("", ioe); } } } final Connectable connectable = context.getConnectable(); final StandardFlowFileEvent flowFileEvent = new StandardFlowFileEvent(connectable.getIdentifier()); flowFileEvent.setBytesRead(bytesRead); flowFileEvent.setBytesWritten(bytesWritten); // update event repository try { context.getFlowFileEventRepository().updateRepository(flowFileEvent); } catch (final Exception e) { LOG.error("Failed to update FlowFileEvent Repository due to " + e); if (LOG.isDebugEnabled()) { LOG.error("", e); } } acknowledgeRecords(); resetState(); } private String loggableFlowfileInfo() { final StringBuilder details = new StringBuilder(1024).append("["); final int initLen = details.length(); int filesListed = 0; for (Map.Entry<FlowFileRecord, StandardRepositoryRecord> entry : records.entrySet()) { if (filesListed >= MAX_ROLLBACK_FLOWFILES_TO_LOG) { break; } filesListed++; final FlowFileRecord entryKey = entry.getKey(); final StandardRepositoryRecord entryValue = entry.getValue(); if (details.length() > initLen) { details.append(", "); } if (entryValue.getOriginalQueue() != null && entryValue.getOriginalQueue().getIdentifier() != null) { details.append("queue=") .append(entryValue.getOriginalQueue().getIdentifier()) .append("/"); } details.append("filename=") .append(entryKey.getAttribute(CoreAttributes.FILENAME.key())) .append("/uuid=") .append(entryKey.getAttribute(CoreAttributes.UUID.key())); } if (records.entrySet().size() > MAX_ROLLBACK_FLOWFILES_TO_LOG) { if (details.length() > initLen) { details.append(", "); } details.append(records.entrySet().size() - MAX_ROLLBACK_FLOWFILES_TO_LOG) .append(" additional Flowfiles not listed"); } else if (filesListed == 0) { details.append("none"); } details.append("]"); return details.toString(); } private void decrementClaimCount(final ContentClaim claim) { if (claim == null) { return; } context.getContentRepository().decrementClaimantCount(claim); } /** * Destroys a ContentClaim that was being written to but is no longer needed * * @param claim claim to destroy */ private void destroyContent(final ContentClaim claim) { if (claim == null) { return; } final int decrementedClaimCount = context.getContentRepository().decrementClaimantCount(claim); if (decrementedClaimCount <= 0) { resetWriteClaims(); // Have to ensure that we are not currently writing to the claim before we can destroy it. context.getContentRepository().remove(claim); } } private void resetState() { records.clear(); readRecursionSet.clear(); writeRecursionSet.clear(); contentSizeIn = 0L; contentSizeOut = 0L; flowFilesIn = 0; flowFilesOut = 0; removedCount = 0; removedBytes = 0L; bytesRead = 0L; bytesWritten = 0L; connectionCounts.clear(); createdFlowFiles.clear(); removedFlowFiles.clear(); counters.clear(); generatedProvenanceEvents.clear(); forkEventBuilders.clear(); provenanceReporter.clear(); processingStartTime = System.nanoTime(); } private void acknowledgeRecords() { for (final Map.Entry<FlowFileQueue, Set<FlowFileRecord>> entry : unacknowledgedFlowFiles.entrySet()) { entry.getKey().acknowledge(entry.getValue()); } unacknowledgedFlowFiles.clear(); } @Override public void migrate(final ProcessSession newOwner, final Collection<FlowFile> flowFiles) { if (Objects.requireNonNull(newOwner) == this) { throw new IllegalArgumentException("Cannot migrate FlowFiles from a Process Session to itself"); } if (flowFiles == null || flowFiles.isEmpty()) { throw new IllegalArgumentException("Must supply at least one FlowFile to migrate"); } if (!(newOwner instanceof StandardProcessSession)) { throw new IllegalArgumentException("Cannot migrate from a StandardProcessSession to a " + newOwner.getClass()); } migrate((StandardProcessSession) newOwner, flowFiles); } private void migrate(final StandardProcessSession newOwner, final Collection<FlowFile> flowFiles) { // We don't call validateRecordState() here because we want to allow migration of FlowFiles that have already been marked as removed or transferred, etc. for (final FlowFile flowFile : flowFiles) { if (openInputStreams.containsKey(flowFile)) { throw new IllegalStateException(flowFile + " cannot be migrated to a new Process Session because this session currently " + "has an open InputStream for the FlowFile, created by calling ProcessSession.read(FlowFile)"); } if (openOutputStreams.containsKey(flowFile)) { throw new IllegalStateException(flowFile + " cannot be migrated to a new Process Session because this session currently " + "has an open OutputStream for the FlowFile, created by calling ProcessSession.write(FlowFile)"); } if (readRecursionSet.containsKey(flowFile)) { throw new IllegalStateException(flowFile + " already in use for an active callback or InputStream created by ProcessSession.read(FlowFile) has not been closed"); } if (writeRecursionSet.contains(flowFile)) { throw new IllegalStateException(flowFile + " already in use for an active callback or OutputStream created by ProcessSession.write(FlowFile) has not been closed"); } final StandardRepositoryRecord record = records.get(flowFile); if (record == null) { throw new FlowFileHandlingException(flowFile + " is not known in this session (" + toString() + ")"); } if (record.getCurrent() != flowFile) { throw new FlowFileHandlingException(flowFile + " is not the most recent version of this FlowFile within this session (" + toString() + ")"); } } // If we have a FORK event for one of the given FlowFiles, then all children must also be migrated. Otherwise, we // could have a case where we have FlowFile A transferred and eventually exiting the flow and later the 'newOwner' // ProcessSession is committed, claiming to have created FlowFiles from the parent, which is no longer even in // the flow. This would be very confusing when looking at the provenance for the FlowFile, so it is best to avoid this. final Set<String> flowFileIds = flowFiles.stream() .map(ff -> ff.getAttribute(CoreAttributes.UUID.key())) .collect(Collectors.toSet()); for (final Map.Entry<FlowFile, ProvenanceEventBuilder> entry : forkEventBuilders.entrySet()) { final FlowFile eventFlowFile = entry.getKey(); if (flowFiles.contains(eventFlowFile)) { final ProvenanceEventBuilder eventBuilder = entry.getValue(); for (final String childId : eventBuilder.getChildFlowFileIds()) { if (!flowFileIds.contains(childId)) { throw new IllegalStateException("Cannot migrate " + eventFlowFile + " to a new session because it was forked to create " + eventBuilder.getChildFlowFileIds().size() + " children and not all children are being migrated. If any FlowFile is forked, all of its children must also be migrated at the same time as the forked FlowFile"); } } } } // If we have a FORK event where a FlowFile is a child of the FORK event, we want to create a FORK // event builder for the new owner of the FlowFile and remove the child from our fork event builder. for (final Map.Entry<FlowFile, ProvenanceEventBuilder> entry : forkEventBuilders.entrySet()) { final FlowFile eventFlowFile = entry.getKey(); final ProvenanceEventBuilder eventBuilder = entry.getValue(); final Set<String> childrenIds = new HashSet<>(eventBuilder.getChildFlowFileIds()); ProvenanceEventBuilder copy = null; for (final FlowFile flowFile : flowFiles) { final String flowFileId = flowFile.getAttribute(CoreAttributes.UUID.key()); if (childrenIds.contains(flowFileId)) { eventBuilder.removeChildFlowFile(flowFile); if (copy == null) { copy = eventBuilder.copy(); copy.getChildFlowFileIds().clear(); } copy.addChildFlowFile(flowFileId); } } if (copy != null) { newOwner.forkEventBuilders.put(eventFlowFile, copy); } } newOwner.processingStartTime = Math.min(newOwner.processingStartTime, processingStartTime); for (final FlowFile flowFile : flowFiles) { final FlowFileRecord flowFileRecord = (FlowFileRecord) flowFile; final StandardRepositoryRecord repoRecord = this.records.remove(flowFile); newOwner.records.put(flowFileRecord, repoRecord); // Adjust the counts for Connections for each FlowFile that was pulled from a Connection. // We do not have to worry about accounting for 'input counts' on connections because those // are incremented only during a checkpoint, and anything that's been checkpointed has // also been committed above. final FlowFileQueue inputQueue = repoRecord.getOriginalQueue(); if (inputQueue != null) { final String connectionId = inputQueue.getIdentifier(); incrementConnectionOutputCounts(connectionId, -1, -repoRecord.getOriginal().getSize()); newOwner.incrementConnectionOutputCounts(connectionId, 1, repoRecord.getOriginal().getSize()); unacknowledgedFlowFiles.get(inputQueue).remove(flowFile); newOwner.unacknowledgedFlowFiles.computeIfAbsent(inputQueue, queue -> new HashSet<>()).add(flowFileRecord); flowFilesIn--; contentSizeIn -= flowFile.getSize(); newOwner.flowFilesIn++; newOwner.contentSizeIn += flowFile.getSize(); } final String flowFileId = flowFile.getAttribute(CoreAttributes.UUID.key()); if (removedFlowFiles.remove(flowFileId)) { newOwner.removedFlowFiles.add(flowFileId); newOwner.removedCount++; newOwner.removedBytes += flowFile.getSize(); removedCount--; removedBytes -= flowFile.getSize(); } if (createdFlowFiles.remove(flowFileId)) { newOwner.createdFlowFiles.add(flowFileId); } if (repoRecord.getTransferRelationship() != null) { flowFilesOut--; contentSizeOut -= flowFile.getSize(); newOwner.flowFilesOut++; newOwner.contentSizeOut += flowFile.getSize(); } final List<ProvenanceEventRecord> events = generatedProvenanceEvents.remove(flowFile); if (events != null) { newOwner.generatedProvenanceEvents.put(flowFile, events); } final ContentClaim currentClaim = repoRecord.getCurrentClaim(); if (currentClaim != null) { final ByteCountingOutputStream appendableStream = appendableStreams.remove(currentClaim); if (appendableStream != null) { newOwner.appendableStreams.put(currentClaim, appendableStream); } } final Path toDelete = deleteOnCommit.remove(flowFile); if (toDelete != null) { newOwner.deleteOnCommit.put(flowFile, toDelete); } } provenanceReporter.migrate(newOwner.provenanceReporter, flowFileIds); } private String summarizeEvents(final Checkpoint checkpoint) { final Map<Relationship, Set<String>> transferMap = new HashMap<>(); // relationship to flowfile ID's final Set<String> modifiedFlowFileIds = new HashSet<>(); int largestTransferSetSize = 0; for (final Map.Entry<FlowFileRecord, StandardRepositoryRecord> entry : checkpoint.records.entrySet()) { final FlowFile flowFile = entry.getKey(); final StandardRepositoryRecord record = entry.getValue(); final Relationship relationship = record.getTransferRelationship(); if (Relationship.SELF.equals(relationship)) { continue; } Set<String> transferIds = transferMap.get(relationship); if (transferIds == null) { transferIds = new HashSet<>(); transferMap.put(relationship, transferIds); } transferIds.add(flowFile.getAttribute(CoreAttributes.UUID.key())); largestTransferSetSize = Math.max(largestTransferSetSize, transferIds.size()); final ContentClaim workingClaim = record.getWorkingClaim(); if (workingClaim != null && workingClaim != record.getOriginalClaim() && record.getTransferRelationship() != null) { modifiedFlowFileIds.add(flowFile.getAttribute(CoreAttributes.UUID.key())); } } final int numRemoved = checkpoint.removedFlowFiles.size(); final int numModified = modifiedFlowFileIds.size(); final int numCreated = checkpoint.createdFlowFiles.size(); final StringBuilder sb = new StringBuilder(512); if (!LOG.isDebugEnabled() && (largestTransferSetSize > VERBOSE_LOG_THRESHOLD || numModified > VERBOSE_LOG_THRESHOLD || numCreated > VERBOSE_LOG_THRESHOLD || numRemoved > VERBOSE_LOG_THRESHOLD)) { if (numCreated > 0) { sb.append("created ").append(numCreated).append(" FlowFiles, "); } if (numModified > 0) { sb.append("modified ").append(modifiedFlowFileIds.size()).append(" FlowFiles, "); } if (numRemoved > 0) { sb.append("removed ").append(numRemoved).append(" FlowFiles, "); } for (final Map.Entry<Relationship, Set<String>> entry : transferMap.entrySet()) { if (entry.getKey() != null) { sb.append("Transferred ").append(entry.getValue().size()).append(" FlowFiles"); final Relationship relationship = entry.getKey(); if (relationship != Relationship.ANONYMOUS) { sb.append(" to '").append(relationship.getName()).append("', "); } } } } else { if (numCreated > 0) { sb.append("created FlowFiles ").append(checkpoint.createdFlowFiles).append(", "); } if (numModified > 0) { sb.append("modified FlowFiles ").append(modifiedFlowFileIds).append(", "); } if (numRemoved > 0) { sb.append("removed FlowFiles ").append(checkpoint.removedFlowFiles).append(", "); } for (final Map.Entry<Relationship, Set<String>> entry : transferMap.entrySet()) { if (entry.getKey() != null) { sb.append("Transferred FlowFiles ").append(entry.getValue()); final Relationship relationship = entry.getKey(); if (relationship != Relationship.ANONYMOUS) { sb.append(" to '").append(relationship.getName()).append("', "); } } } } if (sb.length() > 2 && sb.subSequence(sb.length() - 2, sb.length()).equals(", ")) { sb.delete(sb.length() - 2, sb.length()); } // don't add processing time if we did nothing, because we don't log the summary anyway if (sb.length() > 0) { final long processingNanos = checkpoint.processingTime; sb.append(", Processing Time = "); formatNanos(processingNanos, sb); } return sb.toString(); } private void formatNanos(final long nanos, final StringBuilder sb) { final long seconds = nanos > 1000000000L ? nanos / 1000000000L : 0L; long millis = nanos > 1000000L ? nanos / 1000000L : 0L; ; final long nanosLeft = nanos % 1000000L; if (seconds > 0) { sb.append(seconds).append(" seconds"); } if (millis > 0) { if (seconds > 0) { sb.append(", "); millis -= seconds * 1000L; } sb.append(millis).append(" millis"); } if (seconds == 0 && millis == 0) { sb.append(nanosLeft).append(" nanos"); } sb.append(" (").append(nanos).append(" nanos)"); } private void incrementConnectionInputCounts(final Connection connection, final RepositoryRecord record) { incrementConnectionInputCounts(connection.getIdentifier(), 1, record.getCurrent().getSize()); } private void incrementConnectionInputCounts(final String connectionId, final int flowFileCount, final long bytes) { final StandardFlowFileEvent connectionEvent = connectionCounts.computeIfAbsent(connectionId, id -> new StandardFlowFileEvent(id)); connectionEvent.setContentSizeIn(connectionEvent.getContentSizeIn() + bytes); connectionEvent.setFlowFilesIn(connectionEvent.getFlowFilesIn() + flowFileCount); } private void incrementConnectionOutputCounts(final Connection connection, final FlowFileRecord record) { incrementConnectionOutputCounts(connection.getIdentifier(), 1, record.getSize()); } private void incrementConnectionOutputCounts(final String connectionId, final int flowFileCount, final long bytes) { final StandardFlowFileEvent connectionEvent = connectionCounts.computeIfAbsent(connectionId, id -> new StandardFlowFileEvent(id)); connectionEvent.setContentSizeOut(connectionEvent.getContentSizeOut() + bytes); connectionEvent.setFlowFilesOut(connectionEvent.getFlowFilesOut() + flowFileCount); } private void registerDequeuedRecord(final FlowFileRecord flowFile, final Connection connection) { final StandardRepositoryRecord record = new StandardRepositoryRecord(connection.getFlowFileQueue(), flowFile); records.put(flowFile, record); flowFilesIn++; contentSizeIn += flowFile.getSize(); Set<FlowFileRecord> set = unacknowledgedFlowFiles.get(connection.getFlowFileQueue()); if (set == null) { set = new HashSet<>(); unacknowledgedFlowFiles.put(connection.getFlowFileQueue(), set); } set.add(flowFile); incrementConnectionOutputCounts(connection, flowFile); } @Override public void adjustCounter(final String name, final long delta, final boolean immediate) { if (immediate) { context.adjustCounter(name, delta); return; } adjustCounter(name, delta, counters); } private void adjustCounter(final String name, final long delta, final Map<String, Long> map) { Long curVal = map.get(name); if (curVal == null) { curVal = Long.valueOf(0L); } final long newValue = curVal.longValue() + delta; map.put(name, Long.valueOf(newValue)); } @Override public FlowFile get() { final List<Connection> connections = context.getPollableConnections(); final int numConnections = connections.size(); for (int numAttempts = 0; numAttempts < numConnections; numAttempts++) { final Connection conn = connections.get(context.getNextIncomingConnectionIndex() % numConnections); final Set<FlowFileRecord> expired = new HashSet<>(); final FlowFileRecord flowFile = conn.poll(expired); removeExpired(expired, conn); if (flowFile != null) { registerDequeuedRecord(flowFile, conn); return flowFile; } } return null; } @Override public List<FlowFile> get(final int maxResults) { if (maxResults < 0) { throw new IllegalArgumentException(); } if (maxResults == 0) { return Collections.emptyList(); } // get batch of flow files in a round-robin manner final List<Connection> connections = context.getPollableConnections(); if(connections.isEmpty()) { return Collections.emptyList(); } final Connection connection = connections.get(context.getNextIncomingConnectionIndex() % connections.size()); return get(connection, new ConnectionPoller() { @Override public List<FlowFileRecord> poll(final Connection connection, final Set<FlowFileRecord> expiredRecords) { return connection.poll(new FlowFileFilter() { int polled = 0; @Override public FlowFileFilterResult filter(final FlowFile flowFile) { if (++polled < maxResults) { return FlowFileFilterResult.ACCEPT_AND_CONTINUE; } else { return FlowFileFilterResult.ACCEPT_AND_TERMINATE; } } }, expiredRecords); } }, false); } @Override public List<FlowFile> get(final FlowFileFilter filter) { return get(new ConnectionPoller() { @Override public List<FlowFileRecord> poll(final Connection connection, final Set<FlowFileRecord> expiredRecords) { return connection.poll(filter, expiredRecords); } }, true); } private List<FlowFile> get(final Connection connection, final ConnectionPoller poller, final boolean lockQueue) { if (lockQueue) { connection.lock(); } try { final Set<FlowFileRecord> expired = new HashSet<>(); final List<FlowFileRecord> newlySelected = poller.poll(connection, expired); removeExpired(expired, connection); if (newlySelected.isEmpty() && expired.isEmpty()) { return new ArrayList<>(); } for (final FlowFileRecord flowFile : newlySelected) { registerDequeuedRecord(flowFile, connection); } return new ArrayList<FlowFile>(newlySelected); } finally { if (lockQueue) { connection.unlock(); } } } private List<FlowFile> get(final ConnectionPoller poller, final boolean lockAllQueues) { final List<Connection> connections = context.getPollableConnections(); if (lockAllQueues) { for (final Connection connection : connections) { connection.lock(); } } final int startIndex = context.getNextIncomingConnectionIndex(); try { for (int i = 0; i < connections.size(); i++) { final int connectionIndex = (startIndex + i) % connections.size(); final Connection conn = connections.get(connectionIndex); final Set<FlowFileRecord> expired = new HashSet<>(); final List<FlowFileRecord> newlySelected = poller.poll(conn, expired); removeExpired(expired, conn); if (newlySelected.isEmpty() && expired.isEmpty()) { continue; } for (final FlowFileRecord flowFile : newlySelected) { registerDequeuedRecord(flowFile, conn); } return new ArrayList<FlowFile>(newlySelected); } return new ArrayList<>(); } finally { if (lockAllQueues) { for (final Connection connection : connections) { connection.unlock(); } } } } @Override public QueueSize getQueueSize() { int flowFileCount = 0; long byteCount = 0L; for (final Connection conn : context.getPollableConnections()) { final QueueSize queueSize = conn.getFlowFileQueue().size(); flowFileCount += queueSize.getObjectCount(); byteCount += queueSize.getByteCount(); } return new QueueSize(flowFileCount, byteCount); } @Override public FlowFile create() { final Map<String, String> attrs = new HashMap<>(); attrs.put(CoreAttributes.FILENAME.key(), String.valueOf(System.nanoTime())); attrs.put(CoreAttributes.PATH.key(), DEFAULT_FLOWFILE_PATH); attrs.put(CoreAttributes.UUID.key(), UUID.randomUUID().toString()); final FlowFileRecord fFile = new StandardFlowFileRecord.Builder().id(context.getNextFlowFileSequence()) .addAttributes(attrs) .build(); final StandardRepositoryRecord record = new StandardRepositoryRecord(null); record.setWorking(fFile, attrs); records.put(fFile, record); createdFlowFiles.add(fFile.getAttribute(CoreAttributes.UUID.key())); return fFile; } @Override public FlowFile clone(final FlowFile example) { return clone(example, 0L, example.getSize()); } @Override public FlowFile clone(FlowFile example, final long offset, final long size) { example = validateRecordState(example); final StandardRepositoryRecord exampleRepoRecord = records.get(example); final FlowFileRecord currRec = exampleRepoRecord.getCurrent(); final ContentClaim claim = exampleRepoRecord.getCurrentClaim(); if (offset + size > example.getSize()) { throw new FlowFileHandlingException("Specified offset of " + offset + " and size " + size + " exceeds size of " + example.toString()); } final StandardFlowFileRecord.Builder builder = new StandardFlowFileRecord.Builder().fromFlowFile(currRec); builder.id(context.getNextFlowFileSequence()); builder.contentClaimOffset(currRec.getContentClaimOffset() + offset); builder.size(size); final String newUuid = UUID.randomUUID().toString(); builder.addAttribute(CoreAttributes.UUID.key(), newUuid); final FlowFileRecord clone = builder.build(); if (claim != null) { context.getContentRepository().incrementClaimaintCount(claim); } final StandardRepositoryRecord record = new StandardRepositoryRecord(null); record.setWorking(clone, clone.getAttributes()); records.put(clone, record); if (offset == 0L && size == example.getSize()) { provenanceReporter.clone(example, clone); } else { registerForkEvent(example, clone); } return clone; } private void registerForkEvent(final FlowFile parent, final FlowFile child) { ProvenanceEventBuilder eventBuilder = forkEventBuilders.get(parent); if (eventBuilder == null) { eventBuilder = context.getProvenanceRepository().eventBuilder(); eventBuilder.setEventType(ProvenanceEventType.FORK); eventBuilder.setFlowFileEntryDate(parent.getEntryDate()); eventBuilder.setLineageStartDate(parent.getLineageStartDate()); eventBuilder.setFlowFileUUID(parent.getAttribute(CoreAttributes.UUID.key())); eventBuilder.setComponentId(context.getConnectable().getIdentifier()); final Connectable connectable = context.getConnectable(); final String processorType = connectable.getComponentType(); eventBuilder.setComponentType(processorType); eventBuilder.addParentFlowFile(parent); updateEventContentClaims(eventBuilder, parent, records.get(parent)); forkEventBuilders.put(parent, eventBuilder); } eventBuilder.addChildFlowFile(child); } private void registerJoinEvent(final FlowFile child, final Collection<FlowFile> parents) { final ProvenanceEventRecord eventRecord = provenanceReporter.generateJoinEvent(parents, child); List<ProvenanceEventRecord> existingRecords = generatedProvenanceEvents.get(child); if (existingRecords == null) { existingRecords = new ArrayList<>(); generatedProvenanceEvents.put(child, existingRecords); } existingRecords.add(eventRecord); } @Override public FlowFile penalize(FlowFile flowFile) { flowFile = validateRecordState(flowFile); final StandardRepositoryRecord record = records.get(flowFile); final long expirationEpochMillis = System.currentTimeMillis() + context.getConnectable().getPenalizationPeriod(TimeUnit.MILLISECONDS); final FlowFileRecord newFile = new StandardFlowFileRecord.Builder().fromFlowFile(record.getCurrent()).penaltyExpirationTime(expirationEpochMillis).build(); record.setWorking(newFile); return newFile; } @Override public FlowFile putAttribute(FlowFile flowFile, final String key, final String value) { flowFile = validateRecordState(flowFile); if (CoreAttributes.UUID.key().equals(key)) { return flowFile; } final StandardRepositoryRecord record = records.get(flowFile); final FlowFileRecord newFile = new StandardFlowFileRecord.Builder().fromFlowFile(record.getCurrent()).addAttribute(key, value).build(); record.setWorking(newFile, key, value); return newFile; } @Override public FlowFile putAllAttributes(FlowFile flowFile, final Map<String, String> attributes) { flowFile = validateRecordState(flowFile); final StandardRepositoryRecord record = records.get(flowFile); final Map<String, String> updatedAttributes; if (attributes.containsKey(CoreAttributes.UUID.key())) { updatedAttributes = new HashMap<>(attributes); updatedAttributes.remove(CoreAttributes.UUID.key()); } else { updatedAttributes = attributes; } final StandardFlowFileRecord.Builder ffBuilder = new StandardFlowFileRecord.Builder().fromFlowFile(record.getCurrent()).addAttributes(updatedAttributes); final FlowFileRecord newFile = ffBuilder.build(); record.setWorking(newFile, updatedAttributes); return newFile; } @Override public FlowFile removeAttribute(FlowFile flowFile, final String key) { flowFile = validateRecordState(flowFile); if (CoreAttributes.UUID.key().equals(key)) { return flowFile; } final StandardRepositoryRecord record = records.get(flowFile); final FlowFileRecord newFile = new StandardFlowFileRecord.Builder().fromFlowFile(record.getCurrent()).removeAttributes(key).build(); record.setWorking(newFile, key, null); return newFile; } @Override public FlowFile removeAllAttributes(FlowFile flowFile, final Set<String> keys) { flowFile = validateRecordState(flowFile); if (keys == null) { return flowFile; } final StandardRepositoryRecord record = records.get(flowFile); final FlowFileRecord newFile = new StandardFlowFileRecord.Builder().fromFlowFile(record.getCurrent()).removeAttributes(keys).build(); final Map<String, String> updatedAttrs = new HashMap<>(); for (final String key : keys) { if (CoreAttributes.UUID.key().equals(key)) { continue; } updatedAttrs.put(key, null); } record.setWorking(newFile, updatedAttrs); return newFile; } @Override public FlowFile removeAllAttributes(FlowFile flowFile, final Pattern keyPattern) { flowFile = validateRecordState(flowFile); final StandardRepositoryRecord record = records.get(flowFile); final FlowFileRecord newFile = new StandardFlowFileRecord.Builder().fromFlowFile(record.getCurrent()).removeAttributes(keyPattern).build(); if (keyPattern == null) { record.setWorking(newFile); } else { final Map<String, String> curAttrs = record.getCurrent().getAttributes(); final Map<String, String> removed = new HashMap<>(); for (final String key : curAttrs.keySet()) { if (CoreAttributes.UUID.key().equals(key)) { continue; } if (keyPattern.matcher(key).matches()) { removed.put(key, null); } } record.setWorking(newFile, removed); } return newFile; } private void updateLastQueuedDate(final StandardRepositoryRecord record) { final FlowFileRecord newFile = new StandardFlowFileRecord.Builder().fromFlowFile(record.getCurrent()) .lastQueued(System.currentTimeMillis(), enqueuedIndex.getAndIncrement()).build(); record.setWorking(newFile); } @Override public void transfer(FlowFile flowFile, final Relationship relationship) { flowFile = validateRecordState(flowFile); final int numDestinations = context.getConnections(relationship).size(); final int multiplier = Math.max(1, numDestinations); boolean autoTerminated = false; boolean selfRelationship = false; if (numDestinations == 0 && context.getConnectable().isAutoTerminated(relationship)) { // auto terminated. autoTerminated = true; } else if (numDestinations == 0 && relationship == Relationship.SELF) { selfRelationship = true; } else if (numDestinations == 0) { // the relationship specified is not known in this session/context throw new IllegalArgumentException("Relationship '" + relationship.getName() + "' is not known"); } final StandardRepositoryRecord record = records.get(flowFile); record.setTransferRelationship(relationship); updateLastQueuedDate(record); if (autoTerminated) { removedCount += multiplier; removedBytes += flowFile.getSize(); } else if (!selfRelationship) { flowFilesOut += multiplier; contentSizeOut += flowFile.getSize() * multiplier; } } @Override public void transfer(FlowFile flowFile) { flowFile = validateRecordState(flowFile); final StandardRepositoryRecord record = records.get(flowFile); if (record.getOriginalQueue() == null) { throw new IllegalArgumentException("Cannot transfer FlowFiles that are created in this Session back to self"); } record.setTransferRelationship(Relationship.SELF); updateLastQueuedDate(record); } @Override public void transfer(final Collection<FlowFile> flowFiles) { for (final FlowFile flowFile : flowFiles) { transfer(flowFile); } } @Override public void transfer(Collection<FlowFile> flowFiles, final Relationship relationship) { flowFiles = validateRecordState(flowFiles); boolean autoTerminated = false; boolean selfRelationship = false; final int numDestinations = context.getConnections(relationship).size(); if (numDestinations == 0 && context.getConnectable().isAutoTerminated(relationship)) { // auto terminated. autoTerminated = true; } else if (numDestinations == 0 && relationship == Relationship.SELF) { selfRelationship = true; } else if (numDestinations == 0) { // the relationship specified is not known in this session/context throw new IllegalArgumentException("Relationship '" + relationship.getName() + "' is not known"); } final int multiplier = Math.max(1, numDestinations); long contentSize = 0L; for (final FlowFile flowFile : flowFiles) { final StandardRepositoryRecord record = records.get(flowFile); record.setTransferRelationship(relationship); updateLastQueuedDate(record); contentSize += flowFile.getSize() * multiplier; } if (autoTerminated) { removedCount += multiplier * flowFiles.size(); removedBytes += contentSize; } else if (!selfRelationship) { flowFilesOut += multiplier * flowFiles.size(); contentSizeOut += multiplier * contentSize; } } @Override public void remove(FlowFile flowFile) { flowFile = validateRecordState(flowFile); final StandardRepositoryRecord record = records.get(flowFile); record.markForDelete(); removedFlowFiles.add(flowFile.getAttribute(CoreAttributes.UUID.key())); // if original connection is null, the FlowFile was created in this session, so we // do not want to count it toward the removed count. if (record.getOriginalQueue() == null) { // if we've generated any Fork events, remove them because the FlowFile was created // and then removed in this session. generatedProvenanceEvents.remove(flowFile); removeForkEvents(flowFile); } else { removedCount++; removedBytes += flowFile.getSize(); provenanceReporter.drop(flowFile, flowFile.getAttribute(CoreAttributes.DISCARD_REASON.key())); } } @Override public void remove(Collection<FlowFile> flowFiles) { flowFiles = validateRecordState(flowFiles); for (final FlowFile flowFile : flowFiles) { final StandardRepositoryRecord record = records.get(flowFile); record.markForDelete(); removedFlowFiles.add(flowFile.getAttribute(CoreAttributes.UUID.key())); // if original connection is null, the FlowFile was created in this session, so we // do not want to count it toward the removed count. if (record.getOriginalQueue() == null) { generatedProvenanceEvents.remove(flowFile); removeForkEvents(flowFile); } else { removedCount++; removedBytes += flowFile.getSize(); provenanceReporter.drop(flowFile, flowFile.getAttribute(CoreAttributes.DISCARD_REASON.key())); } } } private void removeForkEvents(final FlowFile flowFile) { for (final ProvenanceEventBuilder builder : forkEventBuilders.values()) { final ProvenanceEventRecord event = builder.build(); if (event.getEventType() == ProvenanceEventType.FORK) { builder.removeChildFlowFile(flowFile); } } } public void expireFlowFiles() { final Set<FlowFileRecord> expired = new HashSet<>(); final FlowFileFilter filter = new FlowFileFilter() { @Override public FlowFileFilterResult filter(final FlowFile flowFile) { return FlowFileFilterResult.REJECT_AND_CONTINUE; } }; for (final Connection conn : context.getConnectable().getIncomingConnections()) { do { expired.clear(); conn.getFlowFileQueue().poll(filter, expired); removeExpired(expired, conn); } while (!expired.isEmpty()); } } private void removeExpired(final Set<FlowFileRecord> flowFiles, final Connection connection) { if (flowFiles.isEmpty()) { return; } LOG.info("{} {} FlowFiles have expired and will be removed", new Object[] {this, flowFiles.size()}); final List<RepositoryRecord> expiredRecords = new ArrayList<>(flowFiles.size()); final Connectable connectable = context.getConnectable(); final String processorType = connectable.getComponentType(); final StandardProvenanceReporter expiredReporter = new StandardProvenanceReporter(this, connectable.getIdentifier(), processorType, context.getProvenanceRepository(), this); final Map<String, FlowFileRecord> recordIdMap = new HashMap<>(); for (final FlowFileRecord flowFile : flowFiles) { recordIdMap.put(flowFile.getAttribute(CoreAttributes.UUID.key()), flowFile); final StandardRepositoryRecord record = new StandardRepositoryRecord(connection.getFlowFileQueue(), flowFile); record.markForDelete(); expiredRecords.add(record); expiredReporter.expire(flowFile, "Expiration Threshold = " + connection.getFlowFileQueue().getFlowFileExpiration()); decrementClaimCount(flowFile.getContentClaim()); final long flowFileLife = System.currentTimeMillis() - flowFile.getEntryDate(); final Object terminator = connectable instanceof ProcessorNode ? ((ProcessorNode) connectable).getProcessor() : connectable; LOG.info("{} terminated by {} due to FlowFile expiration; life of FlowFile = {} ms", new Object[] {flowFile, terminator, flowFileLife}); } try { final Iterable<ProvenanceEventRecord> iterable = new Iterable<ProvenanceEventRecord>() { @Override public Iterator<ProvenanceEventRecord> iterator() { final Iterator<ProvenanceEventRecord> expiredEventIterator = expiredReporter.getEvents().iterator(); final Iterator<ProvenanceEventRecord> enrichingIterator = new Iterator<ProvenanceEventRecord>() { @Override public boolean hasNext() { return expiredEventIterator.hasNext(); } @Override public ProvenanceEventRecord next() { final ProvenanceEventRecord event = expiredEventIterator.next(); final StandardProvenanceEventRecord.Builder enriched = new StandardProvenanceEventRecord.Builder().fromEvent(event); final FlowFileRecord record = recordIdMap.get(event.getFlowFileUuid()); if (record == null) { return null; } final ContentClaim claim = record.getContentClaim(); if (claim != null) { final ResourceClaim resourceClaim = claim.getResourceClaim(); enriched.setCurrentContentClaim(resourceClaim.getContainer(), resourceClaim.getSection(), resourceClaim.getId(), record.getContentClaimOffset() + claim.getOffset(), record.getSize()); enriched.setPreviousContentClaim(resourceClaim.getContainer(), resourceClaim.getSection(), resourceClaim.getId(), record.getContentClaimOffset() + claim.getOffset(), record.getSize()); } enriched.setAttributes(record.getAttributes(), Collections.<String, String> emptyMap()); return enriched.build(); } @Override public void remove() { throw new UnsupportedOperationException(); } }; return enrichingIterator; } }; context.getProvenanceRepository().registerEvents(iterable); context.getFlowFileRepository().updateRepository(expiredRecords); } catch (final IOException e) { LOG.error("Failed to update FlowFile Repository to record expired records due to {}", e); } } private InputStream getInputStream(final FlowFile flowFile, final ContentClaim claim, final long offset, final boolean allowCachingOfStream) throws ContentNotFoundException { // If there's no content, don't bother going to the Content Repository because it is generally expensive and we know // that there is no actual content. if (flowFile.getSize() == 0L) { return new ByteArrayInputStream(new byte[0]); } try { // If the recursion set is empty, we can use the same input stream that we already have open. However, if // the recursion set is NOT empty, we can't do this because we may be reading the input of FlowFile 1 while in the // callback for reading FlowFile 1 and if we used the same stream we'd be destroying the ability to read from FlowFile 1. if (allowCachingOfStream && readRecursionSet.isEmpty() && writeRecursionSet.isEmpty()) { if (currentReadClaim == claim) { if (currentReadClaimStream != null && currentReadClaimStream.getBytesConsumed() <= offset) { final long bytesToSkip = offset - currentReadClaimStream.getBytesConsumed(); if (bytesToSkip > 0) { StreamUtils.skip(currentReadClaimStream, bytesToSkip); } return new DisableOnCloseInputStream(currentReadClaimStream); } } claimCache.flush(claim); final InputStream rawInStream = context.getContentRepository().read(claim); if (currentReadClaimStream != null) { currentReadClaimStream.close(); } currentReadClaim = claim; currentReadClaimStream = new ByteCountingInputStream(rawInStream); StreamUtils.skip(currentReadClaimStream, offset); // Use a non-closeable stream because we want to keep it open after the callback has finished so that we can // reuse the same InputStream for the next FlowFile return new DisableOnCloseInputStream(currentReadClaimStream); } else { claimCache.flush(claim); final InputStream rawInStream = context.getContentRepository().read(claim); try { StreamUtils.skip(rawInStream, offset); } catch(IOException ioe) { IOUtils.closeQuietly(rawInStream); throw ioe; } return rawInStream; } } catch (final ContentNotFoundException cnfe) { throw cnfe; } catch (final EOFException eof) { throw new ContentNotFoundException(claim, eof); } catch (final IOException ioe) { throw new FlowFileAccessException("Failed to read content of " + flowFile, ioe); } } @Override public void read(final FlowFile source, final InputStreamCallback reader) { read(source, false, reader); } @Override public void read(FlowFile source, boolean allowSessionStreamManagement, InputStreamCallback reader) { source = validateRecordState(source, true); final StandardRepositoryRecord record = records.get(source); try { ensureNotAppending(record.getCurrentClaim()); claimCache.flush(record.getCurrentClaim()); } catch (final IOException e) { throw new FlowFileAccessException("Failed to access ContentClaim for " + source.toString(), e); } try (final InputStream rawIn = getInputStream(source, record.getCurrentClaim(), record.getCurrentClaimOffset(), true); final InputStream limitedIn = new LimitedInputStream(rawIn, source.getSize()); final InputStream disableOnCloseIn = new DisableOnCloseInputStream(limitedIn); final ByteCountingInputStream countingStream = new ByteCountingInputStream(disableOnCloseIn, this.bytesRead)) { // We want to differentiate between IOExceptions thrown by the repository and IOExceptions thrown from // Processor code. As a result, as have the FlowFileAccessInputStream that catches IOException from the repository // and translates into either FlowFileAccessException or ContentNotFoundException. We keep track of any // ContentNotFoundException because if it is thrown, the Processor code may catch it and do something else with it // but in reality, if it is thrown, we want to know about it and handle it, even if the Processor code catches it. final FlowFileAccessInputStream ffais = new FlowFileAccessInputStream(countingStream, source, record.getCurrentClaim()); boolean cnfeThrown = false; try { incrementReadCount(source); reader.process(ffais); // Allow processors to close the file after reading to avoid too many files open or do smart session stream management. if (this.currentReadClaimStream != null && !allowSessionStreamManagement) { currentReadClaimStream.close(); currentReadClaimStream = null; } } catch (final ContentNotFoundException cnfe) { cnfeThrown = true; throw cnfe; } finally { decrementReadCount(source); bytesRead += countingStream.getBytesRead(); // if cnfeThrown is true, we don't need to re-thrown the Exception; it will propagate. if (!cnfeThrown && ffais.getContentNotFoundException() != null) { throw ffais.getContentNotFoundException(); } } } catch (final ContentNotFoundException nfe) { handleContentNotFound(nfe, record); } catch (final IOException ex) { throw new ProcessException("IOException thrown from " + connectableDescription + ": " + ex.toString(), ex); } } @Override public InputStream read(FlowFile source) { source = validateRecordState(source, true); final StandardRepositoryRecord record = records.get(source); try { ensureNotAppending(record.getCurrentClaim()); } catch (final IOException e) { throw new FlowFileAccessException("Failed to access ContentClaim for " + source.toString(), e); } final InputStream rawIn = getInputStream(source, record.getCurrentClaim(), record.getCurrentClaimOffset(), false); final InputStream limitedIn = new LimitedInputStream(rawIn, source.getSize()); final ByteCountingInputStream countingStream = new ByteCountingInputStream(limitedIn); final FlowFileAccessInputStream ffais = new FlowFileAccessInputStream(countingStream, source, record.getCurrentClaim()); final FlowFile sourceFlowFile = source; final InputStream errorHandlingStream = new InputStream() { private boolean closed = false; @Override public int read() throws IOException { try { return ffais.read(); } catch (final ContentNotFoundException cnfe) { handleContentNotFound(cnfe, record); close(); throw cnfe; } catch (final FlowFileAccessException ffae) { LOG.error("Failed to read content from " + sourceFlowFile + "; rolling back session", ffae); rollback(true); close(); throw ffae; } } @Override public int read(final byte[] b) throws IOException { return read(b, 0, b.length); } @Override public int read(final byte[] b, final int off, final int len) throws IOException { try { return ffais.read(b, off, len); } catch (final ContentNotFoundException cnfe) { handleContentNotFound(cnfe, record); close(); throw cnfe; } catch (final FlowFileAccessException ffae) { LOG.error("Failed to read content from " + sourceFlowFile + "; rolling back session", ffae); rollback(true); close(); throw ffae; } } @Override public void close() throws IOException { decrementReadCount(sourceFlowFile); if (!closed) { StandardProcessSession.this.bytesRead += countingStream.getBytesRead(); closed = true; } ffais.close(); openInputStreams.remove(sourceFlowFile); } @Override public int available() throws IOException { return ffais.available(); } @Override public long skip(long n) throws IOException { return ffais.skip(n); } @Override public boolean markSupported() { return ffais.markSupported(); } @Override public synchronized void mark(int readlimit) { ffais.mark(readlimit); } @Override public synchronized void reset() throws IOException { ffais.reset(); } @Override public String toString() { return "ErrorHandlingInputStream[FlowFile=" + sourceFlowFile + "]"; } }; incrementReadCount(sourceFlowFile); openInputStreams.put(sourceFlowFile, errorHandlingStream); return errorHandlingStream; } private void incrementReadCount(final FlowFile flowFile) { readRecursionSet.compute(flowFile, (ff, count) -> count == null ? 1 : count + 1); } private void decrementReadCount(final FlowFile flowFile) { final Integer count = readRecursionSet.get(flowFile); if (count == null) { return; } final int updatedCount = count - 1; if (updatedCount == 0) { readRecursionSet.remove(flowFile); } else { readRecursionSet.put(flowFile, updatedCount); } } @Override public FlowFile merge(final Collection<FlowFile> sources, final FlowFile destination) { return merge(sources, destination, null, null, null); } @Override public FlowFile merge(Collection<FlowFile> sources, FlowFile destination, final byte[] header, final byte[] footer, final byte[] demarcator) { sources = validateRecordState(sources); destination = validateRecordState(destination); if (sources.contains(destination)) { throw new IllegalArgumentException("Destination cannot be within sources"); } final Collection<StandardRepositoryRecord> sourceRecords = new ArrayList<>(); for (final FlowFile source : sources) { final StandardRepositoryRecord record = records.get(source); sourceRecords.add(record); try { ensureNotAppending(record.getCurrentClaim()); claimCache.flush(record.getCurrentClaim()); } catch (final IOException e) { throw new FlowFileAccessException("Unable to read from source " + source + " due to " + e.toString(), e); } } final StandardRepositoryRecord destinationRecord = records.get(destination); final ContentRepository contentRepo = context.getContentRepository(); final ContentClaim newClaim; try { newClaim = contentRepo.create(context.getConnectable().isLossTolerant()); claimLog.debug("Creating ContentClaim {} for 'merge' for {}", newClaim, destinationRecord.getCurrent()); } catch (final IOException e) { throw new FlowFileAccessException("Unable to create ContentClaim due to " + e.toString(), e); } long readCount = 0L; long writtenCount = 0L; try { try (final OutputStream rawOut = contentRepo.write(newClaim); final OutputStream out = new BufferedOutputStream(rawOut)) { if (header != null && header.length > 0) { out.write(header); writtenCount += header.length; } int objectIndex = 0; final boolean useDemarcator = demarcator != null && demarcator.length > 0; final int numSources = sources.size(); for (final FlowFile source : sources) { final StandardRepositoryRecord sourceRecord = records.get(source); final long copied = contentRepo.exportTo(sourceRecord.getCurrentClaim(), out, sourceRecord.getCurrentClaimOffset(), source.getSize()); writtenCount += copied; readCount += copied; // don't add demarcator after the last claim if (useDemarcator && ++objectIndex < numSources) { out.write(demarcator); writtenCount += demarcator.length; } } if (footer != null && footer.length > 0) { out.write(footer); writtenCount += footer.length; } } finally { bytesWritten += writtenCount; bytesRead += readCount; } } catch (final ContentNotFoundException nfe) { destroyContent(newClaim); handleContentNotFound(nfe, destinationRecord); handleContentNotFound(nfe, sourceRecords); } catch (final IOException ioe) { destroyContent(newClaim); throw new FlowFileAccessException("Failed to merge " + sources.size() + " into " + destination + " due to " + ioe.toString(), ioe); } catch (final Throwable t) { destroyContent(newClaim); throw t; } removeTemporaryClaim(destinationRecord); final FlowFileRecord newFile = new StandardFlowFileRecord.Builder().fromFlowFile(destinationRecord.getCurrent()).contentClaim(newClaim).contentClaimOffset(0L).size(writtenCount).build(); destinationRecord.setWorking(newFile); records.put(newFile, destinationRecord); return newFile; } private void ensureNotAppending(final ContentClaim claim) throws IOException { if (claim == null) { return; } final ByteCountingOutputStream outStream = appendableStreams.remove(claim); if (outStream == null) { return; } outStream.flush(); outStream.close(); } @Override public OutputStream write(FlowFile source) { source = validateRecordState(source); final StandardRepositoryRecord record = records.get(source); ContentClaim newClaim = null; try { newClaim = claimCache.getContentClaim(); claimLog.debug("Creating ContentClaim {} for 'write' for {}", newClaim, source); ensureNotAppending(newClaim); final OutputStream rawStream = claimCache.write(newClaim); final OutputStream disableOnClose = new DisableOnCloseOutputStream(rawStream); final ByteCountingOutputStream countingOut = new ByteCountingOutputStream(disableOnClose); final FlowFile sourceFlowFile = source; final ContentClaim updatedClaim = newClaim; final OutputStream errorHandlingOutputStream = new OutputStream() { private boolean closed = false; @Override public void write(final int b) throws IOException { try { countingOut.write(b); } catch (final IOException ioe) { LOG.error("Failed to write content to " + sourceFlowFile + "; rolling back session", ioe); rollback(true); close(); throw new FlowFileAccessException("Failed to write to Content Repository for " + sourceFlowFile, ioe); } } @Override public void write(final byte[] b) throws IOException { try { countingOut.write(b); } catch (final IOException ioe) { LOG.error("Failed to write content to " + sourceFlowFile + "; rolling back session", ioe); rollback(true); close(); throw new FlowFileAccessException("Failed to write to Content Repository for " + sourceFlowFile, ioe); } } @Override public void write(final byte[] b, final int off, final int len) throws IOException { try { countingOut.write(b, off, len); } catch (final IOException ioe) { LOG.error("Failed to write content to " + sourceFlowFile + "; rolling back session", ioe); rollback(true); close(); throw new FlowFileAccessException("Failed to write to Content Repository for " + sourceFlowFile, ioe); } } @Override public void flush() throws IOException { try { countingOut.flush(); } catch (final IOException ioe) { LOG.error("Failed to write content to " + sourceFlowFile + "; rolling back session", ioe); rollback(true); close(); throw new FlowFileAccessException("Failed to write to Content Repository for " + sourceFlowFile, ioe); } } @Override public void close() throws IOException { if (closed) { return; } closed = true; writeRecursionSet.remove(sourceFlowFile); final long bytesWritten = countingOut.getBytesWritten(); StandardProcessSession.this.bytesWritten += bytesWritten; openOutputStreams.remove(sourceFlowFile); flush(); removeTemporaryClaim(record); final FlowFileRecord newFile = new StandardFlowFileRecord.Builder() .fromFlowFile(record.getCurrent()) .contentClaim(updatedClaim) .contentClaimOffset(Math.max(0, updatedClaim.getLength() - bytesWritten)) .size(bytesWritten) .build(); record.setWorking(newFile); } }; writeRecursionSet.add(source); openOutputStreams.put(source, errorHandlingOutputStream); return errorHandlingOutputStream; } catch (final ContentNotFoundException nfe) { resetWriteClaims(); // need to reset write claim before we can remove the claim destroyContent(newClaim); handleContentNotFound(nfe, record); throw nfe; } catch (final FlowFileAccessException ffae) { resetWriteClaims(); // need to reset write claim before we can remove the claim destroyContent(newClaim); throw ffae; } catch (final IOException ioe) { resetWriteClaims(); // need to reset write claim before we can remove the claim destroyContent(newClaim); throw new ProcessException("IOException thrown from " + connectableDescription + ": " + ioe.toString(), ioe); } catch (final Throwable t) { resetWriteClaims(); // need to reset write claim before we can remove the claim destroyContent(newClaim); throw t; } } @Override public FlowFile write(FlowFile source, final OutputStreamCallback writer) { source = validateRecordState(source); final StandardRepositoryRecord record = records.get(source); long writtenToFlowFile = 0L; ContentClaim newClaim = null; try { newClaim = claimCache.getContentClaim(); claimLog.debug("Creating ContentClaim {} for 'write' for {}", newClaim, source); ensureNotAppending(newClaim); try (final OutputStream stream = claimCache.write(newClaim); final OutputStream disableOnClose = new DisableOnCloseOutputStream(stream); final ByteCountingOutputStream countingOut = new ByteCountingOutputStream(disableOnClose)) { try { writeRecursionSet.add(source); writer.process(new FlowFileAccessOutputStream(countingOut, source)); } finally { writtenToFlowFile = countingOut.getBytesWritten(); bytesWritten += countingOut.getBytesWritten(); } } finally { writeRecursionSet.remove(source); } } catch (final ContentNotFoundException nfe) { resetWriteClaims(); // need to reset write claim before we can remove the claim destroyContent(newClaim); handleContentNotFound(nfe, record); } catch (final FlowFileAccessException ffae) { resetWriteClaims(); // need to reset write claim before we can remove the claim destroyContent(newClaim); throw ffae; } catch (final IOException ioe) { resetWriteClaims(); // need to reset write claim before we can remove the claim destroyContent(newClaim); throw new ProcessException("IOException thrown from " + connectableDescription + ": " + ioe.toString(), ioe); } catch (final Throwable t) { resetWriteClaims(); // need to reset write claim before we can remove the claim destroyContent(newClaim); throw t; } removeTemporaryClaim(record); final FlowFileRecord newFile = new StandardFlowFileRecord.Builder() .fromFlowFile(record.getCurrent()) .contentClaim(newClaim) .contentClaimOffset(Math.max(0, newClaim.getLength() - writtenToFlowFile)) .size(writtenToFlowFile) .build(); record.setWorking(newFile); return newFile; } @Override public FlowFile append(FlowFile source, final OutputStreamCallback writer) { source = validateRecordState(source); final StandardRepositoryRecord record = records.get(source); long newSize = 0L; // Get the current Content Claim from the record and see if we already have // an OutputStream that we can append to. final ContentClaim oldClaim = record.getCurrentClaim(); ByteCountingOutputStream outStream = appendableStreams.get(oldClaim); long originalByteWrittenCount = 0; ContentClaim newClaim = null; try { if (outStream == null) { claimCache.flush(oldClaim); try (final InputStream oldClaimIn = context.getContentRepository().read(oldClaim)) { newClaim = context.getContentRepository().create(context.getConnectable().isLossTolerant()); claimLog.debug("Creating ContentClaim {} for 'append' for {}", newClaim, source); final OutputStream rawOutStream = context.getContentRepository().write(newClaim); final OutputStream bufferedOutStream = new BufferedOutputStream(rawOutStream); outStream = new ByteCountingOutputStream(bufferedOutStream); originalByteWrittenCount = 0; appendableStreams.put(newClaim, outStream); // We need to copy all of the data from the old claim to the new claim StreamUtils.copy(oldClaimIn, outStream); // wrap our OutputStreams so that the processor cannot close it try (final OutputStream disableOnClose = new DisableOnCloseOutputStream(outStream)) { writeRecursionSet.add(source); writer.process(new FlowFileAccessOutputStream(disableOnClose, source)); } finally { writeRecursionSet.remove(source); } } } else { newClaim = oldClaim; originalByteWrittenCount = outStream.getBytesWritten(); // wrap our OutputStreams so that the processor cannot close it try (final OutputStream disableOnClose = new DisableOnCloseOutputStream(outStream); final OutputStream flowFileAccessOutStream = new FlowFileAccessOutputStream(disableOnClose, source)) { writeRecursionSet.add(source); writer.process(flowFileAccessOutStream); } finally { writeRecursionSet.remove(source); } } // update the newSize to reflect the number of bytes written newSize = outStream.getBytesWritten(); } catch (final ContentNotFoundException nfe) { resetWriteClaims(); // need to reset write claim before we can remove the claim // If the content claim changed, then we should destroy the new one. We do this // because the new content claim will never get set as the 'working claim' for the FlowFile // record since we will throw an Exception. As a result, we need to ensure that we have // appropriately decremented the claimant count and can destroy the content if it is no // longer in use. However, it is critical that we do this ONLY if the content claim has // changed. Otherwise, the FlowFile already has a reference to this Content Claim and // whenever the FlowFile is removed, the claim count will be decremented; if we decremented // it here also, we would be decrementing the claimant count twice! if (newClaim != oldClaim) { destroyContent(newClaim); } handleContentNotFound(nfe, record); } catch (final IOException ioe) { resetWriteClaims(); // need to reset write claim before we can remove the claim // See above explanation for why this is done only if newClaim != oldClaim if (newClaim != oldClaim) { destroyContent(newClaim); } throw new ProcessException("IOException thrown from " + connectableDescription + ": " + ioe.toString(), ioe); } catch (final Throwable t) { resetWriteClaims(); // need to reset write claim before we can remove the claim // See above explanation for why this is done only if newClaim != oldClaim if (newClaim != oldClaim) { destroyContent(newClaim); } throw t; } finally { if (outStream != null) { final long bytesWrittenThisIteration = outStream.getBytesWritten() - originalByteWrittenCount; bytesWritten += bytesWrittenThisIteration; } } // If the record already has a working claim, and this is the first time that we are appending to the FlowFile, // destroy the current working claim because it is a temporary claim that // is no longer going to be used, as we are about to set a new working claim. This would happen, for instance, if // the FlowFile was written to, via #write() and then append() was called. if (newClaim != oldClaim) { removeTemporaryClaim(record); } final FlowFileRecord newFile = new StandardFlowFileRecord.Builder().fromFlowFile(record.getCurrent()).contentClaim(newClaim).contentClaimOffset(0).size(newSize).build(); record.setWorking(newFile); return newFile; } /** * Checks if the ContentClaim associated with this record should be removed, * since the record is about to be updated to point to a new content claim. * If so, removes the working claim. * * This happens if & only if the content of this FlowFile has been modified * since it was last committed to the FlowFile repository, because this * indicates that the content is no longer needed and should be cleaned up. * * @param record record */ private void removeTemporaryClaim(final StandardRepositoryRecord record) { final boolean contentModified = record.getWorkingClaim() != null && record.getWorkingClaim() != record.getOriginalClaim(); // If the working claim is not the same as the original claim, we have modified the content of // the FlowFile, and we need to remove the newly created content (the working claim). However, if // they are the same, we cannot just remove the claim because record.getWorkingClaim() will return // the original claim if the record is "working" but the content has not been modified // (e.g., in the case of attributes only were updated) // // In other words: // If we modify the attributes of a FlowFile, and then we call record.getWorkingClaim(), this will // return the same claim as record.getOriginalClaim(). So we cannot just remove the working claim because // that may decrement the original claim (because the 2 claims are the same), and that's NOT what we want to do // because we will do that later, in the session.commit() and that would result in decrementing the count for // the original claim twice. if (contentModified) { // In this case, it's ok to decrement the claimant count for the content because we know that the working claim is going to be // updated and the given working claim is referenced only by FlowFiles in this session (because it's the Working Claim). // Therefore, we need to decrement the claimant count, and since the Working Claim is being changed, that means that // the Working Claim is a transient claim (the content need not be persisted because no FlowFile refers to it). We cannot simply // remove the content because there may be other FlowFiles that reference the same Resource Claim. Marking the Content Claim as // transient, though, will result in the FlowFile Repository cleaning up as appropriate. context.getContentRepository().decrementClaimantCount(record.getWorkingClaim()); record.addTransientClaim(record.getWorkingClaim()); } } private void resetWriteClaims() { resetWriteClaims(true); } private void resetWriteClaims(final boolean suppressExceptions) { for (final ByteCountingOutputStream out : appendableStreams.values()) { try { try { out.flush(); } finally { out.close(); } } catch (final IOException e) { if (!suppressExceptions) { throw new FlowFileAccessException("Unable to flush the output of FlowFile to the Content Repository"); } } } appendableStreams.clear(); } private void resetReadClaim() { try { if (currentReadClaimStream != null) { currentReadClaimStream.close(); } } catch (final Exception e) { } currentReadClaimStream = null; currentReadClaim = null; } @Override public FlowFile write(FlowFile source, final StreamCallback writer) { source = validateRecordState(source); final StandardRepositoryRecord record = records.get(source); final ContentClaim currClaim = record.getCurrentClaim(); long writtenToFlowFile = 0L; ContentClaim newClaim = null; try { newClaim = claimCache.getContentClaim(); claimLog.debug("Creating ContentClaim {} for 'write' for {}", newClaim, source); ensureNotAppending(newClaim); if (currClaim != null) { claimCache.flush(currClaim.getResourceClaim()); } try (final InputStream is = getInputStream(source, currClaim, record.getCurrentClaimOffset(), true); final InputStream limitedIn = new LimitedInputStream(is, source.getSize()); final InputStream disableOnCloseIn = new DisableOnCloseInputStream(limitedIn); final ByteCountingInputStream countingIn = new ByteCountingInputStream(disableOnCloseIn, bytesRead); final OutputStream os = claimCache.write(newClaim); final OutputStream disableOnCloseOut = new DisableOnCloseOutputStream(os); final ByteCountingOutputStream countingOut = new ByteCountingOutputStream(disableOnCloseOut)) { writeRecursionSet.add(source); // We want to differentiate between IOExceptions thrown by the repository and IOExceptions thrown from // Processor code. As a result, as have the FlowFileAccessInputStream that catches IOException from the repository // and translates into either FlowFileAccessException or ContentNotFoundException. We keep track of any // ContentNotFoundException because if it is thrown, the Processor code may catch it and do something else with it // but in reality, if it is thrown, we want to know about it and handle it, even if the Processor code catches it. final FlowFileAccessInputStream ffais = new FlowFileAccessInputStream(countingIn, source, currClaim); boolean cnfeThrown = false; try { writer.process(ffais, new FlowFileAccessOutputStream(countingOut, source)); } catch (final ContentNotFoundException cnfe) { cnfeThrown = true; throw cnfe; } finally { writtenToFlowFile = countingOut.getBytesWritten(); this.bytesWritten += writtenToFlowFile; this.bytesRead += countingIn.getBytesRead(); writeRecursionSet.remove(source); // if cnfeThrown is true, we don't need to re-thrown the Exception; it will propagate. if (!cnfeThrown && ffais.getContentNotFoundException() != null) { throw ffais.getContentNotFoundException(); } } } } catch (final ContentNotFoundException nfe) { destroyContent(newClaim); handleContentNotFound(nfe, record); } catch (final IOException ioe) { destroyContent(newClaim); throw new ProcessException("IOException thrown from " + connectableDescription + ": " + ioe.toString(), ioe); } catch (final FlowFileAccessException ffae) { destroyContent(newClaim); throw ffae; } catch (final Throwable t) { destroyContent(newClaim); throw t; } removeTemporaryClaim(record); final FlowFileRecord newFile = new StandardFlowFileRecord.Builder() .fromFlowFile(record.getCurrent()) .contentClaim(newClaim) .contentClaimOffset(Math.max(0L, newClaim.getLength() - writtenToFlowFile)) .size(writtenToFlowFile) .build(); record.setWorking(newFile); return newFile; } @Override public FlowFile importFrom(final Path source, final boolean keepSourceFile, FlowFile destination) { destination = validateRecordState(destination); // TODO: find a better solution. With Windows 7 and Java 7 (very early update, at least), Files.isWritable(source.getParent()) returns false, even when it should be true. if (!keepSourceFile && !Files.isWritable(source.getParent()) && !source.getParent().toFile().canWrite()) { // If we do NOT want to keep the file, ensure that we can delete it, or else error. throw new FlowFileAccessException("Cannot write to path " + source.getParent().toFile().getAbsolutePath() + " so cannot delete file; will not import."); } final StandardRepositoryRecord record = records.get(destination); final ContentClaim newClaim; final long claimOffset; try { newClaim = context.getContentRepository().create(context.getConnectable().isLossTolerant()); claimLog.debug("Creating ContentClaim {} for 'importFrom' for {}", newClaim, destination); } catch (final IOException e) { throw new FlowFileAccessException("Unable to create ContentClaim due to " + e.toString(), e); } claimOffset = 0L; long newSize = 0L; try { newSize = context.getContentRepository().importFrom(source, newClaim); bytesWritten += newSize; bytesRead += newSize; } catch (final Throwable t) { destroyContent(newClaim); throw new FlowFileAccessException("Failed to import data from " + source + " for " + destination + " due to " + t.toString(), t); } removeTemporaryClaim(record); final FlowFileRecord newFile = new StandardFlowFileRecord.Builder() .fromFlowFile(record.getCurrent()) .contentClaim(newClaim) .contentClaimOffset(claimOffset) .size(newSize) .addAttribute(CoreAttributes.FILENAME.key(), source.toFile().getName()) .build(); record.setWorking(newFile, CoreAttributes.FILENAME.key(), source.toFile().getName()); if (!keepSourceFile) { deleteOnCommit.put(newFile, source); } return newFile; } @Override public FlowFile importFrom(final InputStream source, FlowFile destination) { destination = validateRecordState(destination); final StandardRepositoryRecord record = records.get(destination); ContentClaim newClaim = null; final long claimOffset = 0L; final long newSize; try { try { newClaim = context.getContentRepository().create(context.getConnectable().isLossTolerant()); claimLog.debug("Creating ContentClaim {} for 'importFrom' for {}", newClaim, destination); newSize = context.getContentRepository().importFrom(source, newClaim); bytesWritten += newSize; } catch (final IOException e) { throw new FlowFileAccessException("Unable to create ContentClaim due to " + e.toString(), e); } } catch (final Throwable t) { if (newClaim != null) { destroyContent(newClaim); } throw new FlowFileAccessException("Failed to import data from " + source + " for " + destination + " due to " + t.toString(), t); } removeTemporaryClaim(record); final FlowFileRecord newFile = new StandardFlowFileRecord.Builder() .fromFlowFile(record.getCurrent()) .contentClaim(newClaim) .contentClaimOffset(claimOffset) .size(newSize) .build(); record.setWorking(newFile); return newFile; } @Override public void exportTo(FlowFile source, final Path destination, final boolean append) { source = validateRecordState(source); final StandardRepositoryRecord record = records.get(source); try { ensureNotAppending(record.getCurrentClaim()); claimCache.flush(record.getCurrentClaim()); final long copyCount = context.getContentRepository().exportTo(record.getCurrentClaim(), destination, append, record.getCurrentClaimOffset(), source.getSize()); bytesRead += copyCount; bytesWritten += copyCount; } catch (final ContentNotFoundException nfe) { handleContentNotFound(nfe, record); } catch (final Throwable t) { throw new FlowFileAccessException("Failed to export " + source + " to " + destination + " due to " + t.toString(), t); } } @Override public void exportTo(FlowFile source, final OutputStream destination) { source = validateRecordState(source); final StandardRepositoryRecord record = records.get(source); if(record.getCurrentClaim() == null) { return; } try { ensureNotAppending(record.getCurrentClaim()); claimCache.flush(record.getCurrentClaim()); } catch (final IOException e) { throw new FlowFileAccessException("Failed to access ContentClaim for " + source.toString(), e); } try (final InputStream rawIn = getInputStream(source, record.getCurrentClaim(), record.getCurrentClaimOffset(), true); final InputStream limitedIn = new LimitedInputStream(rawIn, source.getSize()); final InputStream disableOnCloseIn = new DisableOnCloseInputStream(limitedIn); final ByteCountingInputStream countingStream = new ByteCountingInputStream(disableOnCloseIn, this.bytesRead)) { // We want to differentiate between IOExceptions thrown by the repository and IOExceptions thrown from // Processor code. As a result, as have the FlowFileAccessInputStream that catches IOException from the repository // and translates into either FlowFileAccessException or ContentNotFoundException. We keep track of any // ContentNotFoundException because if it is thrown, the Processor code may catch it and do something else with it // but in reality, if it is thrown, we want to know about it and handle it, even if the Processor code catches it. final FlowFileAccessInputStream ffais = new FlowFileAccessInputStream(countingStream, source, record.getCurrentClaim()); boolean cnfeThrown = false; try { incrementReadCount(source); StreamUtils.copy(ffais, destination, source.getSize()); } catch (final ContentNotFoundException cnfe) { cnfeThrown = true; throw cnfe; } finally { decrementReadCount(source); IOUtils.closeQuietly(ffais); // if cnfeThrown is true, we don't need to re-throw the Exception; it will propagate. if (!cnfeThrown && ffais.getContentNotFoundException() != null) { throw ffais.getContentNotFoundException(); } } } catch (final ContentNotFoundException nfe) { handleContentNotFound(nfe, record); } catch (final IOException ex) { throw new ProcessException("IOException thrown from " + connectableDescription + ": " + ex.toString(), ex); } } private void handleContentNotFound(final ContentNotFoundException nfe, final Collection<StandardRepositoryRecord> suspectRecords) { for (final StandardRepositoryRecord record : suspectRecords) { handleContentNotFound(nfe, record); } } private void handleContentNotFound(final ContentNotFoundException nfe, final StandardRepositoryRecord suspectRecord) { final ContentClaim registeredClaim = suspectRecord.getOriginalClaim(); final ContentClaim transientClaim = suspectRecord.getWorkingClaim(); final ContentClaim missingClaim = nfe.getMissingClaim(); final ProvenanceEventRecord dropEvent = provenanceReporter.drop(suspectRecord.getCurrent(), nfe.getMessage() == null ? "Content Not Found" : nfe.getMessage()); if (dropEvent != null) { context.getProvenanceRepository().registerEvent(dropEvent); } if (missingClaim == registeredClaim) { suspectRecord.markForAbort(); rollback(); throw new MissingFlowFileException("Unable to find content for FlowFile", nfe); } if (missingClaim == transientClaim) { rollback(); throw new MissingFlowFileException("Unable to find content for FlowFile", nfe); } } private FlowFile validateRecordState(final FlowFile flowFile) { return validateRecordState(flowFile, false); } private FlowFile validateRecordState(final FlowFile flowFile, final boolean allowRecursiveRead) { if (!allowRecursiveRead && readRecursionSet.containsKey(flowFile)) { throw new IllegalStateException(flowFile + " already in use for an active callback or an InputStream created by ProcessSession.read(FlowFile) has not been closed"); } if (writeRecursionSet.contains(flowFile)) { throw new IllegalStateException(flowFile + " already in use for an active callback or an OutputStream created by ProcessSession.write(FlowFile) has not been closed"); } final StandardRepositoryRecord record = records.get(flowFile); if (record == null) { rollback(); throw new FlowFileHandlingException(flowFile + " is not known in this session (" + toString() + ")"); } if (record.getTransferRelationship() != null) { rollback(); throw new FlowFileHandlingException(flowFile + " is already marked for transfer"); } if (record.isMarkedForDelete()) { rollback(); throw new FlowFileHandlingException(flowFile + " has already been marked for removal"); } return record.getCurrent(); } private List<FlowFile> validateRecordState(final Collection<FlowFile> flowFiles) { final List<FlowFile> current = new ArrayList<>(flowFiles.size()); for (final FlowFile flowFile : flowFiles) { current.add(validateRecordState(flowFile)); } return current; } /** * Checks if a FlowFile is known in this session. * * @param flowFile the FlowFile to check * @return <code>true</code> if the FlowFile is known in this session, * <code>false</code> otherwise. */ boolean isFlowFileKnown(final FlowFile flowFile) { return records.containsKey(flowFile); } @Override public FlowFile create(final FlowFile parent) { final Map<String, String> newAttributes = new HashMap<>(3); newAttributes.put(CoreAttributes.FILENAME.key(), String.valueOf(System.nanoTime())); newAttributes.put(CoreAttributes.PATH.key(), DEFAULT_FLOWFILE_PATH); newAttributes.put(CoreAttributes.UUID.key(), UUID.randomUUID().toString()); final StandardFlowFileRecord.Builder fFileBuilder = new StandardFlowFileRecord.Builder().id(context.getNextFlowFileSequence()); // copy all attributes from parent except for the "special" attributes. Copying the special attributes // can cause problems -- especially the ALTERNATE_IDENTIFIER, because copying can cause Provenance Events // to be incorrectly created. for (final Map.Entry<String, String> entry : parent.getAttributes().entrySet()) { final String key = entry.getKey(); final String value = entry.getValue(); if (CoreAttributes.ALTERNATE_IDENTIFIER.key().equals(key) || CoreAttributes.DISCARD_REASON.key().equals(key) || CoreAttributes.UUID.key().equals(key)) { continue; } newAttributes.put(key, value); } fFileBuilder.lineageStart(parent.getLineageStartDate(), parent.getLineageStartIndex()); fFileBuilder.addAttributes(newAttributes); final FlowFileRecord fFile = fFileBuilder.build(); final StandardRepositoryRecord record = new StandardRepositoryRecord(null); record.setWorking(fFile, newAttributes); records.put(fFile, record); createdFlowFiles.add(fFile.getAttribute(CoreAttributes.UUID.key())); registerForkEvent(parent, fFile); return fFile; } @Override public FlowFile create(final Collection<FlowFile> parents) { final Map<String, String> newAttributes = intersectAttributes(parents); newAttributes.remove(CoreAttributes.UUID.key()); newAttributes.remove(CoreAttributes.ALTERNATE_IDENTIFIER.key()); newAttributes.remove(CoreAttributes.DISCARD_REASON.key()); // When creating a new FlowFile from multiple parents, we need to add all of the Lineage Identifiers // and use the earliest lineage start date long lineageStartDate = 0L; for (final FlowFile parent : parents) { final long parentLineageStartDate = parent.getLineageStartDate(); if (lineageStartDate == 0L || parentLineageStartDate < lineageStartDate) { lineageStartDate = parentLineageStartDate; } } // find the smallest lineage start index that has the same lineage start date as the one we've chosen. long lineageStartIndex = 0L; for (final FlowFile parent : parents) { if (parent.getLineageStartDate() == lineageStartDate && parent.getLineageStartIndex() < lineageStartIndex) { lineageStartIndex = parent.getLineageStartIndex(); } } newAttributes.put(CoreAttributes.FILENAME.key(), String.valueOf(System.nanoTime())); newAttributes.put(CoreAttributes.PATH.key(), DEFAULT_FLOWFILE_PATH); newAttributes.put(CoreAttributes.UUID.key(), UUID.randomUUID().toString()); final FlowFileRecord fFile = new StandardFlowFileRecord.Builder().id(context.getNextFlowFileSequence()) .addAttributes(newAttributes) .lineageStart(lineageStartDate, lineageStartIndex) .build(); final StandardRepositoryRecord record = new StandardRepositoryRecord(null); record.setWorking(fFile, newAttributes); records.put(fFile, record); createdFlowFiles.add(fFile.getAttribute(CoreAttributes.UUID.key())); registerJoinEvent(fFile, parents); return fFile; } /** * Returns the attributes that are common to every FlowFile given. The key * and value must match exactly. * * @param flowFileList a list of FlowFiles * * @return the common attributes */ private static Map<String, String> intersectAttributes(final Collection<FlowFile> flowFileList) { final Map<String, String> result = new HashMap<>(); // trivial cases if (flowFileList == null || flowFileList.isEmpty()) { return result; } else if (flowFileList.size() == 1) { result.putAll(flowFileList.iterator().next().getAttributes()); } /* * Start with the first attribute map and only put an entry to the * resultant map if it is common to every map. */ final Map<String, String> firstMap = flowFileList.iterator().next().getAttributes(); outer: for (final Map.Entry<String, String> mapEntry : firstMap.entrySet()) { final String key = mapEntry.getKey(); final String value = mapEntry.getValue(); for (final FlowFile flowFile : flowFileList) { final Map<String, String> currMap = flowFile.getAttributes(); final String curVal = currMap.get(key); if (curVal == null || !curVal.equals(value)) { continue outer; } } result.put(key, value); } return result; } @Override protected void finalize() throws Throwable { rollback(); super.finalize(); } @Override public ProvenanceReporter getProvenanceReporter() { return provenanceReporter; } @Override public String toString() { return "StandardProcessSession[id=" + sessionId + "]"; } /** * Callback interface used to poll a FlowFileQueue, in order to perform * functional programming-type of polling a queue */ private static interface ConnectionPoller { List<FlowFileRecord> poll(Connection connection, Set<FlowFileRecord> expiredRecords); } private static class Checkpoint { private long processingTime = 0L; private final Map<FlowFile, List<ProvenanceEventRecord>> generatedProvenanceEvents = new HashMap<>(); private final Map<FlowFile, ProvenanceEventBuilder> forkEventBuilders = new HashMap<>(); private final List<ProvenanceEventRecord> autoTerminatedEvents = new ArrayList<>(); private final Set<ProvenanceEventRecord> reportedEvents = new LinkedHashSet<>(); private final Map<FlowFileRecord, StandardRepositoryRecord> records = new HashMap<>(); private final Map<String, StandardFlowFileEvent> connectionCounts = new HashMap<>(); private final Map<FlowFileQueue, Set<FlowFileRecord>> unacknowledgedFlowFiles = new HashMap<>(); private final Map<String, Long> counters = new HashMap<>(); private final Map<FlowFile, Path> deleteOnCommit = new HashMap<>(); private final Set<String> removedFlowFiles = new HashSet<>(); private final Set<String> createdFlowFiles = new HashSet<>(); private int removedCount = 0; // number of flowfiles removed in this session private long removedBytes = 0L; // size of all flowfiles removed in this session private long bytesRead = 0L; private long bytesWritten = 0L; private int flowFilesIn = 0, flowFilesOut = 0; private long contentSizeIn = 0L, contentSizeOut = 0L; private void checkpoint(final StandardProcessSession session, final List<ProvenanceEventRecord> autoTerminatedEvents) { this.processingTime += System.nanoTime() - session.processingStartTime; this.generatedProvenanceEvents.putAll(session.generatedProvenanceEvents); this.forkEventBuilders.putAll(session.forkEventBuilders); if (autoTerminatedEvents != null) { this.autoTerminatedEvents.addAll(autoTerminatedEvents); } this.reportedEvents.addAll(session.provenanceReporter.getEvents()); this.records.putAll(session.records); this.connectionCounts.putAll(session.connectionCounts); this.unacknowledgedFlowFiles.putAll(session.unacknowledgedFlowFiles); this.counters.putAll(session.counters); this.deleteOnCommit.putAll(session.deleteOnCommit); this.removedFlowFiles.addAll(session.removedFlowFiles); this.createdFlowFiles.addAll(session.createdFlowFiles); this.removedCount += session.removedCount; this.removedBytes += session.removedBytes; this.bytesRead += session.bytesRead; this.bytesWritten += session.bytesWritten; this.flowFilesIn += session.flowFilesIn; this.flowFilesOut += session.flowFilesOut; this.contentSizeIn += session.contentSizeIn; this.contentSizeOut += session.contentSizeOut; } } }