/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.controller;
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.EOFException;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FilenameFilter;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.UUID;
import java.util.regex.Pattern;
import org.apache.nifi.controller.queue.FlowFileQueue;
import org.apache.nifi.controller.repository.FlowFileRecord;
import org.apache.nifi.controller.repository.FlowFileRepository;
import org.apache.nifi.controller.repository.FlowFileSwapManager;
import org.apache.nifi.controller.repository.SwapContents;
import org.apache.nifi.controller.repository.SwapManagerInitializationContext;
import org.apache.nifi.controller.repository.SwapSummary;
import org.apache.nifi.controller.repository.claim.ResourceClaimManager;
import org.apache.nifi.controller.swap.SchemaSwapDeserializer;
import org.apache.nifi.controller.swap.SchemaSwapSerializer;
import org.apache.nifi.controller.swap.SimpleSwapDeserializer;
import org.apache.nifi.controller.swap.SwapDeserializer;
import org.apache.nifi.controller.swap.SwapSerializer;
import org.apache.nifi.events.EventReporter;
import org.apache.nifi.reporting.Severity;
import org.apache.nifi.stream.io.StreamUtils;
import org.apache.nifi.util.NiFiProperties;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* <p>
* An implementation of the {@link FlowFileSwapManager} that swaps FlowFiles
* to/from local disk
* </p>
*/
public class FileSystemSwapManager implements FlowFileSwapManager {
public static final int MINIMUM_SWAP_COUNT = 10000;
private static final Pattern SWAP_FILE_PATTERN = Pattern.compile("\\d+-.+\\.swap");
private static final Pattern TEMP_SWAP_FILE_PATTERN = Pattern.compile("\\d+-.+\\.swap\\.part");
public static final int SWAP_ENCODING_VERSION = 10;
public static final String EVENT_CATEGORY = "Swap FlowFiles";
private static final Logger logger = LoggerFactory.getLogger(FileSystemSwapManager.class);
private final File storageDirectory;
// effectively final
private FlowFileRepository flowFileRepository;
private EventReporter eventReporter;
private ResourceClaimManager claimManager;
private static final byte[] MAGIC_HEADER = {'S', 'W', 'A', 'P'};
/**
* Default no args constructor for service loading only.
*/
public FileSystemSwapManager() {
storageDirectory = null;
}
public FileSystemSwapManager(final NiFiProperties nifiProperties) {
final Path flowFileRepoPath = nifiProperties.getFlowFileRepositoryPath();
this.storageDirectory = flowFileRepoPath.resolve("swap").toFile();
if (!storageDirectory.exists() && !storageDirectory.mkdirs()) {
throw new RuntimeException("Cannot create Swap Storage directory " + storageDirectory.getAbsolutePath());
}
}
@Override
public synchronized void initialize(final SwapManagerInitializationContext initializationContext) {
this.claimManager = initializationContext.getResourceClaimManager();
this.eventReporter = initializationContext.getEventReporter();
this.flowFileRepository = initializationContext.getFlowFileRepository();
}
@Override
public String swapOut(final List<FlowFileRecord> toSwap, final FlowFileQueue flowFileQueue) throws IOException {
if (toSwap == null || toSwap.isEmpty()) {
return null;
}
final File swapFile = new File(storageDirectory, System.currentTimeMillis() + "-" + flowFileQueue.getIdentifier() + "-" + UUID.randomUUID().toString() + ".swap");
final File swapTempFile = new File(swapFile.getParentFile(), swapFile.getName() + ".part");
final String swapLocation = swapFile.getAbsolutePath();
final SwapSerializer serializer = new SchemaSwapSerializer();
try (final FileOutputStream fos = new FileOutputStream(swapTempFile);
final OutputStream out = new BufferedOutputStream(fos)) {
out.write(MAGIC_HEADER);
final DataOutputStream dos = new DataOutputStream(out);
dos.writeUTF(serializer.getSerializationName());
serializer.serializeFlowFiles(toSwap, flowFileQueue, swapLocation, out);
fos.getFD().sync();
} catch (final IOException ioe) {
// we failed to write out the entire swap file. Delete the temporary file, if we can.
swapTempFile.delete();
throw ioe;
}
if (swapTempFile.renameTo(swapFile)) {
flowFileRepository.swapFlowFilesOut(toSwap, flowFileQueue, swapLocation);
} else {
error("Failed to swap out FlowFiles from " + flowFileQueue + " due to: Unable to rename swap file from " + swapTempFile + " to " + swapFile);
}
return swapLocation;
}
@Override
public SwapContents swapIn(final String swapLocation, final FlowFileQueue flowFileQueue) throws IOException {
final File swapFile = new File(swapLocation);
final SwapContents swapContents = peek(swapLocation, flowFileQueue);
flowFileRepository.swapFlowFilesIn(swapFile.getAbsolutePath(), swapContents.getFlowFiles(), flowFileQueue);
if (!swapFile.delete()) {
warn("Swapped in FlowFiles from file " + swapFile.getAbsolutePath() + " but failed to delete the file; this file should be cleaned up manually");
}
return swapContents;
}
@Override
public SwapContents peek(final String swapLocation, final FlowFileQueue flowFileQueue) throws IOException {
final File swapFile = new File(swapLocation);
if (!swapFile.exists()) {
throw new FileNotFoundException("Failed to swap in FlowFiles from external storage location " + swapLocation + " into FlowFile Queue because the file could not be found");
}
try (final InputStream fis = new FileInputStream(swapFile);
final InputStream bis = new BufferedInputStream(fis);
final DataInputStream in = new DataInputStream(bis)) {
final SwapDeserializer deserializer = createSwapDeserializer(in);
return deserializer.deserializeFlowFiles(in, swapLocation, flowFileQueue, claimManager);
}
}
@Override
public void purge() {
final File[] swapFiles = storageDirectory.listFiles(new FilenameFilter() {
@Override
public boolean accept(final File dir, final String name) {
return SWAP_FILE_PATTERN.matcher(name).matches() || TEMP_SWAP_FILE_PATTERN.matcher(name).matches();
}
});
for (final File file : swapFiles) {
if (!file.delete()) {
warn("Failed to delete Swap File " + file + " when purging FlowFile Swap Manager");
}
}
}
@Override
public List<String> recoverSwapLocations(final FlowFileQueue flowFileQueue) throws IOException {
final File[] swapFiles = storageDirectory.listFiles(new FilenameFilter() {
@Override
public boolean accept(final File dir, final String name) {
return SWAP_FILE_PATTERN.matcher(name).matches() || TEMP_SWAP_FILE_PATTERN.matcher(name).matches();
}
});
if (swapFiles == null) {
return Collections.emptyList();
}
final List<String> swapLocations = new ArrayList<>();
// remove in .part files, as they are partial swap files that did not get written fully.
for (final File swapFile : swapFiles) {
if (TEMP_SWAP_FILE_PATTERN.matcher(swapFile.getName()).matches()) {
if (swapFile.delete()) {
logger.info("Removed incomplete/temporary Swap File " + swapFile);
} else {
warn("Failed to remove incomplete/temporary Swap File " + swapFile + "; this file should be cleaned up manually");
}
continue;
}
// split the filename by dashes. The old filenaming scheme was "<timestamp>-<randomuuid>.swap" but the new naming scheme is
// "<timestamp>-<queue identifier>-<random uuid>.swap". If we have two dashes, then we can just check if the queue ID is equal
// to the id of the queue given and if not we can just move on.
final String[] splits = swapFile.getName().split("-");
if (splits.length > 6) {
final String queueIdentifier = splits[1] + "-" + splits[2] + "-" + splits[3] + "-" + splits[4] + "-" + splits[5];
if (queueIdentifier.equals(flowFileQueue.getIdentifier())) {
swapLocations.add(swapFile.getAbsolutePath());
}
continue;
}
// Read the queue identifier from the swap file to check if the swap file is for this queue
try (final InputStream fis = new FileInputStream(swapFile);
final InputStream bufferedIn = new BufferedInputStream(fis);
final DataInputStream in = new DataInputStream(bufferedIn)) {
final SwapDeserializer deserializer;
try {
deserializer = createSwapDeserializer(in);
} catch (final Exception e) {
final String errMsg = "Cannot swap FlowFiles in from " + swapFile + " due to " + e;
eventReporter.reportEvent(Severity.ERROR, EVENT_CATEGORY, errMsg);
throw new IOException(errMsg);
}
// If deserializer is not an instance of Simple Swap Deserializer, then it means that the serializer is new enough that
// we use the 3-element filename as illustrated above, so this is only necessary for the SimpleSwapDeserializer.
if (deserializer instanceof SimpleSwapDeserializer) {
final String connectionId = in.readUTF();
if (connectionId.equals(flowFileQueue.getIdentifier())) {
swapLocations.add(swapFile.getAbsolutePath());
}
}
}
}
Collections.sort(swapLocations, new SwapFileComparator());
return swapLocations;
}
@Override
public SwapSummary getSwapSummary(final String swapLocation) throws IOException {
final File swapFile = new File(swapLocation);
// read record from disk via the swap file
try (final InputStream fis = new FileInputStream(swapFile);
final InputStream bufferedIn = new BufferedInputStream(fis);
final DataInputStream in = new DataInputStream(bufferedIn)) {
final SwapDeserializer deserializer = createSwapDeserializer(in);
return deserializer.getSwapSummary(in, swapLocation, claimManager);
}
}
private SwapDeserializer createSwapDeserializer(final DataInputStream dis) throws IOException {
dis.mark(MAGIC_HEADER.length);
final byte[] magicHeader = new byte[MAGIC_HEADER.length];
try {
StreamUtils.fillBuffer(dis, magicHeader);
} catch (final EOFException eof) {
throw new IOException("Failed to read swap file because the file contained less than 4 bytes of data");
}
if (Arrays.equals(magicHeader, MAGIC_HEADER)) {
final String serializationName = dis.readUTF();
if (serializationName.equals(SchemaSwapDeserializer.getSerializationName())) {
return new SchemaSwapDeserializer();
}
throw new IOException("Cannot find a suitable Deserializer for swap file, written with Serialization Name '" + serializationName + "'");
} else {
// SimpleSwapDeserializer is old and did not write out a magic header.
dis.reset();
return new SimpleSwapDeserializer();
}
}
private void error(final String error) {
logger.error(error);
if (eventReporter != null) {
eventReporter.reportEvent(Severity.ERROR, EVENT_CATEGORY, error);
}
}
private void warn(final String warning) {
logger.warn(warning);
if (eventReporter != null) {
eventReporter.reportEvent(Severity.WARNING, EVENT_CATEGORY, warning);
}
}
private static class SwapFileComparator implements Comparator<String> {
@Override
public int compare(final String o1, final String o2) {
if (o1 == o2) {
return 0;
}
final Long time1 = getTimestampFromFilename(o1);
final Long time2 = getTimestampFromFilename(o2);
if (time1 == null && time2 == null) {
return 0;
}
if (time1 == null) {
return 1;
}
if (time2 == null) {
return -1;
}
final int timeComparisonValue = time1.compareTo(time2);
if (timeComparisonValue != 0) {
return timeComparisonValue;
}
return o1.compareTo(o2);
}
private Long getTimestampFromFilename(final String fullyQualifiedFilename) {
if (fullyQualifiedFilename == null) {
return null;
}
final File file = new File(fullyQualifiedFilename);
final String filename = file.getName();
final int idx = filename.indexOf("-");
if (idx < 1) {
return null;
}
final String millisVal = filename.substring(0, idx);
try {
return Long.parseLong(millisVal);
} catch (final NumberFormatException e) {
return null;
}
}
}
}