/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.nifi.toolkit.repos.flowfile; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.io.RandomAccessFile; import java.nio.file.Files; import java.nio.file.StandardCopyOption; import java.util.List; import java.util.regex.Pattern; import java.util.stream.Collectors; import java.util.stream.Stream; import org.apache.nifi.stream.io.LimitingInputStream; import org.apache.nifi.stream.io.StreamUtils; public class RepairCorruptedFileEndings { private static final Pattern PARTITION_FILE_PATTERN = Pattern.compile("partition\\-\\d+"); private static void printUsage() { System.out.println("Whenever a sudden power loss occurs, it is common with some operating systems for files that are being written to "); System.out.println("to contain many NUL characters (hex 0) at the end of the file upon restart. If this happens to the FlowFile repository, "); System.out.println("NiFi will be unable to recover, because it cannot properly read the repository. This utility attempts to read the FlowFile "); System.out.println("Repository and write out a new copy of the repository, where the new copy does not contain the trailing NUL characters so "); System.out.println("NiFi can be restarted by pointing at the new FlowFile Repository."); System.out.println("Typically, this problem can be identified by seeing an error in the NiFi logs at startup, indicating either:"); System.out.println(); System.out.println("Caused by: java.io.IOException: Expected to read a Sentinel Byte of '1' but got a value of '0' instead"); System.out.println(); System.out.println("or:"); System.out.println(); System.out.println("Caused by: java.lang.IllegalArgumentException: No enum constant org.wali.UpdateType."); System.out.println(); System.out.println(); System.out.println("Usage:"); System.out.println("java " + RepairCorruptedFileEndings.class.getCanonicalName() + " <repo input directory> <repo destination directory>"); System.out.println(); System.out.println("<repo input directory>: The existing FlowFile Repository Directory that contains corrupt data"); System.out.println("<repo destination directory>: The directory to write the repaired repository to"); System.out.println(); } public static void main(final String[] args) { if (args.length != 2) { printUsage(); return; } final File inputDir = new File(args[0]); if (!inputDir.exists()) { System.out.println("Input Repository Directory " + inputDir + " does not exist"); return; } final File[] inputFiles = inputDir.listFiles(); if (inputFiles == null) { System.out.println("Could not access files within input Repository Directory " + inputDir); return; } final List<File> partitionDirs = Stream.of(inputFiles) .filter(RepairCorruptedFileEndings::isPartitionDirectory) .collect(Collectors.toList()); if (partitionDirs.isEmpty()) { System.out.println("Found no partitions within input Repository Directory " + inputDir); return; } final File outputDir = new File(args[1]); if (outputDir.exists()) { final File[] children = outputDir.listFiles(); if (children == null) { System.out.println("Cannot access output Repository Directory " + outputDir); return; } if (children.length > 0) { System.out.println("Output Repository Directory " + outputDir + " already exists and has files or sub-directories. " + "The output directory must either not exist or be empty."); return; } } else if (!outputDir.mkdirs()) { System.out.println("Failed to create output Repository Directory " + outputDir); return; } final List<File> nonPartitionDirFiles = Stream.of(inputFiles) .filter(f -> !isPartitionDirectory(f)) .filter(f -> !f.getName().equals("wali.lock")) .collect(Collectors.toList()); for (final File nonPartitionFile : nonPartitionDirFiles) { final File destination = new File(outputDir, nonPartitionFile.getName()); try { copy(nonPartitionFile, destination); } catch (final IOException e) { System.out.println("Failed to copy source file " + nonPartitionFile + " to destination file " + destination); e.printStackTrace(); } } int fullCopies = 0; int partialCopies = 0; for (final File partitionDir : partitionDirs) { final File[] partitionFiles = partitionDir.listFiles(); if (partitionFiles == null) { System.out.println("Could not access children of input sub-directory " + partitionDir); return; } final File outputPartitionDir = new File(outputDir, partitionDir.getName()); if (!outputPartitionDir.mkdirs()) { System.out.println("Failed to created output directory " + outputPartitionDir); return; } for (final File partitionFile : partitionFiles) { final File destinationFile = new File(outputPartitionDir, partitionFile.getName()); // All journal files follow the pattern of: // <journal entry> <TRANSACTION_CONTINUE | TRANSACTION_COMMIT> <journal entry> <TRANSACTION_CONTINUE | TRANSACTION_COMMIT> ... // The TRANSACTION_CONTINUE byte is a 1 while the TRANSACTION_COMMIT byte is a 2. So if we have 0's at the end then we know // that we can simply truncate up until the point where we encounter the first of the of the trailing zeroes. At that point, // we know that we are done. It is possible that the repo will still be 'corrupt' in that only part of a transaction was // written out. However, this is okay because the repo will recover from this on restart. What it does NOT properly recover // from on restart is when the file ends with a bunch of 0's because it believes that the Transaction ID is zero and then // it reads in 0 bytes for the "Update Type" and as a result we get an invalid enum name because it thinks that the name of // the UpdateType is an empty string because it's a string of length 0. final int trailingZeroes; try { trailingZeroes = countTrailingZeroes(partitionFile); } catch (final Exception e) { System.out.println("Failed to read input file " + partitionFile); e.printStackTrace(); return; } if (trailingZeroes > 0) { final long goodLength = partitionFile.length() - trailingZeroes; try { copy(partitionFile, destinationFile, goodLength); partialCopies++; } catch (final Exception e) { System.out.println("Failed to copy " + goodLength + " bytes from " + partitionFile + " to " + destinationFile); e.printStackTrace(); return; } } else { try { copy(partitionFile, destinationFile); } catch (final Exception e) { System.out.println("Failed to copy entire file from " + partitionFile + " to " + destinationFile); e.printStackTrace(); return; } fullCopies++; } } } System.out.println("Successfully copied " + fullCopies + " journal files fully and truncated " + partialCopies + " journal files in output directory"); } private static boolean isPartitionDirectory(final File file) { return PARTITION_FILE_PATTERN.matcher(file.getName()).matches(); } private static void copy(final File input, final File destination) throws IOException { if (input.isFile()) { copyFile(input, destination); return; } else { copyDirectory(input, destination); } } private static void copyDirectory(final File input, final File destination) throws IOException { if (!destination.exists() && !destination.mkdirs()) { System.out.println("Failed to copy input directory " + input + " to destination because destination directory " + destination + " does not exist and could not be created"); return; } final File[] children = input.listFiles(); if (children == null) { System.out.println("Failed to copy input directory " + input + " to destination because could not access files of input directory"); return; } for (final File child : children) { final File destinationChild = new File(destination, child.getName()); copy(child, destinationChild); } } private static void copyFile(final File input, final File destination) throws IOException { if (!input.exists()) { return; } Files.copy(input.toPath(), destination.toPath(), StandardCopyOption.COPY_ATTRIBUTES); } private static void copy(final File input, final File destination, final long length) throws IOException { try (final InputStream fis = new FileInputStream(input); final LimitingInputStream in = new LimitingInputStream(fis, length); final OutputStream fos = new FileOutputStream(destination)) { StreamUtils.copy(in, fos); } } static int countTrailingZeroes(final File partitionFile) throws IOException { final RandomAccessFile raf = new RandomAccessFile(partitionFile, "r"); long startPos = partitionFile.length() - 4096; int count = 0; boolean reachedStartOfFile = false; while (!reachedStartOfFile) { int bufferLength = 4096; if (startPos < 0) { bufferLength = (int) (startPos + 4096); startPos = 0; reachedStartOfFile = true; } raf.seek(startPos); final byte[] buffer = new byte[bufferLength]; final int read = fillBuffer(raf, buffer); for (int i = read - 1; i >= 0; i--) { final byte b = buffer[i]; if (b == 0) { count++; } else { return count; } } startPos -= 4096; } return count; } private static int fillBuffer(final RandomAccessFile source, final byte[] destination) throws IOException { int bytesRead = 0; int len; while (bytesRead < destination.length) { len = source.read(destination, bytesRead, destination.length - bytesRead); if (len < 0) { break; } bytesRead += len; } return bytesRead; } }