/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.nifi.provenance.serialization; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.util.concurrent.BlockingQueue; import java.util.concurrent.TimeUnit; import org.apache.nifi.provenance.store.EventFileManager; import org.apache.nifi.provenance.toc.StandardTocReader; import org.apache.nifi.provenance.toc.StandardTocWriter; import org.apache.nifi.provenance.toc.TocReader; import org.apache.nifi.provenance.toc.TocUtil; import org.apache.nifi.provenance.toc.TocWriter; import org.apache.nifi.provenance.util.CloseableUtil; import org.apache.nifi.stream.io.ByteCountingOutputStream; import org.apache.nifi.stream.io.GZIPOutputStream; import org.apache.nifi.stream.io.NonCloseableOutputStream; import org.apache.nifi.stream.io.StreamUtils; import org.apache.nifi.util.FormatUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * <p> * This class is responsible for compressing Event Files as a background task. This is done as a background task instead of being * done inline because if compression is performed inline, whenever NiFi is restarted (especially if done so abruptly), it is very * possible that the GZIP stream will be corrupt. As a result, we would stand to lose some Provenance Events when NiFi is restarted. * In order to avoid that, we write data in an uncompressed format and then compress the data in the background. Once the data has * been compressed, this task will then remove the original, uncompressed file. If the file is being read by another thread, this * task will wait for the other thread to finish reading the data before deleting the file. This synchronization of the File is handled * via the {@link EventFileManager Event File Manager}. * </p> */ public class EventFileCompressor implements Runnable { private static final Logger logger = LoggerFactory.getLogger(EventFileCompressor.class); private final BlockingQueue<File> filesToCompress; private final EventFileManager eventFileManager; private volatile boolean shutdown = false; public EventFileCompressor(final BlockingQueue<File> filesToCompress, final EventFileManager eventFileManager) { this.filesToCompress = filesToCompress; this.eventFileManager = eventFileManager; } public void shutdown() { shutdown = true; } @Override public void run() { while (!shutdown) { File uncompressedEventFile = null; try { final long start = System.nanoTime(); uncompressedEventFile = filesToCompress.poll(1, TimeUnit.SECONDS); if (uncompressedEventFile == null || shutdown) { continue; } File outputFile = null; long bytesBefore = 0L; StandardTocReader tocReader = null; File tmpTocFile = null; eventFileManager.obtainReadLock(uncompressedEventFile); try { StandardTocWriter tocWriter = null; final File tocFile = TocUtil.getTocFile(uncompressedEventFile); try { tocReader = new StandardTocReader(tocFile); } catch (final IOException e) { logger.error("Failed to read TOC File {}", tocFile, e); continue; } bytesBefore = uncompressedEventFile.length(); try { outputFile = new File(uncompressedEventFile.getParentFile(), uncompressedEventFile.getName() + ".gz"); try { tmpTocFile = new File(tocFile.getParentFile(), tocFile.getName() + ".tmp"); tocWriter = new StandardTocWriter(tmpTocFile, true, false); compress(uncompressedEventFile, tocReader, outputFile, tocWriter); tocWriter.close(); } catch (final IOException ioe) { logger.error("Failed to compress {} on rollover", uncompressedEventFile, ioe); } } finally { CloseableUtil.closeQuietly(tocReader, tocWriter); } } finally { eventFileManager.releaseReadLock(uncompressedEventFile); } eventFileManager.obtainWriteLock(uncompressedEventFile); try { // Attempt to delete the input file and associated toc file if (uncompressedEventFile.delete()) { if (tocReader != null) { final File tocFile = tocReader.getFile(); if (!tocFile.delete()) { logger.warn("Failed to delete {}; this file should be cleaned up manually", tocFile); } if (tmpTocFile != null) { tmpTocFile.renameTo(tocFile); } } } else { logger.warn("Failed to delete {}; this file should be cleaned up manually", uncompressedEventFile); } } finally { eventFileManager.releaseWriteLock(uncompressedEventFile); } final long millis = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - start); final long bytesAfter = outputFile.length(); final double reduction = 100 * (1 - (double) bytesAfter / (double) bytesBefore); final String reductionTwoDecimals = String.format("%.2f", reduction); logger.debug("Successfully compressed Provenance Event File {} in {} millis from {} to {}, a reduction of {}%", uncompressedEventFile, millis, FormatUtils.formatDataSize(bytesBefore), FormatUtils.formatDataSize(bytesAfter), reductionTwoDecimals); } catch (final InterruptedException e) { Thread.currentThread().interrupt(); return; } catch (final Exception e) { logger.error("Failed to compress {}", uncompressedEventFile, e); } } } public static void compress(final File input, final TocReader tocReader, final File output, final TocWriter tocWriter) throws IOException { try (final InputStream fis = new FileInputStream(input); final OutputStream fos = new FileOutputStream(output); final ByteCountingOutputStream byteCountingOut = new ByteCountingOutputStream(fos)) { int blockIndex = 0; while (true) { // Determine the min and max byte ranges for the current block. final long blockStart = tocReader.getBlockOffset(blockIndex); if (blockStart == -1) { break; } long blockEnd = tocReader.getBlockOffset(blockIndex + 1); if (blockEnd < 0) { blockEnd = input.length(); } final long firstEventId = tocReader.getFirstEventIdForBlock(blockIndex); final long blockStartOffset = byteCountingOut.getBytesWritten(); try (final OutputStream ncos = new NonCloseableOutputStream(byteCountingOut); final OutputStream gzipOut = new GZIPOutputStream(ncos, 1)) { StreamUtils.copy(fis, gzipOut, blockEnd - blockStart); } tocWriter.addBlockOffset(blockStartOffset, firstEventId); blockIndex++; } } // Close the TOC Reader and TOC Writer CloseableUtil.closeQuietly(tocReader, tocWriter); } }