/** * Copyright (C) 2014-2016 LinkedIn Corp. (pinot-core@linkedin.com) * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.linkedin.pinot.common.utils; import com.linkedin.pinot.common.Utils; import java.io.BufferedInputStream; import java.io.BufferedOutputStream; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.util.LinkedList; import java.util.List; import org.apache.commons.compress.archivers.ArchiveException; import org.apache.commons.compress.archivers.ArchiveStreamFactory; import org.apache.commons.compress.archivers.tar.TarArchiveEntry; import org.apache.commons.compress.archivers.tar.TarArchiveInputStream; import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream; import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream; import org.apache.commons.compress.compressors.gzip.GzipCompressorOutputStream; import org.apache.commons.io.FileUtils; import org.apache.commons.io.IOUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * Taken from http://www.thoughtspark.org/node/53 * */ public class TarGzCompressionUtils { private static final Logger LOGGER = LoggerFactory.getLogger(TarGzCompressionUtils.class); public static final String TAR_GZ_FILE_EXTENTION = ".tar.gz"; /** * Creates a tar.gz file at the specified path with the contents of the * specified directory. * * @param directoryPath * The path to the directory to create an archive of * @param tarGzPath * The path to the archive to create. The file may not exist but * it's parent must exist and the parent must be a directory * @return tarGzPath * @throws IOException * If anything goes wrong */ public static String createTarGzOfDirectory(String directoryPath, String tarGzPath) throws IOException { return createTarGzOfDirectory(directoryPath, tarGzPath, ""); } public static String createTarGzOfDirectory(String directoryPath, String tarGzPath, String entryPrefix) throws IOException { FileOutputStream fOut = null; BufferedOutputStream bOut = null; GzipCompressorOutputStream gzOut = null; TarArchiveOutputStream tOut = null; if (!tarGzPath.endsWith(TAR_GZ_FILE_EXTENTION)) { tarGzPath = tarGzPath + TAR_GZ_FILE_EXTENTION; } try { fOut = new FileOutputStream(new File(tarGzPath)); bOut = new BufferedOutputStream(fOut); gzOut = new GzipCompressorOutputStream(bOut); tOut = new TarArchiveOutputStream(gzOut); tOut.setLongFileMode(TarArchiveOutputStream.LONGFILE_GNU); addFileToTarGz(tOut, directoryPath, entryPrefix); } catch (IOException e) { LOGGER.error("Failed to create tar.gz file for {} at path: {}", directoryPath, tarGzPath, e); Utils.rethrowException(e); } finally { if (tOut != null) { tOut.finish(); tOut.close(); } if (gzOut != null) { gzOut.close(); } if (bOut != null) { bOut.close(); } if (fOut != null) { fOut.close(); } } return tarGzPath; } public static String createTarGzOfDirectory(String directoryPath) throws IOException { String tarGzPath = directoryPath.substring(0); while (tarGzPath.endsWith("/")) { tarGzPath = tarGzPath.substring(0, tarGzPath.length() - 1); } tarGzPath = tarGzPath + TAR_GZ_FILE_EXTENTION; return createTarGzOfDirectory(directoryPath, tarGzPath); } /** * Creates a tar entry for the path specified with a name built from the base * passed in and the file/directory name. If the path is a directory, a * recursive call is made such that the full directory is added to the tar. * * @param tOut * The tar file's output stream * @param path * The filesystem path of the file/directory being added * @param base * The base prefix to for the name of the tar file entry * * @throws IOException * If anything goes wrong */ private static void addFileToTarGz(TarArchiveOutputStream tOut, String path, String base) throws IOException { File f = new File(path); String entryName = base + f.getName(); TarArchiveEntry tarEntry = new TarArchiveEntry(f, entryName); tOut.setLongFileMode(TarArchiveOutputStream.LONGFILE_GNU); tOut.putArchiveEntry(tarEntry); if (f.isFile()) { IOUtils.copy(new FileInputStream(f), tOut); tOut.closeArchiveEntry(); } else { tOut.closeArchiveEntry(); File[] children = f.listFiles(); if (children != null) { for (File child : children) { addFileToTarGz(tOut, child.getAbsolutePath(), entryName + "/"); } } } } /** Untar an input file into an output file. * The output file is created in the output folder, having the same name * as the input file, minus the '.tar' extension. * * @param inputFile the input .tar file * @param outputDir the output directory file. * @throws IOException * @throws FileNotFoundException * * @return The {@link List} of {@link File}s with the untared content. * @throws ArchiveException */ public static List<File> unTar(final File inputFile, final File outputDir) throws FileNotFoundException, IOException, ArchiveException { LOGGER.debug(String.format("Untaring %s to dir %s.", inputFile.getAbsolutePath(), outputDir.getAbsolutePath())); TarArchiveInputStream debInputStream = null; InputStream is = null; final List<File> untaredFiles = new LinkedList<File>(); try { is = new GzipCompressorInputStream(new BufferedInputStream(new FileInputStream(inputFile))); debInputStream = (TarArchiveInputStream) new ArchiveStreamFactory().createArchiveInputStream("tar", is); TarArchiveEntry entry = null; while ((entry = (TarArchiveEntry) debInputStream.getNextEntry()) != null) { final File outputFile = new File(outputDir, entry.getName()); if (entry.isDirectory()) { LOGGER.debug(String.format("Attempting to write output directory %s.", outputFile.getAbsolutePath())); if (!outputFile.exists()) { LOGGER.debug(String.format("Attempting to create output directory %s.", outputFile.getAbsolutePath())); if (!outputFile.mkdirs()) { throw new IllegalStateException(String.format("Couldn't create directory %s.", outputFile.getAbsolutePath())); } } else { LOGGER.error("The directory already there. Deleting - " + outputFile.getAbsolutePath()); FileUtils.deleteDirectory(outputFile); } } else { LOGGER.debug(String.format("Creating output file %s.", outputFile.getAbsolutePath())); File directory = outputFile.getParentFile(); if (!directory.exists()) { directory.mkdirs(); } OutputStream outputFileStream = null; try { outputFileStream = new FileOutputStream(outputFile); IOUtils.copy(debInputStream, outputFileStream); } finally { IOUtils.closeQuietly(outputFileStream); } } untaredFiles.add(outputFile); } } finally { IOUtils.closeQuietly(debInputStream); IOUtils.closeQuietly(is); } return untaredFiles; } public static InputStream unTarOneFile(InputStream tarGzInputStream, final String filename) throws FileNotFoundException, IOException, ArchiveException { TarArchiveInputStream debInputStream = null; InputStream is = null; try { is = new GzipCompressorInputStream(tarGzInputStream); debInputStream = (TarArchiveInputStream) new ArchiveStreamFactory().createArchiveInputStream("tar", is); TarArchiveEntry entry = null; while ((entry = (TarArchiveEntry) debInputStream.getNextEntry()) != null) { if (entry.getName().contains(filename)) { ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); IOUtils.copy(debInputStream, byteArrayOutputStream); return new ByteArrayInputStream(byteArrayOutputStream.toByteArray()); } } } finally { IOUtils.closeQuietly(debInputStream); IOUtils.closeQuietly(is); } return null; } }