TarFileInputStream.java example

Explorer
h-store-master
/* Copyright (c) 2001-2009, The HSQL Development Group
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 * Redistributions of source code must retain the above copyright notice, this
 * list of conditions and the following disclaimer.
 *
 * Redistributions in binary form must reproduce the above copyright notice,
 * this list of conditions and the following disclaimer in the documentation
 * and/or other materials provided with the distribution.
 *
 * Neither the name of the HSQL Development Group nor the names of its
 * contributors may be used to endorse or promote products derived from this
 * software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL HSQL DEVELOPMENT GROUP, HSQLDB.ORG,
 * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */


package org.hsqldb.lib.tar;

import java.io.EOFException;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.util.zip.GZIPInputStream;

/**
 * Note that this class <b>is not</b> a java.io.FileInputStream,
 * because our goal is to greatly restrict the public methods of
 * FileInputStream, yet we must use public methods of the underlying
 * FileInputStream internally.  Can't accomplish these goals in Java if we
 * subclass.
 * <P>
 * This class is ignorant about Tar header fields, attributes and such.
 * It is concerned with reading and writing blocks of data in conformance with
 * Tar formatting, in a way convenient to those who want to get the header and
 * data blocks.
 * <P>
 * Asymmetric to the Tar file writing side, the bufferBlocks setting here is
 * used only for to adjust read buffer size (for file data reads), so the user
 * can compromise between available memory and performance.  Small buffer sizes
 * will always work, but will incur more reads; on the other hand, buffer sizes
 * larger than the largest component file is just a waste of memory.
 * <P/>
 * We assume the responsibility to manage the setting because the decision
 * should be based on available RAM more than anything else (therefore, we can't
 * set a good value automatically).
 * <P/>
 * As alluded to above, headers are read in separate reads, regardless of the
 * readBufferBlocks setting.  readBufferBlocks is used for reading
 * <I>file data</I>.
 * <P/>
 * I have purposefully not implemented skip(), because, though I haven't tested
 * it, I believe our readBlock() and readBlocks() methods are at least as fast,
 * since we use the larges read buffer within limits the user has set.
 */
public class TarFileInputStream {

    /* Would love to use a RandomAccessFile, but RandomAccessFiles do not play
     * nicely with InputStreams or filters, and it just would not work with
     * compressed input. */
    protected long bytesRead = 0;

    // Pronounced as past tense of "to read", not the other forms of "read".
    // I.e., the homonym of "red".
    private InputStream readStream;

    /* This is not a "Reader", but the byte "Stream" that we read() from. */
    protected byte[] readBuffer;
    protected int    readBufferBlocks;
    protected int    compressionType;

    /**
     * Convenience wrapper to use default readBufferBlocks and compressionType.
     *
     * @see #TarFileInputStream(File, int, int)
     */
    public TarFileInputStream(File sourceFile) throws IOException {
        this(sourceFile, TarFileOutputStream.Compression.DEFAULT_COMPRESSION);
    }

    /**
     * Convenience wrapper to use default readBufferBlocks.
     *
     * @see #TarFileInputStream(File, int, int)
     */
    public TarFileInputStream(File sourceFile,
                              int compressionType) throws IOException {
        this(sourceFile, compressionType,
             TarFileOutputStream.Compression.DEFAULT_BLOCKS_PER_RECORD);
    }

    public int getReadBufferBlocks() {
        return readBufferBlocks;
    }

    /**
     * This class does no validation or enforcement of file naming conventions.
     * If desired, the caller should enforce extensions like "tar" and
     * "tar.gz" (and that they match the specified compression type).
     * <P/>
     * This object will automatically release its I/O resources when you get
     * false back from a readNextHeaderBlock() call.
     * If you abort before then, you must call the close() method like for a
     * normal InputStream.
     *
     * @see #close()
     * @see #readNextHeaderBlock()
     */
    public TarFileInputStream(File sourceFile, int compressionType,
                              int readBufferBlocks) throws IOException {

        if (!sourceFile.isFile()) {
            throw new FileNotFoundException(sourceFile.getAbsolutePath());
        }

        if (!sourceFile.canRead()) {
            throw new IOException(RB.singleton.getString(RB.READ_DENIED,
                    sourceFile.getAbsolutePath()));
        }

        this.readBufferBlocks = readBufferBlocks;
        this.compressionType  = compressionType;
        readBuffer            = new byte[readBufferBlocks * 512];

        switch (compressionType) {

            case TarFileOutputStream.Compression.NO_COMPRESSION :
                readStream = new FileInputStream(sourceFile);
                break;

            case TarFileOutputStream.Compression.GZIP_COMPRESSION :
                readStream =
                    new GZIPInputStream(new FileInputStream(sourceFile),
                                        readBuffer.length);
                break;

            default :
                throw new IllegalArgumentException(
                    RB.singleton.getString(
                        RB.COMPRESSION_UNKNOWN, compressionType));
        }
    }

    /**
     * readBlocks(int) is the method that USERS of this class should use to
     * read file data from the tar file.
     * This method reads from the tar file and writes to the readBuffer array.
     * <P>
     * This class and subclasses should read from the underlying readStream
     * <b>ONLY WITH THIS METHOD</b>.
     * That way we can be confident that bytesRead will always be accurate.
     * <P>
     * This method is different from a typical Java byte array read command
     * in that when reading tar files <OL>
     *   <LI>we always know ahead-of-time how many bytes we should read, and
     *   <LI>we always want to read quantities of bytes in multiples of 512.
     * </OL>
     *
     * @param blocks  How many 512 blocks to read.
     * @throws IOException for an I/O error on the underlying InputStream
     * @throws TarMalformatException if no I/O error occurred, but we failed to
     *                               read the exact number of bytes requested.
     */
    public void readBlocks(int blocks)
    throws IOException, TarMalformatException {

        /* int for blocks should support sizes up to about 1T, according to
         * my off-the-cuff calculations */
        if (compressionType
                != TarFileOutputStream.Compression.NO_COMPRESSION) {
            readCompressedBlocks(blocks);

            return;
        }

        int i = readStream.read(readBuffer, 0, blocks * 512);

        bytesRead += i;

        if (i != blocks * 512) {
            throw new TarMalformatException(
                RB.singleton.getString(RB.INSUFFICIENT_READ, blocks * 512, i));
        }
    }

    /**
     * Work-around for the problem that compressed InputReaders don't fill
     * the read buffer before returning.
     *
     * Has visibility 'protected' so that subclasses may override with
     * different algorithms, or use different algorithms for different
     * compression stream.
     */
    protected void readCompressedBlocks(int blocks) throws IOException {

        int bytesSoFar    = 0;
        int requiredBytes = 512 * blocks;

        // This method works with individual bytes!
        int i;

        while (bytesSoFar < requiredBytes) {
            i = readStream.read(readBuffer, bytesSoFar,
                                requiredBytes - bytesSoFar);

            if (i < 0) {
                throw new EOFException(
                    RB.singleton.getString(
                        RB.DECOMPRESS_RANOUT, bytesSoFar, requiredBytes));
            }

            bytesRead  += i;
            bytesSoFar += i;
        }
    }

    /**
     * readBlock() and readNextHeaderBlock are the methods that USERS of this
     * class should use to read header blocks from the tar file.
     * <P>
     * readBlock() should be used when you know that the current block should
     * contain what you want.
     * E.g. you know that the very first block of a tar file should contain
     * a Tar Entry header block.
     *
     * @see #readNextHeaderBlock
     */
    public void readBlock() throws IOException, TarMalformatException {
        readBlocks(1);
    }

    /**
     * readBlock() and readNextHeaderBlock are the methods that USERS of this
     * class should use to read header blocks from the tar file.
     * <P>
     * readNextHeaderBlock continues working through the Tar File from the
     * current point until it finds a block with a non-0 first byte.
     *
     * @return  True if a header block was read and place at beginning of the
     *          readBuffer array.  False if EOF was encountered without finding
     *          any blocks with first byte != 0.  If false is returned, we have
     *          automatically closed the this TarFileInputStream too.
     * @see #readBlock
     */
    public boolean readNextHeaderBlock()
    throws IOException, TarMalformatException {

        // We read a-byte-at-a-time because there should only be 2 empty blocks
        // between each Tar Entry.
        try {
            while (readStream.available() > 0) {
                readBlock();

                if (readBuffer[0] != 0) {
                    return true;
                }
            }
        } catch (EOFException ee) {
            /* This is a work-around.
             * Sun Java's inputStream.available() works like crap.
             * Reach this point when performing a read of a GZip stream when
             * .available == 1, which according to API Spec, should not happen.
             * We treat this condition exactly as if readStream.available is 0,
             * which it should be.
             */
        }

        close();

        return false;
    }

    /**
     * Implements java.io.Closeable.
     *
     * @see java.io.Closeable
     */
    public void close() throws IOException {
        readStream.close();
    }
}