/* * Copyright (c) 2011 Matthew Francis * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ package org.xbib.io.compress.bzip2; import java.io.IOException; import java.io.InputStream; /** * <p>An InputStream wrapper that decompresses BZip2 data</p> * * <p>A BZip2 stream consists of one or more blocks of compressed data. This decompressor reads a * whole block at a time, then progressively returns decompressed output.</p> * * <p>On encountering any error decoding the compressed stream, an IOException is thrown, and * further reads will return {@code -1}</p> * * <p><b>Note:</b> Each BZip2 compressed block contains a CRC code which is verified after the block * has been read completely. If verification fails, an exception is thrown on the final read from * the block, <b>potentially after corrupt data has already been returned</b>. The compressed stream * also contains a final CRC code which is verified once the end of the stream has been reached. * <b>This check may fail even if every individual block in the stream passes CRC verification</b>. * If this possibility is of concern, you should read and store the entire decompressed stream * before further processing.</p> * * <p>Instances of this class are not threadsafe.</p> */ public class BZip2InputStream extends InputStream { /** * The stream from which compressed BZip2 data is read and decoded */ private InputStream inputStream; /** * An InputStream wrapper that provides bit-level reads */ private BZip2BitInputStream bitInputStream; /** * If {@code true}, the caller is assumed to have read away the stream's leading "BZ" identifier * bytes */ private final boolean headerless; /** * (@code true} if the end of the compressed stream has been reached, otherwise {@code false} */ private boolean streamComplete = false; /** * The declared block size of the stream (before final run-length decoding). The final block * will usually be smaller, but no block in the stream has to be exactly this large, and an * encoder could in theory choose to mix blocks of any size up to this value. Its function is * therefore as a hint to the decompressor as to how much working space is sufficient to * decompress blocks in a given stream */ private int streamBlockSize; /** * The merged CRC of all blocks decompressed so far */ private int streamCRC = 0; /** * The decompressor for the current block */ private BZip2BlockDecompressor blockDecompressor = null; public BZip2InputStream(final InputStream inputStream) { this(inputStream, false); } public BZip2InputStream(final InputStream inputStream, int bufferSize) { this(inputStream, false); } /** * @param inputStream The InputStream to wrap * @param headerless If {@code true}, the caller is assumed to have read away the stream's * leading "BZ" identifier bytes */ public BZip2InputStream(final InputStream inputStream, final boolean headerless) { if (inputStream == null) { throw new IllegalArgumentException("Null input stream"); } this.inputStream = inputStream; this.bitInputStream = new BZip2BitInputStream(inputStream); this.headerless = headerless; } /* (non-Javadoc) * @see java.io.InputStream#read() */ @Override public int read() throws IOException { int nextByte = -1; if (this.blockDecompressor == null) { initialiseStream(); } else { nextByte = this.blockDecompressor.read(); } if (nextByte == -1) { if (initialiseNextBlock()) { nextByte = this.blockDecompressor.read(); } } return nextByte; } /* (non-Javadoc) * @see java.io.InputStream#read(byte[], int, int) */ @Override public int read(final byte[] destination, final int offset, final int length) throws IOException { int bytesRead = -1; if (this.blockDecompressor == null) { initialiseStream(); } else { bytesRead = this.blockDecompressor.read(destination, offset, length); } if (bytesRead == -1) { if (initialiseNextBlock()) { bytesRead = this.blockDecompressor.read(destination, offset, length); } } return bytesRead; } /* (non-Javadoc) * @see java.io.InputStream#close() */ @Override public void close() throws IOException { if (this.bitInputStream != null) { this.streamComplete = true; this.blockDecompressor = null; this.bitInputStream = null; try { this.inputStream.close(); } finally { this.inputStream = null; } } } /** * Reads the stream header and checks that the data appears to be a valid BZip2 stream * * @throws java.io.IOException if the stream header is not valid */ private void initialiseStream() throws IOException { /* If the stream has been explicitly closed, throw an exception */ if (this.bitInputStream == null) { throw new BZip2Exception("Stream closed"); } /* If we're already at the end of the stream, do nothing */ if (this.streamComplete) { return; } /* Read the stream header */ try { int marker1 = this.headerless ? 0 : this.bitInputStream.readBits(16); int marker2 = this.bitInputStream.readBits(8); int blockSize = (this.bitInputStream.readBits(8) - '0'); if ( (!this.headerless && (marker1 != BZip2Constants.STREAM_START_MARKER_1)) || (marker2 != BZip2Constants.STREAM_START_MARKER_2) || (blockSize < 1) || (blockSize > 9)) { throw new BZip2Exception("Invalid BZip2 header"); } this.streamBlockSize = blockSize * 100000; } catch (IOException e) { // If the stream header was not valid, stop trying to read more data this.streamComplete = true; throw e; } } /** * Prepares a new block for decompression if any remain in the stream. If a previous block has * completed, its CRC is checked and merged into the stream CRC. If the previous block was the * final block in the stream, the stream CRC is validated * * @return {@code true} if a block was successfully initialised, or {@code false} if the end of * file marker was encountered * @throws java.io.IOException if either the block or stream CRC check failed, if the following data is * not a valid block-header or end-of-file marker, or if the following * block could not be decoded */ private boolean initialiseNextBlock() throws IOException { /* If we're already at the end of the stream, do nothing */ if (this.streamComplete) { return false; } /* If a block is complete, check the block CRC and integrate it into the stream CRC */ if (this.blockDecompressor != null) { int blockCRC = this.blockDecompressor.checkCRC(); this.streamCRC = ((this.streamCRC << 1) | (this.streamCRC >>> 31)) ^ blockCRC; } /* Read block-header or end-of-stream marker */ final int marker1 = this.bitInputStream.readBits(24); final int marker2 = this.bitInputStream.readBits(24); if (marker1 == BZip2Constants.BLOCK_HEADER_MARKER_1 && marker2 == BZip2Constants.BLOCK_HEADER_MARKER_2) { // Initialise a new block try { this.blockDecompressor = new BZip2BlockDecompressor(this.bitInputStream, this.streamBlockSize); } catch (IOException e) { // If the block could not be decoded, stop trying to read more data this.streamComplete = true; throw e; } return true; } else if (marker1 == BZip2Constants.STREAM_END_MARKER_1 && marker2 == BZip2Constants.STREAM_END_MARKER_2) { // Read and verify the end-of-stream CRC this.streamComplete = true; final int storedCombinedCRC = this.bitInputStream.readInteger(); if (storedCombinedCRC != this.streamCRC) { throw new BZip2Exception("BZip2 stream CRC error"); } return false; } /* If what was read is not a valid block-header or end-of-stream marker, the stream is broken */ this.streamComplete = true; throw new BZip2Exception("BZip2 stream format error"); } }