/*
* Copyright (c) 2011 Matthew Francis
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package org.itadaki.bzip2;
import java.io.IOException;
import java.io.InputStream;
/**
* <p>
* An InputStream wrapper that decompresses BZip2 data
* </p>
*
* <p>
* A BZip2 stream consists of one or more blocks of compressed data. This
* decompressor reads a whole block at a time, then progressively returns
* decompressed output.
* </p>
*
* <p>
* On encountering any error decoding the compressed stream, an IOException is
* thrown, and further reads will return {@code -1}
* </p>
*
* <p>
* <b>Note:</b> Each BZip2 compressed block contains a CRC code which is
* verified after the block has been read completely. If verification fails, an
* exception is thrown on the final read from the block, <b>potentially after
* corrupt data has already been returned</b>. The compressed stream also
* contains a final CRC code which is verified once the end of the stream has
* been reached. <b>This check may fail even if every individual block in the
* stream passes CRC verification</b>. If this possibility is of concern, you
* should read and store the entire decompressed stream before further
* processing.
* </p>
*
* <p>
* Instances of this class are not threadsafe.
* </p>
*/
public class BZip2InputStream extends InputStream {
/**
* The stream from which compressed BZip2 data is read and decoded
*/
private InputStream inputStream;
/**
* An InputStream wrapper that provides bit-level reads
*/
private BZip2BitInputStream bitInputStream;
/**
* If {@code true}, the caller is assumed to have read away the stream's
* leading "BZ" identifier bytes
*/
private final boolean headerless;
private final Callback callback;
/**
* (@code true} if the end of the compressed stream has been reached,
* otherwise {@code false}
*/
private boolean streamComplete = false;
/**
* The declared block size of the stream (before final run-length decoding).
* The final block will usually be smaller, but no block in the stream has
* to be exactly this large, and an encoder could in theory choose to mix
* blocks of any size up to this value. Its function is therefore as a hint
* to the decompressor as to how much working space is sufficient to
* decompress blocks in a given stream
*/
private int streamBlockSize;
/**
* The merged CRC of all blocks decompressed so far
*/
private int streamCRC = 0;
/**
* The decompressor for the current block
*/
private BZip2BlockDecompressor blockDecompressor = null;
/*
* (non-Javadoc)
*
* @see java.io.InputStream#read()
*/
@Override
public int read() throws IOException {
int nextByte = -1;
if (this.blockDecompressor == null) {
initialiseStream();
} else {
nextByte = this.blockDecompressor.read();
}
if (nextByte == -1) {
if (initialiseNextBlock()) {
nextByte = this.blockDecompressor.read();
}
}
return nextByte;
}
/*
* (non-Javadoc)
*
* @see java.io.InputStream#read(byte[], int, int)
*/
@Override
public int read(final byte[] destination, final int offset, final int length) throws IOException {
int bytesRead = -1;
if (this.blockDecompressor == null) {
initialiseStream();
} else {
bytesRead = this.blockDecompressor.read(destination, offset, length);
}
if (bytesRead == -1) {
if (initialiseNextBlock()) {
bytesRead = this.blockDecompressor.read(destination, offset, length);
}
}
return bytesRead;
}
/*
* (non-Javadoc)
*
* @see java.io.InputStream#close()
*/
@Override
public void close() throws IOException {
if (this.bitInputStream != null) {
this.streamComplete = true;
this.blockDecompressor = null;
this.bitInputStream = null;
try {
this.inputStream.close();
} finally {
this.inputStream = null;
}
}
}
/**
* Reads the stream header and checks that the data appears to be a valid
* BZip2 stream
*
* @throws IOException
* if the stream header is not valid
*/
private void initialiseStream() throws IOException {
/* If the stream has been explicitly closed, throw an exception */
if (this.bitInputStream == null) {
throw new BZip2Exception("Stream closed");
}
/* If we're already at the end of the stream, do nothing */
if (this.streamComplete) {
return;
}
/* Read the stream header */
try {
int marker1 = this.headerless ? 0 : this.bitInputStream.readBits(16);
int marker2 = this.bitInputStream.readBits(8);
int blockSize = (this.bitInputStream.readBits(8) - '0');
if ((!this.headerless && (marker1 != BZip2Constants.STREAM_START_MARKER_1)) || (marker2 != BZip2Constants.STREAM_START_MARKER_2) || (blockSize < 1)
|| (blockSize > 9)) {
throw new BZip2Exception("Invalid BZip2 header");
}
this.streamBlockSize = blockSize * 100000;
} catch (IOException e) {
// If the stream header was not valid, stop trying to read more data
this.streamComplete = true;
throw e;
}
}
/**
* Prepares a new block for decompression if any remain in the stream. If a
* previous block has completed, its CRC is checked and merged into the
* stream CRC. If the previous block was the final block in the stream, the
* stream CRC is validated
*
* @return {@code true} if a block was successfully initialised, or
* {@code false} if the end of file marker was encountered
* @throws IOException
* if either the block or stream CRC check failed, if the
* following data is not a valid block-header or end-of-file
* marker, or if the following block could not be decoded
*/
private boolean initialiseNextBlock() throws IOException {
/* If we're already at the end of the stream, do nothing */
if (this.streamComplete) {
return false;
}
/*
* If a block is complete, check the block CRC and integrate it into the
* stream CRC
*/
if (this.blockDecompressor != null) {
int blockCRC = this.blockDecompressor.checkCRC();
this.streamCRC = ((this.streamCRC << 1) | (this.streamCRC >>> 31)) ^ blockCRC;
}
/* Read block-header or end-of-stream marker */
final int marker1 = this.bitInputStream.readBits(24);
final int marker2 = this.bitInputStream.readBits(24);
if (marker1 == BZip2Constants.BLOCK_HEADER_MARKER_1 && marker2 == BZip2Constants.BLOCK_HEADER_MARKER_2) {
if (callback != null) {
callback.blockStart(bitInputStream.totalBitsRead() - 48);
}
// Initialise a new block
try {
this.blockDecompressor = new BZip2BlockDecompressor(this.bitInputStream, this.streamBlockSize);
} catch (IOException e) {
// If the block could not be decoded, stop trying to read more
// data
this.streamComplete = true;
throw e;
}
return true;
} else if (marker1 == BZip2Constants.STREAM_END_MARKER_1 && marker2 == BZip2Constants.STREAM_END_MARKER_2) {
// Read and verify the end-of-stream CRC
this.streamComplete = true;
final int storedCombinedCRC = this.bitInputStream.readInteger();
if (storedCombinedCRC != this.streamCRC) {
throw new BZip2Exception("BZip2 stream CRC error");
}
if (callback != null) {
callback.noMoreBlocks(bitInputStream.totalBitsRead());
}
return false;
}
/*
* If what was read is not a valid block-header or end-of-stream marker,
* the stream is broken
*/
this.streamComplete = true;
throw new BZip2Exception("BZip2 stream format error");
}
/**
* @param inputStream
* The InputStream to wrap
* @param headerless
* If {@code true}, the caller is assumed to have read away the
* stream's leading "BZ" identifier bytes
*/
public BZip2InputStream(final InputStream inputStream, final boolean headerless) {
this(inputStream, headerless, null);
}
public BZip2InputStream(final InputStream inputStream, final boolean headerless, final Callback callback) {
if (inputStream == null) {
throw new IllegalArgumentException("Null input stream");
}
this.inputStream = inputStream;
this.callback = callback;
this.bitInputStream = new BZip2BitInputStream(inputStream);
this.headerless = headerless;
}
public interface Callback {
public void blockStart(long blockStartBitOffset);
public void noMoreBlocks(long totalBitsRead);
}
}