/*
* Copyright (c) 2011 Matthew Francis
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package org.itadaki.bzip2;
import java.io.IOException;
/*
* Block encoding consists of the following stages:
* 1. Run-Length Encoding[1] - write()
* 2. Burrows Wheeler Transform - close() (through BZip2DivSufSort)
* 3. Write block header - close()
* 4. Move To Front Transform - close() (through BZip2HuffmanStageEncoder)
* 5. Run-Length Encoding[2] - close() (through BZip2HuffmanStageEncoder)
* 6. Create and write Huffman tables - close() (through BZip2HuffmanStageEncoder)
* 7. Huffman encode and write data - close() (through BZip2HuffmanStageEncoder)
*/
/**
* Compresses and writes a single BZip2 block
*/
public class BZip2BlockCompressor {
/**
* The stream to which compressed BZip2 data is written
*/
private final BZip2BitOutputStream bitOutputStream;
/**
* CRC builder for the block
*/
private final CRC32 crc = new CRC32();
/**
* The RLE'd block data
*/
private final byte[] block;
/**
* Current length of the data within the {@link block} array
*/
private int blockLength = 0;
/**
* A limit beyond which new data will not be accepted into the block
*/
private final int blockLengthLimit;
/**
* The values that are present within the RLE'd block data. For each index,
* {@code true} if that value is present within the data, otherwise
* {@code false}
*/
private final boolean[] blockValuesPresent = new boolean[256];
/**
* The Burrows Wheeler Transformed block data
*/
private final int[] bwtBlock;
/**
* The current RLE value being accumulated (undefined when
* {@link #rleLength} is 0)
*/
private int rleCurrentValue = -1;
/**
* The repeat count of the current RLE value
*/
private int rleLength = 0;
/**
* Write the Huffman symbol to output byte map
*
* @throws IOException
* on any I/O error writing the data
*/
private void writeSymbolMap() throws IOException {
BZip2BitOutputStream bitOutputStream = this.bitOutputStream;
final boolean[] blockValuesPresent = this.blockValuesPresent;
final boolean[] condensedInUse = new boolean[16];
for (int i = 0; i < 16; i++) {
for (int j = 0, k = i << 4; j < 16; j++, k++) {
if (blockValuesPresent[k]) {
condensedInUse[i] = true;
}
}
}
for (int i = 0; i < 16; i++) {
bitOutputStream.writeBoolean(condensedInUse[i]);
}
for (int i = 0; i < 16; i++) {
if (condensedInUse[i]) {
for (int j = 0, k = i * 16; j < 16; j++, k++) {
bitOutputStream.writeBoolean(blockValuesPresent[k]);
}
}
}
}
/**
* Writes an RLE run to the block array, updating the block CRC and present
* values array as required
*
* @param value
* The value to write
* @param runLength
* The run length of the value to write
*/
private void writeRun(final int value, int runLength) {
final int blockLength = this.blockLength;
final byte[] block = this.block;
this.blockValuesPresent[value] = true;
this.crc.updateCRC(value, runLength);
final byte byteValue = (byte) value;
switch (runLength) {
case 1:
block[blockLength] = byteValue;
this.blockLength = blockLength + 1;
break;
case 2:
block[blockLength] = byteValue;
block[blockLength + 1] = byteValue;
this.blockLength = blockLength + 2;
break;
case 3:
block[blockLength] = byteValue;
block[blockLength + 1] = byteValue;
block[blockLength + 2] = byteValue;
this.blockLength = blockLength + 3;
break;
default:
runLength -= 4;
this.blockValuesPresent[runLength] = true;
block[blockLength] = byteValue;
block[blockLength + 1] = byteValue;
block[blockLength + 2] = byteValue;
block[blockLength + 3] = byteValue;
block[blockLength + 4] = (byte) runLength;
this.blockLength = blockLength + 5;
break;
}
}
/**
* Writes a byte to the block, accumulating to an RLE run where possible
*
* @param value
* The byte to write
* @return {@code true} if the byte was written, or {@code false} if the
* block is already full
*/
public boolean write(final int value) {
if (this.blockLength > this.blockLengthLimit) {
return false;
}
final int rleCurrentValue = this.rleCurrentValue;
final int rleLength = this.rleLength;
if (rleLength == 0) {
this.rleCurrentValue = value;
this.rleLength = 1;
} else if (rleCurrentValue != value) {
// This path commits us to write 6 bytes - one RLE run (5 bytes)
// plus one extra
writeRun(rleCurrentValue & 0xff, rleLength);
this.rleCurrentValue = value;
this.rleLength = 1;
} else {
if (rleLength == 254) {
writeRun(rleCurrentValue & 0xff, 255);
this.rleLength = 0;
} else {
this.rleLength = rleLength + 1;
}
}
return true;
}
/**
* Writes an array to the block
*
* @param data
* The array to write
* @param offset
* The offset within the input data to write from
* @param length
* The number of bytes of input data to write
* @return The actual number of input bytes written. May be less than the
* number requested, or zero if the block is already full
*/
public int write(final byte[] data, int offset, int length) {
int written = 0;
while (length-- > 0) {
if (!write(data[offset++])) {
break;
}
written++;
}
return written;
}
/**
* Compresses and writes out the block
*
* @throws IOException
* on any I/O error writing the data
*/
public void close() throws IOException {
// If an RLE run is in progress, write it out
if (this.rleLength > 0) {
writeRun(this.rleCurrentValue & 0xff, this.rleLength);
}
// Apply a one byte block wrap required by the BWT implementation
this.block[this.blockLength] = this.block[0];
// Perform the Burrows Wheeler Transform
BZip2DivSufSort divSufSort = new BZip2DivSufSort(this.block, this.bwtBlock, this.blockLength);
int bwtStartPointer = divSufSort.bwt();
// Write out the block header
this.bitOutputStream.writeBits(24, BZip2Constants.BLOCK_HEADER_MARKER_1);
this.bitOutputStream.writeBits(24, BZip2Constants.BLOCK_HEADER_MARKER_2);
this.bitOutputStream.writeInteger(this.crc.getCRC());
this.bitOutputStream.writeBoolean(false); // Randomised block flag. We
// never create randomised
// blocks
this.bitOutputStream.writeBits(24, bwtStartPointer);
// Write out the symbol map
writeSymbolMap();
// Perform the Move To Front Transform and Run-Length Encoding[2] stages
BZip2MTFAndRLE2StageEncoder mtfEncoder = new BZip2MTFAndRLE2StageEncoder(this.bwtBlock, this.blockLength, this.blockValuesPresent);
mtfEncoder.encode();
// Perform the Huffman Encoding stage and write out the encoded data
BZip2HuffmanStageEncoder huffmanEncoder = new BZip2HuffmanStageEncoder(this.bitOutputStream, mtfEncoder.getMtfBlock(), mtfEncoder.getMtfLength(),
mtfEncoder.getMtfAlphabetSize(), mtfEncoder.getMtfSymbolFrequencies());
huffmanEncoder.encode();
}
/**
* Determines if any bytes have been written to the block
*
* @return {@code true} if one or more bytes has been written to the block,
* otherwise {@code false}
*/
public boolean isEmpty() {
return ((this.blockLength == 0) && (this.rleLength == 0));
}
/**
* Gets the CRC of the completed block. Only valid after calling
* {@link #close()}
*
* @return The block's CRC
*/
public int getCRC() {
return this.crc.getCRC();
}
/**
* @param bitOutputStream
* The BZip2BitOutputStream to which compressed BZip2 data is
* written
* @param blockSize
* The declared block size in bytes. Up to this many bytes will
* be accepted into the block after Run-Length Encoding is
* applied
*/
public BZip2BlockCompressor(final BZip2BitOutputStream bitOutputStream, final int blockSize) {
this.bitOutputStream = bitOutputStream;
// One extra byte is added to allow for the block wrap applied in
// close()
this.block = new byte[blockSize + 1];
this.bwtBlock = new int[blockSize + 1];
this.blockLengthLimit = blockSize - 6; // 5 bytes for one RLE run plus
// one byte - see {@link
// #write(int)}
}
}