/*
* Copyright (c) 2011 Matthew Francis
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package org.itadaki.bzip2;
import java.io.IOException;
/**
* A decoder for the BZip2 Huffman coding stage
*/
public class BZip2HuffmanStageDecoder {
/**
* The BZip2BitInputStream from which Huffman codes are read
*/
private final BZip2BitInputStream bitInputStream;
/**
* The Huffman table number to use for each group of 50 symbols
*/
private final byte[] selectors;
/**
* The minimum code length for each Huffman table
*/
private final int[] minimumLengths = new int[BZip2Constants.HUFFMAN_MAXIMUM_TABLES];
/**
* An array of values for each Huffman table that must be subtracted from
* the numerical value of a Huffman code of a given bit length to give its
* canonical code index
*/
private final int[][] codeBases = new int[BZip2Constants.HUFFMAN_MAXIMUM_TABLES][BZip2Constants.HUFFMAN_DECODE_MAXIMUM_CODE_LENGTH + 2];
/**
* An array of values for each Huffman table that gives the highest
* numerical value of a Huffman code of a given bit length
*/
private final int[][] codeLimits = new int[BZip2Constants.HUFFMAN_MAXIMUM_TABLES][BZip2Constants.HUFFMAN_DECODE_MAXIMUM_CODE_LENGTH + 1];
/**
* A mapping for each Huffman table from canonical code index to output
* symbol
*/
private final int[][] codeSymbols = new int[BZip2Constants.HUFFMAN_MAXIMUM_TABLES][BZip2Constants.HUFFMAN_MAXIMUM_ALPHABET_SIZE];
/**
* The Huffman table for the current group
*/
private int currentTable;
/**
* The index of the current group within the selectors array
*/
private int groupIndex = -1;
/**
* The byte position within the current group. A new group is selected every
* 50 decoded bytes
*/
private int groupPosition = -1;
/**
* Constructs Huffman decoding tables from lists of Canonical Huffman code
* lengths
*
* @param alphabetSize
* The total number of codes (uniform for each table)
* @param tableCodeLengths
* The Canonical Huffman code lengths for each table
*/
private void createHuffmanDecodingTables(final int alphabetSize, final byte[][] tableCodeLengths) {
for (int table = 0; table < tableCodeLengths.length; table++) {
final int[] tableBases = this.codeBases[table];
final int[] tableLimits = this.codeLimits[table];
final int[] tableSymbols = this.codeSymbols[table];
final byte[] codeLengths = tableCodeLengths[table];
int minimumLength = BZip2Constants.HUFFMAN_DECODE_MAXIMUM_CODE_LENGTH;
int maximumLength = 0;
// Find the minimum and maximum code length for the table
for (int i = 0; i < alphabetSize; i++) {
maximumLength = Math.max(codeLengths[i], maximumLength);
minimumLength = Math.min(codeLengths[i], minimumLength);
}
this.minimumLengths[table] = minimumLength;
// Calculate the first output symbol for each code length
for (int i = 0; i < alphabetSize; i++) {
tableBases[codeLengths[i] + 1]++;
}
for (int i = 1; i < BZip2Constants.HUFFMAN_DECODE_MAXIMUM_CODE_LENGTH + 2; i++) {
tableBases[i] += tableBases[i - 1];
}
// Calculate the first and last Huffman code for each code length
// (codes at a given
// length are sequential in value)
int code = 0;
for (int i = minimumLength; i <= maximumLength; i++) {
int base = code;
code += tableBases[i + 1] - tableBases[i];
tableBases[i] = base - tableBases[i];
tableLimits[i] = code - 1;
code <<= 1;
}
// Populate the mapping from canonical code index to output symbol
int codeIndex = 0;
for (int bitLength = minimumLength; bitLength <= maximumLength; bitLength++) {
for (int symbol = 0; symbol < alphabetSize; symbol++) {
if (codeLengths[symbol] == bitLength) {
tableSymbols[codeIndex++] = symbol;
}
}
}
}
}
/**
* Decodes and returns the next symbol
*
* @return The decoded symbol
* @throws IOException
* if the end of the input stream is reached while decoding
*/
public int nextSymbol() throws IOException {
final BZip2BitInputStream bitInputStream = this.bitInputStream;
// Move to next group selector if required
if (((++this.groupPosition % BZip2Constants.HUFFMAN_GROUP_RUN_LENGTH) == 0)) {
this.groupIndex++;
if (this.groupIndex == this.selectors.length) {
throw new BZip2Exception("Error decoding BZip2 block");
}
this.currentTable = this.selectors[this.groupIndex] & 0xff;
}
final int currentTable = this.currentTable;
final int[] tableLimits = this.codeLimits[currentTable];
int codeLength = this.minimumLengths[currentTable];
// Starting with the minimum bit length for the table, read additional
// bits one at a time
// until a complete code is recognised
int codeBits = bitInputStream.readBits(codeLength);
for (; codeLength <= BZip2Constants.HUFFMAN_DECODE_MAXIMUM_CODE_LENGTH; codeLength++) {
if (codeBits <= tableLimits[codeLength]) {
// Convert the code to a symbol index and return
return this.codeSymbols[currentTable][codeBits - this.codeBases[currentTable][codeLength]];
}
codeBits = (codeBits << 1) | bitInputStream.readBits(1);
}
// A valid code was not recognised
throw new BZip2Exception("Error decoding BZip2 block");
}
/**
* @param bitInputStream
* The BZip2BitInputStream from which Huffman codes are read
* @param alphabetSize
* The total number of codes (uniform for each table)
* @param tableCodeLengths
* The Canonical Huffman code lengths for each table
* @param selectors
* The Huffman table number to use for each group of 50 symbols
*/
public BZip2HuffmanStageDecoder(final BZip2BitInputStream bitInputStream, final int alphabetSize, final byte[][] tableCodeLengths, final byte[] selectors) {
this.bitInputStream = bitInputStream;
this.selectors = selectors;
this.currentTable = this.selectors[0];
createHuffmanDecodingTables(alphabetSize, tableCodeLengths);
}
}