package org.itadaki.bzip2; /** * An encoder for the BZip2 Move To Front Transform and Run-Length Encoding[2] stages<br> * Although conceptually these two stages are separate, it is computationally efficient to perform * them in one pass. */ public class BZip2MTFAndRLE2StageEncoder { /** * The Burrows-Wheeler transformed block */ private final int[] bwtBlock; /** * Actual length of the data in the {@link bwtBlock} array */ private int bwtLength; /** * At each position, {@code true} if the byte value with that index is present within the block, * otherwise {@code false} */ private final boolean[] bwtValuesInUse; /** * The output of the Move To Front Transform and Run-Length Encoding[2] stages */ private final char[] mtfBlock; /** * The actual number of values contained in the {@link mtfBlock} array */ private int mtfLength; /** * The global frequencies of values within the {@link mtfBlock} array */ private final int[] mtfSymbolFrequencies = new int[BZip2Constants.HUFFMAN_MAXIMUM_ALPHABET_SIZE]; /** * The encoded alphabet size */ private int alphabetSize; /** * Performs the Move To Front transform and Run Length Encoding[1] stages */ public void encode() { final int bwtLength = this.bwtLength; final boolean[] bwtValuesInUse = this.bwtValuesInUse; final int[] bwtBlock = this.bwtBlock; final char[] mtfBlock = this.mtfBlock; final int[] mtfSymbolFrequencies = this.mtfSymbolFrequencies; final byte[] huffmanSymbolMap = new byte[256]; final MoveToFront symbolMTF = new MoveToFront(); int totalUniqueValues = 0; for (int i = 0; i < 256; i++) { if (bwtValuesInUse[i]) { huffmanSymbolMap[i] = (byte) totalUniqueValues++; } } final int endOfBlockSymbol = totalUniqueValues + 1; int mtfIndex = 0; int repeatCount = 0; int totalRunAs = 0; int totalRunBs = 0; for (int i = 0; i < bwtLength; i++) { // Move To Front final int mtfPosition = symbolMTF.valueToFront (huffmanSymbolMap[bwtBlock[i] & 0xff]); // Run Length Encode if (mtfPosition == 0) { repeatCount++; } else { if (repeatCount > 0) { repeatCount--; while (true) { if ((repeatCount & 1) == 0) { mtfBlock[mtfIndex++] = BZip2Constants.HUFFMAN_SYMBOL_RUNA; totalRunAs++; } else { mtfBlock[mtfIndex++] = BZip2Constants.HUFFMAN_SYMBOL_RUNB; totalRunBs++; } if (repeatCount <= 1) { break; } repeatCount = (repeatCount - 2) >>> 1; } repeatCount = 0; } mtfBlock[mtfIndex++] = (char) (mtfPosition + 1); mtfSymbolFrequencies[mtfPosition + 1]++; } } if (repeatCount > 0) { repeatCount--; while (true) { if ((repeatCount & 1) == 0) { mtfBlock[mtfIndex++] = BZip2Constants.HUFFMAN_SYMBOL_RUNA; totalRunAs++; } else { mtfBlock[mtfIndex++] = BZip2Constants.HUFFMAN_SYMBOL_RUNB; totalRunBs++; } if (repeatCount <= 1) { break; } repeatCount = (repeatCount - 2) >>> 1; } } mtfBlock[mtfIndex] = (char) endOfBlockSymbol; mtfSymbolFrequencies[endOfBlockSymbol]++; mtfSymbolFrequencies[BZip2Constants.HUFFMAN_SYMBOL_RUNA] += totalRunAs; mtfSymbolFrequencies[BZip2Constants.HUFFMAN_SYMBOL_RUNB] += totalRunBs; this.mtfLength = mtfIndex + 1; this.alphabetSize = endOfBlockSymbol + 1; } /** * @return The encoded MTF block */ public char[] getMtfBlock() { return this.mtfBlock; } /** * @return The actual length of the MTF block */ public int getMtfLength() { return this.mtfLength; } /** * @return The size of the MTF block's alphabet */ public int getMtfAlphabetSize() { return this.alphabetSize; } /** * @return The frequencies of the MTF block's symbols */ public int[] getMtfSymbolFrequencies() { return this.mtfSymbolFrequencies; } /** * @param bwtBlock The Burrows Wheeler Transformed block data * @param bwtLength The actual length of the BWT data * @param bwtValuesPresent The values that are present within the BWT data. For each index, * {@code true} if that value is present within the data, otherwise {@code false} */ public BZip2MTFAndRLE2StageEncoder (final int[] bwtBlock, final int bwtLength, final boolean[] bwtValuesPresent) { this.bwtBlock = bwtBlock; this.bwtLength = bwtLength; this.bwtValuesInUse = bwtValuesPresent; this.mtfBlock = new char[bwtLength + 1]; } }