/*
* Copyright 2002,2004 The Apache Software Foundation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.compress.bzip2;
import java.io.IOException;
import java.io.InputStream;
/*
* This package is based on the work done by Keiron Liddle, Aftex Software
* <keiron@aftexsw.com> to whom the Ant project is very grateful for his
* great code.
*/
/**
* An input stream that decompresses from the BZip2 format (without the file header chars) to be
* read as any other stream.
*
* @author <a href="mailto:keiron@aftexsw.com">Keiron Liddle</a>
*/
public class CBZip2InputStream extends InputStream implements BZip2Constants {
private static final int START_BLOCK_STATE = 1;
private static final int RAND_PART_A_STATE = 2;
private static final int RAND_PART_B_STATE = 3;
private static final int RAND_PART_C_STATE = 4;
private static final int NO_RAND_PART_A_STATE = 5;
private static final int NO_RAND_PART_B_STATE = 6;
private static final int NO_RAND_PART_C_STATE = 7;
private CRC m_crc = new CRC();
private boolean[] m_inUse = new boolean[256];
private char[] m_seqToUnseq = new char[256];
private char[] m_unseqToSeq = new char[256];
private char[] m_selector = new char[MAX_SELECTORS];
private char[] m_selectorMtf = new char[MAX_SELECTORS];
/*
* freq table collected to save a pass over the data during decompression.
*/
private int[] m_unzftab = new int[256];
private int[][] m_limit = new int[N_GROUPS][MAX_ALPHA_SIZE];
private int[][] m_base = new int[N_GROUPS][MAX_ALPHA_SIZE];
private int[][] m_perm = new int[N_GROUPS][MAX_ALPHA_SIZE];
private int[] m_minLens = new int[N_GROUPS];
private boolean m_streamEnd;
private int m_currentChar = -1;
private int m_currentState = START_BLOCK_STATE;
private int m_rNToGo;
private int m_rTPos;
private int m_tPos;
private int i2;
private int count;
private int chPrev;
private int ch2;
private int j2;
private char z;
private boolean m_blockRandomised;
/*
* always: in the range 0 .. 9. The current block size is 100000 * this number.
*/
private int m_blockSize100k;
private int m_bsBuff;
private int m_bsLive;
private InputStream m_input;
private int m_computedBlockCRC;
private int m_computedCombinedCRC;
/*
* index of the last char in the block, so the block size == last + 1.
*/
private int m_last;
private char[] m_ll8;
private int m_nInUse;
/*
* index in zptr[] of original string after sorting.
*/
private int m_origPtr;
private int m_storedBlockCRC;
private int m_storedCombinedCRC;
private int[] m_tt;
public CBZip2InputStream(final InputStream input){
bsSetStream(input);
initialize();
initBlock();
setupBlock();
}
private static void badBlockHeader(){
cadvise();
}
private static void blockOverrun(){
cadvise();
}
private static void cadvise(){
System.out.println("CRC Error");
// throw new CCoruptionError();
}
private static void compressedStreamEOF(){
cadvise();
}
private static void crcError(){
cadvise();
}
public int read(){
if (m_streamEnd) {
return -1;
} else {
int retChar = m_currentChar;
switch (m_currentState) {
case START_BLOCK_STATE:
break;
case RAND_PART_A_STATE:
break;
case RAND_PART_B_STATE:
setupRandPartB();
break;
case RAND_PART_C_STATE:
setupRandPartC();
break;
case NO_RAND_PART_A_STATE:
break;
case NO_RAND_PART_B_STATE:
setupNoRandPartB();
break;
case NO_RAND_PART_C_STATE:
setupNoRandPartC();
break;
default:
break;
}
return retChar;
}
}
private void setDecompressStructureSizes(int newSize100k){
if (!(0 <= newSize100k && newSize100k <= 9 && 0 <= m_blockSize100k && m_blockSize100k <= 9)) {
// throw new IOException("Invalid block size");
}
m_blockSize100k = newSize100k;
if (newSize100k == 0) {
return;
}
int n = BASE_BLOCK_SIZE * newSize100k;
m_ll8 = new char[n];
m_tt = new int[n];
}
private void setupBlock(){
int[] cftab = new int[257];
char ch;
cftab[0] = 0;
for (int i = 1; i <= 256; i++) {
cftab[i] = m_unzftab[i - 1];
}
for (int i = 1; i <= 256; i++) {
cftab[i] += cftab[i - 1];
}
for (int i = 0; i <= m_last; i++) {
ch = m_ll8[i];
m_tt[cftab[ch]] = i;
cftab[ch]++;
}
cftab = null;
m_tPos = m_tt[m_origPtr];
count = 0;
i2 = 0;
ch2 = 256;
/*
* not a char and not EOF
*/
if (m_blockRandomised) {
m_rNToGo = 0;
m_rTPos = 0;
setupRandPartA();
} else {
setupNoRandPartA();
}
}
private void setupNoRandPartA(){
if (i2 <= m_last) {
chPrev = ch2;
ch2 = m_ll8[m_tPos];
m_tPos = m_tt[m_tPos];
i2++;
m_currentChar = ch2;
m_currentState = NO_RAND_PART_B_STATE;
m_crc.updateCRC(ch2);
} else {
endBlock();
initBlock();
setupBlock();
}
}
private void setupNoRandPartB(){
if (ch2 != chPrev) {
m_currentState = NO_RAND_PART_A_STATE;
count = 1;
setupNoRandPartA();
} else {
count++;
if (count >= 4) {
z = m_ll8[m_tPos];
m_tPos = m_tt[m_tPos];
m_currentState = NO_RAND_PART_C_STATE;
j2 = 0;
setupNoRandPartC();
} else {
m_currentState = NO_RAND_PART_A_STATE;
setupNoRandPartA();
}
}
}
private void setupNoRandPartC(){
if (j2 < z) {
m_currentChar = ch2;
m_crc.updateCRC(ch2);
j2++;
} else {
m_currentState = NO_RAND_PART_A_STATE;
i2++;
count = 0;
setupNoRandPartA();
}
}
private void setupRandPartA(){
if (i2 <= m_last) {
chPrev = ch2;
ch2 = m_ll8[m_tPos];
m_tPos = m_tt[m_tPos];
if (m_rNToGo == 0) {
m_rNToGo = RAND_NUMS[m_rTPos];
m_rTPos++;
if (m_rTPos == 512) {
m_rTPos = 0;
}
}
m_rNToGo--;
ch2 ^= ((m_rNToGo == 1) ? 1 : 0);
i2++;
m_currentChar = ch2;
m_currentState = RAND_PART_B_STATE;
m_crc.updateCRC(ch2);
} else {
endBlock();
initBlock();
setupBlock();
}
}
private void setupRandPartB(){
if (ch2 != chPrev) {
m_currentState = RAND_PART_A_STATE;
count = 1;
setupRandPartA();
} else {
count++;
if (count >= 4) {
z = m_ll8[m_tPos];
m_tPos = m_tt[m_tPos];
if (m_rNToGo == 0) {
m_rNToGo = RAND_NUMS[m_rTPos];
m_rTPos++;
if (m_rTPos == 512) {
m_rTPos = 0;
}
}
m_rNToGo--;
z ^= ((m_rNToGo == 1) ? 1 : 0);
j2 = 0;
m_currentState = RAND_PART_C_STATE;
setupRandPartC();
} else {
m_currentState = RAND_PART_A_STATE;
setupRandPartA();
}
}
}
private void setupRandPartC(){
if (j2 < z) {
m_currentChar = ch2;
m_crc.updateCRC(ch2);
j2++;
} else {
m_currentState = RAND_PART_A_STATE;
i2++;
count = 0;
setupRandPartA();
}
}
private void getAndMoveToFrontDecode(){
int nextSym;
int limitLast = BASE_BLOCK_SIZE * m_blockSize100k;
m_origPtr = readVariableSizedInt(24);
recvDecodingTables();
int EOB = m_nInUse + 1;
int groupNo = -1;
int groupPos = 0;
/*
* Setting up the unzftab entries here is not strictly necessary, but it does save having to
* do it later in a separate pass, and so saves a block's worth of cache misses.
*/
for (int i = 0; i <= 255; i++) {
m_unzftab[i] = 0;
}
final char[] yy = new char[256];
for (int i = 0; i <= 255; i++) {
yy[i] = (char) i;
}
m_last = -1;
int zt;
int zn;
int zvec;
int zj;
groupNo++;
groupPos = G_SIZE - 1;
zt = m_selector[groupNo];
zn = m_minLens[zt];
zvec = bsR(zn);
while (zvec > m_limit[zt][zn]) {
zn++;
while (m_bsLive < 1) {
int zzi;
char thech = 0;
try {
thech = (char) m_input.read();
} catch (IOException e) {
compressedStreamEOF();
}
if (thech == -1) {
compressedStreamEOF();
}
zzi = thech;
m_bsBuff = (m_bsBuff << 8) | (zzi & 0xff);
m_bsLive += 8;
}
zj = (m_bsBuff >> (m_bsLive - 1)) & 1;
m_bsLive--;
zvec = (zvec << 1) | zj;
}
nextSym = m_perm[zt][zvec - m_base[zt][zn]];
while (true) {
if (nextSym == EOB) {
break;
}
if (nextSym == RUNA || nextSym == RUNB) {
char ch;
int s = -1;
int N = 1;
do {
if (nextSym == RUNA) {
s = s + (0 + 1) * N;
} else// if( nextSym == RUNB )
{
s = s + (1 + 1) * N;
}
N = N * 2;
if (groupPos == 0) {
groupNo++;
groupPos = G_SIZE;
}
groupPos--;
zt = m_selector[groupNo];
zn = m_minLens[zt];
zvec = bsR(zn);
while (zvec > m_limit[zt][zn]) {
zn++;
while (m_bsLive < 1) {
int zzi;
char thech = 0;
try {
thech = (char) m_input.read();
} catch (IOException e) {
compressedStreamEOF();
}
if (thech == -1) {
compressedStreamEOF();
}
zzi = thech;
m_bsBuff = (m_bsBuff << 8) | (zzi & 0xff);
m_bsLive += 8;
}
zj = (m_bsBuff >> (m_bsLive - 1)) & 1;
m_bsLive--;
zvec = (zvec << 1) | zj;
}
nextSym = m_perm[zt][zvec - m_base[zt][zn]];
} while (nextSym == RUNA || nextSym == RUNB);
s++;
ch = m_seqToUnseq[yy[0]];
m_unzftab[ch] += s;
while (s > 0) {
m_last++;
m_ll8[m_last] = ch;
s--;
}
if (m_last >= limitLast) {
blockOverrun();
}
continue;
} else {
char tmp;
m_last++;
if (m_last >= limitLast) {
blockOverrun();
}
tmp = yy[nextSym - 1];
m_unzftab[m_seqToUnseq[tmp]]++;
m_ll8[m_last] = m_seqToUnseq[tmp];
/*
* This loop is hammered during decompression, hence the unrolling. for (j =
* nextSym-1; j > 0; j--) yy[j] = yy[j-1];
*/
int j = nextSym - 1;
for (; j > 3; j -= 4) {
yy[j] = yy[j - 1];
yy[j - 1] = yy[j - 2];
yy[j - 2] = yy[j - 3];
yy[j - 3] = yy[j - 4];
}
for (; j > 0; j--) {
yy[j] = yy[j - 1];
}
yy[0] = tmp;
if (groupPos == 0) {
groupNo++;
groupPos = G_SIZE;
}
groupPos--;
zt = m_selector[groupNo];
zn = m_minLens[zt];
zvec = bsR(zn);
while (zvec > m_limit[zt][zn]) {
zn++;
while (m_bsLive < 1) {
char ch = 0;
try {
ch = (char) m_input.read();
} catch (IOException e) {
compressedStreamEOF();
}
m_bsBuff = (m_bsBuff << 8) | (ch & 0xff);
m_bsLive += 8;
}
zj = (m_bsBuff >> (m_bsLive - 1)) & 1;
m_bsLive--;
zvec = (zvec << 1) | zj;
}
nextSym = m_perm[zt][zvec - m_base[zt][zn]];
continue;
}
}
}
private void bsFinishedWithStream(){
if (m_input != null) {
try {
m_input.close();
} catch (IOException e) {}
}
m_input = null;
}
private int readVariableSizedInt(final int numBits){
return bsR(numBits);
}
private char readUnsignedChar(){
return (char) bsR(8);
}
private int readInt(){
int u = 0;
u = (u << 8) | bsR(8);
u = (u << 8) | bsR(8);
u = (u << 8) | bsR(8);
u = (u << 8) | bsR(8);
return u;
}
private int bsR(final int n){
while (m_bsLive < n) {
char ch = 0;
try {
ch = (char) m_input.read();
} catch (final IOException ioe) {
compressedStreamEOF();
}
if (ch == -1) {
compressedStreamEOF();
}
m_bsBuff = (m_bsBuff << 8) | (ch & 0xff);
m_bsLive += 8;
}
final int result = (m_bsBuff >> (m_bsLive - n)) & ((1 << n) - 1);
m_bsLive -= n;
return result;
}
private void bsSetStream(final InputStream input){
m_input = input;
m_bsLive = 0;
m_bsBuff = 0;
}
private void complete(){
m_storedCombinedCRC = readInt();
if (m_storedCombinedCRC != m_computedCombinedCRC) {
crcError();
}
bsFinishedWithStream();
m_streamEnd = true;
}
private void endBlock(){
m_computedBlockCRC = m_crc.getFinalCRC();
/*
* A bad CRC is considered a fatal error.
*/
if (m_storedBlockCRC != m_computedBlockCRC) {
crcError();
}
m_computedCombinedCRC = (m_computedCombinedCRC << 1) | (m_computedCombinedCRC >>> 31);
m_computedCombinedCRC ^= m_computedBlockCRC;
}
private void hbCreateDecodeTables(final int[] limit, final int[] base, final int[] perm,
final char[] length, final int minLen, final int maxLen, final int alphaSize){
int pp = 0;
for (int i = minLen; i <= maxLen; i++) {
for (int j = 0; j < alphaSize; j++) {
if (length[j] == i) {
perm[pp] = j;
pp++;
}
}
}
for (int i = 0; i < MAX_CODE_LEN; i++) {
base[i] = 0;
}
for (int i = 0; i < alphaSize; i++) {
base[length[i] + 1]++;
}
for (int i = 1; i < MAX_CODE_LEN; i++) {
base[i] += base[i - 1];
}
for (int i = 0; i < MAX_CODE_LEN; i++) {
limit[i] = 0;
}
int vec = 0;
for (int i = minLen; i <= maxLen; i++) {
vec += (base[i + 1] - base[i]);
limit[i] = vec - 1;
vec <<= 1;
}
for (int i = minLen + 1; i <= maxLen; i++) {
base[i] = ((limit[i - 1] + 1) << 1) - base[i];
}
}
private void initBlock(){
final char magic1 = readUnsignedChar();
final char magic2 = readUnsignedChar();
final char magic3 = readUnsignedChar();
final char magic4 = readUnsignedChar();
final char magic5 = readUnsignedChar();
final char magic6 = readUnsignedChar();
if (magic1 == 0x17 && magic2 == 0x72 && magic3 == 0x45 && magic4 == 0x38 && magic5 == 0x50
&& magic6 == 0x90) {
complete();
return;
}
if (magic1 != 0x31 || magic2 != 0x41 || magic3 != 0x59 || magic4 != 0x26 || magic5 != 0x53
|| magic6 != 0x59) {
badBlockHeader();
m_streamEnd = true;
return;
}
m_storedBlockCRC = readInt();
if (bsR(1) == 1) {
m_blockRandomised = true;
} else {
m_blockRandomised = false;
}
// currBlockNo++;
getAndMoveToFrontDecode();
m_crc.initialiseCRC();
m_currentState = START_BLOCK_STATE;
}
private void initialize(){
final char magic3 = readUnsignedChar();
final char magic4 = readUnsignedChar();
if (magic3 != 'h' || magic4 < '1' || magic4 > '9') {
bsFinishedWithStream();
m_streamEnd = true;
return;
}
setDecompressStructureSizes(magic4 - '0');
m_computedCombinedCRC = 0;
}
private void makeMaps(){
m_nInUse = 0;
for (int i = 0; i < 256; i++) {
if (m_inUse[i]) {
m_seqToUnseq[m_nInUse] = (char) i;
m_unseqToSeq[i] = (char) m_nInUse;
m_nInUse++;
}
}
}
private void recvDecodingTables(){
buildInUseTable();
makeMaps();
final int alphaSize = m_nInUse + 2;
/*
* Now the selectors
*/
final int groupCount = bsR(3);
final int selectorCount = bsR(15);
for (int i = 0; i < selectorCount; i++) {
int run = 0;
while (bsR(1) == 1) {
run++;
}
m_selectorMtf[i] = (char) run;
}
/*
* Undo the MTF values for the selectors.
*/
final char[] pos = new char[N_GROUPS];
for (char v = 0; v < groupCount; v++) {
pos[v] = v;
}
for (int i = 0; i < selectorCount; i++) {
int v = m_selectorMtf[i];
final char tmp = pos[v];
while (v > 0) {
pos[v] = pos[v - 1];
v--;
}
pos[0] = tmp;
m_selector[i] = tmp;
}
final char[][] len = new char[N_GROUPS][MAX_ALPHA_SIZE];
/*
* Now the coding tables
*/
for (int i = 0; i < groupCount; i++) {
int curr = bsR(5);
for (int j = 0; j < alphaSize; j++) {
while (bsR(1) == 1) {
if (bsR(1) == 0) {
curr++;
} else {
curr--;
}
}
len[i][j] = (char) curr;
}
}
/*
* Create the Huffman decoding tables
*/
for (int k = 0; k < groupCount; k++) {
int minLen = 32;
int maxLen = 0;
for (int i = 0; i < alphaSize; i++) {
if (len[k][i] > maxLen) {
maxLen = len[k][i];
}
if (len[k][i] < minLen) {
minLen = len[k][i];
}
}
hbCreateDecodeTables(m_limit[k], m_base[k], m_perm[k], len[k], minLen, maxLen,
alphaSize);
m_minLens[k] = minLen;
}
}
private void buildInUseTable(){
final boolean[] inUse16 = new boolean[16];
/*
* Receive the mapping table
*/
for (int i = 0; i < 16; i++) {
if (bsR(1) == 1) {
inUse16[i] = true;
} else {
inUse16[i] = false;
}
}
for (int i = 0; i < 256; i++) {
m_inUse[i] = false;
}
for (int i = 0; i < 16; i++) {
if (inUse16[i]) {
for (int j = 0; j < 16; j++) {
if (bsR(1) == 1) {
m_inUse[i * 16 + j] = true;
}
}
}
}
}
public void close() throws IOException{
bsFinishedWithStream();
}
}