/** * Copyright (C) 2007 Rui Shen (rui.shen@gmail.com) All Right Reserved * File : Inflater.java * Created : 2007-3-1 * **************************************************************************** * This program is free software; you can redistribute it and/or modify it under * the terms of the GNU General Public License as published by the Free Software * Foundation; either version 2 of the License, or (at your option) any later * version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more * details. * * You should have received a copy of the GNU General Public License along with * this program; if not, write to the Free Software Foundation, Inc., 59 Temple * Place - Suite 330, Boston, MA 02111-1307, USA. * ***************************************************************************** */ package org.geometerplus.fbreader.formats.chm; import java.io.IOException; import java.io.InputStream; import java.util.logging.Logger; class Inflater { private static Logger log = Logger.getLogger(Inflater.class.getName()); // some constants defined by the LZX specification public static final int LZX_MIN_MATCH = 2; public static final int LZX_MAX_MATCH = 257; public static final int LZX_NUM_CHARS = 256; public static final int LZX_BLOCKTYPE_VERBATIM = 1; public static final int LZX_BLOCKTYPE_ALIGNED = 2; public static final int LZX_BLOCKTYPE_UNCOMPRESSED = 3; public static final int LZX_ALIGNED_NUM_ELEMENTS = 8; // aligned offset tree #elements public static final int LZX_NUM_PRIMARY_LENGTHS = 7; // this one missing from spec! public static final int LZX_NUM_SECONDARY_LENGTHS = 249; // length tree #elements /* * LZX uses what it calls 'position slots' to represent match offsets. * What this means is that a small 'position slot' number and a small * offset from that slot are encoded instead of one large offset for * every match. * - position_base is an index to the position slot bases. */ private static final int[] POSITION_BASE = new int[]{ 0, 1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192, 256, 384, 512, 768, 1024, 1536, 2048, 3072, 4096, 6144, 8192, 12288, 16384, 24576, 32768, 49152, 65536, 98304, 131072, 196608, 262144, 393216, 524288, 655360, 786432, 917504, 1048576, 1179648, 1310720, 1441792, 1572864, 1703936, 1835008, 1966080, 2097152 }; /* * - extra_bits states how many bits of offset-from-base data is needed. */ private static final byte[] EXTRA_BITS = new byte[] { 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17 }; private byte[] window; // the actual decoding window private int window_posn; // current offset within the window private int r0, r1, r2; // for the LRU offset system private int main_elements; // number of main tree elements private boolean header_read; // have we started decoding at all yet? private int block_type; // type of this block private int block_length; // uncompressed length of this block private int block_remaining; // uncompressed bytes still left to decode private int frames_read; // the number of CFDATA blocks processed private int intel_filesize; // magic header value used for transform private int intel_curpos; // current offset in transform space private boolean intel_started; // have we seen any translatable data yet? // LZX huffman defines: tweak tablebits as desired LZXTree mainTree = new LZXTree(12, LZX_NUM_CHARS + 50 * 8); LZXTree lengthTree = new LZXTree(12, LZX_NUM_SECONDARY_LENGTHS + 1); LZXTree alignedTree = new LZXTree(7, LZX_ALIGNED_NUM_ELEMENTS); /** * Don't forget reset at reset intervals */ public Inflater(int windowSize) { if (windowSize < (1 << 15) || windowSize > (1 << 21) ) throw new IllegalArgumentException("Unsupported window size " + windowSize); window = new byte[windowSize]; int positionSlotNo = 0; while(windowSize > 1) { windowSize >>= 1; positionSlotNo += 2; } if (positionSlotNo == 40) positionSlotNo = 42; else if (positionSlotNo == 42) positionSlotNo = 50; main_elements = LZX_NUM_CHARS + (positionSlotNo << 3); } public int inflate(boolean reset, InputStream in, byte[]buf) throws DataFormatException, IOException { return inflate(reset, in, buf, 0, buf.length); } /** * Uncompresses bytes into specified buffer. Returns actual number * of bytes uncompressed. A return value of 0 indicates that * needsInput() or needsDictionary() should be called in order to * determine if more input data or a preset dictionary is required. * In the later case, getAdler() can be used to get the Adler-32 * value of the dictionary required. */ public int inflate(boolean reset, InputStream in, byte[]buf, int off, int len) throws DataFormatException, IOException { if (reset) { // reset at reset intervals r0 = r1 = r2 = 1; header_read = false; frames_read = 0; block_remaining = 0; block_type = -1; // invalid intel_curpos = 0; intel_started = false; window_posn = 0; mainTree.clear(); lengthTree.clear(); } BitsInputStream bin = new BitsInputStream(in); if (! header_read) { if ( bin.readLE(1) > 0) { intel_filesize = (bin.readLE(16) << 16) | bin.readLE(16); // = 0 if not encoded log.info("Intel filesize = " + intel_filesize); } header_read = true; } int togo = len; while (togo > 0) { if (block_remaining == 0) { if (block_type == LZX_BLOCKTYPE_UNCOMPRESSED) { if ( (block_length & 1) > 0) // odd bin.skip(1); // realign to word } block_type = bin.readLE(3); block_remaining = block_length = bin.readLE(16) << 8 | bin.readLE(8) ; log.fine("Block type = " + block_type + ", length = " + block_length); switch (block_type) { case LZX_BLOCKTYPE_ALIGNED: { for (int i = 0; i < alignedTree.max_symbol; i ++) alignedTree.lens[i] = (byte) bin.readLE(3); alignedTree.makeSymbolTable(); // continue to next case ... } case LZX_BLOCKTYPE_VERBATIM: { mainTree.readLengthTable(bin, 0, LZX_NUM_CHARS); mainTree.readLengthTable(bin, LZX_NUM_CHARS, main_elements); mainTree.makeSymbolTable(); if (mainTree.lens[0xE8] != 0) // Intel E8 encoding? intel_started = true; lengthTree.readLengthTable(bin, 0, LZX_NUM_SECONDARY_LENGTHS); lengthTree.makeSymbolTable(); break; } case LZX_BLOCKTYPE_UNCOMPRESSED: { log.warning("LZXC meet LZX_BLOCKTYPE_UNCOMPRESSED"); intel_started = true; // because we can't assume otherwise if (bin.ensure(16) > 16); // get up to 16 pad bits into the buffer bin.skip(-2); // and align the bitstream! TODO what happens to the bitbuf/bitsLeft? r0 = bin.read32LE(); r1 = bin.read32LE(); r2 = bin.read32LE(); break; } default: throw new DataFormatException("Unexpected block type " + block_type); } } /* buffer exhaustive check: * it's possible to have a file where the next run is less than * 16 bits in size. In this case, the READ_HUFFSYM() macro used * in building the tables will exhaust the buffer, so we should * allow for this, but not allow those accidentally read bits to * be used (so we check that there are at least 16 bits * remaining - in this boundary case they aren't really part of * the compressed data) * if (inpos > (endinpos+2) || bitsleft < 16) return DECR_ILLEGALDATA; * @see BitsInputStream#peekUnder(); */ int this_run; while ( (this_run = block_remaining) > 0 && togo > 0) { if (this_run > togo) this_run = togo; togo -= this_run; block_remaining -= this_run; window_posn %= window.length; if (window_posn + this_run > window.length) log.warning("runs can't straddle the window wraparound"); //throw new DataFormatException("runs can't straddle the window wraparound"); if (block_type == LZX_BLOCKTYPE_UNCOMPRESSED) { if (this_run > bin.available()) // make sure we can read throw new DataFormatException("not enough data"); bin.readFully(window, window_posn, this_run); window_posn += this_run; } else { // block_type == LZX_BLOCKTYPE_VERBATIM, LZX_BLOCKTYPE_ALIGNED while (this_run > 0) { int main_element = mainTree.readHuffmanSymbol(bin); if (main_element < LZX_NUM_CHARS) { // literal: 0 to LZX_NUM_CHARS - 1 window[window_posn ++] = (byte) main_element; this_run --; } else { // match: LZX_NUM_CHARS + ((slot<<3) | length_header (3 bits)) main_element -= LZX_NUM_CHARS; int match_length = main_element & LZX_NUM_PRIMARY_LENGTHS; if (match_length == LZX_NUM_PRIMARY_LENGTHS) match_length += lengthTree.readHuffmanSymbol(bin); match_length += LZX_MIN_MATCH; int match_offset = main_element >> 3; if (match_offset > 2) { // not repeated offset if (block_type == LZX_BLOCKTYPE_VERBATIM) { if (match_offset != 3) { byte extra = EXTRA_BITS[match_offset]; match_offset = POSITION_BASE[match_offset] - 2 + bin.readLE(extra); } else { match_offset = 1; } } else if (block_type == LZX_BLOCKTYPE_ALIGNED) { byte extra = EXTRA_BITS[match_offset]; match_offset = POSITION_BASE[match_offset] - 2; if (extra > 3) { // verbatim and aligned bits extra -= 3; match_offset += (bin.readLE(extra) << 3); match_offset += alignedTree.readHuffmanSymbol(bin); } else if (extra == 3) { // aligned bits only match_offset += alignedTree.readHuffmanSymbol(bin); } else if (extra > 0) { // extra == 1, 2; verbatim bits only match_offset += bin.readLE(extra); } else { // extra == 0 match_offset = 1; } } else throw new DataFormatException("Unexpected block type " + block_type); // update repeated offset LRU queue r2 = r1; r1 = r0; r0 = match_offset; } else if (match_offset == 0) { match_offset = r0; } else if (match_offset == 1) { match_offset = r1; r1 = r0; r0 = match_offset; } else { // match_offset == 2 match_offset = r2; r2 = r0; r0 = match_offset; } // log.info("OFF " + match_offset + ": " + r0 + ", " + r1 + ", " + r2 // + ", left = " + bin.available() + ", bitbuf = " + Integer.toBinaryString(bin.bitbuf)); // if ( r0 == 26 && r1 == 13 && r2 == 12) { // System.out.println("here"); // } int runsrc = 0; // move down int rundest = window_posn; this_run -= match_length; // copy any wrapped around source data if (window_posn >= match_offset) { // no wrap runsrc = rundest - match_offset; } else { // wrap around runsrc = rundest + (window.length - match_offset); int copy_length = match_offset - window_posn; if (copy_length < match_length) { match_length -= copy_length; window_posn += copy_length; while (copy_length-- > 0) window[rundest ++] = window[runsrc ++]; runsrc = 0; } } window_posn += match_length; // copy match data - no worries about destination wraps while (match_length-- > 0) window[rundest ++] = window[runsrc ++]; } } } } } if (togo != 0) throw new DataFormatException("should never happens"); System.arraycopy(window, (window_posn == 0 ? window.length : window_posn) - len, buf, off, len); // Intel E8 decoding if ( (frames_read ++ < 32768) && (intel_filesize != 0) ) { log.warning("LZX Intel E8 decoding: running un-tested code " + intel_filesize); if (len <= 6 || ! intel_started ) { intel_curpos += len; } else { int curpos = intel_curpos; intel_curpos += len; for (int i = off; i < off + len - 10; ) { if (buf[i ++] != 0xE8) { curpos ++; } else { int abs_off = (buf[i] & 0xff) |( (buf[i + 1] & 0xff) << 8) | ((buf[i + 2]&0xff) << 16) | ((buf[i + 3] & 0xff) << 24); if ( (abs_off >= -curpos) && (abs_off < intel_filesize) ) { int ref_off = (abs_off >= 0)? abs_off - curpos : abs_off + intel_filesize; buf[i] = (byte) ref_off; buf[i + 1] = (byte) (ref_off >> 8); buf[i + 2] = (byte) (ref_off >> 16); buf[i + 3] = (byte) (ref_off >> 24); } i += 4; curpos += 5; } } } } return 0; } }