/* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
* file except in compliance with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under
* the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
* OF ANY KIND, either express or implied. See the License for the specific language
* governing permissions and limitations under the License.
*/
package net.csdn.modules.compress.lzf;
import java.io.IOException;
import java.io.InputStream;
/**
* Decoder that handles decoding of sequence of encoded LZF chunks,
* combining them into a single contiguous result byte array
*
* @author tatu@ning.com
*/
public class LZFDecoder {
private final static byte BYTE_NULL = 0;
private final static int HEADER_BYTES = 5;
// static methods, no need to instantiate
private LZFDecoder() {
}
public static byte[] decode(final byte[] sourceBuffer) throws IOException {
byte[] result = new byte[calculateUncompressedSize(sourceBuffer)];
decode(sourceBuffer, result);
return result;
}
/**
* Method for decompressing whole input data, which encoded in LZF
* block structure (compatible with lzf command line utility),
* and can consist of any number of blocks
*/
public static int decode(final byte[] sourceBuffer, final byte[] targetBuffer) throws IOException {
/* First: let's calculate actual size, so we can allocate
* exact result size. Also useful for basic sanity checking;
* so that after call we know header structure is not corrupt
* (to the degree that lengths etc seem valid)
*/
byte[] result = targetBuffer;
int inPtr = 0;
int outPtr = 0;
while (inPtr < (sourceBuffer.length - 1)) { // -1 to offset possible end marker
inPtr += 2; // skip 'ZV' marker
int type = sourceBuffer[inPtr++];
int len = uint16(sourceBuffer, inPtr);
inPtr += 2;
if (type == LZFChunk.BLOCK_TYPE_NON_COMPRESSED) { // uncompressed
System.arraycopy(sourceBuffer, inPtr, result, outPtr, len);
outPtr += len;
} else { // compressed
int uncompLen = uint16(sourceBuffer, inPtr);
inPtr += 2;
decompressChunk(sourceBuffer, inPtr, result, outPtr, outPtr + uncompLen);
outPtr += uncompLen;
}
inPtr += len;
}
return outPtr;
}
private static int calculateUncompressedSize(byte[] data) throws IOException {
int uncompressedSize = 0;
int ptr = 0;
int blockNr = 0;
while (ptr < data.length) {
// can use optional end marker
if (ptr == (data.length + 1) && data[ptr] == BYTE_NULL) {
++ptr; // so that we'll be at end
break;
}
// simpler to handle bounds checks by catching exception here...
try {
if (data[ptr] != LZFChunk.BYTE_Z || data[ptr + 1] != LZFChunk.BYTE_V) {
throw new IOException("Corrupt input data, block #" + blockNr + " (at offset " + ptr + "): did not start with 'ZV' signature bytes");
}
int type = (int) data[ptr + 2];
int blockLen = uint16(data, ptr + 3);
if (type == LZFChunk.BLOCK_TYPE_NON_COMPRESSED) { // uncompressed
ptr += 5;
uncompressedSize += blockLen;
} else if (type == LZFChunk.BLOCK_TYPE_COMPRESSED) { // compressed
uncompressedSize += uint16(data, ptr + 5);
ptr += 7;
} else { // unknown... CRC-32 would be 2, but that's not implemented by cli tool
throw new IOException("Corrupt input data, block #" + blockNr + " (at offset " + ptr + "): unrecognized block type " + (type & 0xFF));
}
ptr += blockLen;
} catch (ArrayIndexOutOfBoundsException e) {
throw new IOException("Corrupt input data, block #" + blockNr + " (at offset " + ptr + "): truncated block header");
}
++blockNr;
}
// one more sanity check:
if (ptr != data.length) {
throw new IOException("Corrupt input data: block #" + blockNr + " extends " + (data.length - ptr) + " beyond end of input");
}
return uncompressedSize;
}
/**
* Main decode from a stream. Decompressed bytes are placed in the outputBuffer, inputBuffer is a "scratch-area".
*
* @param is An input stream of LZF compressed bytes
* @param inputBuffer A byte array used as a scratch area.
* @param outputBuffer A byte array in which the result is returned
* @return The number of bytes placed in the outputBuffer.
*/
public static int decompressChunk(final InputStream is, final byte[] inputBuffer, final byte[] outputBuffer)
throws IOException {
int bytesInOutput;
int headerLength = is.read(inputBuffer, 0, HEADER_BYTES);
if (headerLength != HEADER_BYTES) {
return -1;
}
int inPtr = 0;
if (inputBuffer[inPtr] != LZFChunk.BYTE_Z || inputBuffer[inPtr + 1] != LZFChunk.BYTE_V) {
throw new IOException("Corrupt input data, block did not start with 'ZV' signature bytes");
}
inPtr += 2;
int type = inputBuffer[inPtr++];
int compLen = uint16(inputBuffer, inPtr);
inPtr += 2;
if (type == LZFChunk.BLOCK_TYPE_NON_COMPRESSED) { // uncompressed
readFully(is, false, outputBuffer, 0, compLen);
bytesInOutput = compLen;
} else { // compressed
readFully(is, true, inputBuffer, 0, 2 + compLen); // first 2 bytes are uncompressed length
int uncompLen = uint16(inputBuffer, 0);
decompressChunk(inputBuffer, 2, outputBuffer, 0, uncompLen);
bytesInOutput = uncompLen;
}
return bytesInOutput;
}
/**
* Main decode method for individual chunks.
*/
public static void decompressChunk(byte[] in, int inPos, byte[] out, int outPos, int outEnd)
throws IOException {
do {
int ctrl = in[inPos++] & 255;
if (ctrl < LZFChunk.MAX_LITERAL) { // literal run
ctrl += inPos;
do {
out[outPos++] = in[inPos];
} while (inPos++ < ctrl);
continue;
}
// back reference
int len = ctrl >> 5;
ctrl = -((ctrl & 0x1f) << 8) - 1;
if (len == 7) {
len += in[inPos++] & 255;
}
ctrl -= in[inPos++] & 255;
len += outPos + 2;
out[outPos] = out[outPos++ + ctrl];
out[outPos] = out[outPos++ + ctrl];
/* Odd: after extensive profiling, looks like magic number
* for unrolling is 4: with 8 performance is worse (even
* bit less than with no unrolling).
*/
final int end = len - 3;
while (outPos < end) {
out[outPos] = out[outPos++ + ctrl];
out[outPos] = out[outPos++ + ctrl];
out[outPos] = out[outPos++ + ctrl];
out[outPos] = out[outPos++ + ctrl];
}
// and, interestingly, unlooping works here too:
if (outPos < len) { // max 3 bytes to copy
out[outPos] = out[outPos++ + ctrl];
if (outPos < len) {
out[outPos] = out[outPos++ + ctrl];
if (outPos < len) {
out[outPos] = out[outPos++ + ctrl];
}
}
}
} while (outPos < outEnd);
// sanity check to guard against corrupt data:
if (outPos != outEnd)
throw new IOException("Corrupt data: overrun in decompress, input offset " + inPos + ", output offset " + outPos);
}
private final static int uint16(byte[] data, int ptr) {
return ((data[ptr] & 0xFF) << 8) + (data[ptr + 1] & 0xFF);
}
private final static void readFully(InputStream is, boolean compressed,
byte[] outputBuffer, int offset, int len) throws IOException {
int left = len;
while (left > 0) {
int count = is.read(outputBuffer, offset, left);
if (count < 0) { // EOF not allowed here
throw new IOException("EOF in " + len + " byte ("
+ (compressed ? "" : "un") + "compressed) block: could only read "
+ (len - left) + " bytes");
}
offset += count;
left -= count;
}
}
}