/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.apache.commons.compress.compressors.lz4; import java.io.IOException; import java.io.InputStream; import java.util.Arrays; import org.apache.commons.compress.compressors.CompressorInputStream; import org.apache.commons.compress.utils.BoundedInputStream; import org.apache.commons.compress.utils.ByteUtils; import org.apache.commons.compress.utils.ChecksumCalculatingInputStream; import org.apache.commons.compress.utils.IOUtils; /** * CompressorInputStream for the LZ4 frame format. * * <p>Based on the "spec" in the version "1.5.1 (31/03/2015)"</p> * * @see <a href="http://lz4.github.io/lz4/lz4_Frame_format.html">LZ4 Frame Format Description</a> * @since 1.14 * @NotThreadSafe */ public class FramedLZ4CompressorInputStream extends CompressorInputStream { // used by FramedLZ4CompressorOutputStream as well static final byte[] LZ4_SIGNATURE = new byte[] { //NOSONAR 4, 0x22, 0x4d, 0x18 }; private static final byte[] SKIPPABLE_FRAME_TRAILER = new byte[] { 0x2a, 0x4d, 0x18 }; private static final byte SKIPPABLE_FRAME_PREFIX_BYTE_MASK = 0x50; static final int VERSION_MASK = 0xC0; static final int SUPPORTED_VERSION = 0x40; static final int BLOCK_INDEPENDENCE_MASK = 0x20; static final int BLOCK_CHECKSUM_MASK = 0x10; static final int CONTENT_SIZE_MASK = 0x08; static final int CONTENT_CHECKSUM_MASK = 0x04; static final int BLOCK_MAX_SIZE_MASK = 0x70; static final int UNCOMPRESSED_FLAG_MASK = 0x80000000; // used in no-arg read method private final byte[] oneByte = new byte[1]; private final ByteUtils.ByteSupplier supplier = new ByteUtils.ByteSupplier() { @Override public int getAsByte() throws IOException { return readOneByte(); } }; private final InputStream in; private final boolean decompressConcatenated; private boolean expectBlockChecksum; private boolean expectBlockDependency; private boolean expectContentSize; private boolean expectContentChecksum; private InputStream currentBlock; private boolean endReached, inUncompressed; // used for frame header checksum and content checksum, if present private final XXHash32 contentHash = new XXHash32(); // used for block checksum, if present private final XXHash32 blockHash = new XXHash32(); // only created if the frame doesn't set the block independence flag private byte[] blockDependencyBuffer; /** * Creates a new input stream that decompresses streams compressed * using the LZ4 frame format and stops after decompressing the * first frame. * @param in the InputStream from which to read the compressed data * @throws IOException if reading fails */ public FramedLZ4CompressorInputStream(InputStream in) throws IOException { this(in, false); } /** * Creates a new input stream that decompresses streams compressed * using the LZ4 frame format. * @param in the InputStream from which to read the compressed data * @param decompressConcatenated if true, decompress until the end * of the input; if false, stop after the first LZ4 frame * and leave the input position to point to the next byte * after the frame stream * @throws IOException if reading fails */ public FramedLZ4CompressorInputStream(InputStream in, boolean decompressConcatenated) throws IOException { this.in = in; this.decompressConcatenated = decompressConcatenated; init(true); } /** {@inheritDoc} */ @Override public int read() throws IOException { return read(oneByte, 0, 1) == -1 ? -1 : oneByte[0] & 0xFF; } /** {@inheritDoc} */ @Override public void close() throws IOException { if (currentBlock != null) { currentBlock.close(); currentBlock = null; } in.close(); } /** {@inheritDoc} */ @Override public int read(final byte[] b, final int off, final int len) throws IOException { if (endReached) { return -1; } int r = readOnce(b, off, len); if (r == -1) { nextBlock(); if (!endReached) { r = readOnce(b, off, len); } } if (r != -1) { if (expectBlockDependency) { appendToBlockDependencyBuffer(b, off, r); } if (expectContentChecksum) { contentHash.update(b, off, r); } } return r; } private void init(boolean firstFrame) throws IOException { if (readSignature(firstFrame)) { readFrameDescriptor(); nextBlock(); } } private boolean readSignature(boolean firstFrame) throws IOException { String garbageMessage = firstFrame ? "Not a LZ4 frame stream" : "LZ4 frame stream followed by garbage"; final byte[] b = new byte[4]; int read = IOUtils.readFully(in, b); count(read); if (0 == read && !firstFrame) { // good LZ4 frame and nothing after it endReached = true; return false; } if (4 != read) { throw new IOException(garbageMessage); } read = skipSkippableFrame(b); if (0 == read && !firstFrame) { // good LZ4 frame with only some skippable frames after it endReached = true; return false; } if (4 != read || !matches(b, 4)) { throw new IOException(garbageMessage); } return true; } private void readFrameDescriptor() throws IOException { int flags = readOneByte(); if (flags == -1) { throw new IOException("Premature end of stream while reading frame flags"); } contentHash.update(flags); if ((flags & VERSION_MASK) != SUPPORTED_VERSION) { throw new IOException("Unsupported version " + (flags >> 6)); } expectBlockDependency = (flags & BLOCK_INDEPENDENCE_MASK) == 0; if (expectBlockDependency) { if (blockDependencyBuffer == null) { blockDependencyBuffer = new byte[BlockLZ4CompressorInputStream.WINDOW_SIZE]; } } else { blockDependencyBuffer = null; } expectBlockChecksum = (flags & BLOCK_CHECKSUM_MASK) != 0; expectContentSize = (flags & CONTENT_SIZE_MASK) != 0; expectContentChecksum = (flags & CONTENT_CHECKSUM_MASK) != 0; int bdByte = readOneByte(); if (bdByte == -1) { // max size is irrelevant for this implementation throw new IOException("Premature end of stream while reading frame BD byte"); } contentHash.update(bdByte); if (expectContentSize) { // for now we don't care, contains the uncompressed size byte[] contentSize = new byte[8]; int skipped = (int) IOUtils.readFully(in, contentSize); count(skipped); if (8 != skipped) { throw new IOException("Premature end of stream while reading content size"); } contentHash.update(contentSize, 0, contentSize.length); } int headerHash = readOneByte(); if (headerHash == -1) { // partial hash of header. throw new IOException("Premature end of stream while reading frame header checksum"); } int expectedHash = (int) ((contentHash.getValue() >> 8) & 0xff); contentHash.reset(); if (headerHash != expectedHash) { throw new IOException("frame header checksum mismatch."); } } private void nextBlock() throws IOException { maybeFinishCurrentBlock(); long len = ByteUtils.fromLittleEndian(supplier, 4); boolean uncompressed = (len & UNCOMPRESSED_FLAG_MASK) != 0; int realLen = (int) (len & (~UNCOMPRESSED_FLAG_MASK)); if (realLen == 0) { verifyContentChecksum(); if (!decompressConcatenated) { endReached = true; } else { init(false); } return; } InputStream capped = new BoundedInputStream(in, realLen); if (expectBlockChecksum) { capped = new ChecksumCalculatingInputStream(blockHash, capped); } if (uncompressed) { inUncompressed = true; currentBlock = capped; } else { inUncompressed = false; BlockLZ4CompressorInputStream s = new BlockLZ4CompressorInputStream(capped); if (expectBlockDependency) { s.prefill(blockDependencyBuffer); } currentBlock = s; } } private void maybeFinishCurrentBlock() throws IOException { if (currentBlock != null) { currentBlock.close(); currentBlock = null; if (expectBlockChecksum) { verifyChecksum(blockHash, "block"); blockHash.reset(); } } } private void verifyContentChecksum() throws IOException { if (expectContentChecksum) { verifyChecksum(contentHash, "content"); } contentHash.reset(); } private void verifyChecksum(XXHash32 hash, String kind) throws IOException { byte[] checksum = new byte[4]; int read = IOUtils.readFully(in, checksum); count(read); if (4 != read) { throw new IOException("Premature end of stream while reading " + kind + " checksum"); } long expectedHash = hash.getValue(); if (expectedHash != ByteUtils.fromLittleEndian(checksum)) { throw new IOException(kind + " checksum mismatch."); } } private int readOneByte() throws IOException { final int b = in.read(); if (b != -1) { count(1); return b & 0xFF; } return -1; } private int readOnce(byte[] b, int off, int len) throws IOException { if (inUncompressed) { int cnt = currentBlock.read(b, off, len); count(cnt); return cnt; } else { BlockLZ4CompressorInputStream l = (BlockLZ4CompressorInputStream) currentBlock; long before = l.getBytesRead(); int cnt = currentBlock.read(b, off, len); count(l.getBytesRead() - before); return cnt; } } private static boolean isSkippableFrameSignature(byte[] b) { if ((b[0] & SKIPPABLE_FRAME_PREFIX_BYTE_MASK) != SKIPPABLE_FRAME_PREFIX_BYTE_MASK) { return false; } for (int i = 1; i < 4; i++) { if (b[i] != SKIPPABLE_FRAME_TRAILER[i - 1]) { return false; } } return true; } /** * Skips over the contents of a skippable frame as well as * skippable frames following it. * * <p>It then tries to read four more bytes which are supposed to * hold an LZ4 signature and returns the number of bytes read * while storing the bytes in the given array.</p> */ private int skipSkippableFrame(byte[] b) throws IOException { int read = 4; while (read == 4 && isSkippableFrameSignature(b)) { long len = ByteUtils.fromLittleEndian(supplier, 4); long skipped = IOUtils.skip(in, len); count(skipped); if (len != skipped) { throw new IOException("Premature end of stream while skipping frame"); } read = IOUtils.readFully(in, b); count(read); } return read; } private void appendToBlockDependencyBuffer(final byte[] b, final int off, int len) { len = Math.min(len, blockDependencyBuffer.length); if (len > 0) { int keep = blockDependencyBuffer.length - len; if (keep > 0) { // move last keep bytes towards the start of the buffer System.arraycopy(blockDependencyBuffer, len, blockDependencyBuffer, 0, keep); } // append new data System.arraycopy(b, off, blockDependencyBuffer, keep, len); } } /** * Checks if the signature matches what is expected for a .lz4 file. * * <p>.lz4 files start with a four byte signature.</p> * * @param signature the bytes to check * @param length the number of bytes to check * @return true if this is a .sz stream, false otherwise */ public static boolean matches(final byte[] signature, final int length) { if (length < LZ4_SIGNATURE.length) { return false; } byte[] shortenedSig = signature; if (signature.length > LZ4_SIGNATURE.length) { shortenedSig = new byte[LZ4_SIGNATURE.length]; System.arraycopy(signature, 0, shortenedSig, 0, LZ4_SIGNATURE.length); } return Arrays.equals(shortenedSig, LZ4_SIGNATURE); } }