/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.lucene.codecs.compressing; import java.io.IOException; import java.nio.charset.StandardCharsets; import com.carrotsearch.randomizedtesting.generators.RandomNumbers; public abstract class AbstractTestLZ4CompressionMode extends AbstractTestCompressionMode { @Override public byte[] test(byte[] decompressed) throws IOException { final byte[] compressed = super.test(decompressed); int off = 0; int decompressedOff = 0; for (;;) { final int token = compressed[off++] & 0xFF; int literalLen = token >>> 4; if (literalLen == 0x0F) { while (compressed[off] == (byte) 0xFF) { literalLen += 0xFF; ++off; } literalLen += compressed[off++] & 0xFF; } // skip literals off += literalLen; decompressedOff += literalLen; // check that the stream ends with literals and that there are at least // 5 of them if (off == compressed.length) { assertEquals(decompressed.length, decompressedOff); assertTrue("lastLiterals=" + literalLen + ", bytes=" + decompressed.length, literalLen >= LZ4.LAST_LITERALS || literalLen == decompressed.length); break; } final int matchDec = (compressed[off++] & 0xFF) | ((compressed[off++] & 0xFF) << 8); // check that match dec is not 0 assertTrue(matchDec + " " + decompressedOff, matchDec > 0 && matchDec <= decompressedOff); int matchLen = token & 0x0F; if (matchLen == 0x0F) { while (compressed[off] == (byte) 0xFF) { matchLen += 0xFF; ++off; } matchLen += compressed[off++] & 0xFF; } matchLen += LZ4.MIN_MATCH; // if the match ends prematurely, the next sequence should not have // literals or this means we are wasting space if (decompressedOff + matchLen < decompressed.length - LZ4.LAST_LITERALS) { final boolean moreCommonBytes = decompressed[decompressedOff + matchLen] == decompressed[decompressedOff - matchDec + matchLen]; final boolean nextSequenceHasLiterals = ((compressed[off] & 0xFF) >>> 4) != 0; assertTrue(!moreCommonBytes || !nextSequenceHasLiterals); } decompressedOff += matchLen; } assertEquals(decompressed.length, decompressedOff); return compressed; } public void testShortLiteralsAndMatchs() throws IOException { // literals and matchs lengths <= 15 final byte[] decompressed = "1234562345673456745678910123".getBytes(StandardCharsets.UTF_8); test(decompressed); } public void testLongMatchs() throws IOException { // match length >= 20 final byte[] decompressed = new byte[RandomNumbers.randomIntBetween(random(), 300, 1024)]; for (int i = 0; i < decompressed.length; ++i) { decompressed[i] = (byte) i; } test(decompressed); } public void testLongLiterals() throws IOException { // long literals (length >= 16) which are not the last literals final byte[] decompressed = randomArray(RandomNumbers.randomIntBetween(random(), 400, 1024), 256); final int matchRef = random().nextInt(30); final int matchOff = RandomNumbers.randomIntBetween(random(), decompressed.length - 40, decompressed.length - 20); final int matchLength = RandomNumbers.randomIntBetween(random(), 4, 10); System.arraycopy(decompressed, matchRef, decompressed, matchOff, matchLength); test(decompressed); } public void testMatchRightBeforeLastLiterals() throws IOException { test(new byte[] {1,2,3,4, 1,2,3,4, 1,2,3,4,5}); } }