/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package parquet.column.values.delta; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import java.io.IOException; import java.util.Random; import org.junit.Before; import org.junit.Test; import parquet.bytes.BytesInput; import parquet.column.values.ValuesWriter; import parquet.io.ParquetDecodingException; public class DeltaBinaryPackingValuesWriterTest { DeltaBinaryPackingValuesReader reader; private int blockSize; private int miniBlockNum; private ValuesWriter writer; private Random random; @Before public void setUp() { blockSize = 128; miniBlockNum = 4; writer = new DeltaBinaryPackingValuesWriter(blockSize, miniBlockNum, 100); random = new Random(); } @Test(expected = IllegalArgumentException.class) public void miniBlockSizeShouldBeMultipleOf8() { new DeltaBinaryPackingValuesWriter(1281, 4, 100); } /* When data size is multiple of Block*/ @Test public void shouldWriteWhenDataIsAlignedWithBlock() throws IOException { int[] data = new int[5 * blockSize]; for (int i = 0; i < blockSize * 5; i++) { data[i] = random.nextInt(); } shouldWriteAndRead(data); } @Test public void shouldWriteAndReadWhenBlockIsNotFullyWritten() throws IOException { int[] data = new int[blockSize - 3]; for (int i = 0; i < data.length; i++) { data[i] = random.nextInt(); } shouldWriteAndRead(data); } @Test public void shouldWriteAndReadWhenAMiniBlockIsNotFullyWritten() throws IOException { int miniBlockSize = blockSize / miniBlockNum; int[] data = new int[miniBlockSize - 3]; for (int i = 0; i < data.length; i++) { data[i] = random.nextInt(); } shouldWriteAndRead(data); } @Test public void shouldWriteNegativeDeltas() throws IOException { int[] data = new int[blockSize]; for (int i = 0; i < data.length; i++) { data[i] = 10 - (i * 32 - random.nextInt(6)); } shouldWriteAndRead(data); } @Test public void shouldWriteAndReadWhenDeltasAreSame() throws IOException { int[] data = new int[2 * blockSize]; for (int i = 0; i < blockSize; i++) { data[i] = i * 32; } shouldWriteAndRead(data); } @Test public void shouldWriteAndReadWhenValuesAreSame() throws IOException { int[] data = new int[2 * blockSize]; for (int i = 0; i < blockSize; i++) { data[i] = 3; } shouldWriteAndRead(data); } @Test public void shouldWriteWhenDeltaIs0ForEachBlock() throws IOException { int[] data = new int[5 * blockSize + 1]; for (int i = 0; i < data.length; i++) { data[i] = (i - 1) / blockSize; } shouldWriteAndRead(data); } @Test public void shouldReadWriteWhenDataIsNotAlignedWithBlock() throws IOException { int[] data = new int[5 * blockSize + 3]; for (int i = 0; i < data.length; i++) { data[i] = random.nextInt(20) - 10; } shouldWriteAndRead(data); } @Test public void shouldReadMaxMinValue() throws IOException { int[] data = new int[10]; for (int i = 0; i < data.length; i++) { if(i%2==0) { data[i]=Integer.MIN_VALUE; }else { data[i]=Integer.MAX_VALUE; } } shouldWriteAndRead(data); } @Test public void shouldReturnCorrectOffsetAfterInitialization() throws IOException { int[] data = new int[2 * blockSize + 3]; for (int i = 0; i < data.length; i++) { data[i] = i * 32; } writeData(data); reader = new DeltaBinaryPackingValuesReader(); BytesInput bytes = writer.getBytes(); byte[] valueContent = bytes.toByteArray(); byte[] pageContent = new byte[valueContent.length * 10]; int contentOffsetInPage = 33; System.arraycopy(valueContent, 0, pageContent, contentOffsetInPage, valueContent.length); //offset should be correct reader.initFromPage(100, pageContent, contentOffsetInPage); int offset= reader.getNextOffset(); assertEquals(valueContent.length + contentOffsetInPage, offset); //should be able to read data correclty for (int i : data) { assertEquals(i, reader.readInteger()); } } @Test public void shouldThrowExceptionWhenReadMoreThanWritten() throws IOException { int[] data = new int[5 * blockSize + 1]; for (int i = 0; i < data.length; i++) { data[i] = i * 32; } shouldWriteAndRead(data); try { reader.readInteger(); } catch (ParquetDecodingException e) { assertEquals("no more value to read, total value count is " + data.length, e.getMessage()); } } @Test public void shouldSkip() throws IOException { int[] data = new int[5 * blockSize + 1]; for (int i = 0; i < data.length; i++) { data[i] = i * 32; } writeData(data); reader = new DeltaBinaryPackingValuesReader(); reader.initFromPage(100, writer.getBytes().toByteArray(), 0); for (int i = 0; i < data.length; i++) { if (i % 3 == 0) { reader.skip(); } else { assertEquals(i * 32, reader.readInteger()); } } } @Test public void shouldReset() throws IOException { shouldReadWriteWhenDataIsNotAlignedWithBlock(); int[] data = new int[5 * blockSize]; for (int i = 0; i < blockSize * 5; i++) { data[i] = i * 2; } writer.reset(); shouldWriteAndRead(data); } @Test public void randomDataTest() throws IOException { int maxSize = 1000; int[] data = new int[maxSize]; for (int round = 0; round < 100000; round++) { int size = random.nextInt(maxSize); for (int i = 0; i < size; i++) { data[i] = random.nextInt(); } shouldReadAndWrite(data, size); writer.reset(); } } private void shouldWriteAndRead(int[] data) throws IOException { shouldReadAndWrite(data, data.length); } private void shouldReadAndWrite(int[] data, int length) throws IOException { writeData(data, length); reader = new DeltaBinaryPackingValuesReader(); byte[] page = writer.getBytes().toByteArray(); int miniBlockSize = blockSize / miniBlockNum; double miniBlockFlushed = Math.ceil(((double) length - 1) / miniBlockSize); double blockFlushed = Math.ceil(((double) length - 1) / blockSize); double estimatedSize = 4 * 5 //blockHeader + 4 * miniBlockFlushed * miniBlockSize //data(aligned to miniBlock) + blockFlushed * miniBlockNum //bitWidth of mini blocks + (5.0 * blockFlushed);//min delta for each block assertTrue(estimatedSize >= page.length); reader.initFromPage(100, page, 0); for (int i = 0; i < length; i++) { assertEquals(data[i], reader.readInteger()); } } private void writeData(int[] data) { writeData(data, data.length); } private void writeData(int[] data, int length) { for (int i = 0; i < length; i++) { writer.writeInteger(data[i]); } } }