/** * Copyright The Apache Software Foundation * * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with this * work for additional information regarding copyright ownership. The ASF * licenses this file to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations * under the License. */ package org.apache.hadoop.hbase.io.encoding; import static org.junit.Assert.assertArrayEquals; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.fail; import java.io.ByteArrayOutputStream; import java.io.DataOutputStream; import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Collection; import java.util.List; import java.util.Random; import java.util.concurrent.ConcurrentSkipListSet; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.CellComparator; import org.apache.hadoop.hbase.CellUtil; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.KeyValueUtil; import org.apache.hadoop.hbase.Tag; import org.apache.hadoop.hbase.ArrayBackedTag; import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeCodec; import org.apache.hadoop.hbase.io.compress.Compression.Algorithm; import org.apache.hadoop.hbase.io.encoding.DataBlockEncoder.EncodedSeeker; import org.apache.hadoop.hbase.io.hfile.HFileContext; import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder; import org.apache.hadoop.hbase.nio.SingleByteBuff; import org.apache.hadoop.hbase.testclassification.IOTests; import org.apache.hadoop.hbase.testclassification.SmallTests; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.CollectionBackedScanner; import org.junit.Assert; import org.junit.Before; import org.junit.Test; import org.junit.experimental.categories.Category; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; import org.junit.runners.Parameterized.Parameters; /** * Tests scanning/seeking data with PrefixTree Encoding. */ @RunWith(Parameterized.class) @Category({IOTests.class, SmallTests.class}) public class TestPrefixTreeEncoding { private static final Log LOG = LogFactory.getLog(TestPrefixTreeEncoding.class); private static final String CF = "EncodingTestCF"; private static final byte[] CF_BYTES = Bytes.toBytes(CF); private static final int NUM_ROWS_PER_BATCH = 50; private static final int NUM_COLS_PER_ROW = 20; private int numBatchesWritten = 0; private ConcurrentSkipListSet<Cell> kvset = new ConcurrentSkipListSet<>(CellComparator.COMPARATOR); private static boolean formatRowNum = false; @Parameters public static Collection<Object[]> parameters() { List<Object[]> paramList = new ArrayList<>(); { paramList.add(new Object[] { false }); paramList.add(new Object[] { true }); } return paramList; } private final boolean includesTag; public TestPrefixTreeEncoding(boolean includesTag) { this.includesTag = includesTag; } @Before public void setUp() throws Exception { kvset.clear(); formatRowNum = false; } @Test public void testSeekBeforeWithFixedData() throws Exception { formatRowNum = true; PrefixTreeCodec encoder = new PrefixTreeCodec(); int batchId = numBatchesWritten++; HFileContext meta = new HFileContextBuilder() .withHBaseCheckSum(false) .withIncludesMvcc(false) .withIncludesTags(includesTag) .withCompression(Algorithm.NONE).build(); HFileBlockEncodingContext blkEncodingCtx = new HFileBlockDefaultEncodingContext( DataBlockEncoding.PREFIX_TREE, new byte[0], meta); ByteArrayOutputStream baosInMemory = new ByteArrayOutputStream(); DataOutputStream userDataStream = new DataOutputStream(baosInMemory); generateFixedTestData(kvset, batchId, false, includesTag, encoder, blkEncodingCtx, userDataStream); EncodedSeeker seeker = encoder.createSeeker(CellComparator.COMPARATOR, encoder.newDataBlockDecodingContext(meta)); byte[] onDiskBytes = baosInMemory.toByteArray(); ByteBuffer readBuffer = ByteBuffer.wrap(onDiskBytes, DataBlockEncoding.ID_SIZE, onDiskBytes.length - DataBlockEncoding.ID_SIZE); seeker.setCurrentBuffer(new SingleByteBuff(readBuffer)); // Seek before the first keyvalue; Cell seekKey = CellUtil.createFirstDeleteFamilyCellOnRow(getRowKey(batchId, 0), CF_BYTES); seeker.seekToKeyInBlock(seekKey, true); assertEquals(null, seeker.getCell()); // Seek before the middle keyvalue; seekKey = CellUtil.createFirstDeleteFamilyCellOnRow(getRowKey(batchId, NUM_ROWS_PER_BATCH / 3), CF_BYTES); seeker.seekToKeyInBlock(seekKey, true); assertNotNull(seeker.getCell()); assertArrayEquals(getRowKey(batchId, NUM_ROWS_PER_BATCH / 3 - 1), CellUtil.cloneRow(seeker.getCell())); // Seek before the last keyvalue; seekKey = CellUtil.createFirstDeleteFamilyCellOnRow(Bytes.toBytes("zzzz"), CF_BYTES); seeker.seekToKeyInBlock(seekKey, true); assertNotNull(seeker.getCell()); assertArrayEquals(getRowKey(batchId, NUM_ROWS_PER_BATCH - 1), CellUtil.cloneRow(seeker.getCell())); } @Test public void testScanWithRandomData() throws Exception { PrefixTreeCodec encoder = new PrefixTreeCodec(); ByteArrayOutputStream baosInMemory = new ByteArrayOutputStream(); DataOutputStream userDataStream = new DataOutputStream(baosInMemory); HFileContext meta = new HFileContextBuilder() .withHBaseCheckSum(false) .withIncludesMvcc(false) .withIncludesTags(includesTag) .withCompression(Algorithm.NONE) .build(); HFileBlockEncodingContext blkEncodingCtx = new HFileBlockDefaultEncodingContext( DataBlockEncoding.PREFIX_TREE, new byte[0], meta); generateRandomTestData(kvset, numBatchesWritten++, includesTag, encoder, blkEncodingCtx, userDataStream); EncodedSeeker seeker = encoder.createSeeker(CellComparator.COMPARATOR, encoder.newDataBlockDecodingContext(meta)); byte[] onDiskBytes = baosInMemory.toByteArray(); ByteBuffer readBuffer = ByteBuffer.wrap(onDiskBytes, DataBlockEncoding.ID_SIZE, onDiskBytes.length - DataBlockEncoding.ID_SIZE); seeker.setCurrentBuffer(new SingleByteBuff(readBuffer)); Cell previousKV = null; do { Cell currentKV = seeker.getCell(); System.out.println(currentKV); if (previousKV != null && CellComparator.COMPARATOR.compare(currentKV, previousKV) < 0) { dumpInputKVSet(); fail("Current kv " + currentKV + " is smaller than previous keyvalue " + previousKV); } if (!includesTag) { assertFalse(currentKV.getTagsLength() > 0); } else { Assert.assertTrue(currentKV.getTagsLength() > 0); } previousKV = currentKV; } while (seeker.next()); } @Test public void testSeekWithRandomData() throws Exception { PrefixTreeCodec encoder = new PrefixTreeCodec(); ByteArrayOutputStream baosInMemory = new ByteArrayOutputStream(); DataOutputStream userDataStream = new DataOutputStream(baosInMemory); int batchId = numBatchesWritten++; HFileContext meta = new HFileContextBuilder() .withHBaseCheckSum(false) .withIncludesMvcc(false) .withIncludesTags(includesTag) .withCompression(Algorithm.NONE) .build(); HFileBlockEncodingContext blkEncodingCtx = new HFileBlockDefaultEncodingContext( DataBlockEncoding.PREFIX_TREE, new byte[0], meta); generateRandomTestData(kvset, batchId, includesTag, encoder, blkEncodingCtx, userDataStream); EncodedSeeker seeker = encoder.createSeeker(CellComparator.COMPARATOR, encoder.newDataBlockDecodingContext(meta)); byte[] onDiskBytes = baosInMemory.toByteArray(); ByteBuffer readBuffer = ByteBuffer.wrap(onDiskBytes, DataBlockEncoding.ID_SIZE, onDiskBytes.length - DataBlockEncoding.ID_SIZE); verifySeeking(seeker, readBuffer, batchId); } @Test public void testSeekWithFixedData() throws Exception { PrefixTreeCodec encoder = new PrefixTreeCodec(); int batchId = numBatchesWritten++; HFileContext meta = new HFileContextBuilder() .withHBaseCheckSum(false) .withIncludesMvcc(false) .withIncludesTags(includesTag) .withCompression(Algorithm.NONE) .build(); HFileBlockEncodingContext blkEncodingCtx = new HFileBlockDefaultEncodingContext( DataBlockEncoding.PREFIX_TREE, new byte[0], meta); ByteArrayOutputStream baosInMemory = new ByteArrayOutputStream(); DataOutputStream userDataStream = new DataOutputStream(baosInMemory); generateFixedTestData(kvset, batchId, includesTag, encoder, blkEncodingCtx, userDataStream); EncodedSeeker seeker = encoder.createSeeker(CellComparator.COMPARATOR, encoder.newDataBlockDecodingContext(meta)); byte[] onDiskBytes = baosInMemory.toByteArray(); ByteBuffer readBuffer = ByteBuffer.wrap(onDiskBytes, DataBlockEncoding.ID_SIZE, onDiskBytes.length - DataBlockEncoding.ID_SIZE); verifySeeking(seeker, readBuffer, batchId); } private void verifySeeking(EncodedSeeker encodeSeeker, ByteBuffer encodedData, int batchId) { List<KeyValue> kvList = new ArrayList<>(); for (int i = 0; i < NUM_ROWS_PER_BATCH; ++i) { kvList.clear(); encodeSeeker.setCurrentBuffer(new SingleByteBuff(encodedData)); KeyValue firstOnRow = KeyValueUtil.createFirstOnRow(getRowKey(batchId, i)); encodeSeeker.seekToKeyInBlock( new KeyValue.KeyOnlyKeyValue(firstOnRow.getBuffer(), firstOnRow.getKeyOffset(), firstOnRow.getKeyLength()), false); boolean hasMoreOfEncodeScanner = encodeSeeker.next(); CollectionBackedScanner collectionScanner = new CollectionBackedScanner( this.kvset); boolean hasMoreOfCollectionScanner = collectionScanner.seek(firstOnRow); if (hasMoreOfEncodeScanner != hasMoreOfCollectionScanner) { dumpInputKVSet(); fail("Get error result after seeking " + firstOnRow); } if (hasMoreOfEncodeScanner) { if (CellComparator.COMPARATOR.compare(encodeSeeker.getCell(), collectionScanner.peek()) != 0) { dumpInputKVSet(); fail("Expected " + collectionScanner.peek() + " actual " + encodeSeeker.getCell() + ", after seeking " + firstOnRow); } } } } private void dumpInputKVSet() { LOG.info("Dumping input keyvalue set in error case:"); for (Cell kv : kvset) { System.out.println(kv); } } private static void generateFixedTestData(ConcurrentSkipListSet<Cell> kvset, int batchId, boolean useTags, PrefixTreeCodec encoder, HFileBlockEncodingContext blkEncodingCtx, DataOutputStream userDataStream) throws Exception { generateFixedTestData(kvset, batchId, true, useTags, encoder, blkEncodingCtx, userDataStream); } private static void generateFixedTestData(ConcurrentSkipListSet<Cell> kvset, int batchId, boolean partial, boolean useTags, PrefixTreeCodec encoder, HFileBlockEncodingContext blkEncodingCtx, DataOutputStream userDataStream) throws Exception { for (int i = 0; i < NUM_ROWS_PER_BATCH; ++i) { if (partial && i / 10 % 2 == 1) continue; for (int j = 0; j < NUM_COLS_PER_ROW; ++j) { if (!useTags) { KeyValue kv = new KeyValue(getRowKey(batchId, i), CF_BYTES, getQualifier(j), getValue( batchId, i, j)); kvset.add(kv); } else { KeyValue kv = new KeyValue(getRowKey(batchId, i), CF_BYTES, getQualifier(j), 0l, getValue(batchId, i, j), new Tag[] { new ArrayBackedTag((byte) 1, "metaValue1") }); kvset.add(kv); } } } encoder.startBlockEncoding(blkEncodingCtx, userDataStream); for (Cell kv : kvset) { encoder.encode(kv, blkEncodingCtx, userDataStream); } encoder.endBlockEncoding(blkEncodingCtx, userDataStream, null); } private static void generateRandomTestData(ConcurrentSkipListSet<Cell> kvset, int batchId, boolean useTags, PrefixTreeCodec encoder, HFileBlockEncodingContext blkEncodingCtx, DataOutputStream userDataStream) throws Exception { Random random = new Random(); for (int i = 0; i < NUM_ROWS_PER_BATCH; ++i) { if (random.nextInt(100) < 50) continue; for (int j = 0; j < NUM_COLS_PER_ROW; ++j) { if (random.nextInt(100) < 50) continue; if (!useTags) { KeyValue kv = new KeyValue(getRowKey(batchId, i), CF_BYTES, getQualifier(j), getValue( batchId, i, j)); kvset.add(kv); } else { KeyValue kv = new KeyValue(getRowKey(batchId, i), CF_BYTES, getQualifier(j), 0l, getValue(batchId, i, j), new Tag[] { new ArrayBackedTag((byte) 1, "metaValue1") }); kvset.add(kv); } } } encoder.startBlockEncoding(blkEncodingCtx, userDataStream); for (Cell kv : kvset) { encoder.encode(kv, blkEncodingCtx, userDataStream); } encoder.endBlockEncoding(blkEncodingCtx, userDataStream, null); } private static byte[] getRowKey(int batchId, int i) { return Bytes .toBytes("batch" + batchId + "_row" + (formatRowNum ? String.format("%04d", i) : i)); } private static byte[] getQualifier(int j) { return Bytes.toBytes("colfdfafhfhsdfhsdfh" + j); } private static byte[] getValue(int batchId, int i, int j) { return Bytes.toBytes("value_for_" + Bytes.toString(getRowKey(batchId, i)) + "_col" + j); } }