/** * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hbase.regionserver; import static org.apache.hadoop.hbase.HBaseTestingUtility.START_KEY; import static org.apache.hadoop.hbase.HBaseTestingUtility.fam1; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import java.io.IOException; import java.util.ArrayList; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Random; import java.util.Set; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.*; import org.apache.hadoop.hbase.client.Delete; import org.apache.hadoop.hbase.client.Durability; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.client.Table; import org.apache.hadoop.hbase.io.hfile.CacheConfig; import org.apache.hadoop.hbase.io.hfile.HFile; import org.apache.hadoop.hbase.io.hfile.HFileContext; import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder; import org.apache.hadoop.hbase.mob.MobConstants; import org.apache.hadoop.hbase.mob.MobUtils; import org.apache.hadoop.hbase.testclassification.MediumTests; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.FSUtils; import org.apache.hadoop.hbase.util.Pair; import org.junit.After; import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.Rule; import org.junit.Test; import org.junit.experimental.categories.Category; import org.junit.rules.TestName; /** * Test mob store compaction */ @Category(MediumTests.class) public class TestMobStoreCompaction { @Rule public TestName name = new TestName(); static final Log LOG = LogFactory.getLog(TestMobStoreCompaction.class.getName()); private final static HBaseTestingUtility UTIL = new HBaseTestingUtility(); private Configuration conf = null; private HRegion region = null; private HTableDescriptor htd = null; private HColumnDescriptor hcd = null; private long mobCellThreshold = 1000; private FileSystem fs; private static final byte[] COLUMN_FAMILY = fam1; private final byte[] STARTROW = Bytes.toBytes(START_KEY); private int compactionThreshold; @BeforeClass public static void setUpBeforeClass() throws Exception { UTIL.startMiniCluster(1); } @AfterClass public static void tearDownAfterClass() throws Exception { UTIL.shutdownMiniCluster(); } private void init(Configuration conf, long mobThreshold) throws Exception { this.conf = conf; this.mobCellThreshold = mobThreshold; HBaseTestingUtility UTIL = new HBaseTestingUtility(conf); compactionThreshold = conf.getInt("hbase.hstore.compactionThreshold", 3); htd = UTIL.createTableDescriptor(name.getMethodName()); hcd = new HColumnDescriptor(COLUMN_FAMILY); hcd.setMobEnabled(true); hcd.setMobThreshold(mobThreshold); hcd.setMaxVersions(1); htd.modifyFamily(hcd); region = UTIL.createLocalHRegion(htd, null, null); fs = FileSystem.get(conf); } @After public void tearDown() throws Exception { region.close(); fs.delete(UTIL.getDataTestDir(), true); } /** * During compaction, cells smaller than the threshold won't be affected. */ @Test public void testSmallerValue() throws Exception { init(UTIL.getConfiguration(), 500); byte[] dummyData = makeDummyData(300); // smaller than mob threshold Table loader = new RegionAsTable(region); // one hfile per row for (int i = 0; i < compactionThreshold; i++) { Put p = createPut(i, dummyData); loader.put(p); region.flush(true); } assertEquals("Before compaction: store files", compactionThreshold, countStoreFiles()); assertEquals("Before compaction: mob file count", 0, countMobFiles()); assertEquals("Before compaction: rows", compactionThreshold, UTIL.countRows(region)); assertEquals("Before compaction: mob rows", 0, countMobRows()); region.compactStores(); assertEquals("After compaction: store files", 1, countStoreFiles()); assertEquals("After compaction: mob file count", 0, countMobFiles()); assertEquals("After compaction: referenced mob file count", 0, countReferencedMobFiles()); assertEquals("After compaction: rows", compactionThreshold, UTIL.countRows(region)); assertEquals("After compaction: mob rows", 0, countMobRows()); } /** * During compaction, the mob threshold size is changed. */ @Test public void testLargerValue() throws Exception { init(UTIL.getConfiguration(), 200); byte[] dummyData = makeDummyData(300); // larger than mob threshold Table loader = new RegionAsTable(region); for (int i = 0; i < compactionThreshold; i++) { Put p = createPut(i, dummyData); loader.put(p); region.flush(true); } assertEquals("Before compaction: store files", compactionThreshold, countStoreFiles()); assertEquals("Before compaction: mob file count", compactionThreshold, countMobFiles()); assertEquals("Before compaction: rows", compactionThreshold, UTIL.countRows(region)); assertEquals("Before compaction: mob rows", compactionThreshold, countMobRows()); assertEquals("Before compaction: number of mob cells", compactionThreshold, countMobCellsInMetadata()); // Change the threshold larger than the data size region.getTableDesc().getFamily(COLUMN_FAMILY).setMobThreshold(500); region.initialize(); region.compactStores(); assertEquals("After compaction: store files", 1, countStoreFiles()); assertEquals("After compaction: mob file count", compactionThreshold, countMobFiles()); assertEquals("After compaction: referenced mob file count", 0, countReferencedMobFiles()); assertEquals("After compaction: rows", compactionThreshold, UTIL.countRows(region)); assertEquals("After compaction: mob rows", 0, countMobRows()); } /** * This test will first generate store files, then bulk load them and trigger the compaction. * When compaction, the cell value will be larger than the threshold. */ @Test public void testMobCompactionWithBulkload() throws Exception { // The following will produce store files of 600. init(UTIL.getConfiguration(), 300); byte[] dummyData = makeDummyData(600); Path hbaseRootDir = FSUtils.getRootDir(conf); Path basedir = new Path(hbaseRootDir, htd.getNameAsString()); List<Pair<byte[], String>> hfiles = new ArrayList<>(1); for (int i = 0; i < compactionThreshold; i++) { Path hpath = new Path(basedir, "hfile" + i); hfiles.add(Pair.newPair(COLUMN_FAMILY, hpath.toString())); createHFile(hpath, i, dummyData); } // The following will bulk load the above generated store files and compact, with 600(fileSize) // > 300(threshold) Map<byte[], List<Path>> map = region.bulkLoadHFiles(hfiles, true, null); assertTrue("Bulkload result:", !map.isEmpty()); assertEquals("Before compaction: store files", compactionThreshold, countStoreFiles()); assertEquals("Before compaction: mob file count", 0, countMobFiles()); assertEquals("Before compaction: rows", compactionThreshold, UTIL.countRows(region)); assertEquals("Before compaction: mob rows", 0, countMobRows()); assertEquals("Before compaction: referenced mob file count", 0, countReferencedMobFiles()); region.compactStores(); assertEquals("After compaction: store files", 1, countStoreFiles()); assertEquals("After compaction: mob file count:", 1, countMobFiles()); assertEquals("After compaction: rows", compactionThreshold, UTIL.countRows(region)); assertEquals("After compaction: mob rows", compactionThreshold, countMobRows()); assertEquals("After compaction: referenced mob file count", 1, countReferencedMobFiles()); assertEquals("After compaction: number of mob cells", compactionThreshold, countMobCellsInMetadata()); } @Test public void testMajorCompactionAfterDelete() throws Exception { init(UTIL.getConfiguration(), 100); byte[] dummyData = makeDummyData(200); // larger than mob threshold Table loader = new RegionAsTable(region); // create hfiles and mob hfiles but don't trigger compaction int numHfiles = compactionThreshold - 1; byte[] deleteRow = Bytes.add(STARTROW, Bytes.toBytes(0)); for (int i = 0; i < numHfiles; i++) { Put p = createPut(i, dummyData); loader.put(p); region.flush(true); } assertEquals("Before compaction: store files", numHfiles, countStoreFiles()); assertEquals("Before compaction: mob file count", numHfiles, countMobFiles()); assertEquals("Before compaction: rows", numHfiles, UTIL.countRows(region)); assertEquals("Before compaction: mob rows", numHfiles, countMobRows()); assertEquals("Before compaction: number of mob cells", numHfiles, countMobCellsInMetadata()); // now let's delete some cells that contain mobs Delete delete = new Delete(deleteRow); delete.addFamily(COLUMN_FAMILY); region.delete(delete); region.flush(true); assertEquals("Before compaction: store files", numHfiles + 1, countStoreFiles()); assertEquals("Before compaction: mob files", numHfiles, countMobFiles()); // region.compactStores(); region.compact(true); assertEquals("After compaction: store files", 1, countStoreFiles()); // still have original mob hfiles and now added a mob del file assertEquals("After compaction: mob files", numHfiles + 1, countMobFiles()); Scan scan = new Scan(); scan.setRaw(true); InternalScanner scanner = region.getScanner(scan); List<Cell> results = new ArrayList<>(); scanner.next(results); int deleteCount = 0; while (!results.isEmpty()) { for (Cell c : results) { if (c.getTypeByte() == KeyValue.Type.DeleteFamily.getCode()) { deleteCount++; assertTrue(Bytes.equals(CellUtil.cloneRow(c), deleteRow)); } } results.clear(); scanner.next(results); } // assert the delete mark is retained after the major compaction assertEquals(1, deleteCount); scanner.close(); // assert the deleted cell is not counted assertEquals("The cells in mob files", numHfiles - 1, countMobCellsInMobFiles(1)); } private int countStoreFiles() throws IOException { Store store = region.getStore(COLUMN_FAMILY); return store.getStorefilesCount(); } private int countMobFiles() throws IOException { Path mobDirPath = MobUtils.getMobFamilyPath(conf, htd.getTableName(), hcd.getNameAsString()); if (fs.exists(mobDirPath)) { FileStatus[] files = UTIL.getTestFileSystem().listStatus(mobDirPath); return files.length; } return 0; } private long countMobCellsInMetadata() throws IOException { long mobCellsCount = 0; Path mobDirPath = MobUtils.getMobFamilyPath(conf, htd.getTableName(), hcd.getNameAsString()); Configuration copyOfConf = new Configuration(conf); copyOfConf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0f); CacheConfig cacheConfig = new CacheConfig(copyOfConf); if (fs.exists(mobDirPath)) { FileStatus[] files = UTIL.getTestFileSystem().listStatus(mobDirPath); for (FileStatus file : files) { StoreFile sf = new StoreFile(fs, file.getPath(), conf, cacheConfig, BloomType.NONE, true); sf.initReader(); Map<byte[], byte[]> fileInfo = sf.getReader().loadFileInfo(); byte[] count = fileInfo.get(StoreFile.MOB_CELLS_COUNT); assertTrue(count != null); mobCellsCount += Bytes.toLong(count); } } return mobCellsCount; } private Put createPut(int rowIdx, byte[] dummyData) throws IOException { Put p = new Put(Bytes.add(STARTROW, Bytes.toBytes(rowIdx))); p.setDurability(Durability.SKIP_WAL); p.addColumn(COLUMN_FAMILY, Bytes.toBytes("colX"), dummyData); return p; } /** * Create an HFile with the given number of bytes */ private void createHFile(Path path, int rowIdx, byte[] dummyData) throws IOException { HFileContext meta = new HFileContextBuilder().build(); HFile.Writer writer = HFile.getWriterFactory(conf, new CacheConfig(conf)).withPath(fs, path) .withFileContext(meta).create(); long now = System.currentTimeMillis(); try { KeyValue kv = new KeyValue(Bytes.add(STARTROW, Bytes.toBytes(rowIdx)), COLUMN_FAMILY, Bytes.toBytes("colX"), now, dummyData); writer.append(kv); } finally { writer.appendFileInfo(StoreFile.BULKLOAD_TIME_KEY, Bytes.toBytes(System.currentTimeMillis())); writer.close(); } } private int countMobRows() throws IOException { Scan scan = new Scan(); // Do not retrieve the mob data when scanning scan.setAttribute(MobConstants.MOB_SCAN_RAW, Bytes.toBytes(Boolean.TRUE)); InternalScanner scanner = region.getScanner(scan); int scannedCount = 0; List<Cell> results = new ArrayList<>(); boolean hasMore = true; while (hasMore) { hasMore = scanner.next(results); for (Cell c : results) { if (MobUtils.isMobReferenceCell(c)) { scannedCount++; } } results.clear(); } scanner.close(); return scannedCount; } private byte[] makeDummyData(int size) { byte[] dummyData = new byte[size]; new Random().nextBytes(dummyData); return dummyData; } private int countReferencedMobFiles() throws IOException { Scan scan = new Scan(); // Do not retrieve the mob data when scanning scan.setAttribute(MobConstants.MOB_SCAN_RAW, Bytes.toBytes(Boolean.TRUE)); InternalScanner scanner = region.getScanner(scan); List<Cell> kvs = new ArrayList<>(); boolean hasMore = true; String fileName; Set<String> files = new HashSet<>(); do { kvs.clear(); hasMore = scanner.next(kvs); for (Cell kv : kvs) { if (!MobUtils.isMobReferenceCell(kv)) { continue; } if (!MobUtils.hasValidMobRefCellValue(kv)) { continue; } int size = MobUtils.getMobValueLength(kv); if (size <= mobCellThreshold) { continue; } fileName = MobUtils.getMobFileName(kv); if (fileName.isEmpty()) { continue; } files.add(fileName); Path familyPath = MobUtils.getMobFamilyPath(conf, htd.getTableName(), hcd.getNameAsString()); assertTrue(fs.exists(new Path(familyPath, fileName))); } } while (hasMore); scanner.close(); return files.size(); } private int countMobCellsInMobFiles(int expectedNumDelfiles) throws IOException { Configuration copyOfConf = new Configuration(conf); copyOfConf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0f); CacheConfig cacheConfig = new CacheConfig(copyOfConf); Path mobDirPath = MobUtils.getMobFamilyPath(conf, htd.getTableName(), hcd.getNameAsString()); List<StoreFile> sfs = new ArrayList<>(); int numDelfiles = 0; int size = 0; if (fs.exists(mobDirPath)) { for (FileStatus f : fs.listStatus(mobDirPath)) { StoreFile sf = new StoreFile(fs, f.getPath(), conf, cacheConfig, BloomType.NONE, true); sfs.add(sf); if (StoreFileInfo.isDelFile(sf.getPath())) { numDelfiles++; } } List scanners = StoreFileScanner.getScannersForStoreFiles(sfs, false, true, false, false, HConstants.LATEST_TIMESTAMP); Scan scan = new Scan(); scan.setMaxVersions(hcd.getMaxVersions()); long timeToPurgeDeletes = Math.max(conf.getLong("hbase.hstore.time.to.purge.deletes", 0), 0); long ttl = HStore.determineTTLFromFamily(hcd); ScanInfo scanInfo = new ScanInfo(copyOfConf, hcd, ttl, timeToPurgeDeletes, CellComparator.COMPARATOR); StoreScanner scanner = new StoreScanner(scan, scanInfo, ScanType.COMPACT_DROP_DELETES, null, scanners, 0L, HConstants.LATEST_TIMESTAMP); try { size += UTIL.countRows(scanner); } finally { scanner.close(); } } // assert the number of the existing del files assertEquals(expectedNumDelfiles, numDelfiles); return size; } }