/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hbase.regionserver; import static org.hamcrest.core.Is.is; import static org.junit.Assert.assertThat; import java.io.IOException; import java.io.InterruptedIOException; import java.util.ArrayList; import java.util.Collection; import java.util.List; import java.util.Map; import java.util.concurrent.atomic.AtomicLong; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.HColumnDescriptor; import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.KeyValueUtil; import org.apache.hadoop.hbase.MultithreadedTestUtil.RepeatingTestThread; import org.apache.hadoop.hbase.MultithreadedTestUtil.TestContext; import org.apache.hadoop.hbase.TableExistsException; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.ClientServiceCallable; import org.apache.hadoop.hbase.client.ClusterConnection; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.ResultScanner; import org.apache.hadoop.hbase.client.RpcRetryingCaller; import org.apache.hadoop.hbase.client.RpcRetryingCallerFactory; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.client.SecureBulkLoadClient; import org.apache.hadoop.hbase.client.Table; import org.apache.hadoop.hbase.coprocessor.ObserverContext; import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment; import org.apache.hadoop.hbase.coprocessor.RegionObserver; import org.apache.hadoop.hbase.io.compress.Compression; import org.apache.hadoop.hbase.io.compress.Compression.Algorithm; import org.apache.hadoop.hbase.io.hfile.CacheConfig; import org.apache.hadoop.hbase.io.hfile.HFile; import org.apache.hadoop.hbase.io.hfile.HFileContext; import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder; import org.apache.hadoop.hbase.ipc.RpcControllerFactory; import org.apache.hadoop.hbase.shaded.protobuf.RequestConverter; import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos; import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.CompactRegionRequest; import org.apache.hadoop.hbase.regionserver.wal.TestWALActionsListener; import org.apache.hadoop.hbase.regionserver.wal.WALEdit; import org.apache.hadoop.hbase.testclassification.LargeTests; import org.apache.hadoop.hbase.testclassification.RegionServerTests; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.Pair; import org.apache.hadoop.hbase.wal.WAL; import org.apache.hadoop.hbase.wal.WALKey; import org.junit.BeforeClass; import org.junit.Test; import org.junit.experimental.categories.Category; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; import org.junit.runners.Parameterized.Parameters; import com.google.common.collect.Lists; /** * Tests bulk loading of HFiles and shows the atomicity or lack of atomicity of * the region server's bullkLoad functionality. */ @RunWith(Parameterized.class) @Category({RegionServerTests.class, LargeTests.class}) public class TestHRegionServerBulkLoad { private static final Log LOG = LogFactory.getLog(TestHRegionServerBulkLoad.class); protected static HBaseTestingUtility UTIL = new HBaseTestingUtility(); protected final static Configuration conf = UTIL.getConfiguration(); protected final static byte[] QUAL = Bytes.toBytes("qual"); protected final static int NUM_CFS = 10; private int sleepDuration; public static int BLOCKSIZE = 64 * 1024; public static Algorithm COMPRESSION = Compression.Algorithm.NONE; protected final static byte[][] families = new byte[NUM_CFS][]; static { for (int i = 0; i < NUM_CFS; i++) { families[i] = Bytes.toBytes(family(i)); } } @Parameters public static final Collection<Object[]> parameters() { int[] sleepDurations = new int[] { 0, 30000 }; List<Object[]> configurations = new ArrayList<>(); for (int i : sleepDurations) { configurations.add(new Object[] { i }); } return configurations; } public TestHRegionServerBulkLoad(int duration) { this.sleepDuration = duration; } @BeforeClass public static void setUpBeforeClass() throws Exception { conf.setInt("hbase.rpc.timeout", 10 * 1000); } /** * Create a rowkey compatible with * {@link #createHFile(FileSystem, Path, byte[], byte[], byte[], int)}. */ public static byte[] rowkey(int i) { return Bytes.toBytes(String.format("row_%08d", i)); } static String family(int i) { return String.format("family_%04d", i); } /** * Create an HFile with the given number of rows with a specified value. */ public static void createHFile(FileSystem fs, Path path, byte[] family, byte[] qualifier, byte[] value, int numRows) throws IOException { HFileContext context = new HFileContextBuilder().withBlockSize(BLOCKSIZE) .withCompression(COMPRESSION) .build(); HFile.Writer writer = HFile .getWriterFactory(conf, new CacheConfig(conf)) .withPath(fs, path) .withFileContext(context) .create(); long now = System.currentTimeMillis(); try { // subtract 2 since iterateOnSplits doesn't include boundary keys for (int i = 0; i < numRows; i++) { KeyValue kv = new KeyValue(rowkey(i), family, qualifier, now, value); writer.append(kv); } writer.appendFileInfo(StoreFile.BULKLOAD_TIME_KEY, Bytes.toBytes(now)); } finally { writer.close(); } } /** * Thread that does full scans of the table looking for any partially * completed rows. * * Each iteration of this loads 10 hdfs files, which occupies 5 file open file * handles. So every 10 iterations (500 file handles) it does a region * compaction to reduce the number of open file handles. */ public static class AtomicHFileLoader extends RepeatingTestThread { final AtomicLong numBulkLoads = new AtomicLong(); final AtomicLong numCompactions = new AtomicLong(); private TableName tableName; public AtomicHFileLoader(TableName tableName, TestContext ctx, byte targetFamilies[][]) throws IOException { super(ctx); this.tableName = tableName; } public void doAnAction() throws Exception { long iteration = numBulkLoads.getAndIncrement(); Path dir = UTIL.getDataTestDirOnTestFS(String.format("bulkLoad_%08d", iteration)); // create HFiles for different column families FileSystem fs = UTIL.getTestFileSystem(); byte[] val = Bytes.toBytes(String.format("%010d", iteration)); final List<Pair<byte[], String>> famPaths = new ArrayList<>(NUM_CFS); for (int i = 0; i < NUM_CFS; i++) { Path hfile = new Path(dir, family(i)); byte[] fam = Bytes.toBytes(family(i)); createHFile(fs, hfile, fam, QUAL, val, 1000); famPaths.add(new Pair<>(fam, hfile.toString())); } // bulk load HFiles final ClusterConnection conn = (ClusterConnection)UTIL.getConnection(); Table table = conn.getTable(tableName); final String bulkToken = new SecureBulkLoadClient(UTIL.getConfiguration(), table). prepareBulkLoad(conn); ClientServiceCallable<Void> callable = new ClientServiceCallable<Void>(conn, tableName, Bytes.toBytes("aaa"), new RpcControllerFactory(UTIL.getConfiguration()).newController()) { @Override public Void rpcCall() throws Exception { LOG.debug("Going to connect to server " + getLocation() + " for row " + Bytes.toStringBinary(getRow())); SecureBulkLoadClient secureClient = null; byte[] regionName = getLocation().getRegionInfo().getRegionName(); try (Table table = conn.getTable(getTableName())) { secureClient = new SecureBulkLoadClient(UTIL.getConfiguration(), table); secureClient.secureBulkLoadHFiles(getStub(), famPaths, regionName, true, null, bulkToken); } return null; } }; RpcRetryingCallerFactory factory = new RpcRetryingCallerFactory(conf); RpcRetryingCaller<Void> caller = factory.<Void> newCaller(); caller.callWithRetries(callable, Integer.MAX_VALUE); // Periodically do compaction to reduce the number of open file handles. if (numBulkLoads.get() % 5 == 0) { // 5 * 50 = 250 open file handles! callable = new ClientServiceCallable<Void>(conn, tableName, Bytes.toBytes("aaa"), new RpcControllerFactory(UTIL.getConfiguration()).newController()) { @Override protected Void rpcCall() throws Exception { LOG.debug("compacting " + getLocation() + " for row " + Bytes.toStringBinary(getRow())); AdminProtos.AdminService.BlockingInterface server = conn.getAdmin(getLocation().getServerName()); CompactRegionRequest request = RequestConverter.buildCompactRegionRequest( getLocation().getRegionInfo().getRegionName(), true, null); server.compactRegion(null, request); numCompactions.incrementAndGet(); return null; } }; caller.callWithRetries(callable, Integer.MAX_VALUE); } } } public static class MyObserver implements RegionObserver { static int sleepDuration; @Override public InternalScanner preCompact(ObserverContext<RegionCoprocessorEnvironment> e, final Store store, final InternalScanner scanner, final ScanType scanType) throws IOException { try { Thread.sleep(sleepDuration); } catch (InterruptedException ie) { IOException ioe = new InterruptedIOException(); ioe.initCause(ie); throw ioe; } return scanner; } } /** * Thread that does full scans of the table looking for any partially * completed rows. */ public static class AtomicScanReader extends RepeatingTestThread { byte targetFamilies[][]; Table table; AtomicLong numScans = new AtomicLong(); AtomicLong numRowsScanned = new AtomicLong(); TableName TABLE_NAME; public AtomicScanReader(TableName TABLE_NAME, TestContext ctx, byte targetFamilies[][]) throws IOException { super(ctx); this.TABLE_NAME = TABLE_NAME; this.targetFamilies = targetFamilies; table = UTIL.getConnection().getTable(TABLE_NAME); } public void doAnAction() throws Exception { Scan s = new Scan(); for (byte[] family : targetFamilies) { s.addFamily(family); } ResultScanner scanner = table.getScanner(s); for (Result res : scanner) { byte[] lastRow = null, lastFam = null, lastQual = null; byte[] gotValue = null; for (byte[] family : targetFamilies) { byte qualifier[] = QUAL; byte thisValue[] = res.getValue(family, qualifier); if (gotValue != null && thisValue != null && !Bytes.equals(gotValue, thisValue)) { StringBuilder msg = new StringBuilder(); msg.append("Failed on scan ").append(numScans) .append(" after scanning ").append(numRowsScanned) .append(" rows!\n"); msg.append("Current was " + Bytes.toString(res.getRow()) + "/" + Bytes.toString(family) + ":" + Bytes.toString(qualifier) + " = " + Bytes.toString(thisValue) + "\n"); msg.append("Previous was " + Bytes.toString(lastRow) + "/" + Bytes.toString(lastFam) + ":" + Bytes.toString(lastQual) + " = " + Bytes.toString(gotValue)); throw new RuntimeException(msg.toString()); } lastFam = family; lastQual = qualifier; lastRow = res.getRow(); gotValue = thisValue; } numRowsScanned.getAndIncrement(); } numScans.getAndIncrement(); } } /** * Creates a table with given table name and specified number of column * families if the table does not already exist. */ public void setupTable(TableName table, int cfs) throws IOException { try { LOG.info("Creating table " + table); HTableDescriptor htd = new HTableDescriptor(table); htd.addCoprocessor(MyObserver.class.getName()); MyObserver.sleepDuration = this.sleepDuration; for (int i = 0; i < 10; i++) { htd.addFamily(new HColumnDescriptor(family(i))); } UTIL.getAdmin().createTable(htd); } catch (TableExistsException tee) { LOG.info("Table " + table + " already exists"); } } /** * Atomic bulk load. */ @Test public void testAtomicBulkLoad() throws Exception { TableName TABLE_NAME = TableName.valueOf("atomicBulkLoad"); int millisToRun = 30000; int numScanners = 50; UTIL.startMiniCluster(1, false, true); try { WAL log = UTIL.getHBaseCluster().getRegionServer(0).getWAL(null); FindBulkHBaseListener listener = new FindBulkHBaseListener(); log.registerWALActionsListener(listener); runAtomicBulkloadTest(TABLE_NAME, millisToRun, numScanners); assertThat(listener.isFound(), is(true)); } finally { UTIL.shutdownMiniCluster(); } } void runAtomicBulkloadTest(TableName tableName, int millisToRun, int numScanners) throws Exception { setupTable(tableName, 10); TestContext ctx = new TestContext(UTIL.getConfiguration()); AtomicHFileLoader loader = new AtomicHFileLoader(tableName, ctx, null); ctx.addThread(loader); List<AtomicScanReader> scanners = Lists.newArrayList(); for (int i = 0; i < numScanners; i++) { AtomicScanReader scanner = new AtomicScanReader(tableName, ctx, families); scanners.add(scanner); ctx.addThread(scanner); } ctx.startThreads(); ctx.waitFor(millisToRun); ctx.stop(); LOG.info("Loaders:"); LOG.info(" loaded " + loader.numBulkLoads.get()); LOG.info(" compations " + loader.numCompactions.get()); LOG.info("Scanners:"); for (AtomicScanReader scanner : scanners) { LOG.info(" scanned " + scanner.numScans.get()); LOG.info(" verified " + scanner.numRowsScanned.get() + " rows"); } } /** * Run test on an HBase instance for 5 minutes. This assumes that the table * under test only has a single region. */ public static void main(String args[]) throws Exception { try { Configuration c = HBaseConfiguration.create(); TestHRegionServerBulkLoad test = new TestHRegionServerBulkLoad(0); test.setConf(c); test.runAtomicBulkloadTest(TableName.valueOf("atomicTableTest"), 5 * 60 * 1000, 50); } finally { System.exit(0); // something hangs (believe it is lru threadpool) } } private void setConf(Configuration c) { UTIL = new HBaseTestingUtility(c); } static class FindBulkHBaseListener extends TestWALActionsListener.DummyWALActionsListener { private boolean found = false; @Override public void visitLogEntryBeforeWrite(WALKey logKey, WALEdit logEdit) { for (Cell cell : logEdit.getCells()) { KeyValue kv = KeyValueUtil.ensureKeyValue(cell); for (Map.Entry entry : kv.toStringMap().entrySet()) { if (entry.getValue().equals(Bytes.toString(WALEdit.BULK_LOAD))) { found = true; } } } } public boolean isFound() { return found; } } }