/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.wal;
import java.io.IOException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.NavigableMap;
import java.util.Random;
import java.util.Set;
import java.util.TreeMap;
import java.util.concurrent.TimeUnit;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.MockRegionServerServices;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.crypto.KeyProviderForTesting;
import org.apache.hadoop.hbase.regionserver.HRegion;
import org.apache.hadoop.hbase.regionserver.LogRoller;
import org.apache.hadoop.hbase.regionserver.MultiVersionConcurrencyControl;
import org.apache.hadoop.hbase.trace.HBaseHTraceConfiguration;
import org.apache.hadoop.hbase.trace.SpanReceiverHost;
import org.apache.hadoop.hbase.wal.WALProvider.Writer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.FSUtils;
import org.apache.hadoop.hbase.util.Threads;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.htrace.Sampler;
import org.apache.htrace.Trace;
import org.apache.htrace.TraceScope;
import org.apache.htrace.impl.ProbabilitySampler;
import com.codahale.metrics.ConsoleReporter;
import com.codahale.metrics.Histogram;
import com.codahale.metrics.Meter;
import com.codahale.metrics.MetricRegistry;
import com.codahale.metrics.MetricFilter;
// imports for things that haven't moved from regionserver.wal yet.
import org.apache.hadoop.hbase.regionserver.wal.SecureProtobufLogReader;
import org.apache.hadoop.hbase.regionserver.wal.SecureProtobufLogWriter;
import org.apache.hadoop.hbase.regionserver.wal.WALActionsListener;
import org.apache.hadoop.hbase.regionserver.wal.WALEdit;
import static com.codahale.metrics.MetricRegistry.name;
/**
* This class runs performance benchmarks for {@link WAL}.
* See usage for this tool by running:
* <code>$ hbase org.apache.hadoop.hbase.wal.WALPerformanceEvaluation -h</code>
*/
@InterfaceAudience.Private
public final class WALPerformanceEvaluation extends Configured implements Tool {
private static final Log LOG = LogFactory.getLog(WALPerformanceEvaluation.class.getName());
private final MetricRegistry metrics = new MetricRegistry();
private final Meter syncMeter =
metrics.meter(name(WALPerformanceEvaluation.class, "syncMeter", "syncs"));
private final Histogram syncHistogram = metrics.histogram(
name(WALPerformanceEvaluation.class, "syncHistogram", "nanos-between-syncs"));
private final Histogram syncCountHistogram = metrics.histogram(
name(WALPerformanceEvaluation.class, "syncCountHistogram", "countPerSync"));
private final Meter appendMeter = metrics.meter(
name(WALPerformanceEvaluation.class, "appendMeter", "bytes"));
private final Histogram latencyHistogram =
metrics.histogram(name(WALPerformanceEvaluation.class, "latencyHistogram", "nanos"));
private final MultiVersionConcurrencyControl mvcc = new MultiVersionConcurrencyControl();
private HBaseTestingUtility TEST_UTIL;
static final String TABLE_NAME = "WALPerformanceEvaluation";
static final String QUALIFIER_PREFIX = "q";
static final String FAMILY_PREFIX = "cf";
private int numQualifiers = 1;
private int valueSize = 512;
private int keySize = 16;
@Override
public void setConf(Configuration conf) {
super.setConf(conf);
TEST_UTIL = new HBaseTestingUtility(conf);
}
/**
* Perform WAL.append() of Put object, for the number of iterations requested.
* Keys and Vaues are generated randomly, the number of column families,
* qualifiers and key/value size is tunable by the user.
*/
class WALPutBenchmark implements Runnable {
private final long numIterations;
private final int numFamilies;
private final boolean noSync;
private final HRegion region;
private final int syncInterval;
private final HTableDescriptor htd;
private final Sampler loopSampler;
private final NavigableMap<byte[], Integer> scopes;
WALPutBenchmark(final HRegion region, final HTableDescriptor htd,
final long numIterations, final boolean noSync, final int syncInterval,
final double traceFreq) {
this.numIterations = numIterations;
this.noSync = noSync;
this.syncInterval = syncInterval;
this.numFamilies = htd.getColumnFamilyCount();
this.region = region;
this.htd = htd;
scopes = new TreeMap<>(Bytes.BYTES_COMPARATOR);
for(byte[] fam : htd.getFamiliesKeys()) {
scopes.put(fam, 0);
}
String spanReceivers = getConf().get("hbase.trace.spanreceiver.classes");
if (spanReceivers == null || spanReceivers.isEmpty()) {
loopSampler = Sampler.NEVER;
} else {
if (traceFreq <= 0.0) {
LOG.warn("Tracing enabled but traceFreq=0.");
loopSampler = Sampler.NEVER;
} else if (traceFreq >= 1.0) {
loopSampler = Sampler.ALWAYS;
if (numIterations > 1000) {
LOG.warn("Full tracing of all iterations will produce a lot of data. Be sure your"
+ " SpanReceiver can keep up.");
}
} else {
getConf().setDouble("hbase.sampler.fraction", traceFreq);
loopSampler = new ProbabilitySampler(new HBaseHTraceConfiguration(getConf()));
}
}
}
@Override
public void run() {
byte[] key = new byte[keySize];
byte[] value = new byte[valueSize];
Random rand = new Random(Thread.currentThread().getId());
WAL wal = region.getWAL();
TraceScope threadScope =
Trace.startSpan("WALPerfEval." + Thread.currentThread().getName());
try {
long startTime = System.currentTimeMillis();
int lastSync = 0;
for (int i = 0; i < numIterations; ++i) {
assert Trace.currentSpan() == threadScope.getSpan() : "Span leak detected.";
TraceScope loopScope = Trace.startSpan("runLoopIter" + i, loopSampler);
try {
long now = System.nanoTime();
Put put = setupPut(rand, key, value, numFamilies);
WALEdit walEdit = new WALEdit();
addFamilyMapToWALEdit(put.getFamilyCellMap(), walEdit);
HRegionInfo hri = region.getRegionInfo();
final WALKey logkey =
new WALKey(hri.getEncodedNameAsBytes(), hri.getTable(), now, mvcc, scopes);
wal.append(hri, logkey, walEdit, true);
if (!this.noSync) {
if (++lastSync >= this.syncInterval) {
wal.sync();
lastSync = 0;
}
}
latencyHistogram.update(System.nanoTime() - now);
} finally {
loopScope.close();
}
}
long totalTime = (System.currentTimeMillis() - startTime);
logBenchmarkResult(Thread.currentThread().getName(), numIterations, totalTime);
} catch (Exception e) {
LOG.error(getClass().getSimpleName() + " Thread failed", e);
} finally {
threadScope.close();
}
}
}
@Override
public int run(String[] args) throws Exception {
Path rootRegionDir = null;
int numThreads = 1;
long numIterations = 1000000;
int numFamilies = 1;
int syncInterval = 0;
boolean noSync = false;
boolean verify = false;
boolean verbose = false;
boolean cleanup = true;
boolean noclosefs = false;
long roll = Long.MAX_VALUE;
boolean compress = false;
String cipher = null;
int numRegions = 1;
String spanReceivers = getConf().get("hbase.trace.spanreceiver.classes");
boolean trace = spanReceivers != null && !spanReceivers.isEmpty();
double traceFreq = 1.0;
// Process command line args
for (int i = 0; i < args.length; i++) {
String cmd = args[i];
try {
if (cmd.equals("-threads")) {
numThreads = Integer.parseInt(args[++i]);
} else if (cmd.equals("-iterations")) {
numIterations = Long.parseLong(args[++i]);
} else if (cmd.equals("-path")) {
rootRegionDir = new Path(args[++i]);
} else if (cmd.equals("-families")) {
numFamilies = Integer.parseInt(args[++i]);
} else if (cmd.equals("-qualifiers")) {
numQualifiers = Integer.parseInt(args[++i]);
} else if (cmd.equals("-keySize")) {
keySize = Integer.parseInt(args[++i]);
} else if (cmd.equals("-valueSize")) {
valueSize = Integer.parseInt(args[++i]);
} else if (cmd.equals("-syncInterval")) {
syncInterval = Integer.parseInt(args[++i]);
} else if (cmd.equals("-nosync")) {
noSync = true;
} else if (cmd.equals("-verify")) {
verify = true;
} else if (cmd.equals("-verbose")) {
verbose = true;
} else if (cmd.equals("-nocleanup")) {
cleanup = false;
} else if (cmd.equals("-noclosefs")) {
noclosefs = true;
} else if (cmd.equals("-roll")) {
roll = Long.parseLong(args[++i]);
} else if (cmd.equals("-compress")) {
compress = true;
} else if (cmd.equals("-encryption")) {
cipher = args[++i];
} else if (cmd.equals("-regions")) {
numRegions = Integer.parseInt(args[++i]);
} else if (cmd.equals("-traceFreq")) {
traceFreq = Double.parseDouble(args[++i]);
} else if (cmd.equals("-h")) {
printUsageAndExit();
} else if (cmd.equals("--help")) {
printUsageAndExit();
} else {
System.err.println("UNEXPECTED: " + cmd);
printUsageAndExit();
}
} catch (Exception e) {
printUsageAndExit();
}
}
if (compress) {
Configuration conf = getConf();
conf.setBoolean(HConstants.ENABLE_WAL_COMPRESSION, true);
}
if (cipher != null) {
// Set up WAL for encryption
Configuration conf = getConf();
conf.set(HConstants.CRYPTO_KEYPROVIDER_CONF_KEY, KeyProviderForTesting.class.getName());
conf.set(HConstants.CRYPTO_MASTERKEY_NAME_CONF_KEY, "hbase");
conf.setClass("hbase.regionserver.hlog.reader.impl", SecureProtobufLogReader.class,
WAL.Reader.class);
conf.setClass("hbase.regionserver.hlog.writer.impl", SecureProtobufLogWriter.class,
Writer.class);
conf.setBoolean(HConstants.ENABLE_WAL_ENCRYPTION, true);
conf.set(HConstants.CRYPTO_WAL_ALGORITHM_CONF_KEY, cipher);
}
if (numThreads < numRegions) {
LOG.warn("Number of threads is less than the number of regions; some regions will sit idle.");
}
// Internal config. goes off number of threads; if more threads than handlers, stuff breaks.
// In regionserver, number of handlers == number of threads.
getConf().setInt(HConstants.REGION_SERVER_HANDLER_COUNT, numThreads);
// Run WAL Performance Evaluation
// First set the fs from configs. In case we are on hadoop1
FSUtils.setFsDefault(getConf(), FSUtils.getRootDir(getConf()));
FileSystem fs = FileSystem.get(getConf());
LOG.info("FileSystem: " + fs);
SpanReceiverHost receiverHost = trace ? SpanReceiverHost.getInstance(getConf()) : null;
final Sampler<?> sampler = trace ? Sampler.ALWAYS : Sampler.NEVER;
TraceScope scope = Trace.startSpan("WALPerfEval", sampler);
try {
if (rootRegionDir == null) {
rootRegionDir = TEST_UTIL.getDataTestDirOnTestFS("WALPerformanceEvaluation");
}
rootRegionDir = rootRegionDir.makeQualified(fs);
cleanRegionRootDir(fs, rootRegionDir);
FSUtils.setRootDir(getConf(), rootRegionDir);
final WALFactory wals = new WALFactory(getConf(), null, "wals");
final HRegion[] regions = new HRegion[numRegions];
final Runnable[] benchmarks = new Runnable[numRegions];
final MockRegionServerServices mockServices = new MockRegionServerServices(getConf());
final LogRoller roller = new LogRoller(mockServices, mockServices);
Threads.setDaemonThreadRunning(roller.getThread(), "WALPerfEval.logRoller");
try {
for(int i = 0; i < numRegions; i++) {
// Initialize Table Descriptor
// a table per desired region means we can avoid carving up the key space
final HTableDescriptor htd = createHTableDescriptor(i, numFamilies);
regions[i] = openRegion(fs, rootRegionDir, htd, wals, roll, roller);
benchmarks[i] = Trace.wrap(new WALPutBenchmark(regions[i], htd, numIterations, noSync,
syncInterval, traceFreq));
}
ConsoleReporter reporter = ConsoleReporter.forRegistry(metrics).
outputTo(System.out).convertRatesTo(TimeUnit.SECONDS).filter(MetricFilter.ALL).build();
reporter.start(30, TimeUnit.SECONDS);
long putTime = runBenchmark(benchmarks, numThreads);
logBenchmarkResult("Summary: threads=" + numThreads + ", iterations=" + numIterations +
", syncInterval=" + syncInterval, numIterations * numThreads, putTime);
for (int i = 0; i < numRegions; i++) {
if (regions[i] != null) {
closeRegion(regions[i]);
regions[i] = null;
}
}
if (verify) {
LOG.info("verifying written log entries.");
Path dir = new Path(FSUtils.getRootDir(getConf()),
AbstractFSWALProvider.getWALDirectoryName("wals"));
long editCount = 0;
FileStatus [] fsss = fs.listStatus(dir);
if (fsss.length == 0) throw new IllegalStateException("No WAL found");
for (FileStatus fss: fsss) {
Path p = fss.getPath();
if (!fs.exists(p)) throw new IllegalStateException(p.toString());
editCount += verify(wals, p, verbose);
}
long expected = numIterations * numThreads;
if (editCount != expected) {
throw new IllegalStateException("Counted=" + editCount + ", expected=" + expected);
}
}
} finally {
mockServices.stop("test clean up.");
for (int i = 0; i < numRegions; i++) {
if (regions[i] != null) {
closeRegion(regions[i]);
}
}
if (null != roller) {
LOG.info("shutting down log roller.");
roller.close();
}
wals.shutdown();
// Remove the root dir for this test region
if (cleanup) cleanRegionRootDir(fs, rootRegionDir);
}
} finally {
// We may be called inside a test that wants to keep on using the fs.
if (!noclosefs) fs.close();
scope.close();
if (receiverHost != null) receiverHost.closeReceivers();
}
return(0);
}
private static HTableDescriptor createHTableDescriptor(final int regionNum,
final int numFamilies) {
HTableDescriptor htd = new HTableDescriptor(TableName.valueOf(TABLE_NAME + ":" + regionNum));
for (int i = 0; i < numFamilies; ++i) {
HColumnDescriptor colDef = new HColumnDescriptor(FAMILY_PREFIX + i);
htd.addFamily(colDef);
}
return htd;
}
/**
* Verify the content of the WAL file.
* Verify that the file has expected number of edits.
* @param wals may not be null
* @param wal
* @return Count of edits.
* @throws IOException
*/
private long verify(final WALFactory wals, final Path wal, final boolean verbose)
throws IOException {
WAL.Reader reader = wals.createReader(wal.getFileSystem(getConf()), wal);
long count = 0;
Map<String, Long> sequenceIds = new HashMap<>();
try {
while (true) {
WAL.Entry e = reader.next();
if (e == null) {
LOG.debug("Read count=" + count + " from " + wal);
break;
}
count++;
long seqid = e.getKey().getLogSeqNum();
if (sequenceIds.containsKey(Bytes.toString(e.getKey().getEncodedRegionName()))) {
// sequenceIds should be increasing for every regions
if (sequenceIds.get(Bytes.toString(e.getKey().getEncodedRegionName())) >= seqid) {
throw new IllegalStateException("wal = " + wal.getName() + ", " + "previous seqid = "
+ sequenceIds.get(Bytes.toString(e.getKey().getEncodedRegionName()))
+ ", current seqid = " + seqid);
}
}
// update the sequence Id.
sequenceIds.put(Bytes.toString(e.getKey().getEncodedRegionName()), seqid);
if (verbose) LOG.info("seqid=" + seqid);
}
} finally {
reader.close();
}
return count;
}
private static void logBenchmarkResult(String testName, long numTests, long totalTime) {
float tsec = totalTime / 1000.0f;
LOG.info(String.format("%s took %.3fs %.3fops/s", testName, tsec, numTests / tsec));
}
private void printUsageAndExit() {
System.err.printf("Usage: hbase %s [options]\n", getClass().getName());
System.err.println(" where [options] are:");
System.err.println(" -h|-help Show this help and exit.");
System.err.println(" -threads <N> Number of threads writing on the WAL.");
System.err.println(" -regions <N> Number of regions to open in the WAL. Default: 1");
System.err.println(" -iterations <N> Number of iterations per thread.");
System.err.println(" -path <PATH> Path where region's root directory is created.");
System.err.println(" -families <N> Number of column families to write.");
System.err.println(" -qualifiers <N> Number of qualifiers to write.");
System.err.println(" -keySize <N> Row key size in byte.");
System.err.println(" -valueSize <N> Row/Col value size in byte.");
System.err.println(" -nocleanup Do NOT remove test data when done.");
System.err.println(" -noclosefs Do NOT close the filesystem when done.");
System.err.println(" -nosync Append without syncing");
System.err.println(" -syncInterval <N> Append N edits and then sync. " +
"Default=0, i.e. sync every edit.");
System.err.println(" -verify Verify edits written in sequence");
System.err.println(" -verbose Output extra info; " +
"e.g. all edit seq ids when verifying");
System.err.println(" -roll <N> Roll the way every N appends");
System.err.println(" -encryption <A> Encrypt the WAL with algorithm A, e.g. AES");
System.err.println(" -traceFreq <N> Rate of trace sampling. Default: 1.0, " +
"only respected when tracing is enabled, ie -Dhbase.trace.spanreceiver.classes=...");
System.err.println("");
System.err.println("Examples:");
System.err.println("");
System.err.println(" To run 100 threads on hdfs with log rolling every 10k edits and " +
"verification afterward do:");
System.err.println(" $ hbase org.apache.hadoop.hbase.wal." +
"WALPerformanceEvaluation \\");
System.err.println(" -conf ./core-site.xml -path hdfs://example.org:7000/tmp " +
"-threads 100 -roll 10000 -verify");
System.exit(1);
}
private final Set<WAL> walsListenedTo = new HashSet<>();
private HRegion openRegion(final FileSystem fs, final Path dir, final HTableDescriptor htd,
final WALFactory wals, final long whenToRoll, final LogRoller roller) throws IOException {
// Initialize HRegion
HRegionInfo regionInfo = new HRegionInfo(htd.getTableName());
// Initialize WAL
final WAL wal =
wals.getWAL(regionInfo.getEncodedNameAsBytes(), regionInfo.getTable().getNamespace());
// If we haven't already, attach a listener to this wal to handle rolls and metrics.
if (walsListenedTo.add(wal)) {
roller.addWAL(wal);
wal.registerWALActionsListener(new WALActionsListener.Base() {
private int appends = 0;
@Override
public void visitLogEntryBeforeWrite(WALKey logKey, WALEdit logEdit) {
this.appends++;
if (this.appends % whenToRoll == 0) {
LOG.info("Rolling after " + appends + " edits");
// We used to do explicit call to rollWriter but changed it to a request
// to avoid dead lock (there are less threads going on in this class than
// in the regionserver -- regionserver does not have the issue).
AbstractFSWALProvider.requestLogRoll(wal);
}
}
@Override
public void postSync(final long timeInNanos, final int handlerSyncs) {
syncMeter.mark();
syncHistogram.update(timeInNanos);
syncCountHistogram.update(handlerSyncs);
}
@Override
public void postAppend(final long size, final long elapsedTime, final WALKey logkey,
final WALEdit logEdit) {
appendMeter.mark(size);
}
});
}
return HRegion.createHRegion(regionInfo, dir, getConf(), htd, wal);
}
private void closeRegion(final HRegion region) throws IOException {
if (region != null) {
region.close();
WAL wal = region.getWAL();
if (wal != null) {
wal.shutdown();
}
}
}
private void cleanRegionRootDir(final FileSystem fs, final Path dir) throws IOException {
if (fs.exists(dir)) {
fs.delete(dir, true);
}
}
private Put setupPut(Random rand, byte[] key, byte[] value, final int numFamilies) {
rand.nextBytes(key);
Put put = new Put(key);
for (int cf = 0; cf < numFamilies; ++cf) {
for (int q = 0; q < numQualifiers; ++q) {
rand.nextBytes(value);
put.addColumn(Bytes.toBytes(FAMILY_PREFIX + cf),
Bytes.toBytes(QUALIFIER_PREFIX + q), value);
}
}
return put;
}
private void addFamilyMapToWALEdit(Map<byte[], List<Cell>> familyMap,
WALEdit walEdit) {
for (List<Cell> edits : familyMap.values()) {
for (Cell cell : edits) {
walEdit.add(cell);
}
}
}
private long runBenchmark(Runnable[] runnable, final int numThreads) throws InterruptedException {
Thread[] threads = new Thread[numThreads];
long startTime = System.currentTimeMillis();
for (int i = 0; i < numThreads; ++i) {
threads[i] = new Thread(runnable[i%runnable.length], "t" + i + ",r" + (i%runnable.length));
threads[i].start();
}
for (Thread t : threads) t.join();
long endTime = System.currentTimeMillis();
return(endTime - startTime);
}
/**
* The guts of the {@link #main} method.
* Call this method to avoid the {@link #main(String[])} System.exit.
* @param args
* @return errCode
* @throws Exception
*/
static int innerMain(final Configuration c, final String [] args) throws Exception {
return ToolRunner.run(c, new WALPerformanceEvaluation(), args);
}
public static void main(String[] args) throws Exception {
System.exit(innerMain(HBaseConfiguration.create(), args));
}
}