/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.regionserver.wal;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.NavigableMap;
import java.util.TreeMap;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.MiniHBaseCluster;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.regionserver.HRegionServer;
import org.apache.hadoop.hbase.regionserver.MultiVersionConcurrencyControl;
import org.apache.hadoop.hbase.testclassification.MediumTests;
import org.apache.hadoop.hbase.testclassification.RegionServerTests;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.FSUtils;
import org.apache.hadoop.hbase.wal.AbstractFSWALProvider;
import org.apache.hadoop.hbase.wal.WAL;
import org.apache.hadoop.hbase.wal.WALFactory;
import org.apache.hadoop.hbase.wal.WALKey;
import org.apache.hadoop.hbase.wal.WALSplitter;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
import org.junit.experimental.categories.Category;
/**
* Tests for conditions that should trigger RegionServer aborts when
* rolling the current WAL fails.
*/
@Category({RegionServerTests.class, MediumTests.class})
public class TestLogRollAbort {
private static final Log LOG = LogFactory.getLog(AbstractTestLogRolling.class);
private static MiniDFSCluster dfsCluster;
private static Admin admin;
private static MiniHBaseCluster cluster;
protected final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
/* For the split-then-roll test */
private static final Path HBASEDIR = new Path("/hbase");
private static final Path HBASELOGDIR = new Path("/hbaselog");
private static final Path OLDLOGDIR = new Path(HBASELOGDIR, HConstants.HREGION_OLDLOGDIR_NAME);
// Need to override this setup so we can edit the config before it gets sent
// to the HDFS & HBase cluster startup.
@BeforeClass
public static void setUpBeforeClass() throws Exception {
// Tweak default timeout values down for faster recovery
TEST_UTIL.getConfiguration().setInt(
"hbase.regionserver.logroll.errors.tolerated", 2);
TEST_UTIL.getConfiguration().setInt("hbase.rpc.timeout", 10 * 1000);
// Increase the amount of time between client retries
TEST_UTIL.getConfiguration().setLong("hbase.client.pause", 5 * 1000);
// lower the namenode & datanode heartbeat so the namenode
// quickly detects datanode failures
TEST_UTIL.getConfiguration().setInt("dfs.namenode.heartbeat.recheck-interval", 5000);
TEST_UTIL.getConfiguration().setInt("dfs.heartbeat.interval", 1);
// the namenode might still try to choose the recently-dead datanode
// for a pipeline, so try to a new pipeline multiple times
TEST_UTIL.getConfiguration().setInt("dfs.client.block.write.retries", 10);
TEST_UTIL.getConfiguration().set(WALFactory.WAL_PROVIDER, "filesystem");
}
private Configuration conf;
private FileSystem fs;
@Before
public void setUp() throws Exception {
TEST_UTIL.startMiniCluster(2);
cluster = TEST_UTIL.getHBaseCluster();
dfsCluster = TEST_UTIL.getDFSCluster();
admin = TEST_UTIL.getAdmin();
conf = TEST_UTIL.getConfiguration();
fs = TEST_UTIL.getDFSCluster().getFileSystem();
// disable region rebalancing (interferes with log watching)
cluster.getMaster().balanceSwitch(false);
FSUtils.setRootDir(conf, HBASEDIR);
FSUtils.setWALRootDir(conf, HBASELOGDIR);
}
@After
public void tearDown() throws Exception {
TEST_UTIL.shutdownMiniCluster();
}
/**
* Tests that RegionServer aborts if we hit an error closing the WAL when
* there are unsynced WAL edits. See HBASE-4282.
*/
@Test
public void testRSAbortWithUnflushedEdits() throws Exception {
LOG.info("Starting testRSAbortWithUnflushedEdits()");
// When the hbase:meta table can be opened, the region servers are running
TEST_UTIL.getConnection().getTable(TableName.META_TABLE_NAME).close();
// Create the test table and open it
TableName tableName = TableName.valueOf(this.getClass().getSimpleName());
HTableDescriptor desc = new HTableDescriptor(tableName);
desc.addFamily(new HColumnDescriptor(HConstants.CATALOG_FAMILY));
admin.createTable(desc);
Table table = TEST_UTIL.getConnection().getTable(desc.getTableName());
try {
HRegionServer server = TEST_UTIL.getRSForFirstRegionInTable(tableName);
WAL log = server.getWAL(null);
Put p = new Put(Bytes.toBytes("row2001"));
p.addColumn(HConstants.CATALOG_FAMILY, Bytes.toBytes("col"), Bytes.toBytes(2001));
table.put(p);
log.sync();
p = new Put(Bytes.toBytes("row2002"));
p.addColumn(HConstants.CATALOG_FAMILY, Bytes.toBytes("col"), Bytes.toBytes(2002));
table.put(p);
dfsCluster.restartDataNodes();
LOG.info("Restarted datanodes");
try {
log.rollWriter(true);
} catch (FailedLogCloseException flce) {
// Expected exception. We used to expect that there would be unsynced appends but this
// not reliable now that sync plays a roll in wall rolling. The above puts also now call
// sync.
} catch (Throwable t) {
LOG.fatal("FAILED TEST: Got wrong exception", t);
}
} finally {
table.close();
}
}
/**
* Tests the case where a RegionServer enters a GC pause,
* comes back online after the master declared it dead and started to split.
* Want log rolling after a master split to fail. See HBASE-2312.
*/
@Test (timeout=300000)
public void testLogRollAfterSplitStart() throws IOException {
LOG.info("Verify wal roll after split starts will fail.");
String logName = ServerName.valueOf("testLogRollAfterSplitStart",
16010, System.currentTimeMillis()).toString();
Path thisTestsDir = new Path(HBASELOGDIR, AbstractFSWALProvider.getWALDirectoryName(logName));
final WALFactory wals = new WALFactory(conf, null, logName);
try {
// put some entries in an WAL
TableName tableName =
TableName.valueOf(this.getClass().getName());
HRegionInfo regioninfo = new HRegionInfo(tableName,
HConstants.EMPTY_START_ROW, HConstants.EMPTY_END_ROW);
final WAL log = wals.getWAL(regioninfo.getEncodedNameAsBytes(),
regioninfo.getTable().getNamespace());
MultiVersionConcurrencyControl mvcc = new MultiVersionConcurrencyControl(1);
final int total = 20;
for (int i = 0; i < total; i++) {
WALEdit kvs = new WALEdit();
kvs.add(new KeyValue(Bytes.toBytes(i), tableName.getName(), tableName.getName()));
HTableDescriptor htd = new HTableDescriptor(tableName);
htd.addFamily(new HColumnDescriptor("column"));
NavigableMap<byte[], Integer> scopes = new TreeMap<>(Bytes.BYTES_COMPARATOR);
for(byte[] fam : htd.getFamiliesKeys()) {
scopes.put(fam, 0);
}
log.append(regioninfo, new WALKey(regioninfo.getEncodedNameAsBytes(), tableName,
System.currentTimeMillis(), mvcc, scopes), kvs, true);
}
// Send the data to HDFS datanodes and close the HDFS writer
log.sync();
((AbstractFSWAL<?>) log).replaceWriter(((FSHLog)log).getOldPath(), null, null);
/* code taken from MasterFileSystem.getLogDirs(), which is called from MasterFileSystem.splitLog()
* handles RS shutdowns (as observed by the splitting process)
*/
// rename the directory so a rogue RS doesn't create more WALs
Path rsSplitDir = thisTestsDir.suffix(AbstractFSWALProvider.SPLITTING_EXT);
if (!fs.rename(thisTestsDir, rsSplitDir)) {
throw new IOException("Failed fs.rename for log split: " + thisTestsDir);
}
LOG.debug("Renamed region directory: " + rsSplitDir);
LOG.debug("Processing the old log files.");
WALSplitter.split(HBASELOGDIR, rsSplitDir, OLDLOGDIR, fs, conf, wals);
LOG.debug("Trying to roll the WAL.");
try {
log.rollWriter();
Assert.fail("rollWriter() did not throw any exception.");
} catch (IOException ioe) {
if (ioe.getCause() instanceof FileNotFoundException) {
LOG.info("Got the expected exception: ", ioe.getCause());
} else {
Assert.fail("Unexpected exception: " + ioe);
}
}
} finally {
wals.close();
if (fs.exists(thisTestsDir)) {
fs.delete(thisTestsDir, true);
}
}
}
}