/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.regionserver.wal;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
import java.io.EOFException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.fs.HFileSystem;
import org.apache.hadoop.hbase.regionserver.Region;
import org.apache.hadoop.hbase.testclassification.LargeTests;
import org.apache.hadoop.hbase.testclassification.VerySlowRegionServerTests;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.FSUtils;
import org.apache.hadoop.hbase.util.JVMClusterUtil;
import org.apache.hadoop.hbase.wal.AbstractFSWALProvider;
import org.apache.hadoop.hbase.wal.WAL;
import org.apache.hadoop.hbase.wal.WALFactory;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
import org.apache.hadoop.hdfs.server.datanode.DataNode;
import org.junit.BeforeClass;
import org.junit.Test;
import org.junit.experimental.categories.Category;
@Category({ VerySlowRegionServerTests.class, LargeTests.class })
public class TestLogRolling extends AbstractTestLogRolling {
private static final Log LOG = LogFactory.getLog(TestLogRolling.class);
@BeforeClass
public static void setUpBeforeClass() throws Exception {
// TODO: testLogRollOnDatanodeDeath fails if short circuit reads are on under the hadoop2
// profile. See HBASE-9337 for related issues.
System.setProperty("hbase.tests.use.shortcircuit.reads", "false");
/**** configuration for testLogRollOnDatanodeDeath ****/
// lower the namenode & datanode heartbeat so the namenode
// quickly detects datanode failures
Configuration conf= TEST_UTIL.getConfiguration();
conf.setInt("dfs.namenode.heartbeat.recheck-interval", 5000);
conf.setInt("dfs.heartbeat.interval", 1);
// the namenode might still try to choose the recently-dead datanode
// for a pipeline, so try to a new pipeline multiple times
conf.setInt("dfs.client.block.write.retries", 30);
conf.setInt("hbase.regionserver.hlog.tolerable.lowreplication", 2);
conf.setInt("hbase.regionserver.hlog.lowreplication.rolllimit", 3);
conf.set(WALFactory.WAL_PROVIDER, "filesystem");
AbstractTestLogRolling.setUpBeforeClass();
}
void batchWriteAndWait(Table table, final FSHLog log, int start, boolean expect, int timeout)
throws IOException {
for (int i = 0; i < 10; i++) {
Put put = new Put(Bytes.toBytes("row" + String.format("%1$04d", (start + i))));
put.addColumn(HConstants.CATALOG_FAMILY, null, value);
table.put(put);
}
Put tmpPut = new Put(Bytes.toBytes("tmprow"));
tmpPut.addColumn(HConstants.CATALOG_FAMILY, null, value);
long startTime = System.currentTimeMillis();
long remaining = timeout;
while (remaining > 0) {
if (log.isLowReplicationRollEnabled() == expect) {
break;
} else {
// Trigger calling FSHlog#checkLowReplication()
table.put(tmpPut);
try {
Thread.sleep(200);
} catch (InterruptedException e) {
// continue
}
remaining = timeout - (System.currentTimeMillis() - startTime);
}
}
}
/**
* Tests that logs are rolled upon detecting datanode death Requires an HDFS jar with HDFS-826 &
* syncFs() support (HDFS-200)
*/
@Test
public void testLogRollOnDatanodeDeath() throws Exception {
TEST_UTIL.ensureSomeRegionServersAvailable(2);
assertTrue("This test requires WAL file replication set to 2.",
fs.getDefaultReplication(TEST_UTIL.getDataTestDirOnTestFS()) == 2);
LOG.info("Replication=" + fs.getDefaultReplication(TEST_UTIL.getDataTestDirOnTestFS()));
this.server = cluster.getRegionServer(0);
// Create the test table and open it
HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(getName()));
desc.addFamily(new HColumnDescriptor(HConstants.CATALOG_FAMILY));
admin.createTable(desc);
Table table = TEST_UTIL.getConnection().getTable(desc.getTableName());
server = TEST_UTIL.getRSForFirstRegionInTable(desc.getTableName());
HRegionInfo region = server.getOnlineRegions(desc.getTableName()).get(0).getRegionInfo();
final FSHLog log = (FSHLog) server.getWAL(region);
final AtomicBoolean lowReplicationHookCalled = new AtomicBoolean(false);
log.registerWALActionsListener(new WALActionsListener.Base() {
@Override
public void logRollRequested(boolean lowReplication) {
if (lowReplication) {
lowReplicationHookCalled.lazySet(true);
}
}
});
// add up the datanode count, to ensure proper replication when we kill 1
// This function is synchronous; when it returns, the dfs cluster is active
// We start 3 servers and then stop 2 to avoid a directory naming conflict
// when we stop/start a namenode later, as mentioned in HBASE-5163
List<DataNode> existingNodes = dfsCluster.getDataNodes();
int numDataNodes = 3;
dfsCluster.startDataNodes(TEST_UTIL.getConfiguration(), numDataNodes, true, null, null);
List<DataNode> allNodes = dfsCluster.getDataNodes();
for (int i = allNodes.size() - 1; i >= 0; i--) {
if (existingNodes.contains(allNodes.get(i))) {
dfsCluster.stopDataNode(i);
}
}
assertTrue(
"DataNodes " + dfsCluster.getDataNodes().size() + " default replication "
+ fs.getDefaultReplication(TEST_UTIL.getDataTestDirOnTestFS()),
dfsCluster.getDataNodes()
.size() >= fs.getDefaultReplication(TEST_UTIL.getDataTestDirOnTestFS()) + 1);
writeData(table, 2);
long curTime = System.currentTimeMillis();
LOG.info("log.getCurrentFileName(): " + log.getCurrentFileName());
long oldFilenum = AbstractFSWALProvider.extractFileNumFromWAL(log);
assertTrue("Log should have a timestamp older than now",
curTime > oldFilenum && oldFilenum != -1);
assertTrue("The log shouldn't have rolled yet",
oldFilenum == AbstractFSWALProvider.extractFileNumFromWAL(log));
final DatanodeInfo[] pipeline = log.getPipeline();
assertTrue(pipeline.length == fs.getDefaultReplication(TEST_UTIL.getDataTestDirOnTestFS()));
// kill a datanode in the pipeline to force a log roll on the next sync()
// This function is synchronous, when it returns the node is killed.
assertTrue(dfsCluster.stopDataNode(pipeline[0].getName()) != null);
// this write should succeed, but trigger a log roll
writeData(table, 2);
long newFilenum = AbstractFSWALProvider.extractFileNumFromWAL(log);
assertTrue("Missing datanode should've triggered a log roll",
newFilenum > oldFilenum && newFilenum > curTime);
assertTrue("The log rolling hook should have been called with the low replication flag",
lowReplicationHookCalled.get());
// write some more log data (this should use a new hdfs_out)
writeData(table, 3);
assertTrue("The log should not roll again.",
AbstractFSWALProvider.extractFileNumFromWAL(log) == newFilenum);
// kill another datanode in the pipeline, so the replicas will be lower than
// the configured value 2.
assertTrue(dfsCluster.stopDataNode(pipeline[1].getName()) != null);
batchWriteAndWait(table, log, 3, false, 14000);
int replication = log.getLogReplication();
assertTrue("LowReplication Roller should've been disabled, current replication=" + replication,
!log.isLowReplicationRollEnabled());
dfsCluster.startDataNodes(TEST_UTIL.getConfiguration(), 1, true, null, null);
// Force roll writer. The new log file will have the default replications,
// and the LowReplication Roller will be enabled.
log.rollWriter(true);
batchWriteAndWait(table, log, 13, true, 10000);
replication = log.getLogReplication();
assertTrue("New log file should have the default replication instead of " + replication,
replication == fs.getDefaultReplication(TEST_UTIL.getDataTestDirOnTestFS()));
assertTrue("LowReplication Roller should've been enabled", log.isLowReplicationRollEnabled());
}
/**
* Test that WAL is rolled when all data nodes in the pipeline have been restarted.
* @throws Exception
*/
@Test
public void testLogRollOnPipelineRestart() throws Exception {
LOG.info("Starting testLogRollOnPipelineRestart");
assertTrue("This test requires WAL file replication.",
fs.getDefaultReplication(TEST_UTIL.getDataTestDirOnTestFS()) > 1);
LOG.info("Replication=" + fs.getDefaultReplication(TEST_UTIL.getDataTestDirOnTestFS()));
// When the hbase:meta table can be opened, the region servers are running
Table t = TEST_UTIL.getConnection().getTable(TableName.META_TABLE_NAME);
try {
this.server = cluster.getRegionServer(0);
// Create the test table and open it
HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(getName()));
desc.addFamily(new HColumnDescriptor(HConstants.CATALOG_FAMILY));
admin.createTable(desc);
Table table = TEST_UTIL.getConnection().getTable(desc.getTableName());
server = TEST_UTIL.getRSForFirstRegionInTable(desc.getTableName());
HRegionInfo region = server.getOnlineRegions(desc.getTableName()).get(0).getRegionInfo();
final WAL log = server.getWAL(region);
final List<Path> paths = new ArrayList<>(1);
final List<Integer> preLogRolledCalled = new ArrayList<>();
paths.add(AbstractFSWALProvider.getCurrentFileName(log));
log.registerWALActionsListener(new WALActionsListener.Base() {
@Override
public void preLogRoll(Path oldFile, Path newFile) {
LOG.debug("preLogRoll: oldFile=" + oldFile + " newFile=" + newFile);
preLogRolledCalled.add(new Integer(1));
}
@Override
public void postLogRoll(Path oldFile, Path newFile) {
paths.add(newFile);
}
});
writeData(table, 1002);
long curTime = System.currentTimeMillis();
LOG.info("log.getCurrentFileName()): " + AbstractFSWALProvider.getCurrentFileName(log));
long oldFilenum = AbstractFSWALProvider.extractFileNumFromWAL(log);
assertTrue("Log should have a timestamp older than now",
curTime > oldFilenum && oldFilenum != -1);
assertTrue("The log shouldn't have rolled yet",
oldFilenum == AbstractFSWALProvider.extractFileNumFromWAL(log));
// roll all datanodes in the pipeline
dfsCluster.restartDataNodes();
Thread.sleep(1000);
dfsCluster.waitActive();
LOG.info("Data Nodes restarted");
validateData(table, 1002);
// this write should succeed, but trigger a log roll
writeData(table, 1003);
long newFilenum = AbstractFSWALProvider.extractFileNumFromWAL(log);
assertTrue("Missing datanode should've triggered a log roll",
newFilenum > oldFilenum && newFilenum > curTime);
validateData(table, 1003);
writeData(table, 1004);
// roll all datanode again
dfsCluster.restartDataNodes();
Thread.sleep(1000);
dfsCluster.waitActive();
LOG.info("Data Nodes restarted");
validateData(table, 1004);
// this write should succeed, but trigger a log roll
writeData(table, 1005);
// force a log roll to read back and verify previously written logs
log.rollWriter(true);
assertTrue("preLogRolledCalled has size of " + preLogRolledCalled.size(),
preLogRolledCalled.size() >= 1);
// read back the data written
Set<String> loggedRows = new HashSet<>();
FSUtils fsUtils = FSUtils.getInstance(fs, TEST_UTIL.getConfiguration());
for (Path p : paths) {
LOG.debug("recovering lease for " + p);
fsUtils.recoverFileLease(((HFileSystem) fs).getBackingFs(), p, TEST_UTIL.getConfiguration(),
null);
LOG.debug("Reading WAL " + FSUtils.getPath(p));
WAL.Reader reader = null;
try {
reader = WALFactory.createReader(fs, p, TEST_UTIL.getConfiguration());
WAL.Entry entry;
while ((entry = reader.next()) != null) {
LOG.debug("#" + entry.getKey().getSequenceId() + ": " + entry.getEdit().getCells());
for (Cell cell : entry.getEdit().getCells()) {
loggedRows.add(
Bytes.toStringBinary(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength()));
}
}
} catch (EOFException e) {
LOG.debug("EOF reading file " + FSUtils.getPath(p));
} finally {
if (reader != null) reader.close();
}
}
// verify the written rows are there
assertTrue(loggedRows.contains("row1002"));
assertTrue(loggedRows.contains("row1003"));
assertTrue(loggedRows.contains("row1004"));
assertTrue(loggedRows.contains("row1005"));
// flush all regions
for (Region r : server.getOnlineRegionsLocalContext()) {
try {
r.flush(true);
} catch (Exception e) {
// This try/catch was added by HBASE-14317. It is needed
// because this issue tightened up the semantic such that
// a failed append could not be followed by a successful
// sync. What is coming out here is a failed sync, a sync
// that used to 'pass'.
LOG.info(e);
}
}
ResultScanner scanner = table.getScanner(new Scan());
try {
for (int i = 2; i <= 5; i++) {
Result r = scanner.next();
assertNotNull(r);
assertFalse(r.isEmpty());
assertEquals("row100" + i, Bytes.toString(r.getRow()));
}
} finally {
scanner.close();
}
// verify that no region servers aborted
for (JVMClusterUtil.RegionServerThread rsThread : TEST_UTIL.getHBaseCluster()
.getRegionServerThreads()) {
assertFalse(rsThread.getRegionServer().isAborted());
}
} finally {
if (t != null) t.close();
}
}
}