/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hdfs.server.blockmanagement; import java.io.File; import java.io.IOException; import junit.framework.TestCase; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.DFSTestUtil; import org.apache.hadoop.hdfs.HdfsConfiguration; import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties; import org.apache.hadoop.hdfs.TestDatanodeBlockScanner; import org.apache.hadoop.hdfs.protocol.DatanodeID; import org.apache.hadoop.hdfs.protocol.ExtendedBlock; import org.apache.hadoop.hdfs.server.datanode.DataNodeTestUtils; import org.apache.hadoop.hdfs.server.namenode.FSNamesystem; import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter; public class TestOverReplicatedBlocks extends TestCase { /** Test processOverReplicatedBlock can handle corrupt replicas fine. * It make sure that it won't treat corrupt replicas as valid ones * thus prevents NN deleting valid replicas but keeping * corrupt ones. */ public void testProcesOverReplicateBlock() throws IOException { Configuration conf = new HdfsConfiguration(); conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000L); conf.set( DFSConfigKeys.DFS_NAMENODE_REPLICATION_PENDING_TIMEOUT_SEC_KEY, Integer.toString(2)); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build(); FileSystem fs = cluster.getFileSystem(); try { final Path fileName = new Path("/foo1"); DFSTestUtil.createFile(fs, fileName, 2, (short)3, 0L); DFSTestUtil.waitReplication(fs, fileName, (short)3); // corrupt the block on datanode 0 ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, fileName); assertTrue(TestDatanodeBlockScanner.corruptReplica(block, 0)); DataNodeProperties dnProps = cluster.stopDataNode(0); // remove block scanner log to trigger block scanning File scanLog = new File(MiniDFSCluster.getFinalizedDir( cluster.getInstanceStorageDir(0, 0), cluster.getNamesystem().getBlockPoolId()).getParent().toString() + "/../dncp_block_verification.log.prev"); //wait for one minute for deletion to succeed; for(int i=0; !scanLog.delete(); i++) { assertTrue("Could not delete log file in one minute", i < 60); try { Thread.sleep(1000); } catch (InterruptedException ignored) {} } // restart the datanode so the corrupt replica will be detected cluster.restartDataNode(dnProps); DFSTestUtil.waitReplication(fs, fileName, (short)2); String blockPoolId = cluster.getNamesystem().getBlockPoolId(); final DatanodeID corruptDataNode = DataNodeTestUtils.getDNRegistrationForBP( cluster.getDataNodes().get(2), blockPoolId); final FSNamesystem namesystem = cluster.getNamesystem(); final BlockManager bm = namesystem.getBlockManager(); final HeartbeatManager hm = bm.getDatanodeManager().getHeartbeatManager(); try { namesystem.writeLock(); synchronized(hm) { // set live datanode's remaining space to be 0 // so they will be chosen to be deleted when over-replication occurs String corruptMachineName = corruptDataNode.getName(); for (DatanodeDescriptor datanode : hm.getDatanodes()) { if (!corruptMachineName.equals(datanode.getName())) { datanode.updateHeartbeat(100L, 100L, 0L, 100L, 0, 0); } } // decrease the replication factor to 1; NameNodeAdapter.setReplication(namesystem, fileName.toString(), (short)1); // corrupt one won't be chosen to be excess one // without 4910 the number of live replicas would be 0: block gets lost assertEquals(1, bm.countNodes(block.getLocalBlock()).liveReplicas()); } } finally { namesystem.writeUnlock(); } } finally { cluster.shutdown(); } } }