/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hdfs; import java.net.SocketTimeoutException; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.io.Writable; import org.apache.hadoop.io.LongWritable; import java.io.IOException; import java.net.InetSocketAddress; import java.io.InputStream; import java.io.OutputStream; import java.util.ArrayList; import java.util.List; import java.net.SocketTimeoutException; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.*; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.hdfs.DFSClient.DFSInputStream; import org.apache.hadoop.hdfs.protocol.*; import org.apache.hadoop.hdfs.protocol.FSConstants.UpgradeAction; import org.apache.hadoop.hdfs.protocol.Block; import org.apache.hadoop.hdfs.protocol.ClientDatanodeProtocol; import org.apache.hadoop.hdfs.server.common.*; import org.apache.hadoop.hdfs.server.datanode.TestInterDatanodeProtocol; import org.apache.hadoop.hdfs.server.namenode.NotReplicatedYetException; import org.apache.hadoop.hdfs.server.namenode.NameNode; import org.apache.hadoop.io.*; import org.apache.hadoop.ipc.RemoteException; import org.apache.hadoop.security.AccessControlException; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.token.Token; import org.apache.hadoop.security.token.SecretManager.InvalidToken; import org.apache.hadoop.ipc.Client; import org.apache.hadoop.ipc.RPC; import org.apache.hadoop.ipc.Server; import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier; import junit.framework.TestCase; import static org.mockito.Mockito.*; import org.mockito.stubbing.Answer; import org.mockito.invocation.InvocationOnMock; /** * These tests make sure that DFSClient retries fetching data from DFS * properly in case of errors. */ public class TestDFSClientRetries extends TestCase { private static final String ADDRESS = "0.0.0.0"; final static private int PING_INTERVAL = 1000; final static private int MIN_SLEEP_TIME = 1000; public static final Log LOG = LogFactory.getLog(TestDFSClientRetries.class.getName()); final static private Configuration conf = new Configuration(); private static class TestServer extends Server { private boolean sleep; private Class<? extends Writable> responseClass; public TestServer(int handlerCount, boolean sleep) throws IOException { this(handlerCount, sleep, LongWritable.class, null); } public TestServer(int handlerCount, boolean sleep, Class<? extends Writable> paramClass, Class<? extends Writable> responseClass) throws IOException { super(ADDRESS, 0, paramClass, handlerCount, conf); this.sleep = sleep; this.responseClass = responseClass; } @Override public Writable call(Class<?> protocol, Writable param, long receiveTime) throws IOException { if (sleep) { // sleep a bit try { Thread.sleep(PING_INTERVAL + MIN_SLEEP_TIME); } catch (InterruptedException e) {} } if (responseClass != null) { try { return responseClass.newInstance(); } catch (Exception e) { throw new RuntimeException(e); } } else { return param; // echo param as result } } } // writes 'len' bytes of data to out. private static void writeData(OutputStream out, int len) throws IOException { byte [] buf = new byte[4096*16]; while(len > 0) { int toWrite = Math.min(len, buf.length); out.write(buf, 0, toWrite); len -= toWrite; } } /** * This makes sure that when DN closes clients socket after client had * successfully connected earlier, the data can still be fetched. */ public void testWriteTimeoutAtDataNode() throws IOException, InterruptedException { final int writeTimeout = 100; //milliseconds. // set a very short write timeout for datanode, so that tests runs fast. conf.setInt("dfs.datanode.socket.write.timeout", writeTimeout); // set a smaller block size final int blockSize = 10*1024*1024; conf.setInt("dfs.block.size", blockSize); conf.setInt("dfs.client.max.block.acquire.failures", 1); // set a small buffer size final int bufferSize = 4096; conf.setInt("io.file.buffer.size", bufferSize); MiniDFSCluster cluster = new MiniDFSCluster(conf, 3, true, null); try { cluster.waitActive(); FileSystem fs = cluster.getFileSystem(); Path filePath = new Path("/testWriteTimeoutAtDataNode"); OutputStream out = fs.create(filePath, true, bufferSize); // write a 2 block file. writeData(out, 2*blockSize); out.close(); byte[] buf = new byte[1024*1024]; // enough to empty TCP buffers. InputStream in = fs.open(filePath, bufferSize); //first read a few bytes IOUtils.readFully(in, buf, 0, bufferSize/2); //now read few more chunks of data by sleeping in between : for(int i=0; i<10; i++) { Thread.sleep(2*writeTimeout); // force write timeout at the datanode. // read enough to empty out socket buffers. IOUtils.readFully(in, buf, 0, buf.length); } // successfully read with write timeout on datanodes. in.close(); } finally { cluster.shutdown(); } } // more tests related to different failure cases can be added here. class TestNameNode implements ClientProtocol { int num_calls = 0; // The total number of calls that can be made to addBlock // before an exception is thrown int num_calls_allowed; public final String ADD_BLOCK_EXCEPTION = "Testing exception thrown from" + "TestDFSClientRetries::" + "TestNameNode::addBlock"; public final String RETRY_CONFIG = "dfs.client.block.write.locateFollowingBlock.retries"; public TestNameNode(Configuration conf) throws IOException { // +1 because the configuration value is the number of retries and // the first call is not a retry (e.g., 2 retries == 3 total // calls allowed) this.num_calls_allowed = conf.getInt(RETRY_CONFIG, 5) + 1; } public long getProtocolVersion(String protocol, long clientVersion) throws IOException { return versionID; } public LocatedBlock addBlock(String src, String clientName) throws IOException { return addBlock(src, clientName, null); } public LocatedBlock addBlock(String src, String clientName, DatanodeInfo[] excludedNode) throws IOException { num_calls++; if (num_calls > num_calls_allowed) { throw new IOException("addBlock called more times than " + RETRY_CONFIG + " allows."); } else { throw new RemoteException(NotReplicatedYetException.class.getName(), ADD_BLOCK_EXCEPTION); } } // The following methods are stub methods that are not needed by this mock class public LocatedBlocks getBlockLocations(String src, long offset, long length) throws IOException { return null; } @Deprecated public void create(String src, FsPermission masked, String clientName, boolean overwrite, short replication, long blockSize) throws IOException {} public void create(String src, FsPermission masked, String clientName, boolean overwrite, boolean createparent, short replication, long blockSize) throws IOException {} public LocatedBlock append(String src, String clientName) throws IOException { return null; } public boolean setReplication(String src, short replication) throws IOException { return false; } public void setPermission(String src, FsPermission permission) throws IOException {} public void setOwner(String src, String username, String groupname) throws IOException {} public void abandonBlock(Block b, String src, String holder) throws IOException {} public boolean complete(String src, String clientName) throws IOException { return false; } public void reportBadBlocks(LocatedBlock[] blocks) throws IOException {} public boolean rename(String src, String dst) throws IOException { return false; } public boolean delete(String src) throws IOException { return false; } public boolean delete(String src, boolean recursive) throws IOException { return false; } public boolean mkdirs(String src, FsPermission masked) throws IOException { return false; } public HdfsFileStatus[] getListing(String src) throws IOException { return null; } public DirectoryListing getListing(String src, byte[] startName) throws IOException { return null; } public void renewLease(String clientName) throws IOException {} public long[] getStats() throws IOException { return null; } public DatanodeInfo[] getDatanodeReport(FSConstants.DatanodeReportType type) throws IOException { return null; } public long getPreferredBlockSize(String filename) throws IOException { return 0; } public boolean setSafeMode(FSConstants.SafeModeAction action) throws IOException { return false; } public void saveNamespace() throws IOException {} public boolean restoreFailedStorage(String arg) throws AccessControlException { return false; } public void refreshNodes() throws IOException {} public void finalizeUpgrade() throws IOException {} public UpgradeStatusReport distributedUpgradeProgress(UpgradeAction action) throws IOException { return null; } public void metaSave(String filename) throws IOException {} public void setBalancerBandwidth(long bandwidth) throws IOException {} public HdfsFileStatus getFileInfo(String src) throws IOException { return null; } public ContentSummary getContentSummary(String path) throws IOException { return null; } public void setQuota(String path, long namespaceQuota, long diskspaceQuota) throws IOException {} public void fsync(String src, String client) throws IOException {} public void setTimes(String src, long mtime, long atime) throws IOException {} public boolean recoverLease(String src, String clientName) throws IOException {return true;} public Token<DelegationTokenIdentifier> getDelegationToken(Text renewer) throws IOException { return null; } public long renewDelegationToken(Token<DelegationTokenIdentifier> token) throws InvalidToken, IOException { return 0; } public void cancelDelegationToken(Token<DelegationTokenIdentifier> token) throws IOException { } } public void testNotYetReplicatedErrors() throws IOException { // allow 1 retry (2 total calls) conf.setInt("dfs.client.block.write.locateFollowingBlock.retries", 1); TestNameNode tnn = new TestNameNode(conf); final DFSClient client = new DFSClient(null, tnn, conf, null); OutputStream os = client.create("testfile", true); os.write(20); // write one random byte try { os.close(); } catch (Exception e) { assertTrue("Retries are not being stopped correctly", e.getMessage().equals(tnn.ADD_BLOCK_EXCEPTION)); } } /** * This tests that DFSInputStream failures are counted for a given read * operation, and not over the lifetime of the stream. It is a regression * test for HDFS-127. */ public void testFailuresArePerOperation() throws Exception { long fileSize = 4096; Path file = new Path("/testFile"); MiniDFSCluster cluster = new MiniDFSCluster(conf, 1, true, null); int maxBlockAcquires = DFSClient.getMaxBlockAcquireFailures(conf); assertTrue(maxBlockAcquires > 0); try { cluster.waitActive(); FileSystem fs = cluster.getFileSystem(); NameNode preSpyNN = cluster.getNameNode(); NameNode spyNN = spy(preSpyNN); DFSClient client = new DFSClient(null, spyNN, conf, null); DFSTestUtil.createFile(fs, file, fileSize, (short)1, 12345L /*seed*/); // If the client will retry maxBlockAcquires times, then if we fail // any more than that number of times, the operation should entirely // fail. doAnswer(new FailNTimesAnswer(preSpyNN, maxBlockAcquires + 1)) .when(spyNN).getBlockLocations(anyString(), anyLong(), anyLong()); try { IOUtils.copyBytes(client.open(file.toString()), new IOUtils.NullOutputStream(), conf, true); fail("Didn't get exception"); } catch (IOException ioe) { DFSClient.LOG.info("Got expected exception", ioe); } // If we fail exactly that many times, then it should succeed. doAnswer(new FailNTimesAnswer(preSpyNN, maxBlockAcquires)) .when(spyNN).getBlockLocations(anyString(), anyLong(), anyLong()); IOUtils.copyBytes(client.open(file.toString()), new IOUtils.NullOutputStream(), conf, true); DFSClient.LOG.info("Starting test case for failure reset"); // Now the tricky case - if we fail a few times on one read, then succeed, // then fail some more on another read, it shouldn't fail. doAnswer(new FailNTimesAnswer(preSpyNN, maxBlockAcquires)) .when(spyNN).getBlockLocations(anyString(), anyLong(), anyLong()); DFSInputStream is = client.open(file.toString()); byte buf[] = new byte[10]; IOUtils.readFully(is, buf, 0, buf.length); DFSClient.LOG.info("First read successful after some failures."); // Further reads at this point will succeed since it has the good block locations. // So, force the block locations on this stream to be refreshed from bad info. // When reading again, it should start from a fresh failure count, since // we're starting a new operation on the user level. doAnswer(new FailNTimesAnswer(preSpyNN, maxBlockAcquires)) .when(spyNN).getBlockLocations(anyString(), anyLong(), anyLong()); is.openInfo(); // Seek to beginning forces a reopen of the BlockReader - otherwise it'll // just keep reading on the existing stream and the fact that we've poisoned // the block info won't do anything. is.seek(0); IOUtils.readFully(is, buf, 0, buf.length); } finally { cluster.shutdown(); } } /** * Mock Answer implementation of NN.getBlockLocations that will return * a poisoned block list a certain number of times before returning * a proper one. */ private static class FailNTimesAnswer implements Answer<LocatedBlocks> { private int failuresLeft; private NameNode realNN; public FailNTimesAnswer(NameNode realNN, int timesToFail) { failuresLeft = timesToFail; this.realNN = realNN; } public LocatedBlocks answer(InvocationOnMock invocation) throws IOException { Object args[] = invocation.getArguments(); LocatedBlocks realAnswer = realNN.getBlockLocations( (String)args[0], (Long)args[1], (Long)args[2]); if (failuresLeft-- > 0) { NameNode.LOG.info("FailNTimesAnswer injecting failure."); return makeBadBlockList(realAnswer); } NameNode.LOG.info("FailNTimesAnswer no longer failing."); return realAnswer; } private LocatedBlocks makeBadBlockList(LocatedBlocks goodBlockList) { LocatedBlock goodLocatedBlock = goodBlockList.get(0); LocatedBlock badLocatedBlock = new LocatedBlock( goodLocatedBlock.getBlock(), new DatanodeInfo[] { new DatanodeInfo(new DatanodeID("255.255.255.255:234")) }, goodLocatedBlock.getStartOffset(), false); List<LocatedBlock> badBlocks = new ArrayList<LocatedBlock>(); badBlocks.add(badLocatedBlock); return new LocatedBlocks(goodBlockList.getFileLength(), badBlocks, false); } } /** Test that timeout occurs when DN does not respond to RPC. * Start up a server and ask it to sleep for n seconds. Make an * RPC to the server and set rpcTimeout to less than n and ensure * that socketTimeoutException is obtained */ public void testClientDNProtocolTimeout() throws IOException { final Server server = new TestServer(1, true); server.start(); final InetSocketAddress addr = NetUtils.getConnectAddress(server); DatanodeID fakeDnId = new DatanodeID( "localhost:" + addr.getPort(), "fake-storage", 0, addr.getPort()); DatanodeInfo dnInfo = new DatanodeInfo(fakeDnId); LocatedBlock fakeBlock = new LocatedBlock(new Block(12345L), new DatanodeInfo[0]); ClientDatanodeProtocol proxy = null; try { proxy = DFSClient.createClientDatanodeProtocolProxy(dnInfo, conf, fakeBlock.getBlock(), fakeBlock.getBlockToken(), 500); fail ("Did not get expected exception: SocketTimeoutException"); } catch (SocketTimeoutException e) { LOG.info("Got the expected Exception: SocketTimeoutException"); } finally { if (proxy != null) { RPC.stopProxy(proxy); } server.stop(); } } public void testGetFileChecksum() throws Exception { final String f = "/testGetFileChecksum"; final Path p = new Path(f); final Configuration conf = new Configuration(); final MiniDFSCluster cluster = new MiniDFSCluster(conf, 3, true, null); try { cluster.waitActive(); //create a file final FileSystem fs = cluster.getFileSystem(); DFSTestUtil.createFile(fs, p, 1L << 20, (short)3, 20100402L); //get checksum final FileChecksum cs1 = fs.getFileChecksum(p); assertTrue(cs1 != null); //stop the first datanode final List<LocatedBlock> locatedblocks = DFSClient.callGetBlockLocations( cluster.getNameNode(), f, 0, Long.MAX_VALUE).getLocatedBlocks(); final DatanodeInfo first = locatedblocks.get(0).getLocations()[0]; cluster.stopDataNode(first.getName()); //get checksum again final FileChecksum cs2 = fs.getFileChecksum(p); assertEquals(cs1, cs2); } finally { cluster.shutdown(); } } }