/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hdfs; import junit.framework.TestCase; import java.io.*; import java.lang.reflect.Field; import java.net.*; import java.util.List; import java.util.concurrent.atomic.AtomicInteger; import org.apache.commons.logging.impl.Log4JLogger; import org.apache.log4j.Level; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.HardLink; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.BlockLocation; import org.apache.hadoop.hdfs.DFSClient.DFSDataInputStream; import org.apache.hadoop.hdfs.DFSClient.MultiDataOutputStream; import org.apache.hadoop.hdfs.profiling.DFSWriteProfilingData; import org.apache.hadoop.hdfs.protocol.Block; import org.apache.hadoop.hdfs.protocol.DatanodeInfo; import org.apache.hadoop.hdfs.protocol.FSConstants; import org.apache.hadoop.hdfs.protocol.LocatedBlock; import org.apache.hadoop.hdfs.protocol.LocatedBlocks; import org.apache.hadoop.hdfs.protocol.PacketBlockReceiverProfileData; import org.apache.hadoop.hdfs.server.datanode.DataNode; import org.apache.hadoop.hdfs.server.datanode.FSDataset; import org.apache.hadoop.hdfs.server.datanode.SimulatedFSDataset; import org.apache.hadoop.hdfs.util.InjectionEvent; import org.apache.hadoop.util.InjectionEventI; import org.apache.hadoop.util.InjectionHandler; /** * This class tests the building blocks that are needed to * support HDFS appends. */ public class TestFileAppend extends TestCase { { DataNode.LOG.getLogger().setLevel(Level.ALL); ((Log4JLogger)DFSClient.LOG).getLogger().setLevel(Level.ALL); } static final int blockSize = 1024; static final int numBlocks = 10; static final int fileSize = numBlocks * blockSize + 1; boolean simulatedStorage = false; private long seed; private byte[] fileContents = null; // // create a buffer that contains the entire test file data. // private void initBuffer(int size) { seed = AppendTestUtil.nextLong(); fileContents = AppendTestUtil.randomBytes(seed, size); } /* * creates a file but does not close it */ private FSDataOutputStream createFile(FileSystem fileSys, Path name, int repl) throws IOException { FSDataOutputStream stm = fileSys.create(name, true, fileSys.getConf().getInt("io.file.buffer.size", 4096), (short)repl, (long)blockSize); return stm; } // // writes to file but does not close it // private void writeFile(FSDataOutputStream stm) throws IOException { byte[] buffer = AppendTestUtil.randomBytes(seed, fileSize); stm.write(buffer); } // // verify that the data written to the full blocks are sane // private void checkFile(FileSystem fileSys, Path name, int repl) throws IOException { boolean done = false; // wait till all full blocks are confirmed by the datanodes. while (!done) { try { Thread.sleep(1000); } catch (InterruptedException e) {} done = true; BlockLocation[] locations = fileSys.getFileBlockLocations( fileSys.getFileStatus(name), 0, fileSize); if (locations.length < numBlocks) { System.out.println("Number of blocks found " + locations.length); done = false; continue; } for (int idx = 0; idx < numBlocks; idx++) { if (locations[idx].getHosts().length < repl) { System.out.println("Block index " + idx + " not yet replciated."); done = false; break; } } } checkContent(fileSys, name, numBlocks * blockSize); } private void checkContent(FileSystem fileSys, Path name, int length) throws IOException { FSDataInputStream stm = fileSys.open(name); byte[] expected = new byte[length]; if (simulatedStorage) { for (int i= 0; i < expected.length; i++) { expected[i] = SimulatedFSDataset.DEFAULT_DATABYTE; } } else { for (int i= 0; i < expected.length; i++) { expected[i] = fileContents[i]; } } // do a sanity check. Read the file byte[] actual = new byte[length]; stm.readFully(0, actual); checkData(actual, 0, expected, "Read 1"); } private void checkFullFile(FileSystem fs, Path name) throws IOException { FSDataInputStream stm = fs.open(name); byte[] actual = new byte[fileContents.length]; stm.readFully(0, actual); checkData(actual, 0, fileContents, "Read 2"); stm.close(); } private void checkData(byte[] actual, int from, byte[] expected, String message) { for (int idx = 0; idx < actual.length; idx++) { assertEquals(message+" byte "+(from+idx)+" differs. expected "+ expected[from+idx]+" actual "+actual[idx], expected[from+idx], actual[idx]); actual[idx] = 0; } } /** * Test that copy on write for blocks works correctly */ public void testCopyOnWrite() throws IOException { Configuration conf = new Configuration(); conf.setBoolean(FSConstants.DFS_USE_INLINE_CHECKSUM_KEY, true); if (simulatedStorage) { conf.setBoolean(SimulatedFSDataset.CONFIG_PROPERTY_SIMULATED, true); } conf.setBoolean(FSConstants.FS_OUTPUT_STREAM_AUTO_PRINT_PROFILE, true); MiniDFSCluster cluster = new MiniDFSCluster(conf, 1, true, null); FileSystem fs = cluster.getFileSystem(); InetSocketAddress addr = new InetSocketAddress("localhost", cluster.getNameNodePort()); int nsId = cluster.getNameNode().getNamespaceID(); DFSClient client = new DFSClient(addr, conf); try { // create a new file, write to it and close it. // Path file1 = new Path("/filestatus.dat"); FSDataOutputStream stm = createFile(fs, file1, 1); writeFile(stm); stm.close(); // Get a handle to the datanode DataNode[] dn = cluster.listDataNodes(); assertTrue("There should be only one datanode but found " + dn.length, dn.length == 1); LocatedBlocks locations = client.namenode.getBlockLocations( file1.toString(), 0, Long.MAX_VALUE); List<LocatedBlock> blocks = locations.getLocatedBlocks(); FSDataset dataset = (FSDataset) dn[0].data; // // Create hard links for a few of the blocks // for (int i = 0; i < blocks.size(); i = i + 2) { Block b = (Block) blocks.get(i).getBlock(); FSDataset fsd = (FSDataset) dataset; File f = fsd.getFile(nsId, b); File link = new File(f.toString() + ".link"); System.out.println("Creating hardlink for File " + f + " to " + link); HardLink.createHardLink(f, link); } // // Detach all blocks. This should remove hardlinks (if any) // for (int i = 0; i < blocks.size(); i++) { Block b = (Block) blocks.get(i).getBlock(); System.out.println("testCopyOnWrite detaching block " + b); assertTrue("Detaching block " + b + " should have returned true", dataset.detachBlock(nsId, b, 1) == true); } // Since the blocks were already detached earlier, these calls should // return false // for (int i = 0; i < blocks.size(); i++) { Block b = (Block) blocks.get(i).getBlock(); System.out.println("testCopyOnWrite detaching block " + b); assertTrue("Detaching block " + b + " should have returned false", dataset.detachBlock(nsId,b, 1) == false); } } finally { fs.close(); cluster.shutdown(); } } public void testPacketBlockReceiverProfileData() throws IOException { PacketBlockReceiverProfileData profile = new PacketBlockReceiverProfileData(); ByteArrayOutputStream baos = new ByteArrayOutputStream(); DataOutputStream dos = new DataOutputStream(baos); profile.write(dos); dos.close(); DataInputStream dis = new DataInputStream(new ByteArrayInputStream( baos.toByteArray())); profile.readFields(dis); dis.close(); } /** * Test a simple flush on a simple HDFS file. */ public void testSimpleFlush() throws IOException { testSimpleFlushInternal(true, true); testSimpleFlushInternal(true, false); testSimpleFlushInternal(false, true); testSimpleFlushInternal(false, false); } private void testSimpleFlushInternal(boolean datnodeInlineChecksum, boolean clientInlineChecksum) throws IOException { Configuration conf = new Configuration(); conf.setBoolean(FSConstants.DFS_USE_INLINE_CHECKSUM_KEY, datnodeInlineChecksum); if (simulatedStorage) { conf.setBoolean(SimulatedFSDataset.CONFIG_PROPERTY_SIMULATED, true); } initBuffer(fileSize); MiniDFSCluster cluster = new MiniDFSCluster(0, conf, 3, true, true, true, null, null, null, null, true, false, 1, false, false); conf.setBoolean(FSConstants.DFS_USE_INLINE_CHECKSUM_KEY, clientInlineChecksum); cluster.waitActive(); FileSystem fs = cluster.getFileSystem(conf); try { DFSWriteProfilingData profile = new DFSWriteProfilingData(); DFSClient.setProfileDataForNextOutputStream(profile); // create a new file. Path file1 = new Path("/simpleFlush.dat"); FSDataOutputStream stm = createFile(fs, file1, 3); System.out.println("Created file simpleFlush.dat"); // write to file int mid = fileSize/2; try { stm.write(fileContents, 0, mid); stm.sync(); if (!datnodeInlineChecksum && clientInlineChecksum) { TestCase .fail("Client should fail writing to datanode with inline checksum disabled with inline checksum enabled in client side"); } } catch (IOException ioe) { if (datnodeInlineChecksum || !clientInlineChecksum) { throw ioe; } else { return; } } System.out.println("Wrote and Flushed first part of file."); // write the remainder of the file stm.write(fileContents, mid, fileSize - mid); System.out.println("Written second part of file"); stm.sync(); stm.sync(); System.out.println("Wrote and Flushed second part of file."); // verify that full blocks are sane checkFile(fs, file1, 1); stm.close(); System.out.println("Closed file."); // verify that entire file is good checkFullFile(fs, file1); System.out.println("Profile: " + profile.toString()); } catch (IOException e) { System.out.println("Exception :" + e); throw e; } catch (Throwable e) { System.out.println("Throwable :" + e); e.printStackTrace(); throw new IOException("Throwable : " + e); } finally { fs.close(); cluster.shutdown(); } } /** * Test a simple flush on a simple HDFS file. */ public void testSimpleFlushSmallWrite() throws IOException { testSimpleFlushSmallWriteInternal(false); testSimpleFlushSmallWriteInternal(true); } /** * Test a simple flush on a simple HDFS file. */ private void testSimpleFlushSmallWriteInternal(boolean inlineChecksum) throws IOException { Configuration conf = new Configuration(); if (simulatedStorage) { conf.setBoolean(SimulatedFSDataset.CONFIG_PROPERTY_SIMULATED, true); } conf.setBoolean("dfs.use.inline.checksum", inlineChecksum); initBuffer(fileSize); MiniDFSCluster cluster = new MiniDFSCluster(conf, 1, true, null); FileSystem fs = cluster.getFileSystem(); try { // create a new file. Path file1 = new Path("/simpleFlushSmallWrite.dat"); FSDataOutputStream stm = createFile(fs, file1, 1); System.out.println("Created file simpleFlush.dat"); // write to file stm.write(fileContents, 0, 1); stm.sync(); stm.write(fileContents, 1, 1); stm.sync(); stm.write(fileContents, 2, 1); stm.sync(); stm.close(); System.out.println("Closed file."); checkContent(fs, file1, 3); stm = fs.append(file1); System.out.println("opened file for append."); stm.write(fileContents, 3, 1); stm.sync(); stm.write(fileContents, 4, 1); stm.sync(); stm.write(fileContents, 5, 1); stm.sync(); checkContent(fs, file1, 6); stm.write(fileContents, 6, 512); stm.sync(); checkContent(fs, file1, 518); stm.write(fileContents, 518, 1024); stm.sync(); checkContent(fs, file1, 1542); stm.write(fileContents, 1542, 511); stm.sync(); checkContent(fs, file1, 2053); stm.write(fileContents, 2053, 513); stm.sync(); checkContent(fs, file1, 2566); System.out.println("Writing the rest of the data to file"); stm.write(fileContents, 2566, fileSize - 2566); stm.sync(); stm.close(); checkFile(fs, file1, 1); stm.close(); System.out.println("Closed file."); // verify that entire file is good checkFullFile(fs, file1); } catch (IOException e) { System.out.println("Exception :" + e); throw e; } catch (Throwable e) { System.out.println("Throwable :" + e); e.printStackTrace(); throw new IOException("Throwable : " + e); } finally { fs.close(); cluster.shutdown(); } } /** * Test that file data can be flushed. */ public void testComplexFlush() throws IOException { Configuration conf = new Configuration(); conf.setBoolean(FSConstants.DFS_USE_INLINE_CHECKSUM_KEY, true); if (simulatedStorage) { conf.setBoolean(SimulatedFSDataset.CONFIG_PROPERTY_SIMULATED, true); } initBuffer(fileSize); MiniDFSCluster cluster = new MiniDFSCluster(conf, 1, true, null); FileSystem fs = cluster.getFileSystem(); try { // create a new file. Path file1 = new Path("/complexFlush.dat"); FSDataOutputStream stm = createFile(fs, file1, 1); System.out.println("Created file complexFlush.dat"); int start = 0; for (start = 0; (start + 29) < fileSize; ) { stm.write(fileContents, start, 29); stm.sync(); start += 29; } stm.write(fileContents, start, fileSize-start); // verify that full blocks are sane checkFile(fs, file1, 1); stm.close(); // verify that entire file is good checkFullFile(fs, file1); } catch (IOException e) { System.out.println("Exception :" + e); throw e; } catch (Throwable e) { System.out.println("Throwable :" + e); e.printStackTrace(); throw new IOException("Throwable : " + e); } finally { fs.close(); cluster.shutdown(); } } /** This creates a slow writer and check to see * if pipeline heartbeats work fine */ public void testPipelineHeartbeat() throws Exception { final int DATANODE_NUM = 2; final int fileLen = 6; Configuration conf = new Configuration(); conf.setBoolean(FSConstants.DFS_USE_INLINE_CHECKSUM_KEY, true); final int timeout = 2000; conf.setInt("dfs.socket.timeout",timeout); conf.setBoolean(FSConstants.FS_OUTPUT_STREAM_AUTO_PRINT_PROFILE, true); final Path p = new Path("/pipelineHeartbeat/foo"); System.out.println("p=" + p); MiniDFSCluster cluster = new MiniDFSCluster(conf, DATANODE_NUM, true, null); DistributedFileSystem fs = (DistributedFileSystem)cluster.getFileSystem(); initBuffer(fileLen); try { DFSWriteProfilingData profile = new DFSWriteProfilingData(); DFSClient.setProfileDataForNextOutputStream(profile); // create a new file. FSDataOutputStream stm = createFile(fs, p, DATANODE_NUM); stm.write(fileContents, 0, 1); Thread.sleep(timeout); stm.sync(); System.out.println("Wrote 1 byte and hflush " + p); // write another byte Thread.sleep(timeout); stm.write(fileContents, 1, 1); stm.sync(); stm.write(fileContents, 2, 1); Thread.sleep(timeout); stm.sync(); stm.write(fileContents, 3, 1); Thread.sleep(timeout); stm.write(fileContents, 4, 1); stm.sync(); stm.write(fileContents, 5, 1); Thread.sleep(timeout); stm.close(); // verify that entire file is good checkFullFile(fs, p); } finally { fs.close(); cluster.shutdown(); } } /** * Test a simple flush on a simple HDFS file. * @throws InterruptedException * @throws NoSuchFieldException * @throws SecurityException * @throws IllegalAccessException * @throws IllegalArgumentException */ public void testLocatedBlockExpire() throws IOException, InterruptedException, SecurityException, NoSuchFieldException, IllegalArgumentException, IllegalAccessException { Configuration conf = new Configuration(); final AtomicInteger invokeCount = new AtomicInteger(0); InjectionHandler.set(new InjectionHandler() { @Override protected void _processEventIO(InjectionEventI event, Object... args) throws IOException { if (event == InjectionEvent.DFSCLIENT_GET_LOCATED_BLOCKS) { invokeCount.incrementAndGet(); } } }); if (simulatedStorage) { conf.setBoolean(SimulatedFSDataset.CONFIG_PROPERTY_SIMULATED, true); } // Disable background block location renewal thread // (it is enabled by default in unit tests) conf.setBoolean("dfs.client.block.location.renewal.enabled", false); conf.setInt("dfs.client.locatedblock.expire.timeout", 1000); conf.setInt("dfs.client.locatedblock.expire.random.timeout", 2); conf.setLong("dfs.read.prefetch.size", fileSize - blockSize * 2); initBuffer(fileSize); MiniDFSCluster cluster = new MiniDFSCluster(conf, 2, true, null); FileSystem fs = cluster.getFileSystem(); try { // create a new file. Path file1 = new Path("/testLocatedBlockExpire"); FSDataOutputStream stm = createFile(fs, file1, 2); System.out.println("Created file testLocatedBlockExpire"); // write to file stm.write(fileContents, 0, fileSize); stm.close(); System.out.println("Closed file."); TestCase.assertEquals(0, invokeCount.get()); // open the file and remove one datanode from every block FSDataInputStream in = fs.open(file1); TestCase.assertEquals(1, invokeCount.get()); List<LocatedBlock> lbs = ((DFSDataInputStream)in).getAllBlocks(); for (LocatedBlock lb : lbs) { Field f = lb.getClass().getDeclaredField("locs"); //NoSuchFieldException f.setAccessible(true); DatanodeInfo[] di = (DatanodeInfo[]) f.get(lb); DatanodeInfo[] newDi = new DatanodeInfo[] { di[0] }; f.set(lb, newDi); } TestCase.assertEquals(2, invokeCount.get()); in.read(fileSize / 4, new byte[fileSize], 0, fileSize / 2); TestCase.assertEquals(2, invokeCount.get()); // double check the location size is still 1; lbs = ((DFSDataInputStream)in).getAllBlocks(); for (LocatedBlock lb : lbs) { Field f = lb.getClass().getDeclaredField("locs"); //NoSuchFieldException f.setAccessible(true); DatanodeInfo[] di = (DatanodeInfo[]) f.get(lb); TestCase.assertEquals(1, di.length); } // sleep up to the located block expire time Thread.sleep(1000); // all block locations expire now. Refetch [file_size/2, file_size] in.read(fileSize / 2, new byte[fileSize], 0, fileSize / 4 - 1); TestCase.assertEquals(3, invokeCount.get()); Thread.sleep(500); // reread within range so no need to refetch in.seek(fileSize / 4 * 3 + 1); in.read(new byte[fileSize], 0, fileSize / 4 - 2); TestCase.assertEquals(3, invokeCount.get()); // need to refetch as the previous refetch doesn't cover it. in.seek(blockSize); in.read(new byte[fileSize], 0, fileSize / 4 + blockSize); TestCase.assertEquals(4, invokeCount.get()); Thread.sleep(500); // [fileSize-blockSize, fileSize] expired. need to refetch. in.read(fileSize - blockSize, new byte[fileSize], 0, blockSize); TestCase.assertEquals(5, invokeCount.get()); in.read(fileSize - blockSize * 2, new byte[fileSize], 0, blockSize); TestCase.assertEquals(5, invokeCount.get()); Thread.sleep(500); // All but [fileSize-blockSize, fileSize] expired. Refetch. in.read(fileSize / 4, new byte[fileSize], 0, fileSize / 4 - 1); TestCase.assertEquals(6, invokeCount.get()); Thread.sleep(100); // prefetch [fileSize/2, fileSize] in.seek(fileSize / 2); in.read(fileSize / 2, new byte[fileSize], 0, fileSize / 2 - 1); TestCase.assertEquals(7, invokeCount.get()); Thread.sleep(100); // need to prefetch [0, prefetchSize] in.read(0, new byte[fileSize], 0, fileSize / 2 - 1); TestCase.assertEquals(8, invokeCount.get()); // All blocks' locations should be in cache with two locations (updated from namenodes) lbs = ((DFSDataInputStream)in).getAllBlocks(); for (LocatedBlock lb : lbs) { Field f = lb.getClass().getDeclaredField("locs"); //NoSuchFieldException f.setAccessible(true); DatanodeInfo[] di = (DatanodeInfo[]) f.get(lb); TestCase.assertEquals(2, di.length); } TestCase.assertEquals(8, invokeCount.get()); in.close(); } catch (IOException e) { System.out.println("Exception :" + e); throw e; } finally { fs.close(); cluster.shutdown(); } } }