/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hdfs; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import static org.mockito.Matchers.eq; import static org.mockito.Mockito.inOrder; import static org.mockito.Mockito.mock; import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; import java.net.InetSocketAddress; import java.net.ServerSocket; import java.net.SocketTimeoutException; import java.net.URI; import java.security.PrivilegedExceptionAction; import java.util.ArrayList; import java.util.Arrays; import java.util.EnumSet; import java.util.Iterator; import java.util.HashSet; import java.util.List; import java.util.Random; import java.util.Set; import org.apache.commons.lang.ArrayUtils; import java.util.concurrent.CountDownLatch; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.atomic.AtomicReference; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.BlockLocation; import org.apache.hadoop.fs.BlockStorageLocation; import org.apache.hadoop.fs.CommonConfigurationKeys; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.apache.hadoop.fs.CreateFlag; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem.Statistics.StatisticsData; import org.apache.hadoop.fs.FsServerDefaults; import org.apache.hadoop.fs.FileChecksum; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.GlobalStorageStatistics; import org.apache.hadoop.fs.GlobalStorageStatistics.StorageStatisticsProvider; import org.apache.hadoop.fs.LocatedFileStatus; import org.apache.hadoop.fs.MD5MD5CRC32FileChecksum; import org.apache.hadoop.fs.Options.ChecksumOpt; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.RemoteIterator; import org.apache.hadoop.fs.StorageStatistics; import org.apache.hadoop.fs.StorageType; import org.apache.hadoop.fs.VolumeId; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties; import org.apache.hadoop.hdfs.net.Peer; import org.apache.hadoop.fs.StorageStatistics.LongStatistic; import org.apache.hadoop.hdfs.DFSOpsCountStatistics.OpType; import org.apache.hadoop.hdfs.protocol.HdfsConstants; import org.apache.hadoop.hdfs.server.datanode.DataNode; import org.apache.hadoop.hdfs.server.datanode.DataNodeFaultInjector; import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsDatasetSpi; import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeReference; import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeSpi; import org.apache.hadoop.hdfs.server.namenode.ha.HATestUtil; import org.apache.hadoop.hdfs.web.HftpFileSystem; import org.apache.hadoop.hdfs.web.WebHdfsFileSystem; import org.apache.hadoop.ipc.ProtobufRpcEngine; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.util.DataChecksum; import org.apache.hadoop.util.Time; import org.apache.log4j.Level; import org.junit.Assert; import org.junit.Test; import org.mockito.InOrder; import org.mockito.Mockito; import org.mockito.invocation.InvocationOnMock; import org.mockito.stubbing.Answer; import com.google.common.base.Supplier; import com.google.common.collect.Lists; import org.mockito.internal.util.reflection.Whitebox; import org.slf4j.Logger; import org.slf4j.LoggerFactory; public class TestDistributedFileSystem { private static final Random RAN = new Random(); private static final Logger LOG = LoggerFactory.getLogger( TestDistributedFileSystem.class); static { GenericTestUtils.setLogLevel(DFSClient.LOG, Level.ALL); } private boolean dualPortTesting = false; private boolean noXmlDefaults = false; private HdfsConfiguration getTestConfiguration() { HdfsConfiguration conf; if (noXmlDefaults) { conf = new HdfsConfiguration(false); String namenodeDir = new File(MiniDFSCluster.getBaseDirectory(), "name").getAbsolutePath(); conf.set(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY, namenodeDir); conf.set(DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY, namenodeDir); } else { conf = new HdfsConfiguration(); } if (dualPortTesting) { conf.set(DFSConfigKeys.DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY, "localhost:0"); } conf.setLong(DFSConfigKeys.DFS_NAMENODE_MIN_BLOCK_SIZE_KEY, 0); return conf; } @Test public void testEmptyDelegationToken() throws IOException { Configuration conf = getTestConfiguration(); MiniDFSCluster cluster = null; try { cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build(); FileSystem fileSys = cluster.getFileSystem(); fileSys.getDelegationToken(""); } finally { cluster.shutdown(); } } @Test public void testFileSystemCloseAll() throws Exception { Configuration conf = getTestConfiguration(); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0).build(); URI address = FileSystem.getDefaultUri(conf); try { FileSystem.closeAll(); conf = getTestConfiguration(); FileSystem.setDefaultUri(conf, address); FileSystem.get(conf); FileSystem.get(conf); FileSystem.closeAll(); } finally { if (cluster != null) {cluster.shutdown();} } } /** * Tests DFSClient.close throws no ConcurrentModificationException if * multiple files are open. * Also tests that any cached sockets are closed. (HDFS-3359) */ @Test public void testDFSClose() throws Exception { Configuration conf = getTestConfiguration(); MiniDFSCluster cluster = null; try { cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2).build(); FileSystem fileSys = cluster.getFileSystem(); // create two files, leaving them open fileSys.create(new Path("/test/dfsclose/file-0")); fileSys.create(new Path("/test/dfsclose/file-1")); // create another file, close it, and read it, so // the client gets a socket in its SocketCache Path p = new Path("/non-empty-file"); DFSTestUtil.createFile(fileSys, p, 1L, (short)1, 0L); DFSTestUtil.readFile(fileSys, p); fileSys.close(); } finally { if (cluster != null) {cluster.shutdown();} } } @Test public void testDFSCloseOrdering() throws Exception { DistributedFileSystem fs = new MyDistributedFileSystem(); Path path = new Path("/a"); fs.deleteOnExit(path); fs.close(); InOrder inOrder = inOrder(fs.dfs); inOrder.verify(fs.dfs).closeOutputStreams(eq(false)); inOrder.verify(fs.dfs).delete(eq(path.toString()), eq(true)); inOrder.verify(fs.dfs).close(); } private static class MyDistributedFileSystem extends DistributedFileSystem { MyDistributedFileSystem() { dfs = mock(DFSClient.class); } @Override public boolean exists(Path p) { return true; // trick out deleteOnExit } // Symlink resolution doesn't work with a mock, since it doesn't // have a valid Configuration to resolve paths to the right FileSystem. // Just call the DFSClient directly to register the delete @Override public boolean delete(Path f, final boolean recursive) throws IOException { return dfs.delete(f.toUri().getPath(), recursive); } } @Test public void testDFSSeekExceptions() throws IOException { Configuration conf = getTestConfiguration(); MiniDFSCluster cluster = null; try { cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2).build(); FileSystem fileSys = cluster.getFileSystem(); String file = "/test/fileclosethenseek/file-0"; Path path = new Path(file); // create file FSDataOutputStream output = fileSys.create(path); output.writeBytes("Some test data to write longer than 10 bytes"); output.close(); FSDataInputStream input = fileSys.open(path); input.seek(10); boolean threw = false; try { input.seek(100); } catch (IOException e) { // success threw = true; } assertTrue("Failed to throw IOE when seeking past end", threw); input.close(); threw = false; try { input.seek(1); } catch (IOException e) { //success threw = true; } assertTrue("Failed to throw IOE when seeking after close", threw); fileSys.close(); } finally { if (cluster != null) {cluster.shutdown();} } } @Test public void testDFSClient() throws Exception { Configuration conf = getTestConfiguration(); final long grace = 1000L; MiniDFSCluster cluster = null; try { cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2).build(); final String filepathstring = "/test/LeaseChecker/foo"; final Path[] filepaths = new Path[4]; for(int i = 0; i < filepaths.length; i++) { filepaths[i] = new Path(filepathstring + i); } final long millis = Time.now(); { final DistributedFileSystem dfs = cluster.getFileSystem(); dfs.dfs.getLeaseRenewer().setGraceSleepPeriod(grace); assertFalse(dfs.dfs.getLeaseRenewer().isRunning()); { //create a file final FSDataOutputStream out = dfs.create(filepaths[0]); assertTrue(dfs.dfs.getLeaseRenewer().isRunning()); //write something out.writeLong(millis); assertTrue(dfs.dfs.getLeaseRenewer().isRunning()); //close out.close(); Thread.sleep(grace/4*3); //within grace period assertTrue(dfs.dfs.getLeaseRenewer().isRunning()); for(int i = 0; i < 3; i++) { if (dfs.dfs.getLeaseRenewer().isRunning()) { Thread.sleep(grace/2); } } //passed grace period assertFalse(dfs.dfs.getLeaseRenewer().isRunning()); } { //create file1 final FSDataOutputStream out1 = dfs.create(filepaths[1]); assertTrue(dfs.dfs.getLeaseRenewer().isRunning()); //create file2 final FSDataOutputStream out2 = dfs.create(filepaths[2]); assertTrue(dfs.dfs.getLeaseRenewer().isRunning()); //write something to file1 out1.writeLong(millis); assertTrue(dfs.dfs.getLeaseRenewer().isRunning()); //close file1 out1.close(); assertTrue(dfs.dfs.getLeaseRenewer().isRunning()); //write something to file2 out2.writeLong(millis); assertTrue(dfs.dfs.getLeaseRenewer().isRunning()); //close file2 out2.close(); Thread.sleep(grace/4*3); //within grace period assertTrue(dfs.dfs.getLeaseRenewer().isRunning()); } { //create file3 final FSDataOutputStream out3 = dfs.create(filepaths[3]); assertTrue(dfs.dfs.getLeaseRenewer().isRunning()); Thread.sleep(grace/4*3); //passed previous grace period, should still running assertTrue(dfs.dfs.getLeaseRenewer().isRunning()); //write something to file3 out3.writeLong(millis); assertTrue(dfs.dfs.getLeaseRenewer().isRunning()); //close file3 out3.close(); assertTrue(dfs.dfs.getLeaseRenewer().isRunning()); Thread.sleep(grace/4*3); //within grace period assertTrue(dfs.dfs.getLeaseRenewer().isRunning()); for(int i = 0; i < 3; i++) { if (dfs.dfs.getLeaseRenewer().isRunning()) { Thread.sleep(grace/2); } } //passed grace period assertFalse(dfs.dfs.getLeaseRenewer().isRunning()); } dfs.close(); } { // Check to see if opening a non-existent file triggers a FNF FileSystem fs = cluster.getFileSystem(); Path dir = new Path("/wrwelkj"); assertFalse("File should not exist for test.", fs.exists(dir)); try { FSDataInputStream in = fs.open(dir); try { in.close(); fs.close(); } finally { assertTrue("Did not get a FileNotFoundException for non-existing" + " file.", false); } } catch (FileNotFoundException fnf) { // This is the proper exception to catch; move on. } } { final DistributedFileSystem dfs = cluster.getFileSystem(); assertFalse(dfs.dfs.getLeaseRenewer().isRunning()); //open and check the file FSDataInputStream in = dfs.open(filepaths[0]); assertFalse(dfs.dfs.getLeaseRenewer().isRunning()); assertEquals(millis, in.readLong()); assertFalse(dfs.dfs.getLeaseRenewer().isRunning()); in.close(); assertFalse(dfs.dfs.getLeaseRenewer().isRunning()); dfs.close(); } { // test accessing DFS with ip address. should work with any hostname // alias or ip address that points to the interface that NameNode // is listening on. In this case, it is localhost. String uri = "hdfs://127.0.0.1:" + cluster.getNameNodePort() + "/test/ipAddress/file"; Path path = new Path(uri); FileSystem fs = FileSystem.get(path.toUri(), conf); FSDataOutputStream out = fs.create(path); byte[] buf = new byte[1024]; out.write(buf); out.close(); FSDataInputStream in = fs.open(path); in.readFully(buf); in.close(); fs.close(); } } finally { if (cluster != null) {cluster.shutdown();} } } /** * This is to test that the {@link FileSystem#clearStatistics()} resets all * the global storage statistics. */ @Test public void testClearStatistics() throws Exception { final Configuration conf = getTestConfiguration(); final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).build(); try { cluster.waitActive(); FileSystem dfs = cluster.getFileSystem(); final Path dir = new Path("/testClearStatistics"); final long mkdirCount = getOpStatistics(OpType.MKDIRS); long writeCount = DFSTestUtil.getStatistics(dfs).getWriteOps(); dfs.mkdirs(dir); checkOpStatistics(OpType.MKDIRS, mkdirCount + 1); assertEquals(++writeCount, DFSTestUtil.getStatistics(dfs).getWriteOps()); final long createCount = getOpStatistics(OpType.CREATE); FSDataOutputStream out = dfs.create(new Path(dir, "tmpFile"), (short)1); out.write(40); out.close(); checkOpStatistics(OpType.CREATE, createCount + 1); assertEquals(++writeCount, DFSTestUtil.getStatistics(dfs).getWriteOps()); FileSystem.clearStatistics(); checkOpStatistics(OpType.MKDIRS, 0); checkOpStatistics(OpType.CREATE, 0); checkStatistics(dfs, 0, 0, 0); } finally { cluster.shutdown(); } } @Test public void testStatistics() throws IOException { FileSystem.getStatistics(HdfsConstants.HDFS_URI_SCHEME, DistributedFileSystem.class).reset(); @SuppressWarnings("unchecked") ThreadLocal<StatisticsData> data = (ThreadLocal<StatisticsData>) Whitebox.getInternalState( FileSystem.getStatistics(HdfsConstants.HDFS_URI_SCHEME, DistributedFileSystem.class), "threadData"); data.set(null); int lsLimit = 2; final Configuration conf = getTestConfiguration(); conf.setInt(DFSConfigKeys.DFS_LIST_LIMIT, lsLimit); final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).build(); try { cluster.waitActive(); final FileSystem fs = cluster.getFileSystem(); Path dir = new Path("/test"); Path file = new Path(dir, "file"); int readOps = 0; int writeOps = 0; int largeReadOps = 0; long opCount = getOpStatistics(OpType.MKDIRS); fs.mkdirs(dir); checkStatistics(fs, readOps, ++writeOps, largeReadOps); checkOpStatistics(OpType.MKDIRS, opCount + 1); opCount = getOpStatistics(OpType.CREATE); FSDataOutputStream out = fs.create(file, (short)1); out.close(); checkStatistics(fs, readOps, ++writeOps, largeReadOps); checkOpStatistics(OpType.CREATE, opCount + 1); opCount = getOpStatistics(OpType.GET_FILE_STATUS); FileStatus status = fs.getFileStatus(file); checkStatistics(fs, ++readOps, writeOps, largeReadOps); checkOpStatistics(OpType.GET_FILE_STATUS, opCount + 1); opCount = getOpStatistics(OpType.GET_FILE_BLOCK_LOCATIONS); fs.getFileBlockLocations(file, 0, 0); checkStatistics(fs, ++readOps, writeOps, largeReadOps); checkOpStatistics(OpType.GET_FILE_BLOCK_LOCATIONS, opCount + 1); fs.getFileBlockLocations(status, 0, 0); checkStatistics(fs, ++readOps, writeOps, largeReadOps); checkOpStatistics(OpType.GET_FILE_BLOCK_LOCATIONS, opCount + 2); opCount = getOpStatistics(OpType.OPEN); FSDataInputStream in = fs.open(file); in.close(); checkStatistics(fs, ++readOps, writeOps, largeReadOps); checkOpStatistics(OpType.OPEN, opCount + 1); opCount = getOpStatistics(OpType.SET_REPLICATION); fs.setReplication(file, (short)2); checkStatistics(fs, readOps, ++writeOps, largeReadOps); checkOpStatistics(OpType.SET_REPLICATION, opCount + 1); opCount = getOpStatistics(OpType.RENAME); Path file1 = new Path(dir, "file1"); fs.rename(file, file1); checkStatistics(fs, readOps, ++writeOps, largeReadOps); checkOpStatistics(OpType.RENAME, opCount + 1); opCount = getOpStatistics(OpType.GET_CONTENT_SUMMARY); fs.getContentSummary(file1); checkStatistics(fs, ++readOps, writeOps, largeReadOps); checkOpStatistics(OpType.GET_CONTENT_SUMMARY, opCount + 1); // Iterative ls test long mkdirOp = getOpStatistics(OpType.MKDIRS); long listStatusOp = getOpStatistics(OpType.LIST_STATUS); for (int i = 0; i < 10; i++) { Path p = new Path(dir, Integer.toString(i)); fs.mkdirs(p); mkdirOp++; FileStatus[] list = fs.listStatus(dir); if (list.length > lsLimit) { // if large directory, then count readOps and largeReadOps by // number times listStatus iterates int iterations = (int)Math.ceil((double)list.length/lsLimit); largeReadOps += iterations; readOps += iterations; listStatusOp += iterations; } else { // Single iteration in listStatus - no large read operation done readOps++; listStatusOp++; } // writeOps incremented by 1 for mkdirs // readOps and largeReadOps incremented by 1 or more checkStatistics(fs, readOps, ++writeOps, largeReadOps); checkOpStatistics(OpType.MKDIRS, mkdirOp); checkOpStatistics(OpType.LIST_STATUS, listStatusOp); } opCount = getOpStatistics(OpType.GET_STATUS); fs.getStatus(file1); checkStatistics(fs, ++readOps, writeOps, largeReadOps); checkOpStatistics(OpType.GET_STATUS, opCount + 1); opCount = getOpStatistics(OpType.GET_FILE_CHECKSUM); fs.getFileChecksum(file1); checkStatistics(fs, ++readOps, writeOps, largeReadOps); checkOpStatistics(OpType.GET_FILE_CHECKSUM, opCount + 1); opCount = getOpStatistics(OpType.SET_PERMISSION); fs.setPermission(file1, new FsPermission((short)0777)); checkStatistics(fs, readOps, ++writeOps, largeReadOps); checkOpStatistics(OpType.SET_PERMISSION, opCount + 1); opCount = getOpStatistics(OpType.SET_TIMES); fs.setTimes(file1, 0L, 0L); checkStatistics(fs, readOps, ++writeOps, largeReadOps); checkOpStatistics(OpType.SET_TIMES, opCount + 1); opCount = getOpStatistics(OpType.SET_OWNER); UserGroupInformation ugi = UserGroupInformation.getCurrentUser(); fs.setOwner(file1, ugi.getUserName(), ugi.getGroupNames()[0]); checkOpStatistics(OpType.SET_OWNER, opCount + 1); checkStatistics(fs, readOps, ++writeOps, largeReadOps); opCount = getOpStatistics(OpType.DELETE); fs.delete(dir, true); checkStatistics(fs, readOps, ++writeOps, largeReadOps); checkOpStatistics(OpType.DELETE, opCount + 1); } finally { if (cluster != null) cluster.shutdown(); } } @SuppressWarnings("ThrowableResultOfMethodCallIgnored") @Test (timeout = 180000) public void testConcurrentStatistics() throws IOException, InterruptedException { FileSystem.getStatistics(HdfsConstants.HDFS_URI_SCHEME, DistributedFileSystem.class).reset(); final MiniDFSCluster cluster = new MiniDFSCluster.Builder( new Configuration()).build(); cluster.waitActive(); final FileSystem fs = cluster.getFileSystem(); final int numThreads = 5; final ExecutorService threadPool = Executors.newFixedThreadPool(numThreads); try { final CountDownLatch allExecutorThreadsReady = new CountDownLatch(numThreads); final CountDownLatch startBlocker = new CountDownLatch(1); final CountDownLatch allDone = new CountDownLatch(numThreads); final AtomicReference<Throwable> childError = new AtomicReference<>(); for (int i = 0; i < numThreads; i++) { threadPool.submit(new Runnable() { @Override public void run() { allExecutorThreadsReady.countDown(); try { startBlocker.await(); final FileSystem fs = cluster.getFileSystem(); fs.mkdirs(new Path("/testStatisticsParallelChild")); } catch (Throwable t) { LOG.error("Child failed when calling mkdir", t); childError.compareAndSet(null, t); } finally { allDone.countDown(); } } }); } final long oldMkdirOpCount = getOpStatistics(OpType.MKDIRS); // wait until all threads are ready allExecutorThreadsReady.await(); // all threads start making directories startBlocker.countDown(); // wait until all threads are done allDone.await(); assertNull("Child failed with exception " + childError.get(), childError.get()); checkStatistics(fs, 0, numThreads, 0); // check the single operation count stat checkOpStatistics(OpType.MKDIRS, numThreads + oldMkdirOpCount); // iterate all the operation counts for (Iterator<LongStatistic> opCountIter = FileSystem.getGlobalStorageStatistics() .get(DFSOpsCountStatistics.NAME).getLongStatistics(); opCountIter.hasNext();) { final LongStatistic opCount = opCountIter.next(); if (OpType.MKDIRS.getSymbol().equals(opCount.getName())) { assertEquals("Unexpected op count from iterator!", numThreads + oldMkdirOpCount, opCount.getValue()); } LOG.info(opCount.getName() + "\t" + opCount.getValue()); } } finally { threadPool.shutdownNow(); cluster.shutdown(); } } /** Checks statistics. -1 indicates do not check for the operations */ private void checkStatistics(FileSystem fs, int readOps, int writeOps, int largeReadOps) { assertEquals(readOps, DFSTestUtil.getStatistics(fs).getReadOps()); assertEquals(writeOps, DFSTestUtil.getStatistics(fs).getWriteOps()); assertEquals(largeReadOps, DFSTestUtil.getStatistics(fs).getLargeReadOps()); } private static void checkOpStatistics(OpType op, long count) { assertEquals("Op " + op.getSymbol() + " has unexpected count!", count, getOpStatistics(op)); } private static long getOpStatistics(OpType op) { return GlobalStorageStatistics.INSTANCE.get( DFSOpsCountStatistics.NAME) .getLong(op.getSymbol()); } @Test public void testFileChecksum() throws Exception { GenericTestUtils.setLogLevel(HftpFileSystem.LOG, Level.ALL); final long seed = RAN.nextLong(); System.out.println("seed=" + seed); RAN.setSeed(seed); final Configuration conf = getTestConfiguration(); conf.setBoolean(DFSConfigKeys.DFS_WEBHDFS_ENABLED_KEY, true); final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2).build(); final FileSystem hdfs = cluster.getFileSystem(); final String nnAddr = conf.get(DFSConfigKeys.DFS_NAMENODE_HTTP_ADDRESS_KEY); final UserGroupInformation current = UserGroupInformation.getCurrentUser(); final UserGroupInformation ugi = UserGroupInformation.createUserForTesting( current.getShortUserName() + "x", new String[]{"user"}); try { hdfs.getFileChecksum(new Path( "/test/TestNonExistingFile")); fail("Expecting FileNotFoundException"); } catch (FileNotFoundException e) { assertTrue("Not throwing the intended exception message", e.getMessage() .contains("File does not exist: /test/TestNonExistingFile")); } try { Path path = new Path("/test/TestExistingDir/"); hdfs.mkdirs(path); hdfs.getFileChecksum(path); fail("Expecting FileNotFoundException"); } catch (FileNotFoundException e) { assertTrue("Not throwing the intended exception message", e.getMessage() .contains("Path is not a file: /test/TestExistingDir")); } //hftp final String hftpuri = "hftp://" + nnAddr; System.out.println("hftpuri=" + hftpuri); final FileSystem hftp = ugi.doAs( new PrivilegedExceptionAction<FileSystem>() { @Override public FileSystem run() throws Exception { return new Path(hftpuri).getFileSystem(conf); } }); //webhdfs final String webhdfsuri = WebHdfsFileSystem.SCHEME + "://" + nnAddr; System.out.println("webhdfsuri=" + webhdfsuri); final FileSystem webhdfs = ugi.doAs( new PrivilegedExceptionAction<FileSystem>() { @Override public FileSystem run() throws Exception { return new Path(webhdfsuri).getFileSystem(conf); } }); final Path dir = new Path("/filechecksum"); final int block_size = 1024; final int buffer_size = conf.getInt(CommonConfigurationKeys.IO_FILE_BUFFER_SIZE_KEY, 4096); conf.setInt(DFSConfigKeys.DFS_BYTES_PER_CHECKSUM_KEY, 512); //try different number of blocks for(int n = 0; n < 5; n++) { //generate random data final byte[] data = new byte[RAN.nextInt(block_size/2-1)+n*block_size+1]; RAN.nextBytes(data); System.out.println("data.length=" + data.length); //write data to a file final Path foo = new Path(dir, "foo" + n); { final FSDataOutputStream out = hdfs.create(foo, false, buffer_size, (short)2, block_size); out.write(data); out.close(); } //compute checksum final FileChecksum hdfsfoocs = hdfs.getFileChecksum(foo); System.out.println("hdfsfoocs=" + hdfsfoocs); //hftp final FileChecksum hftpfoocs = hftp.getFileChecksum(foo); System.out.println("hftpfoocs=" + hftpfoocs); final Path qualified = new Path(hftpuri + dir, "foo" + n); final FileChecksum qfoocs = hftp.getFileChecksum(qualified); System.out.println("qfoocs=" + qfoocs); //webhdfs final FileChecksum webhdfsfoocs = webhdfs.getFileChecksum(foo); System.out.println("webhdfsfoocs=" + webhdfsfoocs); final Path webhdfsqualified = new Path(webhdfsuri + dir, "foo" + n); final FileChecksum webhdfs_qfoocs = webhdfs.getFileChecksum(webhdfsqualified); System.out.println("webhdfs_qfoocs=" + webhdfs_qfoocs); //create a zero byte file final Path zeroByteFile = new Path(dir, "zeroByteFile" + n); { final FSDataOutputStream out = hdfs.create(zeroByteFile, false, buffer_size, (short)2, block_size); out.close(); } // verify the magic val for zero byte files { final FileChecksum zeroChecksum = hdfs.getFileChecksum(zeroByteFile); assertEquals(zeroChecksum.toString(), "MD5-of-0MD5-of-0CRC32:70bc8f4b72a86921468bf8e8441dce51"); } //write another file final Path bar = new Path(dir, "bar" + n); { final FSDataOutputStream out = hdfs.create(bar, false, buffer_size, (short)2, block_size); out.write(data); out.close(); } { //verify checksum final FileChecksum barcs = hdfs.getFileChecksum(bar); final int barhashcode = barcs.hashCode(); assertEquals(hdfsfoocs.hashCode(), barhashcode); assertEquals(hdfsfoocs, barcs); //hftp assertEquals(hftpfoocs.hashCode(), barhashcode); assertEquals(hftpfoocs, barcs); assertEquals(qfoocs.hashCode(), barhashcode); assertEquals(qfoocs, barcs); //webhdfs assertEquals(webhdfsfoocs.hashCode(), barhashcode); assertEquals(webhdfsfoocs, barcs); assertEquals(webhdfs_qfoocs.hashCode(), barhashcode); assertEquals(webhdfs_qfoocs, barcs); } hdfs.setPermission(dir, new FsPermission((short)0)); { //test permission error on hftp try { hftp.getFileChecksum(qualified); fail(); } catch(IOException ioe) { FileSystem.LOG.info("GOOD: getting an exception", ioe); } } { //test permission error on webhdfs try { webhdfs.getFileChecksum(webhdfsqualified); fail(); } catch(IOException ioe) { FileSystem.LOG.info("GOOD: getting an exception", ioe); } } hdfs.setPermission(dir, new FsPermission((short)0777)); } cluster.shutdown(); } @Test public void testAllWithDualPort() throws Exception { dualPortTesting = true; try { testFileSystemCloseAll(); testDFSClose(); testDFSClient(); testFileChecksum(); } finally { dualPortTesting = false; } } @Test public void testAllWithNoXmlDefaults() throws Exception { // Do all the tests with a configuration that ignores the defaults in // the XML files. noXmlDefaults = true; try { testFileSystemCloseAll(); testDFSClose(); testDFSClient(); testFileChecksum(); } finally { noXmlDefaults = false; } } @Test(timeout=120000) public void testLocatedFileStatusStorageIdsTypes() throws Exception { final Configuration conf = getTestConfiguration(); final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf) .numDataNodes(3).build(); try { final DistributedFileSystem fs = cluster.getFileSystem(); final Path testFile = new Path("/testListLocatedStatus"); final int blockSize = 4096; final int numBlocks = 10; // Create a test file final int repl = 2; DFSTestUtil.createFile(fs, testFile, blockSize, numBlocks * blockSize, blockSize, (short) repl, 0xADDED); // Get the listing RemoteIterator<LocatedFileStatus> it = fs.listLocatedStatus(testFile); assertTrue("Expected file to be present", it.hasNext()); LocatedFileStatus stat = it.next(); BlockLocation[] locs = stat.getBlockLocations(); assertEquals("Unexpected number of locations", numBlocks, locs.length); Set<String> dnStorageIds = new HashSet<>(); for (DataNode d : cluster.getDataNodes()) { try (FsDatasetSpi.FsVolumeReferences references = d.getFSDataset().getFsVolumeReferences()) { for (FsVolumeSpi vol : references) { dnStorageIds.add(vol.getStorageID()); } } } for (BlockLocation loc : locs) { String[] ids = loc.getStorageIds(); // Run it through a set to deduplicate, since there should be no dupes Set<String> storageIds = new HashSet<>(); for (String id: ids) { storageIds.add(id); } assertEquals("Unexpected num storage ids", repl, storageIds.size()); // Make sure these are all valid storage IDs assertTrue("Unknown storage IDs found!", dnStorageIds.containsAll (storageIds)); // Check storage types are the default, since we didn't set any StorageType[] types = loc.getStorageTypes(); assertEquals("Unexpected num storage types", repl, types.length); for (StorageType t: types) { assertEquals("Unexpected storage type", StorageType.DEFAULT, t); } } } finally { if (cluster != null) { cluster.shutdown(); } } } /** * Tests the normal path of batching up BlockLocation[]s to be passed to a * single * {@link DistributedFileSystem#getFileBlockStorageLocations(java.util.List)} * call */ @Test(timeout=60000) public void testGetFileBlockStorageLocationsBatching() throws Exception { final Configuration conf = getTestConfiguration(); GenericTestUtils.setLogLevel(ProtobufRpcEngine.LOG, Level.TRACE); GenericTestUtils.setLogLevel(BlockStorageLocationUtil.LOG, Level.TRACE); GenericTestUtils.setLogLevel(DFSClient.LOG, Level.TRACE); conf.setBoolean(DFSConfigKeys.DFS_HDFS_BLOCKS_METADATA_ENABLED, true); final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf) .numDataNodes(2).build(); try { final DistributedFileSystem fs = cluster.getFileSystem(); // Create two files final Path tmpFile1 = new Path("/tmpfile1.dat"); final Path tmpFile2 = new Path("/tmpfile2.dat"); DFSTestUtil.createFile(fs, tmpFile1, 1024, (short) 2, 0xDEADDEADl); DFSTestUtil.createFile(fs, tmpFile2, 1024, (short) 2, 0xDEADDEADl); // Make sure files are fully replicated before continuing GenericTestUtils.waitFor(new Supplier<Boolean>() { @Override public Boolean get() { try { List<BlockLocation> list = Lists.newArrayList(); list.addAll(Arrays.asList(fs.getFileBlockLocations(tmpFile1, 0, 1024))); list.addAll(Arrays.asList(fs.getFileBlockLocations(tmpFile2, 0, 1024))); int totalRepl = 0; for (BlockLocation loc : list) { totalRepl += loc.getHosts().length; } if (totalRepl == 4) { return true; } } catch(IOException e) { // swallow } return false; } }, 500, 30000); // Get locations of blocks of both files and concat together BlockLocation[] blockLocs1 = fs.getFileBlockLocations(tmpFile1, 0, 1024); BlockLocation[] blockLocs2 = fs.getFileBlockLocations(tmpFile2, 0, 1024); BlockLocation[] blockLocs = (BlockLocation[]) ArrayUtils.addAll(blockLocs1, blockLocs2); // Fetch VolumeBlockLocations in batch BlockStorageLocation[] locs = fs.getFileBlockStorageLocations(Arrays .asList(blockLocs)); int counter = 0; // Print out the list of ids received for each block for (BlockStorageLocation l : locs) { for (int i = 0; i < l.getVolumeIds().length; i++) { VolumeId id = l.getVolumeIds()[i]; String name = l.getNames()[i]; if (id != null) { System.out.println("Datanode " + name + " has block " + counter + " on volume id " + id.toString()); } } counter++; } assertEquals("Expected two HdfsBlockLocations for two 1-block files", 2, locs.length); for (BlockStorageLocation l : locs) { assertEquals("Expected two replicas for each block", 2, l.getVolumeIds().length); for (int i = 0; i < l.getVolumeIds().length; i++) { VolumeId id = l.getVolumeIds()[i]; String name = l.getNames()[i]; assertTrue("Expected block to be valid on datanode " + name, id != null); } } } finally { cluster.shutdown(); } } /** * Tests error paths for * {@link DistributedFileSystem#getFileBlockStorageLocations(java.util.List)} */ @Test(timeout=60000) public void testGetFileBlockStorageLocationsError() throws Exception { final Configuration conf = getTestConfiguration(); conf.setBoolean(DFSConfigKeys.DFS_HDFS_BLOCKS_METADATA_ENABLED, true); conf.setInt( DFSConfigKeys.DFS_CLIENT_FILE_BLOCK_STORAGE_LOCATIONS_TIMEOUT_MS, 1500); conf.setInt( CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_MAX_RETRIES_KEY, 0); MiniDFSCluster cluster = null; try { cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2).build(); cluster.getDataNodes(); final DistributedFileSystem fs = cluster.getFileSystem(); // Create a few files and add together their block locations into // a list. final Path tmpFile1 = new Path("/errorfile1.dat"); final Path tmpFile2 = new Path("/errorfile2.dat"); DFSTestUtil.createFile(fs, tmpFile1, 1024, (short) 2, 0xDEADDEADl); DFSTestUtil.createFile(fs, tmpFile2, 1024, (short) 2, 0xDEADDEADl); // Make sure files are fully replicated before continuing GenericTestUtils.waitFor(new Supplier<Boolean>() { @Override public Boolean get() { try { List<BlockLocation> list = Lists.newArrayList(); list.addAll(Arrays.asList(fs.getFileBlockLocations(tmpFile1, 0, 1024))); list.addAll(Arrays.asList(fs.getFileBlockLocations(tmpFile2, 0, 1024))); int totalRepl = 0; for (BlockLocation loc : list) { totalRepl += loc.getHosts().length; } if (totalRepl == 4) { return true; } } catch(IOException e) { // swallow } return false; } }, 500, 30000); BlockLocation[] blockLocs1 = fs.getFileBlockLocations(tmpFile1, 0, 1024); BlockLocation[] blockLocs2 = fs.getFileBlockLocations(tmpFile2, 0, 1024); List<BlockLocation> allLocs = Lists.newArrayList(); allLocs.addAll(Arrays.asList(blockLocs1)); allLocs.addAll(Arrays.asList(blockLocs2)); // Stall on the DN to test the timeout DataNodeFaultInjector injector = Mockito.mock(DataNodeFaultInjector.class); Mockito.doAnswer(new Answer<Void>() { @Override public Void answer(InvocationOnMock invocation) throws Throwable { Thread.sleep(3000); return null; } }).when(injector).getHdfsBlocksMetadata(); DataNodeFaultInjector.instance = injector; BlockStorageLocation[] locs = fs.getFileBlockStorageLocations(allLocs); for (BlockStorageLocation loc: locs) { assertEquals( "Found more than 0 cached hosts although RPCs supposedly timed out", 0, loc.getCachedHosts().length); } // Restore a default injector DataNodeFaultInjector.instance = new DataNodeFaultInjector(); // Stop a datanode to simulate a failure. DataNodeProperties stoppedNode = cluster.stopDataNode(0); // Fetch VolumeBlockLocations locs = fs.getFileBlockStorageLocations(allLocs); assertEquals("Expected two HdfsBlockLocation for two 1-block files", 2, locs.length); for (BlockStorageLocation l : locs) { assertEquals("Expected two replicas for each block", 2, l.getHosts().length); assertEquals("Expected two VolumeIDs for each block", 2, l.getVolumeIds().length); assertTrue("Expected one valid and one invalid volume", (l.getVolumeIds()[0] == null) ^ (l.getVolumeIds()[1] == null)); } // Start the datanode again, and remove one of the blocks. // This is a different type of failure where the block itself // is invalid. cluster.restartDataNode(stoppedNode, true /*keepPort*/); cluster.waitActive(); fs.delete(tmpFile2, true); HATestUtil.waitForNNToIssueDeletions(cluster.getNameNode()); cluster.triggerHeartbeats(); HATestUtil.waitForDNDeletions(cluster); locs = fs.getFileBlockStorageLocations(allLocs); assertEquals("Expected two HdfsBlockLocations for two 1-block files", 2, locs.length); assertNotNull(locs[0].getVolumeIds()[0]); assertNotNull(locs[0].getVolumeIds()[1]); assertNull(locs[1].getVolumeIds()[0]); assertNull(locs[1].getVolumeIds()[1]); } finally { if (cluster != null) { cluster.shutdown(); } } } @Test public void testCreateWithCustomChecksum() throws Exception { Configuration conf = getTestConfiguration(); MiniDFSCluster cluster = null; Path testBasePath = new Path("/test/csum"); // create args Path path1 = new Path(testBasePath, "file_wtih_crc1"); Path path2 = new Path(testBasePath, "file_with_crc2"); ChecksumOpt opt1 = new ChecksumOpt(DataChecksum.Type.CRC32C, 512); ChecksumOpt opt2 = new ChecksumOpt(DataChecksum.Type.CRC32, 512); // common args FsPermission perm = FsPermission.getDefault().applyUMask( FsPermission.getUMask(conf)); EnumSet<CreateFlag> flags = EnumSet.of(CreateFlag.OVERWRITE, CreateFlag.CREATE); short repl = 1; try { cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build(); FileSystem dfs = cluster.getFileSystem(); dfs.mkdirs(testBasePath); // create two files with different checksum types FSDataOutputStream out1 = dfs.create(path1, perm, flags, 4096, repl, 131072L, null, opt1); FSDataOutputStream out2 = dfs.create(path2, perm, flags, 4096, repl, 131072L, null, opt2); for (int i = 0; i < 1024; i++) { out1.write(i); out2.write(i); } out1.close(); out2.close(); // the two checksums must be different. MD5MD5CRC32FileChecksum sum1 = (MD5MD5CRC32FileChecksum)dfs.getFileChecksum(path1); MD5MD5CRC32FileChecksum sum2 = (MD5MD5CRC32FileChecksum)dfs.getFileChecksum(path2); assertFalse(sum1.equals(sum2)); // check the individual params assertEquals(DataChecksum.Type.CRC32C, sum1.getCrcType()); assertEquals(DataChecksum.Type.CRC32, sum2.getCrcType()); } finally { if (cluster != null) { cluster.getFileSystem().delete(testBasePath, true); cluster.shutdown(); } } } @Test(timeout=60000) public void testFileCloseStatus() throws IOException { Configuration conf = new HdfsConfiguration(); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).build(); DistributedFileSystem fs = cluster.getFileSystem(); try { // create a new file. Path file = new Path("/simpleFlush.dat"); FSDataOutputStream output = fs.create(file); // write to file output.writeBytes("Some test data"); output.flush(); assertFalse("File status should be open", fs.isFileClosed(file)); output.close(); assertTrue("File status should be closed", fs.isFileClosed(file)); } finally { if (cluster != null) { cluster.shutdown(); } } } @Test(timeout=60000) public void testListFiles() throws IOException { Configuration conf = new HdfsConfiguration(); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).build(); try { DistributedFileSystem fs = cluster.getFileSystem(); final Path relative = new Path("relative"); fs.create(new Path(relative, "foo")).close(); final List<LocatedFileStatus> retVal = new ArrayList<LocatedFileStatus>(); final RemoteIterator<LocatedFileStatus> iter = fs.listFiles(relative, true); while (iter.hasNext()) { retVal.add(iter.next()); } System.out.println("retVal = " + retVal); } finally { cluster.shutdown(); } } @Test(timeout=10000) public void testDFSClientPeerReadTimeout() throws IOException { final int timeout = 1000; final Configuration conf = new HdfsConfiguration(); conf.setInt(DFSConfigKeys.DFS_CLIENT_SOCKET_TIMEOUT_KEY, timeout); // only need cluster to create a dfs client to get a peer final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).build(); try { cluster.waitActive(); DistributedFileSystem dfs = cluster.getFileSystem(); // use a dummy socket to ensure the read timesout ServerSocket socket = new ServerSocket(0); Peer peer = dfs.getClient().newConnectedPeer( (InetSocketAddress) socket.getLocalSocketAddress(), null, null); long start = Time.now(); try { peer.getInputStream().read(); Assert.fail("read should timeout"); } catch (SocketTimeoutException ste) { long delta = Time.now() - start; Assert.assertTrue("read timedout too soon", delta >= timeout*0.9); Assert.assertTrue("read timedout too late", delta <= timeout*1.1); } catch (Throwable t) { Assert.fail("wrong exception:"+t); } } finally { cluster.shutdown(); } } @Test(timeout=60000) public void testGetServerDefaults() throws IOException { Configuration conf = new HdfsConfiguration(); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).build(); try { cluster.waitActive(); DistributedFileSystem dfs = cluster.getFileSystem(); FsServerDefaults fsServerDefaults = dfs.getServerDefaults(); Assert.assertNotNull(fsServerDefaults); } finally { cluster.shutdown(); } } @Test(timeout=10000) public void testDFSClientPeerWriteTimeout() throws IOException { final int timeout = 1000; final Configuration conf = new HdfsConfiguration(); conf.setInt(DFSConfigKeys.DFS_CLIENT_SOCKET_TIMEOUT_KEY, timeout); // only need cluster to create a dfs client to get a peer final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).build(); try { cluster.waitActive(); DistributedFileSystem dfs = cluster.getFileSystem(); // Write 10 MB to a dummy socket to ensure the write times out ServerSocket socket = new ServerSocket(0); Peer peer = dfs.getClient().newConnectedPeer( (InetSocketAddress) socket.getLocalSocketAddress(), null, null); long start = Time.now(); try { byte[] buf = new byte[10 * 1024 * 1024]; peer.getOutputStream().write(buf); long delta = Time.now() - start; Assert.fail("write finish in " + delta + " ms" + "but should timedout"); } catch (SocketTimeoutException ste) { long delta = Time.now() - start; Assert.assertTrue("write timedout too soon in " + delta + " ms", delta >= timeout * 0.9); Assert.assertTrue("write timedout too late in " + delta + " ms", delta <= timeout * 1.2); } catch (Throwable t) { Assert.fail("wrong exception:" + t); } } finally { cluster.shutdown(); } } }