/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.fs.s3a.scale; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.s3a.S3AFileSystem; import org.apache.hadoop.fs.s3a.Statistic; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; import static org.apache.hadoop.fs.s3a.Statistic.*; import static org.apache.hadoop.fs.s3a.S3ATestUtils.*; import static org.apache.hadoop.fs.contract.ContractTestUtils.*; /** * Test the performance of listing files/directories. */ public class ITestS3ADirectoryPerformance extends S3AScaleTestBase { private static final Logger LOG = LoggerFactory.getLogger( ITestS3ADirectoryPerformance.class); @Test public void testListOperations() throws Throwable { describe("Test recursive list operations"); final Path scaleTestDir = path("testListOperations"); final Path listDir = new Path(scaleTestDir, "lists"); S3AFileSystem fs = getFileSystem(); // scale factor. int scale = getConf().getInt(KEY_DIRECTORY_COUNT, DEFAULT_DIRECTORY_COUNT); int width = scale; int depth = scale; int files = scale; MetricDiff metadataRequests = new MetricDiff(fs, OBJECT_METADATA_REQUESTS); MetricDiff listRequests = new MetricDiff(fs, OBJECT_LIST_REQUESTS); MetricDiff listContinueRequests = new MetricDiff(fs, OBJECT_CONTINUE_LIST_REQUESTS); MetricDiff listStatusCalls = new MetricDiff(fs, INVOCATION_LIST_FILES); MetricDiff getFileStatusCalls = new MetricDiff(fs, INVOCATION_GET_FILE_STATUS); NanoTimer createTimer = new NanoTimer(); TreeScanResults created = createSubdirs(fs, listDir, depth, width, files, 0); // add some empty directories int emptyDepth = 1 * scale; int emptyWidth = 3 * scale; created.add(createSubdirs(fs, listDir, emptyDepth, emptyWidth, 0, 0, "empty", "f-", "")); createTimer.end("Time to create %s", created); LOG.info("Time per operation: {}", toHuman(createTimer.nanosPerOperation(created.totalCount()))); printThenReset(LOG, metadataRequests, listRequests, listContinueRequests, listStatusCalls, getFileStatusCalls); describe("Listing files via treewalk"); try { // Scan the directory via an explicit tree walk. // This is the baseline for any listing speedups. NanoTimer treeWalkTimer = new NanoTimer(); TreeScanResults treewalkResults = treeWalk(fs, listDir); treeWalkTimer.end("List status via treewalk of %s", created); printThenReset(LOG, metadataRequests, listRequests, listContinueRequests, listStatusCalls, getFileStatusCalls); assertEquals("Files found in listFiles(recursive=true) " + " created=" + created + " listed=" + treewalkResults, created.getFileCount(), treewalkResults.getFileCount()); describe("Listing files via listFiles(recursive=true)"); // listFiles() does the recursion internally NanoTimer listFilesRecursiveTimer = new NanoTimer(); TreeScanResults listFilesResults = new TreeScanResults( fs.listFiles(listDir, true)); listFilesRecursiveTimer.end("listFiles(recursive=true) of %s", created); assertEquals("Files found in listFiles(recursive=true) " + " created=" + created + " listed=" + listFilesResults, created.getFileCount(), listFilesResults.getFileCount()); // only two list operations should have taken place print(LOG, metadataRequests, listRequests, listContinueRequests, listStatusCalls, getFileStatusCalls); assertEquals(listRequests.toString(), 2, listRequests.diff()); reset(metadataRequests, listRequests, listContinueRequests, listStatusCalls, getFileStatusCalls); } finally { describe("deletion"); // deletion at the end of the run NanoTimer deleteTimer = new NanoTimer(); fs.delete(listDir, true); deleteTimer.end("Deleting directory tree"); printThenReset(LOG, metadataRequests, listRequests, listContinueRequests, listStatusCalls, getFileStatusCalls); } } @Test public void testTimeToStatEmptyDirectory() throws Throwable { describe("Time to stat an empty directory"); Path path = path("empty"); getFileSystem().mkdirs(path); timeToStatPath(path); } @Test public void testTimeToStatNonEmptyDirectory() throws Throwable { describe("Time to stat a non-empty directory"); Path path = path("dir"); S3AFileSystem fs = getFileSystem(); fs.mkdirs(path); touch(fs, new Path(path, "file")); timeToStatPath(path); } @Test public void testTimeToStatFile() throws Throwable { describe("Time to stat a simple file"); Path path = path("file"); touch(getFileSystem(), path); timeToStatPath(path); } @Test public void testTimeToStatRoot() throws Throwable { describe("Time to stat the root path"); timeToStatPath(new Path("/")); } private void timeToStatPath(Path path) throws IOException { describe("Timing getFileStatus(\"%s\")", path); S3AFileSystem fs = getFileSystem(); MetricDiff metadataRequests = new MetricDiff(fs, Statistic.OBJECT_METADATA_REQUESTS); MetricDiff listRequests = new MetricDiff(fs, Statistic.OBJECT_LIST_REQUESTS); long attempts = getOperationCount(); NanoTimer timer = new NanoTimer(); for (long l = 0; l < attempts; l++) { fs.getFileStatus(path); } timer.end("Time to execute %d getFileStatusCalls", attempts); LOG.info("Time per call: {}", toHuman(timer.nanosPerOperation(attempts))); LOG.info("metadata: {}", metadataRequests); LOG.info("metadata per operation {}", metadataRequests.diff() / attempts); LOG.info("listObjects: {}", listRequests); LOG.info("listObjects: per operation {}", listRequests.diff() / attempts); } }