/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.s3a.scale;
import java.io.IOException;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import com.amazonaws.event.ProgressEvent;
import com.amazonaws.event.ProgressEventType;
import com.amazonaws.event.ProgressListener;
import org.junit.FixMethodOrder;
import org.junit.Test;
import org.junit.runners.MethodSorters;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.StorageStatistics;
import org.apache.hadoop.fs.contract.ContractTestUtils;
import org.apache.hadoop.fs.s3a.S3AFileStatus;
import org.apache.hadoop.fs.s3a.S3AFileSystem;
import org.apache.hadoop.fs.s3a.S3AInstrumentation;
import org.apache.hadoop.fs.s3a.Statistic;
import org.apache.hadoop.util.Progressable;
import static org.apache.hadoop.fs.contract.ContractTestUtils.*;
import static org.apache.hadoop.fs.s3a.Constants.*;
import static org.apache.hadoop.fs.s3a.S3ATestUtils.*;
/**
* Scale test which creates a huge file.
*
* <b>Important:</b> the order in which these tests execute is fixed to
* alphabetical order. Test cases are numbered {@code test_123_} to impose
* an ordering based on the numbers.
*
* Having this ordering allows the tests to assume that the huge file
* exists. Even so: they should all have a {@link #assumeHugeFileExists()}
* check at the start, in case an individual test is executed.
*/
@FixMethodOrder(MethodSorters.NAME_ASCENDING)
public abstract class AbstractSTestS3AHugeFiles extends S3AScaleTestBase {
private static final Logger LOG = LoggerFactory.getLogger(
AbstractSTestS3AHugeFiles.class);
public static final int DEFAULT_UPLOAD_BLOCKSIZE = 64 * _1KB;
public static final String DEFAULT_PARTITION_SIZE = "8M";
private Path scaleTestDir;
private Path hugefile;
private Path hugefileRenamed;
private int uploadBlockSize = DEFAULT_UPLOAD_BLOCKSIZE;
private int partitionSize;
@Override
public void setup() throws Exception {
super.setup();
final Path testPath = getTestPath();
scaleTestDir = new Path(testPath, "scale");
hugefile = new Path(scaleTestDir, "hugefile");
hugefileRenamed = new Path(scaleTestDir, "hugefileRenamed");
}
/**
* Note that this can get called before test setup.
* @return the configuration to use.
*/
@Override
protected Configuration createScaleConfiguration() {
Configuration conf = super.createScaleConfiguration();
partitionSize = (int)getTestPropertyBytes(conf,
KEY_HUGE_PARTITION_SIZE,
DEFAULT_PARTITION_SIZE);
assertTrue("Partition size too small: " + partitionSize,
partitionSize > MULTIPART_MIN_SIZE);
conf.setLong(SOCKET_SEND_BUFFER, _1MB);
conf.setLong(SOCKET_RECV_BUFFER, _1MB);
conf.setLong(MIN_MULTIPART_THRESHOLD, partitionSize);
conf.setInt(MULTIPART_SIZE, partitionSize);
conf.set(USER_AGENT_PREFIX, "STestS3AHugeFileCreate");
conf.setBoolean(FAST_UPLOAD, true);
conf.set(FAST_UPLOAD_BUFFER, getBlockOutputBufferName());
return conf;
}
/**
* The name of the buffering mechanism to use.
* @return a buffering mechanism
*/
protected abstract String getBlockOutputBufferName();
@Test
public void test_010_CreateHugeFile() throws IOException {
assertFalse("Please run this test sequentially to avoid timeouts" +
" and bandwidth problems", isParallelExecution());
long filesize = getTestPropertyBytes(getConf(), KEY_HUGE_FILESIZE,
DEFAULT_HUGE_FILESIZE);
long filesizeMB = filesize / _1MB;
// clean up from any previous attempts
deleteHugeFile();
describe("Creating file %s of size %d MB" +
" with partition size %d buffered by %s",
hugefile, filesizeMB, partitionSize, getBlockOutputBufferName());
// now do a check of available upload time, with a pessimistic bandwidth
// (that of remote upload tests). If the test times out then not only is
// the test outcome lost, as the follow-on tests continue, they will
// overlap with the ongoing upload test, for much confusion.
int timeout = getTestTimeoutSeconds();
// assume 1 MB/s upload bandwidth
int bandwidth = _1MB;
long uploadTime = filesize / bandwidth;
assertTrue(String.format("Timeout set in %s seconds is too low;" +
" estimating upload time of %d seconds at 1 MB/s." +
" Rerun tests with -D%s=%d",
timeout, uploadTime, KEY_TEST_TIMEOUT, uploadTime * 2),
uploadTime < timeout);
assertEquals("File size set in " + KEY_HUGE_FILESIZE + " = " + filesize
+ " is not a multiple of " + uploadBlockSize,
0, filesize % uploadBlockSize);
byte[] data = new byte[uploadBlockSize];
for (int i = 0; i < uploadBlockSize; i++) {
data[i] = (byte) (i % 256);
}
long blocks = filesize / uploadBlockSize;
long blocksPerMB = _1MB / uploadBlockSize;
// perform the upload.
// there's lots of logging here, so that a tail -f on the output log
// can give a view of what is happening.
S3AFileSystem fs = getFileSystem();
StorageStatistics storageStatistics = fs.getStorageStatistics();
String putRequests = Statistic.OBJECT_PUT_REQUESTS.getSymbol();
String putBytes = Statistic.OBJECT_PUT_BYTES.getSymbol();
Statistic putRequestsActive = Statistic.OBJECT_PUT_REQUESTS_ACTIVE;
Statistic putBytesPending = Statistic.OBJECT_PUT_BYTES_PENDING;
ContractTestUtils.NanoTimer timer = new ContractTestUtils.NanoTimer();
S3AInstrumentation.OutputStreamStatistics streamStatistics;
long blocksPer10MB = blocksPerMB * 10;
ProgressCallback progress = new ProgressCallback(timer);
try (FSDataOutputStream out = fs.create(hugefile,
true,
uploadBlockSize,
progress)) {
try {
streamStatistics = getOutputStreamStatistics(out);
} catch (ClassCastException e) {
LOG.info("Wrapped output stream is not block stream: {}",
out.getWrappedStream());
streamStatistics = null;
}
for (long block = 1; block <= blocks; block++) {
out.write(data);
long written = block * uploadBlockSize;
// every 10 MB and on file upload @ 100%, print some stats
if (block % blocksPer10MB == 0 || written == filesize) {
long percentage = written * 100 / filesize;
double elapsedTime = timer.elapsedTime() / 1.0e9;
double writtenMB = 1.0 * written / _1MB;
LOG.info(String.format("[%02d%%] Buffered %.2f MB out of %d MB;" +
" PUT %d bytes (%d pending) in %d operations (%d active);" +
" elapsedTime=%.2fs; write to buffer bandwidth=%.2f MB/s",
percentage,
writtenMB,
filesizeMB,
storageStatistics.getLong(putBytes),
gaugeValue(putBytesPending),
storageStatistics.getLong(putRequests),
gaugeValue(putRequestsActive),
elapsedTime,
writtenMB / elapsedTime));
}
}
// now close the file
LOG.info("Closing stream {}", out);
LOG.info("Statistics : {}", streamStatistics);
ContractTestUtils.NanoTimer closeTimer
= new ContractTestUtils.NanoTimer();
out.close();
closeTimer.end("time to close() output stream");
}
timer.end("time to write %d MB in blocks of %d",
filesizeMB, uploadBlockSize);
logFSState();
bandwidth(timer, filesize);
LOG.info("Statistics after stream closed: {}", streamStatistics);
long putRequestCount = storageStatistics.getLong(putRequests);
Long putByteCount = storageStatistics.getLong(putBytes);
LOG.info("PUT {} bytes in {} operations; {} MB/operation",
putByteCount, putRequestCount,
putByteCount / (putRequestCount * _1MB));
LOG.info("Time per PUT {} nS",
toHuman(timer.nanosPerOperation(putRequestCount)));
assertEquals("active put requests in \n" + fs,
0, gaugeValue(putRequestsActive));
ContractTestUtils.assertPathExists(fs, "Huge file", hugefile);
S3AFileStatus status = fs.getFileStatus(hugefile);
ContractTestUtils.assertIsFile(hugefile, status);
assertEquals("File size in " + status, filesize, status.getLen());
if (progress != null) {
progress.verifyNoFailures("Put file " + hugefile
+ " of size " + filesize);
}
if (streamStatistics != null) {
assertEquals("actively allocated blocks in " + streamStatistics,
0, streamStatistics.blocksActivelyAllocated());
}
}
/**
* Progress callback from AWS. Likely to come in on a different thread.
*/
private final class ProgressCallback implements Progressable,
ProgressListener {
private AtomicLong bytesTransferred = new AtomicLong(0);
private AtomicInteger failures = new AtomicInteger(0);
private final ContractTestUtils.NanoTimer timer;
private ProgressCallback(NanoTimer timer) {
this.timer = timer;
}
@Override
public void progress() {
}
@Override
public void progressChanged(ProgressEvent progressEvent) {
ProgressEventType eventType = progressEvent.getEventType();
if (eventType.isByteCountEvent()) {
bytesTransferred.addAndGet(progressEvent.getBytesTransferred());
}
switch (eventType) {
case TRANSFER_PART_FAILED_EVENT:
// failure
failures.incrementAndGet();
LOG.warn("Transfer failure");
break;
case TRANSFER_PART_COMPLETED_EVENT:
// completion
long elapsedTime = timer.elapsedTime();
double elapsedTimeS = elapsedTime / 1.0e9;
long written = bytesTransferred.get();
long writtenMB = written / _1MB;
LOG.info(String.format(
"Event %s; total uploaded=%d MB in %.1fs;" +
" effective upload bandwidth = %.2f MB/s",
progressEvent,
writtenMB, elapsedTimeS, writtenMB / elapsedTimeS));
break;
default:
if (eventType.isByteCountEvent()) {
LOG.debug("Event {}", progressEvent);
} else {
LOG.info("Event {}", progressEvent);
}
break;
}
}
@Override
public String toString() {
String sb = "ProgressCallback{"
+ "bytesTransferred=" + bytesTransferred +
", failures=" + failures +
'}';
return sb;
}
private void verifyNoFailures(String operation) {
assertEquals("Failures in " + operation +": " + this, 0, failures.get());
}
}
void assumeHugeFileExists() throws IOException {
S3AFileSystem fs = getFileSystem();
ContractTestUtils.assertPathExists(fs, "huge file not created", hugefile);
FileStatus status = fs.getFileStatus(hugefile);
ContractTestUtils.assertIsFile(hugefile, status);
assertTrue("File " + hugefile + " is empty", status.getLen() > 0);
}
private void logFSState() {
LOG.info("File System state after operation:\n{}", getFileSystem());
}
@Test
public void test_040_PositionedReadHugeFile() throws Throwable {
assumeHugeFileExists();
final String encryption = getConf().getTrimmed(
SERVER_SIDE_ENCRYPTION_ALGORITHM);
boolean encrypted = encryption != null;
if (encrypted) {
LOG.info("File is encrypted with algorithm {}", encryption);
}
String filetype = encrypted ? "encrypted file" : "file";
describe("Positioned reads of %s %s", filetype, hugefile);
S3AFileSystem fs = getFileSystem();
S3AFileStatus status = fs.getFileStatus(hugefile);
long filesize = status.getLen();
int ops = 0;
final int bufferSize = 8192;
byte[] buffer = new byte[bufferSize];
long eof = filesize - 1;
ContractTestUtils.NanoTimer timer = new ContractTestUtils.NanoTimer();
ContractTestUtils.NanoTimer readAtByte0, readAtByte0Again, readAtEOF;
try (FSDataInputStream in = fs.open(hugefile, uploadBlockSize)) {
readAtByte0 = new ContractTestUtils.NanoTimer();
in.readFully(0, buffer);
readAtByte0.end("time to read data at start of file");
ops++;
readAtEOF = new ContractTestUtils.NanoTimer();
in.readFully(eof - bufferSize, buffer);
readAtEOF.end("time to read data at end of file");
ops++;
readAtByte0Again = new ContractTestUtils.NanoTimer();
in.readFully(0, buffer);
readAtByte0Again.end("time to read data at start of file again");
ops++;
LOG.info("Final stream state: {}", in);
}
long mb = Math.max(filesize / _1MB, 1);
logFSState();
timer.end("time to performed positioned reads of %s of %d MB ",
filetype, mb);
LOG.info("Time per positioned read = {} nS",
toHuman(timer.nanosPerOperation(ops)));
}
@Test
public void test_050_readHugeFile() throws Throwable {
assumeHugeFileExists();
describe("Reading %s", hugefile);
S3AFileSystem fs = getFileSystem();
S3AFileStatus status = fs.getFileStatus(hugefile);
long filesize = status.getLen();
long blocks = filesize / uploadBlockSize;
byte[] data = new byte[uploadBlockSize];
ContractTestUtils.NanoTimer timer = new ContractTestUtils.NanoTimer();
try (FSDataInputStream in = fs.open(hugefile, uploadBlockSize)) {
for (long block = 0; block < blocks; block++) {
in.readFully(data);
}
LOG.info("Final stream state: {}", in);
}
long mb = Math.max(filesize / _1MB, 1);
timer.end("time to read file of %d MB ", mb);
LOG.info("Time per MB to read = {} nS",
toHuman(timer.nanosPerOperation(mb)));
bandwidth(timer, filesize);
logFSState();
}
@Test
public void test_100_renameHugeFile() throws Throwable {
assumeHugeFileExists();
describe("renaming %s to %s", hugefile, hugefileRenamed);
S3AFileSystem fs = getFileSystem();
S3AFileStatus status = fs.getFileStatus(hugefile);
long filesize = status.getLen();
fs.delete(hugefileRenamed, false);
ContractTestUtils.NanoTimer timer = new ContractTestUtils.NanoTimer();
fs.rename(hugefile, hugefileRenamed);
long mb = Math.max(filesize / _1MB, 1);
timer.end("time to rename file of %d MB", mb);
LOG.info("Time per MB to rename = {} nS",
toHuman(timer.nanosPerOperation(mb)));
bandwidth(timer, filesize);
logFSState();
S3AFileStatus destFileStatus = fs.getFileStatus(hugefileRenamed);
assertEquals(filesize, destFileStatus.getLen());
// rename back
ContractTestUtils.NanoTimer timer2 = new ContractTestUtils.NanoTimer();
fs.rename(hugefileRenamed, hugefile);
timer2.end("Renaming back");
LOG.info("Time per MB to rename = {} nS",
toHuman(timer2.nanosPerOperation(mb)));
bandwidth(timer2, filesize);
}
@Test
public void test_999_DeleteHugeFiles() throws IOException {
deleteHugeFile();
ContractTestUtils.NanoTimer timer2 = new ContractTestUtils.NanoTimer();
S3AFileSystem fs = getFileSystem();
fs.delete(hugefileRenamed, false);
timer2.end("time to delete %s", hugefileRenamed);
ContractTestUtils.rm(fs, getTestPath(), true, true);
}
protected void deleteHugeFile() throws IOException {
describe("Deleting %s", hugefile);
NanoTimer timer = new NanoTimer();
getFileSystem().delete(hugefile, false);
timer.end("time to delete %s", hugefile);
}
}