/* * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 * (the "License"). You may not use this work except in compliance with the License, which is * available at www.apache.org/licenses/LICENSE-2.0 * * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, * either express or implied, as more fully set forth in the License. * * See the NOTICE file distributed with this work for information regarding copyright ownership. */ package alluxio.underfs.s3a; import com.amazonaws.services.s3.AmazonS3; import com.amazonaws.services.s3.model.GetObjectRequest; import com.amazonaws.services.s3.model.S3ObjectInputStream; import java.io.IOException; import java.io.InputStream; import javax.annotation.concurrent.NotThreadSafe; /** * A wrapper around an {@link S3ObjectInputStream} which handles skips efficiently. */ @NotThreadSafe public class S3AInputStream extends InputStream { /** Client for operations with s3. */ private final AmazonS3 mClient; /** Name of the bucket the object resides in. */ private final String mBucketName; /** The path of the object to read. */ private final String mKey; /** The backing input stream from s3. */ private S3ObjectInputStream mIn; /** The current position of the stream. */ private long mPos; /** * Constructor for an input stream of an object in s3 using the aws-sdk implementation to read * the data. The stream will be positioned at the start of the file. * * @param bucketName the bucket the object resides in * @param key the path of the object to read * @param client the s3 client to use for operations */ public S3AInputStream(String bucketName, String key, AmazonS3 client) { this(bucketName, key, client, 0L); } /** * Constructor for an input stream of an object in s3 using the aws-sdk implementation to read * the data. The stream will be positioned at the specified position. * * @param bucketName the bucket the object resides in * @param key the path of the object to read * @param client the s3 client to use for operations * @param position the position to begin reading from */ public S3AInputStream(String bucketName, String key, AmazonS3 client, long position) { mBucketName = bucketName; mKey = key; mClient = client; mPos = position; } @Override public void close() { closeStream(); } @Override public int read() throws IOException { if (mIn == null) { openStream(); } int value = mIn.read(); if (value != -1) { // valid data read mPos++; } return value; } @Override public int read(byte[] b) throws IOException { return read(b, 0, b.length); } @Override public int read(byte[] b, int offset, int length) throws IOException { if (length == 0) { return 0; } if (mIn == null) { openStream(); } int read = mIn.read(b, offset, length); if (read != -1) { mPos += read; } return read; } @Override public long skip(long n) { if (n <= 0) { return 0; } closeStream(); mPos += n; openStream(); return n; } /** * Opens a new stream at mPos if the wrapped stream mIn is null. */ private void openStream() { if (mIn != null) { // stream is already open return; } GetObjectRequest getReq = new GetObjectRequest(mBucketName, mKey); // If the position is 0, setting range is redundant and causes an error if the file is 0 length if (mPos > 0) { getReq.setRange(mPos); } mIn = mClient.getObject(getReq).getObjectContent(); } /** * Closes the current stream. */ // TODO(calvin): Investigate if close instead of abort will bring performance benefits. private void closeStream() { if (mIn == null) { return; } mIn.abort(); mIn = null; } }