/* * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 * (the "License"). You may not use this work except in compliance with the License, which is * available at www.apache.org/licenses/LICENSE-2.0 * * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, * either express or implied, as more fully set forth in the License. * * See the NOTICE file distributed with this work for information regarding copyright ownership. */ package alluxio.underfs.s3; import org.jets3t.service.S3Service; import org.jets3t.service.ServiceException; import org.jets3t.service.model.S3Object; import java.io.BufferedInputStream; import java.io.IOException; import java.io.InputStream; import javax.annotation.concurrent.NotThreadSafe; /** * A stream for reading a file from S3. The main purpose is to provide a faster skip method, as * the underlying implementation will read and discard bytes until the number to skip has been * reached. This input stream returns 0 when calling read with an empty buffer. */ @NotThreadSafe public class S3InputStream extends InputStream { /** Bucket name of the Alluxio S3 bucket. */ private final String mBucketName; /** Key of the file in S3 to read. */ private final String mKey; /** The JetS3t client for S3 operations. */ private final S3Service mClient; /** The storage object that will be updated on each large skip. */ private S3Object mObject; /** The underlying input stream. */ private BufferedInputStream mInputStream; /** Position of the stream. */ private long mPos; /** * Creates a new instance of {@link S3InputStream}. * * @param bucketName the name of the bucket * @param key the key of the file * @param client the client for S3 * @throws ServiceException if a service exception occurs */ S3InputStream(String bucketName, String key, S3Service client) throws ServiceException { this(bucketName, key, client, 0L); } /** * Creates a new instance of {@link S3InputStream}, at a specific position. * * @param bucketName the name of the bucket * @param key the key of the file * @param client the client for S3 * @param pos the position to start * @throws ServiceException if a service exception occurs */ S3InputStream(String bucketName, String key, S3Service client, long pos) throws ServiceException { mBucketName = bucketName; mKey = key; mClient = client; mPos = pos; // For an empty file setting start pos = 0 will throw a ServiceException if (mPos > 0) { mObject = mClient.getObject(mBucketName, mKey, null, null, null, null, mPos, null); } else { mObject = mClient.getObject(mBucketName, mKey); } mInputStream = new BufferedInputStream(mObject.getDataInputStream()); } @Override public void close() throws IOException { mInputStream.close(); } @Override public int read() throws IOException { int ret = mInputStream.read(); if (ret != -1) { mPos++; } return ret; } @Override public int read(byte[] b, int off, int len) throws IOException { int ret = mInputStream.read(b, off, len); if (ret != -1) { mPos += ret; } return ret; } /** * This method leverages the ability to open a stream from S3 from a given offset. When the * underlying stream has fewer bytes buffered than the skip request, the stream is closed, and * a new stream is opened starting at the requested offset. * * @param n number of bytes to skip * @return the number of bytes skipped */ @Override public long skip(long n) throws IOException { if (mInputStream.available() >= n) { return mInputStream.skip(n); } // The number of bytes to skip is possibly large, open a new stream from S3. mInputStream.close(); mPos += n; try { mObject = mClient.getObject(mBucketName, mKey, null, null, null, null, mPos, null); mInputStream = new BufferedInputStream(mObject.getDataInputStream()); } catch (ServiceException e) { throw new IOException(e); } return n; } }