S3InputStream.java example

Explorer
alluxio-master
/*
 * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0
 * (the "License"). You may not use this work except in compliance with the License, which is
 * available at www.apache.org/licenses/LICENSE-2.0
 *
 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
 * either express or implied, as more fully set forth in the License.
 *
 * See the NOTICE file distributed with this work for information regarding copyright ownership.
 */

package alluxio.underfs.s3;

import org.jets3t.service.S3Service;
import org.jets3t.service.ServiceException;
import org.jets3t.service.model.S3Object;

import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;

import javax.annotation.concurrent.NotThreadSafe;

/**
 * A stream for reading a file from S3. The main purpose is to provide a faster skip method, as
 * the underlying implementation will read and discard bytes until the number to skip has been
 * reached. This input stream returns 0 when calling read with an empty buffer.
 */
@NotThreadSafe
public class S3InputStream extends InputStream {

  /** Bucket name of the Alluxio S3 bucket. */
  private final String mBucketName;

  /** Key of the file in S3 to read. */
  private final String mKey;

  /** The JetS3t client for S3 operations. */
  private final S3Service mClient;

  /** The storage object that will be updated on each large skip. */
  private S3Object mObject;

  /** The underlying input stream. */
  private BufferedInputStream mInputStream;

  /** Position of the stream. */
  private long mPos;

  /**
   * Creates a new instance of {@link S3InputStream}.
   *
   * @param bucketName the name of the bucket
   * @param key the key of the file
   * @param client the client for S3
   * @throws ServiceException if a service exception occurs
   */
  S3InputStream(String bucketName, String key, S3Service client) throws ServiceException {
    this(bucketName, key, client, 0L);
  }

  /**
   * Creates a new instance of {@link S3InputStream}, at a specific position.
   *
   * @param bucketName the name of the bucket
   * @param key the key of the file
   * @param client the client for S3
   * @param pos the position to start
   * @throws ServiceException if a service exception occurs
   */
  S3InputStream(String bucketName, String key, S3Service client, long pos) throws ServiceException {
    mBucketName = bucketName;
    mKey = key;
    mClient = client;
    mPos = pos;
    // For an empty file setting start pos = 0 will throw a ServiceException
    if (mPos > 0) {
      mObject = mClient.getObject(mBucketName, mKey, null, null, null, null, mPos, null);
    } else {
      mObject = mClient.getObject(mBucketName, mKey);
    }
    mInputStream = new BufferedInputStream(mObject.getDataInputStream());
  }

  @Override
  public void close() throws IOException {
    mInputStream.close();
  }

  @Override
  public int read() throws IOException {
    int ret = mInputStream.read();
    if (ret != -1) {
      mPos++;
    }
    return ret;
  }

  @Override
  public int read(byte[] b, int off, int len) throws IOException {
    int ret = mInputStream.read(b, off, len);
    if (ret != -1) {
      mPos += ret;
    }
    return ret;
  }

  /**
   * This method leverages the ability to open a stream from S3 from a given offset. When the
   * underlying stream has fewer bytes buffered than the skip request, the stream is closed, and
   * a new stream is opened starting at the requested offset.
   *
   * @param n number of bytes to skip
   * @return the number of bytes skipped
   */
  @Override
  public long skip(long n) throws IOException {
    if (mInputStream.available() >= n) {
      return mInputStream.skip(n);
    }
    // The number of bytes to skip is possibly large, open a new stream from S3.
    mInputStream.close();
    mPos += n;
    try {
      mObject = mClient.getObject(mBucketName, mKey, null, null, null, null, mPos, null);
      mInputStream = new BufferedInputStream(mObject.getDataInputStream());
    } catch (ServiceException e) {
      throw new IOException(e);
    }
    return n;
  }
}