/** * */ package edu.washington.escience.myria.io; import java.io.IOException; import java.io.InputStream; import java.io.Serializable; import java.net.URI; import java.util.Objects; import javax.annotation.concurrent.NotThreadSafe; import javax.ws.rs.core.Response.Status; import org.apache.commons.httpclient.URIException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.s3a.AnonymousAWSCredentialsProvider; import com.amazonaws.ClientConfiguration; import com.amazonaws.auth.AWSCredentialsProvider; import com.amazonaws.services.s3.AmazonS3Client; import com.amazonaws.services.s3.model.GetObjectRequest; import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.base.MoreObjects; import edu.washington.escience.myria.api.MyriaApiException; /** * */ @NotThreadSafe public class AmazonS3Source implements DataSource, Serializable { /** Required for Java serialization. */ private static final long serialVersionUID = 1L; /** The logger for debug, trace, etc. messages in this class. */ private static final org.slf4j.Logger LOGGER = org.slf4j.LoggerFactory.getLogger(AmazonS3Source.class); private final URI s3Uri; private transient ClientConfiguration clientConfig; private transient AmazonS3Client s3Client; private transient GetObjectRequest s3Request; private long startRange; private long endRange; private final String bucket; private final String key; private Long fileSize; @JsonCreator public AmazonS3Source( @JsonProperty(value = "s3Uri", required = true) final String uri, @JsonProperty(value = "startRange") final Long startRange, @JsonProperty(value = "endRange") final Long endRange) throws URIException { s3Uri = URI.create(Objects.requireNonNull(uri, "Parameter uri to UriSource may not be null")); if (!s3Uri.getScheme().equals("s3")) { throw new URIException("URI must contain an S3 scheme"); } String uriString = s3Uri.toString(); String removedScheme = uriString.substring(5); bucket = removedScheme.substring(0, removedScheme.indexOf('/')); key = removedScheme.substring(removedScheme.indexOf('/') + 1); this.startRange = MoreObjects.firstNonNull(startRange, new Long(0)); this.endRange = MoreObjects.firstNonNull(endRange, getFileSize()); } public AmazonS3Client getS3Client() throws MyriaApiException { if (s3Client == null) { /** * Supported providers in fs.s3a.aws.credentials.provider are InstanceProfileCredentialsProvider, * EnvironmentVariableCredentialsProvider and AnonymousAWSCredentialsProvider. */ AWSCredentialsProvider credentials; Configuration conf = new Configuration(); String propertyName = "fs.s3a.aws.credentials.provider"; String className = conf.getTrimmed(propertyName); if (className == null) { LOGGER.warn( "No AWS credentials provider property found in Hadoop configuration file. Instantiating the AmazonS3Client with anonymous credentials."); credentials = new AnonymousAWSCredentialsProvider(); } try { Class<?> credentialClass = Class.forName(className); try { credentials = (AWSCredentialsProvider) credentialClass .getDeclaredConstructor(URI.class, Configuration.class) .newInstance(s3Uri, conf); } catch (NoSuchMethodException | SecurityException e) { credentials = (AWSCredentialsProvider) credentialClass.getDeclaredConstructor().newInstance(); } clientConfig = new ClientConfiguration(); clientConfig.setMaxErrorRetry(3); s3Client = new AmazonS3Client(credentials, clientConfig); } catch (ClassNotFoundException e) { throw new MyriaApiException(Status.INTERNAL_SERVER_ERROR, className + " not found ", e); } catch (NoSuchMethodException | SecurityException e) { throw new MyriaApiException( Status.INTERNAL_SERVER_ERROR, className + " constructor exception. Should provide an accessible constructor accepting URI" + " and Configuration, or an accessible default constructor.", e); } catch (ReflectiveOperationException | IllegalArgumentException e) { throw new MyriaApiException( Status.INTERNAL_SERVER_ERROR, className + " instantiation exception.", e); } } return s3Client; } public Long getFileSize() { if (fileSize == null) { fileSize = getS3Client().getObjectMetadata(bucket, key).getContentLength(); } return fileSize; } public InputStream getInputStream(final long startByte, final long endByte) throws IOException { setStartRange(startByte); setEndRange(endByte); return getInputStream(); } @Override public InputStream getInputStream() throws IOException { s3Request = new GetObjectRequest(bucket, key); s3Request.setRange(startRange, endRange); return getS3Client().getObject(s3Request).getObjectContent(); } public void setStartRange(final long startRange) { this.startRange = startRange; } public void setEndRange(final long endRange) { this.endRange = endRange; } }