/* * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 * (the "License"). You may not use this work except in compliance with the License, which is * available at www.apache.org/licenses/LICENSE-2.0 * * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, * either express or implied, as more fully set forth in the License. * * See the NOTICE file distributed with this work for information regarding copyright ownership. */ package alluxio.underfs.s3; import alluxio.AlluxioURI; import alluxio.Constants; import alluxio.PropertyKey; import alluxio.underfs.ObjectUnderFileSystem; import alluxio.underfs.UnderFileSystem; import alluxio.underfs.UnderFileSystemConfiguration; import alluxio.underfs.options.OpenOptions; import alluxio.util.CommonUtils; import alluxio.util.UnderFileSystemUtils; import alluxio.util.io.PathUtils; import com.google.common.base.Preconditions; import org.jets3t.service.Jets3tProperties; import org.jets3t.service.S3Service; import org.jets3t.service.ServiceException; import org.jets3t.service.StorageObjectsChunk; import org.jets3t.service.acl.AccessControlList; import org.jets3t.service.impl.rest.httpclient.RestS3Service; import org.jets3t.service.model.S3Object; import org.jets3t.service.model.StorageObject; import org.jets3t.service.security.AWSCredentials; import org.jets3t.service.utils.Mimetypes; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; import javax.annotation.concurrent.ThreadSafe; /** * S3 FS {@link UnderFileSystem} implementation based on the jets3t library. */ @ThreadSafe public class S3UnderFileSystem extends ObjectUnderFileSystem { private static final Logger LOG = LoggerFactory.getLogger(S3UnderFileSystem.class); /** Suffix for an empty file to flag it as a directory. */ private static final String FOLDER_SUFFIX = "_$folder$"; private static final byte[] DIR_HASH; /** Jets3t S3 client. */ private final S3Service mClient; /** Bucket name of user's configured Alluxio bucket. */ private final String mBucketName; /** The name of the account owner. */ private final String mAccountOwner; /** The permission mode that the account owner has to the bucket. */ private final short mBucketMode; static { try { DIR_HASH = MessageDigest.getInstance("MD5").digest(new byte[0]); } catch (NoSuchAlgorithmException e) { throw new IllegalStateException(e); } } /** * Constructs a new instance of {@link S3UnderFileSystem}. * * @param uri the {@link AlluxioURI} for this UFS * @param conf the configuration for this UFS * @return the created {@link S3UnderFileSystem} instance * @throws ServiceException when a connection to S3 could not be created */ public static S3UnderFileSystem createInstance(AlluxioURI uri, UnderFileSystemConfiguration conf) throws ServiceException { String bucketName = UnderFileSystemUtils.getBucketName(uri); Preconditions.checkArgument(conf.containsKey(PropertyKey.S3N_ACCESS_KEY), "Property " + PropertyKey.S3N_ACCESS_KEY + " is required to connect to S3"); Preconditions.checkArgument(conf.containsKey(PropertyKey.S3N_SECRET_KEY), "Property " + PropertyKey.S3N_SECRET_KEY + " is required to connect to S3"); AWSCredentials awsCredentials = new AWSCredentials(conf.getValue(PropertyKey.S3N_ACCESS_KEY), conf.getValue(PropertyKey.S3N_SECRET_KEY)); Jets3tProperties props = new Jets3tProperties(); if (conf.containsKey(PropertyKey.UNDERFS_S3_PROXY_HOST)) { props.setProperty("httpclient.proxy-autodetect", "false"); props.setProperty("httpclient.proxy-host", conf.getValue(PropertyKey.UNDERFS_S3_PROXY_HOST)); props.setProperty("httpclient.proxy-port", conf.getValue(PropertyKey.UNDERFS_S3_PROXY_PORT)); } if (conf.containsKey(PropertyKey.UNDERFS_S3_PROXY_HTTPS_ONLY)) { props.setProperty("s3service.https-only", conf.getValue(PropertyKey.UNDERFS_S3_PROXY_HTTPS_ONLY)); } if (conf.containsKey(PropertyKey.UNDERFS_S3_ENDPOINT)) { props.setProperty("s3service.s3-endpoint", conf.getValue(PropertyKey.UNDERFS_S3_ENDPOINT)); if (conf.containsKey(PropertyKey.UNDERFS_S3_PROXY_HTTPS_ONLY)) { props.setProperty("s3service.s3-endpoint-https-port", conf.getValue(PropertyKey.UNDERFS_S3_ENDPOINT_HTTPS_PORT)); } else { props.setProperty("s3service.s3-endpoint-http-port", conf.getValue(PropertyKey.UNDERFS_S3_ENDPOINT_HTTP_PORT)); } } if (conf.containsKey(PropertyKey.UNDERFS_S3_DISABLE_DNS_BUCKETS)) { props.setProperty("s3service.disable-dns-buckets", conf.getValue(PropertyKey.UNDERFS_S3_DISABLE_DNS_BUCKETS)); } if (conf.containsKey(PropertyKey.UNDERFS_S3_UPLOAD_THREADS_MAX)) { props.setProperty("threaded-service.max-thread-count", conf.getValue(PropertyKey.UNDERFS_S3_UPLOAD_THREADS_MAX)); } if (conf.containsKey(PropertyKey.UNDERFS_S3_ADMIN_THREADS_MAX)) { props.setProperty("threaded-service.admin-max-thread-count", conf.getValue(PropertyKey.UNDERFS_S3_ADMIN_THREADS_MAX)); } if (conf.containsKey(PropertyKey.UNDERFS_S3_THREADS_MAX)) { props.setProperty("httpclient.max-connections", conf.getValue(PropertyKey.UNDERFS_S3_THREADS_MAX)); } LOG.debug("Initializing S3 underFs with properties: {}", props.getProperties()); RestS3Service restS3Service = new RestS3Service(awsCredentials, null, null, props); String accountOwnerId = restS3Service.getAccountOwner().getId(); // Gets the owner from user-defined static mapping from S3 canonical user id to Alluxio // user name. String owner = CommonUtils.getValueFromStaticMapping( conf.getValue(PropertyKey.UNDERFS_S3_OWNER_ID_TO_USERNAME_MAPPING), accountOwnerId); // If there is no user-defined mapping, use the display name. if (owner == null) { owner = restS3Service.getAccountOwner().getDisplayName(); } String accountOwner = owner == null ? accountOwnerId : owner; AccessControlList acl = restS3Service.getBucketAcl(bucketName); short bucketMode = S3Utils.translateBucketAcl(acl, accountOwnerId); return new S3UnderFileSystem(uri, restS3Service, bucketName, bucketMode, accountOwner); } /** * Constructor for {@link S3UnderFileSystem}. * * @param uri the {@link AlluxioURI} for this UFS * @param s3Service Jets3t S3 client * @param bucketName bucket name of user's configured Alluxio bucket * @param bucketMode the permission mode that the account owner has to the bucket * @param accountOwner the name of the account owner */ protected S3UnderFileSystem(AlluxioURI uri, S3Service s3Service, String bucketName, short bucketMode, String accountOwner) { super(uri); mClient = s3Service; mBucketName = bucketName; mBucketMode = bucketMode; mAccountOwner = accountOwner; } @Override public String getUnderFSType() { return "s3"; } // Setting S3 owner via Alluxio is not supported yet. This is a no-op. @Override public void setOwner(String path, String user, String group) {} // Setting S3 mode via Alluxio is not supported yet. This is a no-op. @Override public void setMode(String path, short mode) throws IOException {} @Override protected boolean copyObject(String src, String dst) { LOG.debug("Copying {} to {}", src, dst); S3Object obj = new S3Object(dst); // Retry copy for a few times, in case some Jets3t or AWS internal errors happened during copy. int retries = 3; for (int i = 0; i < retries; i++) { try { mClient.copyObject(mBucketName, src, mBucketName, obj, false); return true; } catch (ServiceException e) { LOG.error("Failed to copy file {} to {}", src, dst, e); if (i != retries - 1) { LOG.error("Retrying copying file {} to {}", src, dst); } } } LOG.error("Failed to copy file {} to {}, after {} retries", src, dst, retries); return false; } @Override protected boolean createEmptyObject(String key) { try { S3Object obj = new S3Object(key); obj.setDataInputStream(new ByteArrayInputStream(new byte[0])); obj.setContentLength(0); obj.setMd5Hash(DIR_HASH); obj.setContentType(Mimetypes.MIMETYPE_BINARY_OCTET_STREAM); mClient.putObject(mBucketName, obj); return true; } catch (ServiceException e) { LOG.error("Failed to create object: {}", key, e); return false; } } @Override protected OutputStream createObject(String key) throws IOException { return new S3OutputStream(mBucketName, key, mClient); } @Override protected boolean deleteObject(String key) { try { mClient.deleteObject(mBucketName, key); } catch (ServiceException e) { LOG.error("Failed to delete {}", key, e); return false; } return true; } @Override protected String getFolderSuffix() { return FOLDER_SUFFIX; } @Override protected ObjectListingChunk getObjectListingChunk(String key, boolean recursive) throws IOException { key = PathUtils.normalizePath(key, PATH_SEPARATOR); // In case key is root (empty string) do not normalize prefix key = key.equals(PATH_SEPARATOR) ? "" : key; String delimiter = recursive ? "" : PATH_SEPARATOR; StorageObjectsChunk chunk = getObjectListingChunk(key, delimiter, null); if (chunk != null) { return new S3NObjectListingChunk(chunk); } return null; } // Get next chunk of listing result private StorageObjectsChunk getObjectListingChunk(String key, String delimiter, String priorLastKey) { StorageObjectsChunk res; try { res = mClient.listObjectsChunked(mBucketName, key, delimiter, getListingChunkLength(), priorLastKey); } catch (ServiceException e) { LOG.error("Failed to list path {}", key, e); res = null; } return res; } /** * Wrapper over S3 {@link StorageObjectsChunk}. */ private final class S3NObjectListingChunk implements ObjectListingChunk { final StorageObjectsChunk mChunk; S3NObjectListingChunk(StorageObjectsChunk chunk) throws IOException { mChunk = chunk; if (mChunk == null) { throw new IOException("S3N listing result is null"); } } @Override public ObjectStatus[] getObjectStatuses() { StorageObject[] objects = mChunk.getObjects(); ObjectStatus[] ret = new ObjectStatus[objects.length]; for (int i = 0; i < ret.length; ++i) { ret[i] = new ObjectStatus(objects[i].getKey(), objects[i].getContentLength(), objects[i].getLastModifiedDate().getTime()); } return ret; } @Override public String[] getCommonPrefixes() { return mChunk.getCommonPrefixes(); } @Override public ObjectListingChunk getNextChunk() throws IOException { if (!mChunk.isListingComplete()) { StorageObjectsChunk nextChunk = getObjectListingChunk(mChunk.getPrefix(), mChunk.getDelimiter(), mChunk.getPriorLastKey()); if (nextChunk != null) { return new S3NObjectListingChunk(nextChunk); } } return null; } } @Override protected ObjectStatus getObjectStatus(String key) { try { StorageObject meta = mClient.getObjectDetails(mBucketName, key); if (meta == null) { return null; } return new ObjectStatus(key, meta.getContentLength(), meta.getLastModifiedDate().getTime()); } catch (ServiceException e) { return null; } } // No group in S3 ACL, returns the account owner for group. @Override protected ObjectPermissions getPermissions() { return new ObjectPermissions(mAccountOwner, mAccountOwner, mBucketMode); } @Override protected String getRootKey() { return Constants.HEADER_S3N + mBucketName; } @Override protected InputStream openObject(String key, OpenOptions options) throws IOException { try { return new S3InputStream(mBucketName, key, mClient, options.getOffset()); } catch (ServiceException e) { throw new IOException(e.getMessage()); } } }