/* * ModeShape (http://www.modeshape.org) * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.modeshape.jcr.value.binary; import com.amazonaws.AmazonClientException; import com.amazonaws.auth.AWSCredentialsProvider; import com.amazonaws.auth.BasicAWSCredentials; import com.amazonaws.auth.profile.ProfileCredentialsProvider; import com.amazonaws.internal.StaticCredentialsProvider; import com.amazonaws.services.s3.AmazonS3Client; import com.amazonaws.services.s3.iterable.S3Objects; import com.amazonaws.services.s3.model.CopyObjectRequest; import com.amazonaws.services.s3.model.ObjectMetadata; import com.amazonaws.services.s3.model.S3Object; import com.amazonaws.services.s3.model.S3ObjectSummary; import org.modeshape.common.logging.Logger; import org.modeshape.jcr.JcrI18n; import org.modeshape.jcr.value.BinaryKey; import org.modeshape.jcr.value.BinaryValue; import javax.jcr.RepositoryException; import java.io.IOException; import java.io.InputStream; import java.util.Collections; import java.util.Date; import java.util.Iterator; import java.util.Map; import java.util.concurrent.TimeUnit; /** * Binary storage option which manages the storage of files to Amazon S3 * * @author bbranan */ public class S3BinaryStore extends AbstractBinaryStore { /* * AWS client which provides access to Amazon S3 */ private AmazonS3Client s3Client = null; /* * Temporary local file cache to allow for checksum computation */ private FileSystemBinaryStore fileSystemCache; /* * S3 bucket used to store and retrieve content */ private String bucketName; /* * Key for storing and retrieving extracted text from S3 object user metadata */ protected static final String EXTRACTED_TEXT_KEY = "extracted-text"; /* * Key for storing boolean which describes if a MIME type has been explicitly set */ protected static final String USER_MIME_TYPE_KEY = "user-mime-type"; /* * Key for storing boolean which describes if object is unused */ protected static final String UNUSED_KEY = "unused"; /** * Creates a binary store with a connection to Amazon S3 * * @param accessKey AWS access key credential * @param secretKey AWS secret key credential * @param bucketName Name of the S3 bucket in which binary content will be stored * @throws BinaryStoreException if S3 connection cannot be made to verify bucket */ public S3BinaryStore(String accessKey, String secretKey, String bucketName) throws BinaryStoreException { this(accessKey, secretKey, bucketName, null); } /** * Creates a binary store with a connection to Amazon S3 * * @param accessKey AWS access key credential * @param secretKey AWS secret key credential * @param bucketName Name of the S3 bucket in which binary content will be stored * @param endPoint The S3 endpoint URL where the bucket will be accessed * @throws BinaryStoreException if S3 connection cannot be made to verify bucket */ public S3BinaryStore(String accessKey, String secretKey, String bucketName, String endPoint) throws BinaryStoreException { this.bucketName = bucketName; AWSCredentialsProvider credentialsProvider; if (accessKey == null && secretKey == null) { credentialsProvider = new ProfileCredentialsProvider(); } else { credentialsProvider = new StaticCredentialsProvider(new BasicAWSCredentials(accessKey, secretKey)); } this.s3Client = new AmazonS3Client(credentialsProvider); // Support for compatible S3 storage systems if(endPoint != null) this.s3Client.setEndpoint(endPoint); this.fileSystemCache = TransientBinaryStore.get(); this.fileSystemCache.setMinimumBinarySizeInBytes(0L); // Ensure bucket exists try { if (!s3Client.doesBucketExist(bucketName)) { s3Client.createBucket(bucketName); } } catch (AmazonClientException e) { throw new BinaryStoreException(e); } } /** * Creates a binary store with a connection to Amazon S3. This constructor is * intended for testing only. * * @param bucketName Name of the S3 bucket in which binary content will be stored * @param s3Client Client for communicating with Amazon S3 */ protected S3BinaryStore(String bucketName, AmazonS3Client s3Client) { this.bucketName = bucketName; this.s3Client = s3Client; this.fileSystemCache = TransientBinaryStore.get(); this.fileSystemCache.setMinimumBinarySizeInBytes(1L); } @Override protected String getStoredMimeType(BinaryValue binaryValue) throws BinaryStoreException { try { String key = binaryValue.getKey().toString(); ObjectMetadata metadata = s3Client.getObjectMetadata(bucketName, key); if (String.valueOf(true).equals(metadata.getUserMetadata().get(USER_MIME_TYPE_KEY))) { return metadata.getContentType(); } else { return null; } } catch (AmazonClientException e) { throw new BinaryStoreException(e); } } @Override protected void storeMimeType(BinaryValue binaryValue, String mimeType) throws BinaryStoreException { try { String key = binaryValue.getKey().toString(); ObjectMetadata metadata = s3Client.getObjectMetadata(bucketName, key); metadata.setContentType(mimeType); metadata.addUserMetadata(USER_MIME_TYPE_KEY, String.valueOf(true)); // Update the object in place CopyObjectRequest copyRequest = new CopyObjectRequest(bucketName, key, bucketName, key); copyRequest.setNewObjectMetadata(metadata); s3Client.copyObject(copyRequest); } catch (AmazonClientException e) { throw new BinaryStoreException(e); } } @Override public void storeExtractedText(BinaryValue binaryValue, String extractedText) throws BinaryStoreException { // User defined metadata for S3 objects cannot exceed 2KB // This checks for the absolute top of that range if(extractedText.length() > 2000) { throw new BinaryStoreException("S3 objects cannot store associated data " + "that is larger than 2KB"); } setS3ObjectUserProperty(binaryValue.getKey(), EXTRACTED_TEXT_KEY, extractedText); } private void setS3ObjectUserProperty(BinaryKey binaryKey, String metadataKey, String metadataValue) throws BinaryStoreException { try { String key = binaryKey.toString(); ObjectMetadata metadata = s3Client.getObjectMetadata(bucketName, key); Map<String, String> userMetadata = metadata.getUserMetadata(); if(null != metadataValue && metadataValue.equals(userMetadata.get(metadataKey))) { return; // The key/value pair already exists in user metadata, skip update } userMetadata.put(metadataKey, metadataValue); metadata.setUserMetadata(userMetadata); // Update the object in place CopyObjectRequest copyRequest = new CopyObjectRequest(bucketName, key, bucketName, key); copyRequest.setNewObjectMetadata(metadata); s3Client.copyObject(copyRequest); } catch (AmazonClientException e) { throw new BinaryStoreException(e); } } @Override public String getExtractedText(BinaryValue binaryValue) throws BinaryStoreException { try { String key = binaryValue.getKey().toString(); ObjectMetadata metadata = s3Client.getObjectMetadata(bucketName, key); return metadata.getUserMetadata().get(EXTRACTED_TEXT_KEY); } catch (AmazonClientException e) { throw new BinaryStoreException(e); } } @Override public BinaryValue storeValue(InputStream stream, boolean markAsUnused) throws BinaryStoreException { // Cache file on the file system in order to have SHA-1 hash calculated BinaryValue cachedFile = fileSystemCache.storeValue(stream, markAsUnused); try { // Retrieve SHA-1 hash BinaryKey key = new BinaryKey(cachedFile.getKey().toString()); // If file is NOT already in S3 storage, store it if(!s3Client.doesObjectExist(bucketName, key.toString())) { ObjectMetadata metadata = new ObjectMetadata(); metadata.setContentLength(cachedFile.getSize()); // Set Mimetype metadata.setContentType(fileSystemCache.getMimeType(cachedFile, key.toString())); // Set Unused value Map<String, String> userMetadata = metadata.getUserMetadata(); userMetadata.put(UNUSED_KEY, String.valueOf(markAsUnused)); metadata.setUserMetadata(userMetadata); // Store content in S3 s3Client.putObject(bucketName, key.toString(), fileSystemCache.getInputStream(key), metadata); } else { // Set the unused value, if necessary if(markAsUnused) { markAsUnused(Collections.singleton(key)); } else { markAsUsed(Collections.singleton(key)); } } return new StoredBinaryValue(this, key, cachedFile.getSize()); } catch (AmazonClientException|RepositoryException |IOException e) { throw new BinaryStoreException(e); } finally { // Remove cached file fileSystemCache.markAsUnused(Collections.singleton(cachedFile.getKey())); fileSystemCache.removeValuesUnusedLongerThan(1, TimeUnit.MICROSECONDS); } } @Override public InputStream getInputStream(BinaryKey key) throws BinaryStoreException { try { S3Object s3Object = s3Client.getObject(bucketName, key.toString()); return s3Object.getObjectContent(); } catch (AmazonClientException e) { throw new BinaryStoreException(e); } } @Override public void markAsUsed(Iterable<BinaryKey> keys) throws BinaryStoreException { for(BinaryKey key : keys) { setS3ObjectUserProperty(key, UNUSED_KEY, String.valueOf(false)); } } @Override public void markAsUnused(Iterable<BinaryKey> keys) throws BinaryStoreException { for(BinaryKey key : keys) { setS3ObjectUserProperty(key, UNUSED_KEY, String.valueOf(true)); } } @Override public void removeValuesUnusedLongerThan(long minimumAge, TimeUnit timeUnit) throws BinaryStoreException { Date deadline = new Date(System.currentTimeMillis() - timeUnit.toMillis(minimumAge)); // There is no capacity in S3 to query on object properties. This must be done // by straight iteration, so may take a very long time for large data sets. try { for(BinaryKey key : getAllBinaryKeys()) { ObjectMetadata metadata = s3Client.getObjectMetadata(bucketName, key.toString()); String unused = metadata.getUserMetadata().get(UNUSED_KEY); if (null != unused && unused.equals(String.valueOf(true))) { Date lastMod = metadata.getLastModified(); if (lastMod.before(deadline)) { try { s3Client.deleteObject(bucketName, key.toString()); } catch (AmazonClientException e) { Logger log = Logger.getLogger(getClass()); log.warn(e, JcrI18n.unableToDeleteTemporaryFile, e.getMessage()); } } } // Assumes that if no value is set, content is used } } catch (AmazonClientException e) { throw new BinaryStoreException(e); } } @Override public Iterable<BinaryKey> getAllBinaryKeys() throws BinaryStoreException { try { final Iterator<S3ObjectSummary> objectsIterator = S3Objects.inBucket(s3Client, bucketName).iterator(); // Lambda to hand back BinaryKeys rather than S3ObjectSummaries return () -> { return new Iterator<BinaryKey>() { @Override public boolean hasNext() { return objectsIterator.hasNext(); } @Override public BinaryKey next() { S3ObjectSummary object = objectsIterator.next(); return new BinaryKey(object.getKey()); } }; }; } catch (AmazonClientException e) { throw new BinaryStoreException(e); } } }