/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.nifi.processors.aws.s3; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.regex.Pattern; import com.amazonaws.AmazonServiceException; import com.amazonaws.services.s3.model.ListMultipartUploadsRequest; import com.amazonaws.services.s3.model.ObjectMetadata; import org.apache.nifi.components.PropertyDescriptor; import org.apache.nifi.flowfile.FlowFile; import org.apache.nifi.flowfile.attributes.CoreAttributes; import org.apache.nifi.processor.DataUnit; import org.apache.nifi.processor.ProcessContext; import org.apache.nifi.processors.aws.AbstractAWSCredentialsProviderProcessor; import org.apache.nifi.processors.aws.credentials.provider.service.AWSCredentialsProviderControllerService; import org.apache.nifi.provenance.ProvenanceEventRecord; import org.apache.nifi.provenance.ProvenanceEventType; import org.apache.nifi.reporting.InitializationException; import org.apache.nifi.util.MockFlowFile; import org.apache.nifi.util.TestRunner; import org.apache.nifi.util.TestRunners; import org.junit.Assert; import org.junit.Ignore; import org.junit.Test; import com.amazonaws.AmazonClientException; import com.amazonaws.services.s3.AmazonS3Client; import com.amazonaws.services.s3.model.InitiateMultipartUploadRequest; import com.amazonaws.services.s3.model.MultipartUpload; import com.amazonaws.services.s3.model.MultipartUploadListing; import com.amazonaws.services.s3.model.PartETag; import com.amazonaws.services.s3.model.Region; import com.amazonaws.services.s3.model.StorageClass; /** * Provides integration level testing with actual AWS S3 resources for {@link PutS3Object} and requires additional configuration and resources to work. */ public class ITPutS3Object extends AbstractS3IT { final static String TEST_ENDPOINT = "https://endpoint.com"; // final static String TEST_TRANSIT_URI = "https://" + BUCKET_NAME + ".endpoint.com"; final static String TEST_PARTSIZE_STRING = "50 mb"; final static Long TEST_PARTSIZE_LONG = 50L * 1024L * 1024L; final static Long S3_MINIMUM_PART_SIZE = 50L * 1024L * 1024L; final static Long S3_MAXIMUM_OBJECT_SIZE = 5L * 1024L * 1024L * 1024L; final static Pattern reS3ETag = Pattern.compile("[0-9a-fA-f]{32,32}(-[0-9]+)?"); @Test public void testSimplePut() throws IOException { final TestRunner runner = TestRunners.newTestRunner(new PutS3Object()); runner.setProperty(PutS3Object.CREDENTIALS_FILE, CREDENTIALS_FILE); runner.setProperty(PutS3Object.REGION, REGION); runner.setProperty(PutS3Object.BUCKET, BUCKET_NAME); Assert.assertTrue(runner.setProperty("x-custom-prop", "hello").isValid()); for (int i = 0; i < 3; i++) { final Map<String, String> attrs = new HashMap<>(); attrs.put("filename", String.valueOf(i) + ".txt"); runner.enqueue(getResourcePath(SAMPLE_FILE_RESOURCE_NAME), attrs); } runner.run(3); runner.assertAllFlowFilesTransferred(PutS3Object.REL_SUCCESS, 3); } @Test public void testSimplePutEncrypted() throws IOException { final TestRunner runner = TestRunners.newTestRunner(new PutS3Object()); runner.setProperty(PutS3Object.CREDENTIALS_FILE, CREDENTIALS_FILE); runner.setProperty(PutS3Object.REGION, REGION); runner.setProperty(PutS3Object.BUCKET, BUCKET_NAME); runner.setProperty(PutS3Object.SERVER_SIDE_ENCRYPTION, ObjectMetadata.AES_256_SERVER_SIDE_ENCRYPTION); Assert.assertTrue(runner.setProperty("x-custom-prop", "hello").isValid()); for (int i = 0; i < 3; i++) { final Map<String, String> attrs = new HashMap<>(); attrs.put("filename", String.valueOf(i) + ".txt"); runner.enqueue(getResourcePath(SAMPLE_FILE_RESOURCE_NAME), attrs); } runner.run(3); runner.assertAllFlowFilesTransferred(PutS3Object.REL_SUCCESS, 3); final List<MockFlowFile> ffs = runner.getFlowFilesForRelationship(PutS3Object.REL_SUCCESS); for (MockFlowFile flowFile : ffs) { flowFile.assertAttributeEquals(PutS3Object.S3_SSE_ALGORITHM, ObjectMetadata.AES_256_SERVER_SIDE_ENCRYPTION); } } private void testPutThenFetch(String sseAlgorithm) throws IOException { // Put TestRunner runner = TestRunners.newTestRunner(new PutS3Object()); runner.setProperty(PutS3Object.CREDENTIALS_FILE, CREDENTIALS_FILE); runner.setProperty(PutS3Object.REGION, REGION); runner.setProperty(PutS3Object.BUCKET, BUCKET_NAME); if(ObjectMetadata.AES_256_SERVER_SIDE_ENCRYPTION.equals(sseAlgorithm)){ runner.setProperty(PutS3Object.SERVER_SIDE_ENCRYPTION, sseAlgorithm); } final Map<String, String> attrs = new HashMap<>(); attrs.put("filename", "filename-on-s3.txt"); runner.enqueue(getResourcePath(SAMPLE_FILE_RESOURCE_NAME), attrs); runner.run(); runner.assertAllFlowFilesTransferred(PutS3Object.REL_SUCCESS, 1); List<MockFlowFile> ffs = runner.getFlowFilesForRelationship(PutS3Object.REL_SUCCESS); if(ObjectMetadata.AES_256_SERVER_SIDE_ENCRYPTION.equals(sseAlgorithm)){ ffs.get(0).assertAttributeEquals(PutS3Object.S3_SSE_ALGORITHM, ObjectMetadata.AES_256_SERVER_SIDE_ENCRYPTION); } else { ffs.get(0).assertAttributeNotExists(PutS3Object.S3_SSE_ALGORITHM); } // Fetch runner = TestRunners.newTestRunner(new FetchS3Object()); runner.setProperty(FetchS3Object.CREDENTIALS_FILE, CREDENTIALS_FILE); runner.setProperty(FetchS3Object.REGION, REGION); runner.setProperty(FetchS3Object.BUCKET, BUCKET_NAME); runner.enqueue(new byte[0], attrs); runner.run(1); runner.assertAllFlowFilesTransferred(FetchS3Object.REL_SUCCESS, 1); ffs = runner.getFlowFilesForRelationship(FetchS3Object.REL_SUCCESS); MockFlowFile ff = ffs.get(0); ff.assertContentEquals(getFileFromResourceName(SAMPLE_FILE_RESOURCE_NAME)); if(ObjectMetadata.AES_256_SERVER_SIDE_ENCRYPTION.equals(sseAlgorithm)){ ff.assertAttributeEquals(PutS3Object.S3_SSE_ALGORITHM, ObjectMetadata.AES_256_SERVER_SIDE_ENCRYPTION); } else { ff.assertAttributeNotExists(PutS3Object.S3_SSE_ALGORITHM); } } @Test public void testPutThenFetchWithoutSSE() throws IOException { testPutThenFetch(PutS3Object.NO_SERVER_SIDE_ENCRYPTION); } @Test public void testPutThenFetchWithSSE() throws IOException { testPutThenFetch(ObjectMetadata.AES_256_SERVER_SIDE_ENCRYPTION); } @Test public void testPutS3ObjectUsingCredentialsProviderService() throws Throwable { final TestRunner runner = TestRunners.newTestRunner(new PutS3Object()); final AWSCredentialsProviderControllerService serviceImpl = new AWSCredentialsProviderControllerService(); runner.addControllerService("awsCredentialsProvider", serviceImpl); runner.setProperty(serviceImpl, AbstractAWSCredentialsProviderProcessor.CREDENTIALS_FILE, System.getProperty("user.home") + "/aws-credentials.properties"); runner.enableControllerService(serviceImpl); runner.assertValid(serviceImpl); runner.setProperty(PutS3Object.AWS_CREDENTIALS_PROVIDER_SERVICE, "awsCredentialsProvider"); runner.setProperty(PutS3Object.REGION, REGION); runner.setProperty(PutS3Object.BUCKET, BUCKET_NAME); Assert.assertTrue(runner.setProperty("x-custom-prop", "hello").isValid()); for (int i = 0; i < 3; i++) { final Map<String, String> attrs = new HashMap<>(); attrs.put("filename", String.valueOf(i) + ".txt"); runner.enqueue(getResourcePath(SAMPLE_FILE_RESOURCE_NAME), attrs); } runner.run(3); runner.assertAllFlowFilesTransferred(PutS3Object.REL_SUCCESS, 3); } @Test public void testMetaData() throws IOException { PutS3Object processor = new PutS3Object(); final TestRunner runner = TestRunners.newTestRunner(processor); runner.setProperty(PutS3Object.CREDENTIALS_FILE, CREDENTIALS_FILE); runner.setProperty(PutS3Object.REGION, REGION); runner.setProperty(PutS3Object.BUCKET, BUCKET_NAME); PropertyDescriptor prop1 = processor.getSupportedDynamicPropertyDescriptor("TEST-PROP-1"); runner.setProperty(prop1, "TESTING-1-2-3"); PropertyDescriptor prop2 = processor.getSupportedDynamicPropertyDescriptor("TEST-PROP-2"); runner.setProperty(prop2, "TESTING-4-5-6"); final Map<String, String> attrs = new HashMap<>(); attrs.put("filename", "meta.txt"); runner.enqueue(getResourcePath(SAMPLE_FILE_RESOURCE_NAME), attrs); runner.run(); runner.assertAllFlowFilesTransferred(PutS3Object.REL_SUCCESS, 1); List<MockFlowFile> flowFiles = runner.getFlowFilesForRelationship(PutS3Object.REL_SUCCESS); MockFlowFile ff1 = flowFiles.get(0); for (Map.Entry attrib : ff1.getAttributes().entrySet()) { System.out.println(attrib.getKey() + " = " + attrib.getValue()); } } @Test public void testContentType() throws IOException { PutS3Object processor = new PutS3Object(); final TestRunner runner = TestRunners.newTestRunner(processor); runner.setProperty(PutS3Object.CREDENTIALS_FILE, CREDENTIALS_FILE); runner.setProperty(PutS3Object.REGION, REGION); runner.setProperty(PutS3Object.BUCKET, BUCKET_NAME); runner.setProperty(PutS3Object.CONTENT_TYPE, "text/plain"); runner.enqueue(getResourcePath(SAMPLE_FILE_RESOURCE_NAME)); runner.run(); runner.assertAllFlowFilesTransferred(PutS3Object.REL_SUCCESS, 1); List<MockFlowFile> flowFiles = runner.getFlowFilesForRelationship(PutS3Object.REL_SUCCESS); MockFlowFile ff1 = flowFiles.get(0); ff1.assertAttributeEquals(PutS3Object.S3_CONTENT_TYPE, "text/plain"); } @Test public void testPutInFolder() throws IOException { final TestRunner runner = TestRunners.newTestRunner(new PutS3Object()); runner.setProperty(PutS3Object.CREDENTIALS_FILE, CREDENTIALS_FILE); runner.setProperty(PutS3Object.REGION, REGION); runner.setProperty(PutS3Object.BUCKET, BUCKET_NAME); Assert.assertTrue(runner.setProperty("x-custom-prop", "hello").isValid()); runner.assertValid(); final Map<String, String> attrs = new HashMap<>(); attrs.put("filename", "folder/1.txt"); runner.enqueue(getResourcePath(SAMPLE_FILE_RESOURCE_NAME), attrs); runner.run(); runner.assertAllFlowFilesTransferred(PutS3Object.REL_SUCCESS, 1); } @Test public void testStorageClass() throws IOException { final TestRunner runner = TestRunners.newTestRunner(new PutS3Object()); runner.setProperty(PutS3Object.CREDENTIALS_FILE, CREDENTIALS_FILE); runner.setProperty(PutS3Object.REGION, REGION); runner.setProperty(PutS3Object.BUCKET, BUCKET_NAME); runner.setProperty(PutS3Object.STORAGE_CLASS, StorageClass.ReducedRedundancy.name()); int bytesNeeded = 55 * 1024 * 1024; StringBuilder bldr = new StringBuilder(bytesNeeded + 1000); for (int line = 0; line < 55; line++) { bldr.append(String.format("line %06d This is sixty-three characters plus the EOL marker!\n", line)); } String data55mb = bldr.toString(); Assert.assertTrue(runner.setProperty("x-custom-prop", "hello").isValid()); final Map<String, String> attrs = new HashMap<>(); attrs.put("filename", "folder/2.txt"); runner.enqueue(getResourcePath(SAMPLE_FILE_RESOURCE_NAME), attrs); attrs.put("filename", "folder/3.txt"); runner.enqueue(data55mb.getBytes(), attrs); runner.run(2); runner.assertAllFlowFilesTransferred(PutS3Object.REL_SUCCESS, 2); FlowFile file1 = runner.getFlowFilesForRelationship(PutS3Object.REL_SUCCESS).get(0); Assert.assertEquals(StorageClass.ReducedRedundancy.toString(), file1.getAttribute(PutS3Object.S3_STORAGECLASS_ATTR_KEY)); FlowFile file2 = runner.getFlowFilesForRelationship(PutS3Object.REL_SUCCESS).get(1); Assert.assertEquals(StorageClass.ReducedRedundancy.toString(), file2.getAttribute(PutS3Object.S3_STORAGECLASS_ATTR_KEY)); } @Test public void testPermissions() throws IOException { final TestRunner runner = TestRunners.newTestRunner(new PutS3Object()); runner.setProperty(PutS3Object.CREDENTIALS_FILE, CREDENTIALS_FILE); runner.setProperty(PutS3Object.BUCKET, BUCKET_NAME); runner.setProperty(PutS3Object.FULL_CONTROL_USER_LIST,"28545acd76c35c7e91f8409b95fd1aa0c0914bfa1ac60975d9f48bc3c5e090b5"); runner.setProperty(PutS3Object.REGION, REGION); final Map<String, String> attrs = new HashMap<>(); attrs.put("filename", "folder/4.txt"); runner.enqueue(getResourcePath(SAMPLE_FILE_RESOURCE_NAME), attrs); runner.run(); runner.assertAllFlowFilesTransferred(PutS3Object.REL_SUCCESS, 1); } @Test public void testGetPropertyDescriptors() throws Exception { PutS3Object processor = new PutS3Object(); List<PropertyDescriptor> pd = processor.getSupportedPropertyDescriptors(); assertEquals("size should be eq", 28, pd.size()); assertTrue(pd.contains(PutS3Object.ACCESS_KEY)); assertTrue(pd.contains(PutS3Object.AWS_CREDENTIALS_PROVIDER_SERVICE)); assertTrue(pd.contains(PutS3Object.BUCKET)); assertTrue(pd.contains(PutS3Object.CANNED_ACL)); assertTrue(pd.contains(PutS3Object.CREDENTIALS_FILE)); assertTrue(pd.contains(PutS3Object.ENDPOINT_OVERRIDE)); assertTrue(pd.contains(PutS3Object.FULL_CONTROL_USER_LIST)); assertTrue(pd.contains(PutS3Object.KEY)); assertTrue(pd.contains(PutS3Object.OWNER)); assertTrue(pd.contains(PutS3Object.READ_ACL_LIST)); assertTrue(pd.contains(PutS3Object.READ_USER_LIST)); assertTrue(pd.contains(PutS3Object.REGION)); assertTrue(pd.contains(PutS3Object.SECRET_KEY)); assertTrue(pd.contains(PutS3Object.SIGNER_OVERRIDE)); assertTrue(pd.contains(PutS3Object.SSL_CONTEXT_SERVICE)); assertTrue(pd.contains(PutS3Object.TIMEOUT)); assertTrue(pd.contains(PutS3Object.EXPIRATION_RULE_ID)); assertTrue(pd.contains(PutS3Object.STORAGE_CLASS)); assertTrue(pd.contains(PutS3Object.WRITE_ACL_LIST)); assertTrue(pd.contains(PutS3Object.WRITE_USER_LIST)); assertTrue(pd.contains(PutS3Object.SERVER_SIDE_ENCRYPTION)); } @Test public void testDynamicProperty() throws IOException { final String DYNAMIC_ATTRIB_KEY = "fs.runTimestamp"; final String DYNAMIC_ATTRIB_VALUE = "${now():toNumber()}"; final PutS3Object processor = new PutS3Object(); final TestRunner runner = TestRunners.newTestRunner(processor); runner.setProperty(PutS3Object.CREDENTIALS_FILE, CREDENTIALS_FILE); runner.setProperty(PutS3Object.REGION, REGION); runner.setProperty(PutS3Object.BUCKET, BUCKET_NAME); runner.setProperty(PutS3Object.MULTIPART_PART_SIZE, TEST_PARTSIZE_STRING); PropertyDescriptor testAttrib = processor.getSupportedDynamicPropertyDescriptor(DYNAMIC_ATTRIB_KEY); runner.setProperty(testAttrib, DYNAMIC_ATTRIB_VALUE); final String FILE1_NAME = "file1"; Map<String, String> attribs = new HashMap<>(); attribs.put(CoreAttributes.FILENAME.key(), FILE1_NAME); runner.enqueue("123".getBytes(), attribs); runner.assertValid(); processor.getPropertyDescriptor(DYNAMIC_ATTRIB_KEY); runner.run(); runner.assertAllFlowFilesTransferred(PutS3Object.REL_SUCCESS); final List<MockFlowFile> successFiles = runner.getFlowFilesForRelationship(PutS3Object.REL_SUCCESS); Assert.assertEquals(1, successFiles.size()); MockFlowFile ff1 = successFiles.get(0); Long now = System.currentTimeMillis(); String millisNow = Long.toString(now); String millisOneSecAgo = Long.toString(now - 1000L); String usermeta = ff1.getAttribute(PutS3Object.S3_USERMETA_ATTR_KEY); String[] usermetaLine0 = usermeta.split(System.lineSeparator())[0].split("="); String usermetaKey0 = usermetaLine0[0]; String usermetaValue0 = usermetaLine0[1]; Assert.assertEquals(DYNAMIC_ATTRIB_KEY, usermetaKey0); Assert.assertTrue(usermetaValue0.compareTo(millisOneSecAgo) >=0 && usermetaValue0.compareTo(millisNow) <= 0); } @Test public void testProvenance() throws InitializationException { final String PROV1_FILE = "provfile1"; final PutS3Object processor = new PutS3Object(); final TestRunner runner = TestRunners.newTestRunner(processor); runner.setProperty(PutS3Object.CREDENTIALS_FILE, CREDENTIALS_FILE); runner.setProperty(PutS3Object.REGION, REGION); runner.setProperty(PutS3Object.BUCKET, BUCKET_NAME); runner.setProperty(PutS3Object.KEY, "${filename}"); Map<String, String> attribs = new HashMap<>(); attribs.put(CoreAttributes.FILENAME.key(), PROV1_FILE); runner.enqueue("prov1 contents".getBytes(), attribs); runner.assertValid(); runner.run(); runner.assertAllFlowFilesTransferred(PutS3Object.REL_SUCCESS); final List<MockFlowFile> successFiles = runner.getFlowFilesForRelationship(PutS3Object.REL_SUCCESS); Assert.assertEquals(1, successFiles.size()); final List<ProvenanceEventRecord> provenanceEvents = runner.getProvenanceEvents(); Assert.assertEquals(1, provenanceEvents.size()); ProvenanceEventRecord provRec1 = provenanceEvents.get(0); Assert.assertEquals(ProvenanceEventType.SEND, provRec1.getEventType()); Assert.assertEquals(processor.getIdentifier(), provRec1.getComponentId()); client.setRegion(Region.fromValue(REGION).toAWSRegion()); String targetUri = client.getUrl(BUCKET_NAME, PROV1_FILE).toString(); Assert.assertEquals(targetUri, provRec1.getTransitUri()); Assert.assertEquals(7, provRec1.getUpdatedAttributes().size()); Assert.assertEquals(BUCKET_NAME, provRec1.getUpdatedAttributes().get(PutS3Object.S3_BUCKET_KEY)); } @Test public void testStateDefaults() { PutS3Object.MultipartState state1 = new PutS3Object.MultipartState(); Assert.assertEquals(state1.getUploadId(), ""); Assert.assertEquals(state1.getFilePosition(), (Long) 0L); Assert.assertEquals(state1.getPartETags().size(), 0L); Assert.assertEquals(state1.getPartSize(), (Long) 0L); Assert.assertEquals(state1.getStorageClass().toString(), StorageClass.Standard.toString()); Assert.assertEquals(state1.getContentLength(), (Long) 0L); } @Test public void testStateToString() throws IOException, InitializationException { final String target = "UID-test1234567890#10001#1/PartETag-1,2/PartETag-2,3/PartETag-3,4/PartETag-4#20002#REDUCED_REDUNDANCY#30003#8675309"; PutS3Object.MultipartState state2 = new PutS3Object.MultipartState(); state2.setUploadId("UID-test1234567890"); state2.setFilePosition(10001L); state2.setTimestamp(8675309L); for (Integer partNum = 1; partNum < 5; partNum++) { state2.addPartETag(new PartETag(partNum, "PartETag-" + partNum.toString())); } state2.setPartSize(20002L); state2.setStorageClass(StorageClass.ReducedRedundancy); state2.setContentLength(30003L); Assert.assertEquals(target, state2.toString()); } @Test public void testEndpointOverride() { // remove leading "/" from filename to avoid duplicate separators final String TESTKEY = AbstractS3IT.SAMPLE_FILE_RESOURCE_NAME.substring(1); final PutS3Object processor = new TestablePutS3Object(); final TestRunner runner = TestRunners.newTestRunner(processor); final ProcessContext context = runner.getProcessContext(); runner.setProperty(PutS3Object.ENDPOINT_OVERRIDE, TEST_ENDPOINT); runner.setProperty(PutS3Object.BUCKET, BUCKET_NAME); runner.setProperty(PutS3Object.KEY, TESTKEY); runner.run(); Assert.assertEquals(BUCKET_NAME, context.getProperty(PutS3Object.BUCKET).toString()); Assert.assertEquals(TESTKEY, context.getProperty(PutS3Object.KEY).evaluateAttributeExpressions().toString()); Assert.assertEquals(TEST_ENDPOINT, context.getProperty(PutS3Object.ENDPOINT_OVERRIDE).toString()); String s3url = ((TestablePutS3Object)processor).testable_getClient().getResourceUrl(BUCKET_NAME, TESTKEY); Assert.assertEquals(TEST_ENDPOINT + "/" + BUCKET_NAME + "/" + TESTKEY, s3url); } @Test public void testMultipartProperties() throws IOException { final PutS3Object processor = new PutS3Object(); final TestRunner runner = TestRunners.newTestRunner(processor); final ProcessContext context = runner.getProcessContext(); runner.setProperty(PutS3Object.FULL_CONTROL_USER_LIST, "28545acd76c35c7e91f8409b95fd1aa0c0914bfa1ac60975d9f48bc3c5e090b5"); runner.setProperty(PutS3Object.REGION, REGION); runner.setProperty(PutS3Object.MULTIPART_PART_SIZE, TEST_PARTSIZE_STRING); runner.setProperty(PutS3Object.BUCKET, BUCKET_NAME); runner.setProperty(PutS3Object.KEY, AbstractS3IT.SAMPLE_FILE_RESOURCE_NAME); Assert.assertEquals(BUCKET_NAME, context.getProperty(PutS3Object.BUCKET).toString()); Assert.assertEquals(SAMPLE_FILE_RESOURCE_NAME, context.getProperty(PutS3Object.KEY).evaluateAttributeExpressions().toString()); Assert.assertEquals(TEST_PARTSIZE_LONG.longValue(), context.getProperty(PutS3Object.MULTIPART_PART_SIZE).asDataSize(DataUnit.B).longValue()); } @Test public void testLocalStatePersistence() throws IOException { final PutS3Object processor = new PutS3Object(); final TestRunner runner = TestRunners.newTestRunner(processor); final String bucket = runner.getProcessContext().getProperty(PutS3Object.BUCKET).getValue(); final String key = runner.getProcessContext().getProperty(PutS3Object.KEY).getValue(); final String cacheKey1 = runner.getProcessor().getIdentifier() + "/" + bucket + "/" + key; final String cacheKey2 = runner.getProcessor().getIdentifier() + "/" + bucket + "/" + key + "-v2"; final String cacheKey3 = runner.getProcessor().getIdentifier() + "/" + bucket + "/" + key + "-v3"; /* * store 3 versions of state */ PutS3Object.MultipartState state1orig = new PutS3Object.MultipartState(); processor.persistLocalState(cacheKey1, state1orig); PutS3Object.MultipartState state2orig = new PutS3Object.MultipartState(); state2orig.setUploadId("1234"); state2orig.setContentLength(1234L); processor.persistLocalState(cacheKey2, state2orig); PutS3Object.MultipartState state3orig = new PutS3Object.MultipartState(); state3orig.setUploadId("5678"); state3orig.setContentLength(5678L); processor.persistLocalState(cacheKey3, state3orig); final List<MultipartUpload> uploadList = new ArrayList<>(); final MultipartUpload upload1 = new MultipartUpload(); upload1.setKey(key); upload1.setUploadId(""); uploadList.add(upload1); final MultipartUpload upload2 = new MultipartUpload(); upload2.setKey(key + "-v2"); upload2.setUploadId("1234"); uploadList.add(upload2); final MultipartUpload upload3 = new MultipartUpload(); upload3.setKey(key + "-v3"); upload3.setUploadId("5678"); uploadList.add(upload3); final MultipartUploadListing uploadListing = new MultipartUploadListing(); uploadListing.setMultipartUploads(uploadList); final MockAmazonS3Client mockClient = new MockAmazonS3Client(); mockClient.setListing(uploadListing); /* * reload and validate stored state */ final PutS3Object.MultipartState state1new = processor.getLocalStateIfInS3(mockClient, bucket, cacheKey1); Assert.assertEquals("", state1new.getUploadId()); Assert.assertEquals(0L, state1new.getFilePosition().longValue()); Assert.assertEquals(new ArrayList<PartETag>(), state1new.getPartETags()); Assert.assertEquals(0L, state1new.getPartSize().longValue()); Assert.assertEquals(StorageClass.fromValue(StorageClass.Standard.toString()), state1new.getStorageClass()); Assert.assertEquals(0L, state1new.getContentLength().longValue()); final PutS3Object.MultipartState state2new = processor.getLocalStateIfInS3(mockClient, bucket, cacheKey2); Assert.assertEquals("1234", state2new.getUploadId()); Assert.assertEquals(0L, state2new.getFilePosition().longValue()); Assert.assertEquals(new ArrayList<PartETag>(), state2new.getPartETags()); Assert.assertEquals(0L, state2new.getPartSize().longValue()); Assert.assertEquals(StorageClass.fromValue(StorageClass.Standard.toString()), state2new.getStorageClass()); Assert.assertEquals(1234L, state2new.getContentLength().longValue()); final PutS3Object.MultipartState state3new = processor.getLocalStateIfInS3(mockClient, bucket, cacheKey3); Assert.assertEquals("5678", state3new.getUploadId()); Assert.assertEquals(0L, state3new.getFilePosition().longValue()); Assert.assertEquals(new ArrayList<PartETag>(), state3new.getPartETags()); Assert.assertEquals(0L, state3new.getPartSize().longValue()); Assert.assertEquals(StorageClass.fromValue(StorageClass.Standard.toString()), state3new.getStorageClass()); Assert.assertEquals(5678L, state3new.getContentLength().longValue()); } @Test public void testStatePersistsETags() throws IOException { final PutS3Object processor = new PutS3Object(); final TestRunner runner = TestRunners.newTestRunner(processor); final String bucket = runner.getProcessContext().getProperty(PutS3Object.BUCKET).getValue(); final String key = runner.getProcessContext().getProperty(PutS3Object.KEY).getValue(); final String cacheKey1 = runner.getProcessor().getIdentifier() + "/" + bucket + "/" + key + "-bv1"; final String cacheKey2 = runner.getProcessor().getIdentifier() + "/" + bucket + "/" + key + "-bv2"; final String cacheKey3 = runner.getProcessor().getIdentifier() + "/" + bucket + "/" + key + "-bv3"; /* * store 3 versions of state */ PutS3Object.MultipartState state1orig = new PutS3Object.MultipartState(); processor.persistLocalState(cacheKey1, state1orig); PutS3Object.MultipartState state2orig = new PutS3Object.MultipartState(); state2orig.setUploadId("1234"); state2orig.setContentLength(1234L); processor.persistLocalState(cacheKey2, state2orig); PutS3Object.MultipartState state3orig = new PutS3Object.MultipartState(); state3orig.setUploadId("5678"); state3orig.setContentLength(5678L); processor.persistLocalState(cacheKey3, state3orig); /* * persist state to caches so that * 1. v2 has 2 and then 4 tags * 2. v3 has 4 and then 2 tags */ state2orig.getPartETags().add(new PartETag(1, "state 2 tag one")); state2orig.getPartETags().add(new PartETag(2, "state 2 tag two")); processor.persistLocalState(cacheKey2, state2orig); state2orig.getPartETags().add(new PartETag(3, "state 2 tag three")); state2orig.getPartETags().add(new PartETag(4, "state 2 tag four")); processor.persistLocalState(cacheKey2, state2orig); state3orig.getPartETags().add(new PartETag(1, "state 3 tag one")); state3orig.getPartETags().add(new PartETag(2, "state 3 tag two")); state3orig.getPartETags().add(new PartETag(3, "state 3 tag three")); state3orig.getPartETags().add(new PartETag(4, "state 3 tag four")); processor.persistLocalState(cacheKey3, state3orig); state3orig.getPartETags().remove(state3orig.getPartETags().size() - 1); state3orig.getPartETags().remove(state3orig.getPartETags().size() - 1); processor.persistLocalState(cacheKey3, state3orig); final List<MultipartUpload> uploadList = new ArrayList<>(); final MultipartUpload upload1 = new MultipartUpload(); upload1.setKey(key + "-bv2"); upload1.setUploadId("1234"); uploadList.add(upload1); final MultipartUpload upload2 = new MultipartUpload(); upload2.setKey(key + "-bv3"); upload2.setUploadId("5678"); uploadList.add(upload2); final MultipartUploadListing uploadListing = new MultipartUploadListing(); uploadListing.setMultipartUploads(uploadList); final MockAmazonS3Client mockClient = new MockAmazonS3Client(); mockClient.setListing(uploadListing); /* * load state and validate that * 1. v2 restore shows 4 tags * 2. v3 restore shows 2 tags */ final PutS3Object.MultipartState state2new = processor.getLocalStateIfInS3(mockClient, bucket, cacheKey2); Assert.assertEquals("1234", state2new.getUploadId()); Assert.assertEquals(4, state2new.getPartETags().size()); final PutS3Object.MultipartState state3new = processor.getLocalStateIfInS3(mockClient, bucket, cacheKey3); Assert.assertEquals("5678", state3new.getUploadId()); Assert.assertEquals(2, state3new.getPartETags().size()); } @Test public void testStateRemove() throws IOException { final PutS3Object processor = new PutS3Object(); final TestRunner runner = TestRunners.newTestRunner(processor); final String bucket = runner.getProcessContext().getProperty(PutS3Object.BUCKET).getValue(); final String key = runner.getProcessContext().getProperty(PutS3Object.KEY).getValue(); final String cacheKey = runner.getProcessor().getIdentifier() + "/" + bucket + "/" + key + "-sr"; final List<MultipartUpload> uploadList = new ArrayList<>(); final MultipartUpload upload1 = new MultipartUpload(); upload1.setKey(key); upload1.setUploadId("1234"); uploadList.add(upload1); final MultipartUploadListing uploadListing = new MultipartUploadListing(); uploadListing.setMultipartUploads(uploadList); final MockAmazonS3Client mockClient = new MockAmazonS3Client(); mockClient.setListing(uploadListing); /* * store state, retrieve and validate, remove and validate */ PutS3Object.MultipartState stateOrig = new PutS3Object.MultipartState(); stateOrig.setUploadId("1234"); stateOrig.setContentLength(1234L); processor.persistLocalState(cacheKey, stateOrig); PutS3Object.MultipartState state1 = processor.getLocalStateIfInS3(mockClient, bucket, cacheKey); Assert.assertEquals("1234", state1.getUploadId()); Assert.assertEquals(1234L, state1.getContentLength().longValue()); processor.persistLocalState(cacheKey, null); PutS3Object.MultipartState state2 = processor.getLocalStateIfInS3(mockClient, bucket, cacheKey); Assert.assertNull(state2); } @Test public void testMultipartSmallerThanMinimum() throws IOException { final String FILE1_NAME = "file1"; final byte[] megabyte = new byte[1024 * 1024]; final Path tempFile = Files.createTempFile("s3mulitpart", "tmp"); final FileOutputStream tempOut = new FileOutputStream(tempFile.toFile()); long tempByteCount = 0; for (int i = 0; i < 5; i++) { tempOut.write(megabyte); tempByteCount += megabyte.length; } tempOut.close(); System.out.println("file size: " + tempByteCount); Assert.assertTrue(tempByteCount < S3_MINIMUM_PART_SIZE); Assert.assertTrue(megabyte.length < S3_MINIMUM_PART_SIZE); Assert.assertTrue(TEST_PARTSIZE_LONG >= S3_MINIMUM_PART_SIZE && TEST_PARTSIZE_LONG <= S3_MAXIMUM_OBJECT_SIZE); final PutS3Object processor = new PutS3Object(); final TestRunner runner = TestRunners.newTestRunner(processor); runner.setProperty(PutS3Object.CREDENTIALS_FILE, CREDENTIALS_FILE); runner.setProperty(PutS3Object.REGION, REGION); runner.setProperty(PutS3Object.BUCKET, BUCKET_NAME); runner.setProperty(PutS3Object.MULTIPART_PART_SIZE, TEST_PARTSIZE_STRING); Map<String, String> attributes = new HashMap<>(); attributes.put(CoreAttributes.FILENAME.key(), FILE1_NAME); runner.enqueue(new FileInputStream(tempFile.toFile()), attributes); runner.assertValid(); runner.run(); runner.assertAllFlowFilesTransferred(PutS3Object.REL_SUCCESS); final List<MockFlowFile> successFiles = runner.getFlowFilesForRelationship(PutS3Object.REL_SUCCESS); Assert.assertEquals(1, successFiles.size()); final List<MockFlowFile> failureFiles = runner.getFlowFilesForRelationship(PutS3Object.REL_FAILURE); Assert.assertEquals(0, failureFiles.size()); MockFlowFile ff1 = successFiles.get(0); Assert.assertEquals(PutS3Object.S3_API_METHOD_PUTOBJECT, ff1.getAttribute(PutS3Object.S3_API_METHOD_ATTR_KEY)); Assert.assertEquals(FILE1_NAME, ff1.getAttribute(CoreAttributes.FILENAME.key())); Assert.assertEquals(BUCKET_NAME, ff1.getAttribute(PutS3Object.S3_BUCKET_KEY)); Assert.assertEquals(FILE1_NAME, ff1.getAttribute(PutS3Object.S3_OBJECT_KEY)); Assert.assertTrue(reS3ETag.matcher(ff1.getAttribute(PutS3Object.S3_ETAG_ATTR_KEY)).matches()); Assert.assertEquals(tempByteCount, ff1.getSize()); } @Test public void testMultipartBetweenMinimumAndMaximum() throws IOException { final String FILE1_NAME = "file1"; final byte[] megabyte = new byte[1024 * 1024]; final Path tempFile = Files.createTempFile("s3mulitpart", "tmp"); final FileOutputStream tempOut = new FileOutputStream(tempFile.toFile()); long tempByteCount = 0; for ( ; tempByteCount < TEST_PARTSIZE_LONG + 1; ) { tempOut.write(megabyte); tempByteCount += megabyte.length; } tempOut.close(); System.out.println("file size: " + tempByteCount); Assert.assertTrue(tempByteCount > S3_MINIMUM_PART_SIZE && tempByteCount < S3_MAXIMUM_OBJECT_SIZE); Assert.assertTrue(tempByteCount > TEST_PARTSIZE_LONG); final PutS3Object processor = new PutS3Object(); final TestRunner runner = TestRunners.newTestRunner(processor); runner.setProperty(PutS3Object.CREDENTIALS_FILE, CREDENTIALS_FILE); runner.setProperty(PutS3Object.REGION, REGION); runner.setProperty(PutS3Object.BUCKET, BUCKET_NAME); runner.setProperty(PutS3Object.MULTIPART_THRESHOLD, TEST_PARTSIZE_STRING); runner.setProperty(PutS3Object.MULTIPART_PART_SIZE, TEST_PARTSIZE_STRING); Map<String, String> attributes = new HashMap<>(); attributes.put(CoreAttributes.FILENAME.key(), FILE1_NAME); runner.enqueue(new FileInputStream(tempFile.toFile()), attributes); runner.assertValid(); runner.run(); runner.assertAllFlowFilesTransferred(PutS3Object.REL_SUCCESS); final List<MockFlowFile> successFiles = runner.getFlowFilesForRelationship(PutS3Object.REL_SUCCESS); Assert.assertEquals(1, successFiles.size()); final List<MockFlowFile> failureFiles = runner.getFlowFilesForRelationship(PutS3Object.REL_FAILURE); Assert.assertEquals(0, failureFiles.size()); MockFlowFile ff1 = successFiles.get(0); Assert.assertEquals(PutS3Object.S3_API_METHOD_MULTIPARTUPLOAD, ff1.getAttribute(PutS3Object.S3_API_METHOD_ATTR_KEY)); Assert.assertEquals(FILE1_NAME, ff1.getAttribute(CoreAttributes.FILENAME.key())); Assert.assertEquals(BUCKET_NAME, ff1.getAttribute(PutS3Object.S3_BUCKET_KEY)); Assert.assertEquals(FILE1_NAME, ff1.getAttribute(PutS3Object.S3_OBJECT_KEY)); Assert.assertTrue(reS3ETag.matcher(ff1.getAttribute(PutS3Object.S3_ETAG_ATTR_KEY)).matches()); Assert.assertEquals(tempByteCount, ff1.getSize()); } @Ignore @Test public void testMultipartLargerThanObjectMaximum() throws IOException { final String FILE1_NAME = "file1"; final byte[] megabyte = new byte[1024 * 1024]; final Path tempFile = Files.createTempFile("s3mulitpart", "tmp"); final FileOutputStream tempOut = new FileOutputStream(tempFile.toFile()); for (int i = 0; i < (S3_MAXIMUM_OBJECT_SIZE / 1024 / 1024 + 1); i++) { tempOut.write(megabyte); } tempOut.close(); final PutS3Object processor = new PutS3Object(); final TestRunner runner = TestRunners.newTestRunner(processor); runner.setProperty(PutS3Object.CREDENTIALS_FILE, CREDENTIALS_FILE); runner.setProperty(PutS3Object.REGION, REGION); runner.setProperty(PutS3Object.BUCKET, BUCKET_NAME); runner.setProperty(PutS3Object.MULTIPART_PART_SIZE, TEST_PARTSIZE_STRING); Map<String, String> attributes = new HashMap<>(); attributes.put(CoreAttributes.FILENAME.key(), FILE1_NAME); runner.enqueue(new FileInputStream(tempFile.toFile()), attributes); runner.assertValid(); runner.run(); runner.assertAllFlowFilesTransferred(PutS3Object.REL_SUCCESS); final List<MockFlowFile> successFiles = runner.getFlowFilesForRelationship(PutS3Object.REL_SUCCESS); Assert.assertEquals(1, successFiles.size()); final List<MockFlowFile> failureFiles = runner.getFlowFilesForRelationship(PutS3Object.REL_FAILURE); Assert.assertEquals(0, failureFiles.size()); MockFlowFile ff1 = successFiles.get(0); Assert.assertEquals(FILE1_NAME, ff1.getAttribute(CoreAttributes.FILENAME.key())); Assert.assertEquals(BUCKET_NAME, ff1.getAttribute(PutS3Object.S3_BUCKET_KEY)); Assert.assertEquals(FILE1_NAME, ff1.getAttribute(PutS3Object.S3_OBJECT_KEY)); Assert.assertTrue(reS3ETag.matcher(ff1.getAttribute(PutS3Object.S3_ETAG_ATTR_KEY)).matches()); Assert.assertTrue(ff1.getSize() > S3_MAXIMUM_OBJECT_SIZE); } @Ignore @Test public void testS3MultipartAgeoff() throws InterruptedException, IOException { final PutS3Object processor = new PutS3Object(); final TestRunner runner = TestRunners.newTestRunner(processor); final ProcessContext context = runner.getProcessContext(); runner.setProperty(PutS3Object.CREDENTIALS_FILE, CREDENTIALS_FILE); runner.setProperty(PutS3Object.REGION, REGION); runner.setProperty(PutS3Object.BUCKET, BUCKET_NAME); // set check interval and age off to minimum values runner.setProperty(PutS3Object.MULTIPART_S3_AGEOFF_INTERVAL, "1 milli"); runner.setProperty(PutS3Object.MULTIPART_S3_MAX_AGE, "1 milli"); // create some dummy uploads for (Integer i = 0; i < 3; i++) { final InitiateMultipartUploadRequest initiateRequest = new InitiateMultipartUploadRequest( BUCKET_NAME, "file" + i.toString() + ".txt"); try { client.initiateMultipartUpload(initiateRequest); } catch (AmazonClientException e) { Assert.fail("Failed to initiate upload: " + e.getMessage()); } } // Age off is time dependent, so test has some timing constraints. This // sleep() delays long enough to satisfy interval and age intervals. Thread.sleep(2000L); // System millis are used for timing, but it is incremented on each // call to circumvent what appears to be caching in the AWS library. // The increments are 1000 millis because AWS returns upload // initiation times in whole seconds. Long now = System.currentTimeMillis(); MultipartUploadListing uploadList = processor.getS3AgeoffListAndAgeoffLocalState(context, client, now); Assert.assertEquals(3, uploadList.getMultipartUploads().size()); MultipartUpload upload0 = uploadList.getMultipartUploads().get(0); processor.abortS3MultipartUpload(client, BUCKET_NAME, upload0); uploadList = processor.getS3AgeoffListAndAgeoffLocalState(context, client, now+1000); Assert.assertEquals(2, uploadList.getMultipartUploads().size()); final Map<String, String> attrs = new HashMap<>(); attrs.put("filename", "test-upload.txt"); runner.enqueue(getResourcePath(SAMPLE_FILE_RESOURCE_NAME), attrs); runner.run(); uploadList = processor.getS3AgeoffListAndAgeoffLocalState(context, client, now+2000); Assert.assertEquals(0, uploadList.getMultipartUploads().size()); } private class MockAmazonS3Client extends AmazonS3Client { MultipartUploadListing listing; public void setListing(MultipartUploadListing newlisting) { listing = newlisting; } @Override public MultipartUploadListing listMultipartUploads(ListMultipartUploadsRequest listMultipartUploadsRequest) throws AmazonClientException, AmazonServiceException { return listing; } } public class TestablePutS3Object extends PutS3Object { public AmazonS3Client testable_getClient() { return this.getClient(); } } }