package com.sequenceiq.samples.flume.s3; import static org.apache.flume.conf.Configurables.ensureRequiredNonNull; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import org.apache.flume.Context; import org.apache.flume.channel.ChannelProcessor; import org.apache.flume.event.SimpleEvent; import org.apache.flume.source.AbstractEventDrivenSource; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.amazonaws.auth.AWSCredentials; import com.amazonaws.auth.BasicAWSCredentials; import com.amazonaws.services.s3.AmazonS3; import com.amazonaws.services.s3.AmazonS3Client; import com.amazonaws.services.s3.model.GetObjectRequest; import com.amazonaws.services.s3.model.ListObjectsRequest; import com.amazonaws.services.s3.model.ObjectListing; import com.amazonaws.services.s3.model.S3Object; import com.amazonaws.services.s3.model.S3ObjectSummary; public class S3Source extends AbstractEventDrivenSource { private static final Logger LOGGER = LoggerFactory.getLogger(S3Source.class); private static final String ACCESS_KEY_KEY = "accessKey"; private static final String SECRET_KEY = "secretKey"; private static final String BUCKET_KEY = "bucket"; private String accessKey; private String secretKey; private String bucket; @Override protected void doConfigure(Context context) { ensureRequiredNonNull(context, ACCESS_KEY_KEY, SECRET_KEY, BUCKET_KEY); this.accessKey = context.getString(ACCESS_KEY_KEY); this.secretKey = context.getString(SECRET_KEY); this.bucket = context.getString(BUCKET_KEY); } @Override protected void doStart() { AWSCredentials myCredentials = new BasicAWSCredentials(accessKey, secretKey); AmazonS3 s3Client = new AmazonS3Client(myCredentials); ListObjectsRequest listObjectsRequest = new ListObjectsRequest().withBucketName(bucket); ObjectListing objectListing = s3Client.listObjects(listObjectsRequest); ChannelProcessor channelProcessor = getChannelProcessor(); for (S3ObjectSummary s3ObjectSummary : objectListing.getObjectSummaries()) { String file = s3ObjectSummary.getKey(); LOGGER.info("Read the content of {}", file); GetObjectRequest objectRequest = new GetObjectRequest(bucket, file); S3Object objectPortion = s3Client.getObject(objectRequest); try { long startTime = System.currentTimeMillis(); processLines(channelProcessor, objectPortion.getObjectContent()); LOGGER.info("Processing of {} took {} ms", file, System.currentTimeMillis() - startTime); } catch (IOException e) { LOGGER.warn("Cannot process the {}, skipping", file, e); } } } @Override protected void doStop() { } private void processLines(ChannelProcessor channelProcessor, InputStream input) throws IOException { BufferedReader reader = new BufferedReader(new InputStreamReader(input)); String line; while ((line = reader.readLine()) != null) { channelProcessor.processEvent(createEvent(line)); } reader.close(); } private SimpleEvent createEvent(String message) { SimpleEvent event = new SimpleEvent(); event.setBody(message.getBytes()); return event; } }