/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.streams.s3;
import org.apache.streams.config.ComponentConfigurator;
import org.apache.streams.config.StreamsConfigurator;
import org.apache.streams.converter.LineReadWriteUtil;
import org.apache.streams.core.DatumStatus;
import org.apache.streams.core.DatumStatusCountable;
import org.apache.streams.core.DatumStatusCounter;
import org.apache.streams.core.StreamsDatum;
import org.apache.streams.core.StreamsPersistWriter;
import org.apache.streams.jackson.StreamsJacksonMapper;
import com.amazonaws.ClientConfiguration;
import com.amazonaws.Protocol;
import com.amazonaws.auth.AWSCredentials;
import com.amazonaws.auth.BasicAWSCredentials;
import com.amazonaws.regions.Region;
import com.amazonaws.regions.Regions;
import com.amazonaws.services.s3.AmazonS3Client;
import com.amazonaws.services.s3.S3ClientOptions;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.base.Preconditions;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.Flushable;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
/**
* S3PersistWriter writes documents to s3.
*/
public class S3PersistWriter implements StreamsPersistWriter, DatumStatusCountable {
public static final String STREAMS_ID = "S3PersistWriter";
private static final Logger LOGGER = LoggerFactory.getLogger(S3PersistWriter.class);
private static final char DELIMITER = '\t';
private ObjectMapper objectMapper;
private AmazonS3Client amazonS3Client;
private S3WriterConfiguration s3WriterConfiguration;
private final List<String> writtenFiles = new ArrayList<>();
protected LineReadWriteUtil lineWriterUtil;
private final AtomicLong totalBytesWritten = new AtomicLong();
private AtomicLong bytesWrittenThisFile = new AtomicLong();
private final AtomicInteger totalRecordsWritten = new AtomicInteger();
private AtomicInteger fileLineCounter = new AtomicInteger();
private static Map<String, String> objectMetaData = new HashMap<>();
static {
objectMetaData.put("line[0]", "id");
objectMetaData.put("line[1]", "timeStamp");
objectMetaData.put("line[2]", "metaData");
objectMetaData.put("line[3]", "document");
}
private OutputStreamWriter currentWriter = null;
public AmazonS3Client getAmazonS3Client() {
return this.amazonS3Client;
}
public S3WriterConfiguration getS3WriterConfiguration() {
return this.s3WriterConfiguration;
}
public List<String> getWrittenFiles() {
return this.writtenFiles;
}
public Map<String, String> getObjectMetaData() {
return objectMetaData;
}
public ObjectMapper getObjectMapper() {
return this.objectMapper;
}
public void setObjectMapper(ObjectMapper mapper) {
this.objectMapper = mapper;
}
public void setObjectMetaData(Map<String, String> val) {
objectMetaData = val;
}
public S3PersistWriter() {
this(new ComponentConfigurator<>(S3WriterConfiguration.class).detectConfiguration(StreamsConfigurator.getConfig().getConfig("s3")));
}
public S3PersistWriter(S3WriterConfiguration s3WriterConfiguration) {
this.s3WriterConfiguration = s3WriterConfiguration;
}
/**
* Instantiator with a pre-existing amazonS3Client, this is used to help with re-use.
* @param amazonS3Client
* If you have an existing amazonS3Client, it wont' bother to create another one
* @param s3WriterConfiguration
* Configuration of the write paths and instructions are still required.
*/
public S3PersistWriter(AmazonS3Client amazonS3Client, S3WriterConfiguration s3WriterConfiguration) {
this.amazonS3Client = amazonS3Client;
this.s3WriterConfiguration = s3WriterConfiguration;
}
@Override
public String getId() {
return STREAMS_ID;
}
@Override
public void write(StreamsDatum streamsDatum) {
synchronized (this) {
// Check to see if we need to reset the file that we are currently working with
if (this.currentWriter == null || ( this.bytesWrittenThisFile.get() >= (this.s3WriterConfiguration.getMaxFileSize() * 1024 * 1024))) {
try {
LOGGER.info("Resetting the file");
this.currentWriter = resetFile();
} catch (Exception ex) {
ex.printStackTrace();
}
}
String line = lineWriterUtil.convertResultToString(streamsDatum);
try {
this.currentWriter.write(line);
} catch (IOException ex) {
ex.printStackTrace();
}
// add the bytes we've written
int recordSize = line.getBytes().length;
this.totalBytesWritten.addAndGet(recordSize);
this.bytesWrittenThisFile.addAndGet(recordSize);
// increment the record count
this.totalRecordsWritten.incrementAndGet();
this.fileLineCounter.incrementAndGet();
}
}
/**
* Reset File when it's time to create a new file.
* @return OutputStreamWriter
* @throws Exception Exception
*/
public synchronized OutputStreamWriter resetFile() throws Exception {
// this will keep it thread safe, so we don't create too many files
if (this.fileLineCounter.get() == 0 && this.currentWriter != null) {
return this.currentWriter;
}
closeAndDestroyWriter();
// Create the path for where the file is going to live.
try {
// generate a file name
String fileName = this.s3WriterConfiguration.getWriterFilePrefix()
+ (this.s3WriterConfiguration.getChunk() ? "/" : "-")
+ new Date().getTime()
+ ".tsv";
// create the output stream
OutputStream outputStream = new S3OutputStreamWrapper(this.amazonS3Client,
this.s3WriterConfiguration.getBucket(),
this.s3WriterConfiguration.getWriterPath(),
fileName,
objectMetaData);
// reset the counter
this.fileLineCounter = new AtomicInteger();
this.bytesWrittenThisFile = new AtomicLong();
// add this to the list of written files
writtenFiles.add(this.s3WriterConfiguration.getWriterPath() + fileName);
// Log that we are creating this file
LOGGER.info("File Created: Bucket[{}] - {}", this.s3WriterConfiguration.getBucket(), this.s3WriterConfiguration.getWriterPath() + fileName);
// return the output stream
return new OutputStreamWriter(outputStream);
} catch (Exception ex) {
LOGGER.error(ex.getMessage());
throw ex;
}
}
private synchronized void closeAndDestroyWriter() {
// if there is a current writer, we must close it first.
if (this.currentWriter != null) {
this.safeFlush(this.currentWriter);
this.closeSafely(this.currentWriter);
this.currentWriter = null;
// Logging of information to alert the user to the activities of this class
LOGGER.debug("File Closed: Records[{}] Bytes[{}] {} ", this.fileLineCounter.get(), this.bytesWrittenThisFile.get(), this.writtenFiles.get(this.writtenFiles.size() - 1));
}
}
private synchronized void closeSafely(Writer writer) {
if (writer != null) {
try {
writer.flush();
writer.close();
} catch (Exception ex) {
LOGGER.trace("closeSafely", ex);
}
LOGGER.debug("File Closed");
}
}
private void safeFlush(Flushable flushable) {
// This is wrapped with a ByteArrayOutputStream, so this is really safe.
if (flushable != null) {
try {
flushable.flush();
} catch (IOException ex) {
LOGGER.trace("safeFlush", ex);
}
}
}
@Override
public void prepare(Object configurationObject) {
lineWriterUtil = LineReadWriteUtil.getInstance(s3WriterConfiguration);
// Connect to S3
synchronized (this) {
try {
// if the user has chosen to not set the object mapper, then set a default object mapper for them.
if (this.objectMapper == null) {
this.objectMapper = StreamsJacksonMapper.getInstance();
}
// Create the credentials Object
if (this.amazonS3Client == null) {
AWSCredentials credentials = new BasicAWSCredentials(s3WriterConfiguration.getKey(), s3WriterConfiguration.getSecretKey());
ClientConfiguration clientConfig = new ClientConfiguration();
clientConfig.setProtocol(Protocol.valueOf(s3WriterConfiguration.getProtocol().toString()));
// We do not want path style access
S3ClientOptions clientOptions = new S3ClientOptions();
clientOptions.setPathStyleAccess(false);
this.amazonS3Client = new AmazonS3Client(credentials, clientConfig);
if (StringUtils.isNotEmpty(s3WriterConfiguration.getRegion())) {
this.amazonS3Client.setRegion(Region.getRegion(Regions.fromName(s3WriterConfiguration.getRegion())));
}
this.amazonS3Client.setS3ClientOptions(clientOptions);
}
} catch (Exception ex) {
LOGGER.error("Exception while preparing the S3 client: {}", ex);
}
Preconditions.checkArgument(this.amazonS3Client != null);
}
}
public void cleanUp() {
closeAndDestroyWriter();
}
@Override
public DatumStatusCounter getDatumStatusCounter() {
DatumStatusCounter counters = new DatumStatusCounter();
counters.incrementAttempt(this.totalRecordsWritten.get());
counters.incrementStatus(DatumStatus.SUCCESS, this.totalRecordsWritten.get());
return counters;
}
}