/*
* Copyright © 2014-2015 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package co.cask.cdap.data.stream;
import co.cask.cdap.api.common.Bytes;
import co.cask.cdap.api.flow.flowlet.StreamEvent;
import co.cask.cdap.common.io.BinaryEncoder;
import co.cask.cdap.common.io.BufferedEncoder;
import co.cask.cdap.common.io.Encoder;
import co.cask.cdap.common.stream.StreamEventDataCodec;
import co.cask.cdap.data.file.FileWriter;
import com.google.common.base.Function;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Maps;
import com.google.common.io.Closeables;
import com.google.common.io.OutputSupplier;
import com.google.common.primitives.Longs;
import org.apache.hadoop.fs.Syncable;
import java.io.Flushable;
import java.io.IOException;
import java.io.OutputStream;
import java.util.Iterator;
import java.util.Map;
import javax.annotation.concurrent.NotThreadSafe;
/**
* File format
*
* Stream event file:
*
* <pre>
* {@code
*
* event_file = <header> <data>* <end_marker>
* header = "E" "1" <properties>
* properties = Avro encoded with the properties schema
* data = <timestamp> <length> <stream_event>+
* timestamp = 8 bytes int64 for timestamp in milliseconds
* length = Avro encoded int32 for size in bytes for all <stream_event>s
* stream_event = Avro encoded bytes according to the StreamData schema
* end_marker = 8 bytes int64 with value == -(close_timestamp)
*
* }
* </pre>
*
* Stream index file:
*
* <pre>
* {@code
*
* meta_file = <header> <index>*
* header = "I" "1" <properties>
* properties = Avro encoded with the properties schema
* index = <timestamp> <offset>
* timestamp = 8 bytes int64 for timestamp in milliseconds
* offset = 8 bytes int64 for offset to data block in the event file
*
* }
* </pre>
*/
@NotThreadSafe
public final class StreamDataFileWriter implements TimestampCloseable, Flushable, FileWriter<StreamEvent> {
private static final int BUFFER_SIZE = 256 * 1024; // 256K
private final OutputStream eventOutput;
private final OutputStream indexOutput;
private final long indexInterval;
private final BufferedEncoder encoder;
private final BufferedEncoder lengthEncoder;
// Timestamp for the current block
private long currentTimestamp;
private long position;
private long nextIndexTime;
private boolean synced;
private boolean closed;
private long closeTimestamp;
/**
* Constructs a new instance that writes to given outputs. Same as calling
* {@link StreamDataFileWriter#StreamDataFileWriter(OutputSupplier, OutputSupplier, long, Map)}
* with an empty property map.
*/
public StreamDataFileWriter(OutputSupplier<? extends OutputStream> eventOutputSupplier,
OutputSupplier<? extends OutputStream> indexOutputSupplier,
long indexInterval) throws IOException {
this(eventOutputSupplier, indexOutputSupplier, indexInterval, ImmutableMap.<String, String>of());
}
/**
* Constructs a new instance that writes to given outputs.
*
* @param eventOutputSupplier the provider of the {@link OutputStream} for writing events
* @param indexOutputSupplier the provider of the {@link OutputStream} for writing the index
* @param indexInterval the time interval in milliseconds for emitting a new index entry
* @param properties the property set that will be stored as file properties
* @throws IOException if there is an error in preparing the output streams
*/
public StreamDataFileWriter(OutputSupplier<? extends OutputStream> eventOutputSupplier,
OutputSupplier<? extends OutputStream> indexOutputSupplier,
long indexInterval, Map<String, String> properties) throws IOException {
this.eventOutput = eventOutputSupplier.getOutput();
try {
this.indexOutput = indexOutputSupplier.getOutput();
} catch (IOException e) {
Closeables.closeQuietly(this.eventOutput);
throw e;
}
this.indexInterval = indexInterval;
this.currentTimestamp = -1L;
this.closeTimestamp = -1L;
Function<OutputStream, Encoder> encoderFactory = createEncoderFactory();
this.encoder = new BufferedEncoder(BUFFER_SIZE, encoderFactory);
this.lengthEncoder = new BufferedEncoder(5, encoderFactory);
try {
init(properties);
} catch (IOException e) {
Closeables.closeQuietly(eventOutput);
Closeables.closeQuietly(indexOutput);
throw e;
}
}
@Override
public void append(StreamEvent event) throws IOException {
doAppend(event, BUFFER_SIZE);
}
/**
* Writes multiple events to the stream file. Events provided by the iterator must be sorted by timestamp.
* This method guarantees events with the same timestamp are written in the same data block. Note that
* events of the same timestamp are all buffered in memory before writing to disk, since the data block length
* needs to be known before it can be written to disk.
*
* @param events an {@link Iterator} that provides events to append
* @throws IOException
*/
@Override
public void appendAll(Iterator<? extends StreamEvent> events) throws IOException {
while (events.hasNext()) {
doAppend(events.next(), Integer.MAX_VALUE);
}
}
@Override
public void close() throws IOException {
if (closed) {
return;
}
try {
flushBlock(false);
// Write the tail marker, which is a -(current timestamp).
closeTimestamp = System.currentTimeMillis();
eventOutput.write(Longs.toByteArray(-closeTimestamp));
} finally {
closed = true;
try {
eventOutput.close();
} finally {
indexOutput.close();
}
}
}
@Override
public void flush() throws IOException {
try {
flushBlock(true);
} catch (IOException e) {
throw closeWithException(e);
}
}
@Override
public long getCloseTimestamp() {
Preconditions.checkState(closed, "Writer not closed");
return closeTimestamp;
}
private void doAppend(StreamEvent event, int flushLimit) throws IOException {
if (closed) {
throw new IOException("Writer already closed.");
}
synced = false;
long eventTimestamp = event.getTimestamp();
if (eventTimestamp < currentTimestamp) {
throw closeWithException(new IOException("Out of order events written."));
}
try {
if (eventTimestamp > currentTimestamp) {
flushBlock(false);
currentTimestamp = eventTimestamp;
// Write the timestamp directly to output
eventOutput.write(Bytes.toBytes(currentTimestamp));
position += Bytes.SIZEOF_LONG;
}
// Encodes the event data into buffer.
StreamEventDataCodec.encode(event, encoder);
// Optionally flush if already filled up the buffer.
if (encoder.size() >= flushLimit) {
flushBlock(false);
}
} catch (IOException e) {
throw closeWithException(e);
}
}
private void init(Map<String, String> properties) throws IOException {
// Writes the header for event file
encoder.writeRaw(StreamDataFileConstants.MAGIC_HEADER_V2);
Map<String, String> headers = Maps.newHashMap(properties);
headers.put(StreamDataFileConstants.Property.Key.SCHEMA, StreamEventDataCodec.STREAM_DATA_SCHEMA.toString());
StreamUtils.encodeMap(headers, encoder);
long headerSize = encoder.size();
encoder.writeTo(eventOutput);
sync(eventOutput);
position = headerSize;
// Writes the header for index file
encoder.writeRaw(StreamDataFileConstants.INDEX_MAGIC_HEADER_V1);
// Empty properties map for now. May have properties in future version.
StreamUtils.encodeMap(ImmutableMap.<String, String>of(), encoder);
encoder.writeTo(indexOutput);
sync(indexOutput);
}
/**
* Writes the buffered data to underlying output stream.
*
* @param sync If {@code true}, perform a sync call to the underlying output stream.
* @throws IOException If failed to flush.
*/
private void flushBlock(boolean sync) throws IOException {
if (encoder.size() == 0) {
if (sync && !synced) {
sync(eventOutput);
sync(indexOutput);
synced = true;
}
return;
}
// Record the current event output position if needs to update index
long indexOffset = -1L;
if (currentTimestamp >= nextIndexTime) {
// Index offset is the current block start, hence is current position - 8 bytes timestamp already written.
indexOffset = position - Bytes.SIZEOF_LONG;
}
// Writes the size of the encoded event
lengthEncoder.writeInt(encoder.size());
int size = lengthEncoder.size();
lengthEncoder.writeTo(eventOutput);
position += size;
// Writes all encoded data from the buffer to the output.
size = encoder.size();
encoder.writeTo(eventOutput);
position += size;
if (sync) {
sync(eventOutput);
}
if (indexOffset >= 0) {
encoder.writeRaw(Bytes.toBytes(currentTimestamp));
encoder.writeRaw(Bytes.toBytes(indexOffset));
encoder.writeTo(indexOutput);
if (sync) {
sync(indexOutput);
}
nextIndexTime = currentTimestamp + indexInterval;
} else if (sync) {
sync(indexOutput);
}
// Reset the current timestamp so that a data block will start.
currentTimestamp = -1L;
synced = sync;
}
private void sync(OutputStream output) throws IOException {
if (output instanceof Syncable) {
((Syncable) output).hsync();
} else {
output.flush();
}
}
/**
* Close this writer because of exception.
* This method always throw exception.
*/
private IOException closeWithException(IOException ex) throws IOException {
closed = true;
Closeables.closeQuietly(eventOutput);
Closeables.closeQuietly(indexOutput);
throw ex;
}
private static Function<OutputStream, Encoder> createEncoderFactory() {
return new Function<OutputStream, Encoder>() {
@Override
public Encoder apply(OutputStream input) {
return new BinaryEncoder(input);
}
};
}
}