/*
* Copyright © 2014-2015 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package co.cask.cdap.data.stream;
import co.cask.cdap.api.flow.flowlet.StreamEvent;
import co.cask.cdap.common.io.Locations;
import co.cask.cdap.data.file.FileReader;
import co.cask.cdap.data.file.FileWriter;
import co.cask.cdap.data.file.ReadFilter;
import co.cask.cdap.data.file.filter.TTLReadFilter;
import co.cask.cdap.data2.transaction.stream.StreamConfig;
import co.cask.cdap.proto.Id;
import co.cask.cdap.test.SlowTests;
import com.google.common.base.Charsets;
import com.google.common.base.Stopwatch;
import com.google.common.base.Throwables;
import com.google.common.collect.AbstractIterator;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.LinkedListMultimap;
import com.google.common.collect.Lists;
import com.google.common.collect.Multimap;
import com.google.common.io.Closeables;
import com.google.common.io.Flushables;
import com.google.common.util.concurrent.Uninterruptibles;
import org.apache.hadoop.hbase.util.Strings;
import org.apache.twill.filesystem.Location;
import org.apache.twill.filesystem.LocationFactory;
import org.junit.Assert;
import org.junit.ClassRule;
import org.junit.Test;
import org.junit.experimental.categories.Category;
import org.junit.rules.TemporaryFolder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.Closeable;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.Queue;
import java.util.Random;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
/**
* Test cases for StreamDataFileReader/Writer.
*/
public abstract class StreamDataFileTestBase {
private static final Logger LOG = LoggerFactory.getLogger(StreamDataFileTestBase.class);
@ClassRule
public static final TemporaryFolder TMP_FOLDER = new TemporaryFolder();
protected abstract LocationFactory getLocationFactory();
@Test
public void testEmptyFile() throws Exception {
Location dir = StreamFileTestUtils.createTempDir(getLocationFactory());
Location eventFile = dir.getTempFile(".dat");
Location indexFile = dir.getTempFile(".idx");
// Creates a stream file that has no event inside
StreamDataFileWriter writer = new StreamDataFileWriter(Locations.newOutputSupplier(eventFile),
Locations.newOutputSupplier(indexFile),
10000L);
writer.close();
// Create a reader that starts from beginning.
StreamDataFileReader reader = StreamDataFileReader.create(Locations.newInputSupplier(eventFile));
List<StreamEvent> events = Lists.newArrayList();
Assert.assertEquals(-1, reader.read(events, 1, 0, TimeUnit.SECONDS));
reader.close();
}
/**
* Test for basic read write to verify data encode/decode correctly.
* @throws Exception
*/
@Test
public void testBasicReadWrite() throws Exception {
Location dir = StreamFileTestUtils.createTempDir(getLocationFactory());
Location eventFile = dir.getTempFile(".dat");
Location indexFile = dir.getTempFile(".idx");
StreamDataFileWriter writer = new StreamDataFileWriter(Locations.newOutputSupplier(eventFile),
Locations.newOutputSupplier(indexFile),
10000L);
// Write 100 events to the stream, with 20 even timestamps
for (int i = 0; i < 40; i += 2) {
for (int j = 0; j < 5; j++) {
writer.append(StreamFileTestUtils.createEvent(i, "Basic test " + i));
}
}
writer.close();
// Create a reader that starts from beginning.
StreamDataFileReader reader = StreamDataFileReader.create(Locations.newInputSupplier(eventFile));
List<StreamEvent> events = Lists.newArrayList();
Assert.assertEquals(100, reader.read(events, 100, 1, TimeUnit.SECONDS));
Assert.assertEquals(-1, reader.read(events, 100, 1, TimeUnit.SECONDS));
reader.close();
// Collect the events in a multimap for verification
Multimap<Long, String> messages = LinkedListMultimap.create();
for (StreamEvent event : events) {
messages.put(event.getTimestamp(), Charsets.UTF_8.decode(event.getBody()).toString());
}
// 20 timestamps
Assert.assertEquals(20, messages.keySet().size());
for (Map.Entry<Long, Collection<String>> entry : messages.asMap().entrySet()) {
// Each timestamp has 5 messages
Assert.assertEquals(5, entry.getValue().size());
// All 5 messages for a timestamp are the same
Assert.assertEquals(1, ImmutableSet.copyOf(entry.getValue()).size());
// Message is "Basic test " + timestamp
Assert.assertEquals("Basic test " + entry.getKey(), entry.getValue().iterator().next());
}
}
@Test
public void testLargeDataBlock() throws Exception {
Location dir = StreamFileTestUtils.createTempDir(getLocationFactory());
Location eventFile = dir.getTempFile(".dat");
Location indexFile = dir.getTempFile(".idx");
StreamDataFileWriter writer = new StreamDataFileWriter(Locations.newOutputSupplier(eventFile),
Locations.newOutputSupplier(indexFile),
10000L);
// Write 1200 events in one data block with each event has size of 150 bytes.
// This make sure it crosses the 128K read buffer boundary that is observed in HDFS.
// The StreamDataFileWriter has an internal data block buffer size of 256K,
// hence writing ~175K data block shouldn't go over the flush limit in the writer, making sure all
// events are in one data block
ByteBuffer body = Charsets.UTF_8.encode(Strings.repeat('0', 150));
for (int i = 0; i < 1200; i++) {
writer.append(new StreamEvent(ImmutableMap.<String, String>of(), body.duplicate(), 0));
}
writer.close();
// Read event one by one
StreamDataFileReader reader = StreamDataFileReader.create(Locations.newInputSupplier(eventFile));
List<StreamEvent> events = Lists.newArrayList();
for (int i = 0; i < 1200; i++) {
Assert.assertEquals(1, reader.read(events, 1, 0, TimeUnit.SECONDS));
Assert.assertEquals(body, events.get(0).getBody());
events.clear();
}
Assert.assertEquals(-1, reader.read(events, 1, 0, TimeUnit.SECONDS));
reader.close();
}
@Test
public void testTail() throws Exception {
Location dir = StreamFileTestUtils.createTempDir(getLocationFactory());
final Location eventFile = dir.getTempFile(".dat");
final Location indexFile = dir.getTempFile(".idx");
final CountDownLatch writerStarted = new CountDownLatch(1);
// Create a thread for writing 10 events, 1 event per 200 milliseconds.
// It pauses after writing 5 events.
final CountDownLatch waitLatch = new CountDownLatch(1);
Thread writerThread = new Thread() {
@Override
public void run() {
try {
StreamDataFileWriter writer = new StreamDataFileWriter(Locations.newOutputSupplier(eventFile),
Locations.newOutputSupplier(indexFile),
10000L);
writerStarted.countDown();
for (int i = 0; i < 10; i++) {
writer.append(StreamFileTestUtils.createEvent(i, "Testing " + i));
writer.flush();
TimeUnit.MILLISECONDS.sleep(200);
if (i == 4) {
waitLatch.await();
}
}
writer.close();
} catch (Exception e) {
throw Throwables.propagate(e);
}
}
};
StreamDataFileReader reader = StreamDataFileReader.create(Locations.newInputSupplier(eventFile));
List<StreamEvent> events = Lists.newArrayList();
writerThread.start();
writerStarted.await();
// Expect 10 events, followed by EOF.
Assert.assertEquals(5, reader.read(events, 5, 2000, TimeUnit.MILLISECONDS));
waitLatch.countDown();
Assert.assertEquals(5, reader.read(events, 5, 2000, TimeUnit.MILLISECONDS));
Assert.assertEquals(-1, reader.read(events, 1, 500, TimeUnit.MILLISECONDS));
Assert.assertEquals(10, events.size());
// Verify the ordering of events
int ts = 0;
for (StreamEvent event : events) {
Assert.assertEquals(ts, event.getTimestamp());
Assert.assertEquals("Testing " + ts, Charsets.UTF_8.decode(event.getBody()).toString());
ts++;
}
}
@Test
public void testFilter() throws Exception {
Location dir = StreamFileTestUtils.createTempDir(getLocationFactory());
final Location eventFile = dir.getTempFile(".dat");
final Location indexFile = dir.getTempFile(".idx");
StreamDataFileWriter writer = new StreamDataFileWriter(Locations.newOutputSupplier(eventFile),
Locations.newOutputSupplier(indexFile),
10000L);
writer.append(StreamFileTestUtils.createEvent(0, "Message 1"));
writer.flush();
StreamDataFileReader reader = StreamDataFileReader.create(Locations.newInputSupplier(eventFile));
List<StreamEvent> events = Lists.newArrayList();
final AtomicBoolean active = new AtomicBoolean(false);
ReadFilter filter = new ReadFilter() {
private long nextTimestamp = -1L;
@Override
public void reset() {
active.set(false);
nextTimestamp = -1L;
}
@Override
public boolean acceptTimestamp(long timestamp) {
active.set(true);
nextTimestamp = timestamp + 1;
return false;
}
@Override
public long getNextTimestampHint() {
return nextTimestamp;
}
};
Assert.assertEquals(0, reader.read(events, 1, 0, TimeUnit.SECONDS, filter));
Assert.assertTrue(active.get());
filter.reset();
Assert.assertEquals(0, reader.read(events, 1, 0, TimeUnit.SECONDS, filter));
Assert.assertFalse(active.get());
reader.close();
writer.close();
}
@Test
public void testIndex() throws Exception {
Location dir = StreamFileTestUtils.createTempDir(getLocationFactory());
Location eventFile = dir.getTempFile(".dat");
Location indexFile = dir.getTempFile(".idx");
// Write 1000 events with different timestamps, and create index for every 100 timestamps.
StreamDataFileWriter writer = new StreamDataFileWriter(Locations.newOutputSupplier(eventFile),
Locations.newOutputSupplier(indexFile),
100L);
for (int i = 0; i < 1000; i++) {
writer.append(StreamFileTestUtils.createEvent(1000 + i, "Testing " + i));
}
writer.close();
// Read with index
for (long ts : new long[] {1050, 1110, 1200, 1290, 1301, 1400, 1500, 1600, 1898, 1900, 1999}) {
StreamDataFileReader reader = StreamDataFileReader.createByStartTime(Locations.newInputSupplier(eventFile),
Locations.newInputSupplier(indexFile),
ts);
Queue<StreamEvent> events = Lists.newLinkedList();
Assert.assertEquals(1, reader.read(events, 1, 1L, TimeUnit.MILLISECONDS));
Assert.assertEquals(ts, events.poll().getTimestamp());
reader.close();
}
}
@Test
public void testPosition() throws Exception {
Location dir = StreamFileTestUtils.createTempDir(getLocationFactory());
Location eventFile = dir.getTempFile(".dat");
Location indexFile = dir.getTempFile(".idx");
// Write 10 events with different timestamps. Index doesn't matter
StreamDataFileWriter writer = new StreamDataFileWriter(Locations.newOutputSupplier(eventFile),
Locations.newOutputSupplier(indexFile),
100L);
for (int i = 0; i < 10; i++) {
writer.append(StreamFileTestUtils.createEvent(i, "Testing " + i));
}
writer.close();
// Read 4 events
StreamDataFileReader reader = StreamDataFileReader.create(Locations.newInputSupplier(eventFile));
List<StreamEvent> events = Lists.newArrayList();
reader.read(events, 4, 1, TimeUnit.SECONDS);
Assert.assertEquals(4, events.size());
for (StreamEvent event : events) {
Assert.assertEquals("Testing " + event.getTimestamp(), Charsets.UTF_8.decode(event.getBody()).toString());
}
long position = reader.getPosition();
reader.close();
// Open a new reader, read from the last position.
reader = StreamDataFileReader.createWithOffset(Locations.newInputSupplier(eventFile),
Locations.newInputSupplier(indexFile),
position);
events.clear();
reader.read(events, 10, 1, TimeUnit.SECONDS);
Assert.assertEquals(6, events.size());
for (int i = 0; i < 6; i++) {
StreamEvent event = events.get(i);
Assert.assertEquals((long) (i + 4), event.getTimestamp());
Assert.assertEquals("Testing " + event.getTimestamp(), Charsets.UTF_8.decode(event.getBody()).toString());
}
}
@Test
public void testOffset() throws Exception {
Location dir = StreamFileTestUtils.createTempDir(getLocationFactory());
Location eventFile = dir.getTempFile(".dat");
Location indexFile = dir.getTempFile(".idx");
// Writer 100 events with different timestamps.
StreamDataFileWriter writer = new StreamDataFileWriter(Locations.newOutputSupplier(eventFile),
Locations.newOutputSupplier(indexFile),
10L);
for (int i = 0; i < 100; i++) {
writer.append(StreamFileTestUtils.createEvent(i, "Testing " + i));
}
writer.close();
StreamDataFileIndex index = new StreamDataFileIndex(Locations.newInputSupplier(indexFile));
StreamDataFileIndexIterator iterator = index.indexIterator();
while (iterator.nextIndexEntry()) {
StreamDataFileReader reader = StreamDataFileReader.createWithOffset(
Locations.newInputSupplier(eventFile),
Locations.newInputSupplier(indexFile),
iterator.currentPosition() - 1);
List<StreamEvent> events = Lists.newArrayList();
Assert.assertEquals(1, reader.read(events, 1, 0, TimeUnit.SECONDS));
Assert.assertEquals(iterator.currentTimestamp(), events.get(0).getTimestamp());
}
}
@Test
public void testArbitraryOffset() throws Exception {
Location dir = StreamFileTestUtils.createTempDir(getLocationFactory());
Location eventFile = dir.getTempFile(".dat");
Location indexFile = dir.getTempFile(".idx");
// Writer 100 events with different timestamps.
StreamDataFileWriter writer = new StreamDataFileWriter(Locations.newOutputSupplier(eventFile),
Locations.newOutputSupplier(indexFile),
10L);
for (int i = 0; i < 100; i++) {
writer.append(StreamFileTestUtils.createEvent(i, "Testing " + i));
}
writer.close();
// Read all 100 events to record their start position
StreamDataFileReader reader = StreamDataFileReader.create(Locations.newInputSupplier(eventFile));
List<PositionStreamEvent> events = Lists.newArrayList();
Assert.assertEquals(100, reader.read(events, 100, 0, TimeUnit.SECONDS));
reader.close();
// Read the events again by seeking to some arbitary offset
Random random = new Random();
for (PositionStreamEvent event : ImmutableList.copyOf(events)) {
int rand = random.nextInt(5) + 1;
reader = StreamDataFileReader.createWithOffset(Locations.newInputSupplier(eventFile),
Locations.newInputSupplier(indexFile), event.getStart() - rand);
events.clear();
Assert.assertEquals(1, reader.read(events, 1, 0, TimeUnit.SECONDS));
reader.close();
Assert.assertEquals(event.getStart(), events.get(0).getStart());
}
}
@Test
public void testEndOfFile() throws Exception {
// This test is for opening a reader with start time beyond the last event in the file.
Location dir = StreamFileTestUtils.createTempDir(getLocationFactory());
Location eventFile = dir.getTempFile(".dat");
Location indexFile = dir.getTempFile(".idx");
// Write 5 events
StreamDataFileWriter writer = new StreamDataFileWriter(Locations.newOutputSupplier(eventFile),
Locations.newOutputSupplier(indexFile),
10000L);
for (int i = 0; i < 5; i++) {
writer.append(StreamFileTestUtils.createEvent(i, "Testing " + i));
}
writer.close();
// Open a reader with timestamp larger that all events in the file.
StreamDataFileReader reader = StreamDataFileReader.createByStartTime(
Locations.newInputSupplier(eventFile),
Locations.newInputSupplier(indexFile),
10L);
List<StreamEvent> events = Lists.newArrayList();
Assert.assertEquals(-1, reader.read(events, 10, 1, TimeUnit.SECONDS));
reader.close();
}
@Test
public void testIndexIterator() throws Exception {
Location dir = StreamFileTestUtils.createTempDir(getLocationFactory());
Location eventFile = dir.getTempFile(".dat");
Location indexFile = dir.getTempFile(".idx");
// Write 1000 events with different timestamps, and create index for every 100 timestamps.
StreamDataFileWriter writer = new StreamDataFileWriter(Locations.newOutputSupplier(eventFile),
Locations.newOutputSupplier(indexFile),
100L);
for (int i = 0; i < 1000; i++) {
writer.append(StreamFileTestUtils.createEvent(1000 + i, "Testing " + i));
}
writer.close();
// Iterate the index
StreamDataFileIndex index = new StreamDataFileIndex(Locations.newInputSupplier(indexFile));
StreamDataFileIndexIterator iterator = index.indexIterator();
long ts = 1000;
while (iterator.nextIndexEntry()) {
Assert.assertEquals(ts, iterator.currentTimestamp());
StreamDataFileReader reader = StreamDataFileReader.createWithOffset(
Locations.newInputSupplier(eventFile),
Locations.newInputSupplier(indexFile),
iterator.currentPosition());
List<StreamEvent> events = Lists.newArrayList();
Assert.assertEquals(1, reader.read(events, 1, 0, TimeUnit.SECONDS));
Assert.assertEquals("Testing " + (ts - 1000),
Charsets.UTF_8.decode(events.get(0).getBody()).toString());
ts += 100;
}
Assert.assertEquals(2000, ts);
}
@Test
public void testMaxEvents() throws Exception {
Location dir = StreamFileTestUtils.createTempDir(getLocationFactory());
Location eventFile = dir.getTempFile(".dat");
Location indexFile = dir.getTempFile(".idx");
// Write 1000 events with 100 different timestamps, and create index for every 100ms timestamps.
StreamDataFileWriter writer = new StreamDataFileWriter(Locations.newOutputSupplier(eventFile),
Locations.newOutputSupplier(indexFile),
100L);
for (int i = 0; i < 100; i++) {
for (int j = 0; j < 10; j++) {
writer.append(StreamFileTestUtils.createEvent(i, "Testing " + (i * 10 + j)));
}
}
writer.close();
// Reads events one by one
List<StreamEvent> events = Lists.newArrayList();
StreamDataFileReader reader = StreamDataFileReader.create(Locations.newInputSupplier(eventFile));
int expectedId = 0;
while (reader.read(events, 1, 1, TimeUnit.SECONDS) >= 0) {
Assert.assertEquals(1, events.size());
StreamEvent event = events.get(0);
long expectedTimestamp = expectedId / 10;
Assert.assertEquals(expectedTimestamp, event.getTimestamp());
Assert.assertEquals("Testing " + expectedId, Charsets.UTF_8.decode(event.getBody()).toString());
expectedId++;
events.clear();
}
reader.close();
// Reads four events every time, with a new reader.
events.clear();
reader = StreamDataFileReader.create(Locations.newInputSupplier(eventFile));
int expectedSize = 4;
while (reader.read(events, 4, 1, TimeUnit.SECONDS) >= 0) {
Assert.assertEquals(expectedSize, events.size());
expectedSize += 4;
long position = reader.getPosition();
reader.close();
reader = StreamDataFileReader.createWithOffset(Locations.newInputSupplier(eventFile),
Locations.newInputSupplier(indexFile),
position);
}
// Verify all events are read
Assert.assertEquals(1000, events.size());
expectedId = 0;
for (StreamEvent event : events) {
long expectedTimestamp = expectedId / 10;
Assert.assertEquals(expectedTimestamp, event.getTimestamp());
Assert.assertEquals("Testing " + expectedId, Charsets.UTF_8.decode(event.getBody()).toString());
expectedId++;
}
}
@Test
public void testTailNotExists() throws IOException, InterruptedException {
Location dir = StreamFileTestUtils.createTempDir(getLocationFactory());
Location eventFile = dir.getTempFile(".dat");
Location indexFile = dir.getTempFile(".idx");
// Create a read on non-exist file and try reading, it should be ok with 0 events read.
List<StreamEvent> events = Lists.newArrayList();
StreamDataFileReader reader = StreamDataFileReader.create(Locations.newInputSupplier(eventFile));
Assert.assertEquals(0, reader.read(events, 1, 0, TimeUnit.SECONDS));
// Write an event
StreamDataFileWriter writer = new StreamDataFileWriter(Locations.newOutputSupplier(eventFile),
Locations.newOutputSupplier(indexFile),
100L);
writer.append(StreamFileTestUtils.createEvent(100, "Testing"));
writer.flush();
// Reads the event just written
Assert.assertEquals(1, reader.read(events, 1, 0, TimeUnit.SECONDS));
Assert.assertEquals(100, events.get(0).getTimestamp());
Assert.assertEquals("Testing", Charsets.UTF_8.decode(events.get(0).getBody()).toString());
// Close the writer.
writer.close();
// Reader should return EOF (after some time, as closing of file takes time on HDFS.
Assert.assertEquals(-1, reader.read(events, 1, 2, TimeUnit.SECONDS));
}
@Test
public void testOffsetAtEnd() throws IOException, InterruptedException {
// Test for offset at the end of file
Location dir = StreamFileTestUtils.createTempDir(getLocationFactory());
Location eventFile = dir.getTempFile(".dat");
Location indexFile = dir.getTempFile(".idx");
// Write 1 event.
StreamDataFileWriter writer = new StreamDataFileWriter(Locations.newOutputSupplier(eventFile),
Locations.newOutputSupplier(indexFile),
100L);
writer.append(StreamFileTestUtils.createEvent(1, "Testing"));
writer.close();
// Read 1 event.
List<StreamEvent> events = Lists.newArrayList();
StreamDataFileReader reader = StreamDataFileReader.create(Locations.newInputSupplier(eventFile));
Assert.assertEquals(1, reader.read(events, 10, 0, TimeUnit.SECONDS));
// Create a reader with the offset pointing to EOF timestamp.
long offset = reader.getPosition();
reader = StreamDataFileReader.createWithOffset(
Locations.newInputSupplier(eventFile), Locations.newInputSupplier(indexFile), offset);
Assert.assertEquals(-1, reader.read(events, 10, 0, TimeUnit.SECONDS));
// Create a read with offset way pass EOF
reader = StreamDataFileReader.createWithOffset(
Locations.newInputSupplier(eventFile),
Locations.newInputSupplier(indexFile),
eventFile.length() + 100);
Assert.assertEquals(-1, reader.read(events, 10, 0, TimeUnit.SECONDS));
}
@Test
public void testTTLFilter() throws IOException, InterruptedException {
// Test the TTL filter by writing events with different timestamp and use the TTL to control what
// events to read.
Location dir = StreamFileTestUtils.createTempDir(getLocationFactory());
Location eventFile = dir.getTempFile(".dat");
Location indexFile = dir.getTempFile(".idx");
// Writer 10 events, with 10 different timestamps, differ by 5, starting from 1.
// ts = {1, 6, 11, 16, 21, 26, 31, 36, 41, 46 }
StreamDataFileWriter writer = new StreamDataFileWriter(Locations.newOutputSupplier(eventFile),
Locations.newOutputSupplier(indexFile),
20L);
long ts = 1L;
for (int i = 0; i < 10; i++, ts += 5) {
writer.append(StreamFileTestUtils.createEvent(ts, "Testing " + i));
}
// Just flush writer, keep the write live to keep writing more events down below.
writer.flush();
List<StreamEvent> events = Lists.newArrayList();
// Create a reader
try (StreamDataFileReader reader = StreamDataFileReader.createByStartTime(Locations.newInputSupplier(eventFile),
Locations.newInputSupplier(indexFile),
0L)) {
// Read with a TTL filter. The TTL makes the first valid event as TS >= 25, hence TS == 26.
reader.read(events, 1, 0, TimeUnit.SECONDS, new TTLReadFilter(0) {
@Override
protected long getCurrentTime() {
return 25L;
}
});
Assert.assertEquals(1, events.size());
Assert.assertEquals(26L, events.get(0).getTimestamp());
// Read with TTL filter that will skip all reaming events in the stream (TTL = 0).
events.clear();
reader.read(events, 1, 0, TimeUnit.SECONDS, new TTLReadFilter(0));
Assert.assertTrue(events.isEmpty());
// Write 5 more event, with TS starts at 56
for (int i = 0; i < 5; i++, ts += 5) {
writer.append(StreamFileTestUtils.createEvent(ts, "Testing " + i));
}
writer.close();
// Read with TTL filter that makes only the last event pass (TS = 76)
events.clear();
reader.read(events, 10, 0, TimeUnit.SECONDS, new TTLReadFilter(0) {
@Override
protected long getCurrentTime() {
return 71L;
}
});
Assert.assertEquals(1, events.size());
Assert.assertEquals(71L, events.get(0).getTimestamp());
}
}
/**
* Test live stream reader with new partitions and/or sequence file being created over time.
*/
@Category(SlowTests.class)
@Test
public void testLiveStream() throws Exception {
String streamName = "live";
Id.Stream streamId = Id.Stream.from(Id.Namespace.DEFAULT, streamName);
final String filePrefix = "prefix";
long partitionDuration = 5000; // 5 seconds
Location location = getLocationFactory().create(streamName);
location.mkdirs();
final StreamConfig config = new StreamConfig(streamId, partitionDuration, 10000, Long.MAX_VALUE,
location, null, 1000);
// Create a thread that will write 10 event per second
final AtomicInteger eventsWritten = new AtomicInteger();
final List<Closeable> closeables = Lists.newArrayList();
Thread writerThread = new Thread() {
@Override
public void run() {
try {
while (!interrupted()) {
FileWriter<StreamEvent> writer = createWriter(config, filePrefix);
closeables.add(writer);
for (int i = 0; i < 10; i++) {
long ts = System.currentTimeMillis();
writer.append(StreamFileTestUtils.createEvent(ts, "Testing"));
eventsWritten.getAndIncrement();
}
writer.flush();
TimeUnit.SECONDS.sleep(1);
}
} catch (IOException e) {
LOG.error(e.getMessage(), e);
throw Throwables.propagate(e);
} catch (InterruptedException e) {
// No-op
}
}
};
// Create a live reader start with one partition earlier than current time.
long partitionStart = StreamUtils.getPartitionStartTime(System.currentTimeMillis() - config.getPartitionDuration(),
config.getPartitionDuration());
Location partitionLocation = StreamUtils.createPartitionLocation(config.getLocation(),
partitionStart, config.getPartitionDuration());
Location eventLocation = StreamUtils.createStreamLocation(partitionLocation, filePrefix, 0, StreamFileType.EVENT);
// Creates a live stream reader that check for sequence file ever 100 millis.
FileReader<PositionStreamEvent, StreamFileOffset> reader
= new LiveStreamFileReader(config, new StreamFileOffset(eventLocation, 0L, 0), 100);
List<StreamEvent> events = Lists.newArrayList();
// Try to read, since the writer thread is not started, it should get nothing
Assert.assertEquals(0, reader.read(events, 1, 2, TimeUnit.SECONDS));
// Start the writer thread.
writerThread.start();
Stopwatch stopwatch = new Stopwatch();
stopwatch.start();
while (stopwatch.elapsedTime(TimeUnit.SECONDS) < 10 && reader.read(events, 1, 1, TimeUnit.SECONDS) == 0) {
// Empty
}
stopwatch.stop();
// Should be able to read a event
Assert.assertEquals(1, events.size());
TimeUnit.MILLISECONDS.sleep(partitionDuration * 2);
writerThread.interrupt();
writerThread.join();
LOG.info("Writer stopped with {} events written.", eventsWritten.get());
stopwatch.reset();
while (stopwatch.elapsedTime(TimeUnit.SECONDS) < 10 && events.size() != eventsWritten.get()) {
reader.read(events, eventsWritten.get(), 0, TimeUnit.SECONDS);
}
// Should see all events written
Assert.assertEquals(eventsWritten.get(), events.size());
// Take a snapshot of the offset.
StreamFileOffset offset = new StreamFileOffset(reader.getPosition());
reader.close();
for (Closeable c : closeables) {
Closeables.closeQuietly(c);
}
// Now creates a new writer to write 10 more events across two partitions with a skip one partition.
try (FileWriter<StreamEvent> writer = createWriter(config, filePrefix)) {
for (int i = 0; i < 5; i++) {
long ts = System.currentTimeMillis();
writer.append(StreamFileTestUtils.createEvent(ts, "Testing " + ts));
}
TimeUnit.MILLISECONDS.sleep(partitionDuration * 3 / 2);
for (int i = 0; i < 5; i++) {
long ts = System.currentTimeMillis();
writer.append(StreamFileTestUtils.createEvent(ts, "Testing " + ts));
}
}
// Create a new reader with the previous offset
reader = new LiveStreamFileReader(config, offset, 100);
events.clear();
stopwatch.reset();
while (stopwatch.elapsedTime(TimeUnit.SECONDS) < 10 && events.size() != 10) {
reader.read(events, 10, 0, TimeUnit.SECONDS);
}
Assert.assertEquals(10, events.size());
// Try to read more, should got nothing
reader.read(events, 10, 2, TimeUnit.SECONDS);
reader.close();
for (Closeable c : closeables) {
c.close();
}
}
/**
* This test is to validate batch write with the same timestamp are written in the same data block.
*/
@Test
public void testAppendAll() throws Exception {
Location dir = StreamFileTestUtils.createTempDir(getLocationFactory());
Location eventFile = dir.getTempFile(".dat");
Location indexFile = dir.getTempFile(".idx");
// Creates a stream file
try (StreamDataFileWriter writer = new StreamDataFileWriter(Locations.newOutputSupplier(eventFile),
Locations.newOutputSupplier(indexFile),
10000L)) {
final CountDownLatch writeCompleted = new CountDownLatch(1);
final CountDownLatch readAttempted = new CountDownLatch(1);
// Write 1000 events using appendAll from a separate thread
// It writes 1000 events of size 300 bytes of the same timestamp and wait for a signal before ending.
// This make sure the data block is not written (internal buffer size is 256K if the writer flush),
// hence the reader shouldn't be seeing it.
Thread t = new Thread() {
@Override
public void run() {
try {
writer.appendAll(new AbstractIterator<StreamEvent>() {
int count = 1000;
long timestamp = System.currentTimeMillis();
Map<String, String> headers = ImmutableMap.of();
@Override
protected StreamEvent computeNext() {
if (count-- > 0) {
return new StreamEvent(headers, Charsets.UTF_8.encode(String.format("%0300d", count)), timestamp);
}
writeCompleted.countDown();
Uninterruptibles.awaitUninterruptibly(readAttempted);
Flushables.flushQuietly(writer);
return endOfData();
}
});
} catch (IOException e) {
throw Throwables.propagate(e);
}
}
};
t.start();
// Create a reader
try (StreamDataFileReader reader = StreamDataFileReader.create(Locations.newInputSupplier(eventFile))) {
List<PositionStreamEvent> events = Lists.newArrayList();
// Wait for the writer completion
Assert.assertTrue(writeCompleted.await(20, TimeUnit.SECONDS));
// Try to read a event, nothing should be read
Assert.assertEquals(0, reader.read(events, 1, 0, TimeUnit.SECONDS));
// Now signal writer to flush
readAttempted.countDown();
// Now should be able to read 1000 events
t.join(10000);
Assert.assertEquals(1000, reader.read(events, 1000, 0, TimeUnit.SECONDS));
int size = events.size();
long lastStart = -1;
for (int i = 0; i < size; i++) {
PositionStreamEvent event = events.get(i);
Assert.assertEquals(String.format("%0300d", size - i - 1), Charsets.UTF_8.decode(event.getBody()).toString());
if (lastStart > 0) {
// The position differences between two consecutive events should be 303
// 2 bytes for body length, 300 bytes body, 1 byte header map (value == 0)
Assert.assertEquals(303L, event.getStart() - lastStart);
}
lastStart = event.getStart();
}
}
}
}
/**
* This is to test batch write with different timestamps will write to different data block correctly.
*/
@Test
public void testAppendAllMultiBlocks() throws IOException, InterruptedException {
Location dir = StreamFileTestUtils.createTempDir(getLocationFactory());
Location eventFile = dir.getTempFile(".dat");
Location indexFile = dir.getTempFile(".idx");
// Creates a stream file
try (StreamDataFileWriter writer = new StreamDataFileWriter(Locations.newOutputSupplier(eventFile),
Locations.newOutputSupplier(indexFile),
10000L)) {
// Writes with appendAll with events having 2 different timestamps
Map<String, String> headers = ImmutableMap.of();
writer.appendAll(ImmutableList.of(
new StreamEvent(headers, Charsets.UTF_8.encode("0"), 1000),
new StreamEvent(headers, Charsets.UTF_8.encode("0"), 1000),
new StreamEvent(headers, Charsets.UTF_8.encode("1"), 1001),
new StreamEvent(headers, Charsets.UTF_8.encode("1"), 1001)
).iterator());
}
// Reads all events and assert the event position to see if they are in two different blocks
try (StreamDataFileReader reader = StreamDataFileReader.create(Locations.newInputSupplier(eventFile))) {
List<PositionStreamEvent> events = Lists.newArrayList();
Assert.assertEquals(4, reader.read(events, 4, 0, TimeUnit.SECONDS));
// Event is encoded as <var_int_body_length><body_bytes><var_int_map_size>
// Since we are writing single byte data,
// body_length is 1 byte, body_bytes is 1 byte and map_size is 1 byte (with value == 0)
// The position differences between the first two events should be 3 since they belongs to the same data block.
Assert.assertEquals(3L, events.get(1).getStart() - events.get(0).getStart());
// The position differences between the second and third events
// should be 3 (second event size) + 8 (timestamp) + 1 (block length) == 12
Assert.assertEquals(12L, events.get(2).getStart() - events.get(1).getStart());
// The position differences between the third and forth events should be 3 again since they are in the same block
Assert.assertEquals(3L, events.get(3).getStart() - events.get(2).getStart());
}
}
/**
* This unit test is to test the v2 file format that supports
* defaulting values in stream event (timestamp and headers).
*/
@Test
public void testEventTemplate() throws IOException, InterruptedException {
Location dir = StreamFileTestUtils.createTempDir(getLocationFactory());
Location eventFile = dir.getTempFile(".dat");
Location indexFile = dir.getTempFile(".idx");
// Creates a stream file with the uni timestamp property and a default header (key=value)
StreamDataFileWriter writer = new StreamDataFileWriter(
Locations.newOutputSupplier(eventFile), Locations.newOutputSupplier(indexFile), 10000L,
ImmutableMap.of(
StreamDataFileConstants.Property.Key.UNI_TIMESTAMP, StreamDataFileConstants.Property.Value.CLOSE_TIMESTAMP,
StreamDataFileConstants.Property.Key.EVENT_HEADER_PREFIX + "key", "value"
));
// Write 1000 events with different timestamp
for (int i = 0; i < 1000; i++) {
writer.append(StreamFileTestUtils.createEvent(i, "Message " + i));
}
// Trying to get close timestamp should throw exception before the file get closed
try {
writer.getCloseTimestamp();
Assert.fail();
} catch (IllegalStateException e) {
// Expected
}
writer.close();
// Get the close timestamp from the file for assertion below
long timestamp = writer.getCloseTimestamp();
// Create a reader to read all events. All events should have the same timestamp
StreamDataFileReader reader = StreamDataFileReader.create(Locations.newInputSupplier(eventFile));
List<StreamEvent> events = Lists.newArrayList();
Assert.assertEquals(1000, reader.read(events, 1000, 0, TimeUnit.SECONDS));
// All events should have the same timestamp and contains a default header
for (StreamEvent event : events) {
Assert.assertEquals(timestamp, event.getTimestamp());
Assert.assertEquals("value", event.getHeaders().get("key"));
}
// No more events
Assert.assertEquals(-1, reader.read(events, 1, 0, TimeUnit.SECONDS));
reader.close();
// Open another read that reads with a filter that skips all events by timestamp
reader = StreamDataFileReader.create(Locations.newInputSupplier(eventFile));
int res = reader.read(events, 1, 0, TimeUnit.SECONDS, new ReadFilter() {
@Override
public boolean acceptTimestamp(long timestamp) {
return false;
}
});
Assert.assertEquals(-1, res);
reader.close();
}
private FileWriter<StreamEvent> createWriter(StreamConfig config, String prefix) {
return new TimePartitionedStreamFileWriter(config.getLocation(), config.getPartitionDuration(),
prefix, config.getIndexInterval());
}
}