/*
* Copyright © 2014 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package co.cask.cdap.data.stream;
import co.cask.cdap.api.flow.flowlet.StreamEvent;
import co.cask.cdap.data.file.FileWriter;
import co.cask.cdap.data2.transaction.stream.StreamConfig;
import co.cask.cdap.proto.Id;
import com.google.common.base.Charsets;
import com.google.common.base.Function;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import org.apache.twill.filesystem.Location;
import org.apache.twill.filesystem.LocationFactory;
import org.junit.Assert;
import org.junit.ClassRule;
import org.junit.Test;
import org.junit.rules.TemporaryFolder;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.concurrent.TimeUnit;
/**
*
*/
public abstract class MultiLiveStreamFileReaderTestBase {
@ClassRule
public static final TemporaryFolder TMP_FOLDER = new TemporaryFolder();
protected abstract LocationFactory getLocationFactory();
@Test
public void testLiveFileReader() throws Exception {
String streamName = "liveReader";
Id.Stream streamId = Id.Stream.from(Id.Namespace.DEFAULT, streamName);
Location location = getLocationFactory().create(streamName);
location.mkdirs();
// Create a stream with 5 seconds partition.
StreamConfig config = new StreamConfig(streamId, 5000, 1000, Long.MAX_VALUE, location, null, 1000);
// Write 5 events in the first partition
try (FileWriter<StreamEvent> writer = createWriter(config, "live.0")) {
for (int i = 0; i < 5; i++) {
writer.append(StreamFileTestUtils.createEvent(i, "Testing " + i));
}
}
// Writer 5 events in the forth partition (ts = 15 to 19)
try (FileWriter<StreamEvent> writer = createWriter(config, "live.0")) {
for (int i = 0; i < 5; i++) {
writer.append(StreamFileTestUtils.createEvent(i + 15, "Testing " + (i + 15)));
}
}
// Create a LiveStreamFileReader to read 10 events. It should be able to read them all.
Location partitionLocation = StreamUtils.createPartitionLocation(config.getLocation(), 0,
config.getPartitionDuration());
Location eventLocation = StreamUtils.createStreamLocation(partitionLocation, "live.0", 0, StreamFileType.EVENT);
List<StreamEvent> events = new ArrayList<>();
try (LiveStreamFileReader reader = new LiveStreamFileReader(config, new StreamFileOffset(eventLocation, 0, 0))) {
while (events.size() < 10) {
// It shouldn't have empty read.
Assert.assertTrue(reader.read(events, Integer.MAX_VALUE, 0, TimeUnit.SECONDS) > 0);
}
}
Assert.assertEquals(10, events.size());
// First 5 events must have timestamps 0-4
Iterator<StreamEvent> itor = events.iterator();
for (int i = 0; i < 5; i++) {
Assert.assertEquals(i, itor.next().getTimestamp());
}
// Next 5 events must have timestamps 15-19
for (int i = 15; i < 20; i++) {
Assert.assertEquals(i, itor.next().getTimestamp());
}
}
@Test
public void testMultiFileReader() throws Exception {
String streamName = "multiReader";
Id.Stream streamId = Id.Stream.from(Id.Namespace.DEFAULT, streamName);
Location location = getLocationFactory().create(streamName);
location.mkdirs();
// Create a stream with 1 partition.
StreamConfig config = new StreamConfig(streamId, Long.MAX_VALUE, 10000, Long.MAX_VALUE, location, null, 1000);
// Write out 200 events in 5 files, with interleaving timestamps
List<FileWriter<StreamEvent>> writers = Lists.newArrayList();
for (int i = 0; i < 5; i++) {
FileWriter<StreamEvent> writer = createWriter(config, "bucket" + i);
writers.add(writer);
for (int j = 0; j < 200; j++) {
long timestamp = j * 5 + i;
writer.append(StreamFileTestUtils.createEvent(timestamp, "Testing " + timestamp));
}
}
// Flush all writers.
for (FileWriter<StreamEvent> writer : writers) {
writer.flush();
}
// Create a multi stream file reader
List<StreamFileOffset> sources = Lists.newArrayList();
Location partitionLocation = StreamUtils.createPartitionLocation(config.getLocation(), 0, Long.MAX_VALUE);
for (int i = 0; i < 5; i++) {
Location eventFile = StreamUtils.createStreamLocation(partitionLocation, "bucket" + i, 0, StreamFileType.EVENT);
sources.add(new StreamFileOffset(eventFile, 0L, 0));
}
// Reads all events written so far.
MultiLiveStreamFileReader reader = new MultiLiveStreamFileReader(config, sources);
List<StreamEvent> events = Lists.newArrayList();
long expectedTimestamp = 0L;
for (int i = 0; i < 10; i++) {
Assert.assertEquals(100, reader.read(events, 100, 0, TimeUnit.SECONDS));
Assert.assertEquals(100, events.size());
for (StreamEvent event : events) {
Assert.assertEquals(expectedTimestamp, event.getTimestamp());
Assert.assertEquals("Testing " + expectedTimestamp, Charsets.UTF_8.decode(event.getBody()).toString());
expectedTimestamp++;
}
events.clear();
}
Assert.assertEquals(0, reader.read(events, 1, 1, TimeUnit.SECONDS));
// Writes some more events to the first three writers.
for (int i = 0; i < 3; i++) {
FileWriter<StreamEvent> writer = writers.get(i);
for (int j = 0; j < 10; j++) {
long timestamp = 1000 + j * 3 + i;
writer.append(StreamFileTestUtils.createEvent(timestamp, "Testing " + timestamp));
}
}
// Close all writers
for (FileWriter<StreamEvent> writer : writers) {
writer.close();
}
// Continue to read
Assert.assertEquals(30, reader.read(events, 30, 2, TimeUnit.SECONDS));
Assert.assertEquals(30, events.size());
for (StreamEvent event : events) {
Assert.assertEquals(expectedTimestamp, event.getTimestamp());
Assert.assertEquals("Testing " + expectedTimestamp, Charsets.UTF_8.decode(event.getBody()).toString());
expectedTimestamp++;
}
// Should get no more events.
Assert.assertEquals(0, reader.read(events, 1, 1, TimeUnit.SECONDS));
reader.close();
}
@Test
public void testOffsets() throws Exception {
String streamName = "offsets";
Id.Stream streamId = Id.Stream.from(Id.Namespace.DEFAULT, streamName);
Location location = getLocationFactory().create(streamName);
location.mkdirs();
// Create a stream with 1 partition.
StreamConfig config = new StreamConfig(streamId, Long.MAX_VALUE, 10000, Long.MAX_VALUE, location, null, 1000);
// Write out 200 events in 5 files, with interleaving timestamps
for (int i = 0; i < 5; i++) {
FileWriter<StreamEvent> writer = createWriter(config, "bucket" + i);
for (int j = 0; j < 200; j++) {
long timestamp = j * 5 + i;
writer.append(StreamFileTestUtils.createEvent(timestamp, "Testing " + timestamp));
}
writer.close();
}
// Create a multi reader
List<StreamFileOffset> sources = Lists.newArrayList();
Location partitionLocation = StreamUtils.createPartitionLocation(config.getLocation(), 0, Long.MAX_VALUE);
for (int i = 0; i < 5; i++) {
Location eventFile = StreamUtils.createStreamLocation(partitionLocation, "bucket" + i, 0, StreamFileType.EVENT);
sources.add(new StreamFileOffset(eventFile, 0L, 0));
}
MultiLiveStreamFileReader reader = new MultiLiveStreamFileReader(config, sources);
// Reads some events
List<StreamEvent> events = Lists.newArrayList();
long expectedTimestamp = 0L;
// Read 250 events, in batch size of 10.
for (int i = 0; i < 25; i++) {
Assert.assertEquals(10, reader.read(events, 10, 0, TimeUnit.SECONDS));
Assert.assertEquals(10, events.size());
for (StreamEvent event : events) {
Assert.assertEquals(expectedTimestamp, event.getTimestamp());
Assert.assertEquals("Testing " + expectedTimestamp, Charsets.UTF_8.decode(event.getBody()).toString());
expectedTimestamp++;
}
events.clear();
}
// Capture the offsets
Iterable<StreamFileOffset> offsets = ImmutableList.copyOf(
Iterables.transform(reader.getPosition(), new Function<StreamFileOffset, StreamFileOffset>() {
@Override
public StreamFileOffset apply(StreamFileOffset input) {
return new StreamFileOffset(input);
}
}));
reader.close();
// Create another multi reader with the offsets
sources.clear();
for (StreamFileOffset offset : offsets) {
sources.add(offset);
}
// Read 750 events, in batch size of 10.
reader = new MultiLiveStreamFileReader(config, sources);
for (int i = 0; i < 75; i++) {
Assert.assertEquals(10, reader.read(events, 10, 0, TimeUnit.SECONDS));
Assert.assertEquals(10, events.size());
for (StreamEvent event : events) {
Assert.assertEquals(expectedTimestamp, event.getTimestamp());
Assert.assertEquals("Testing " + expectedTimestamp, Charsets.UTF_8.decode(event.getBody()).toString());
expectedTimestamp++;
}
events.clear();
}
Assert.assertEquals(0, reader.read(events, 10, 2, TimeUnit.SECONDS));
reader.close();
}
private FileWriter<StreamEvent> createWriter(StreamConfig config, String prefix) {
return new TimePartitionedStreamFileWriter(config.getLocation(), config.getPartitionDuration(),
prefix, config.getIndexInterval());
}
}