/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.flume.source.taildir;
import com.google.common.base.Charsets;
import com.google.common.base.Throwables;
import com.google.common.collect.HashBasedTable;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import com.google.common.collect.Table;
import com.google.common.io.Files;
import org.apache.flume.Event;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import java.io.File;
import java.io.IOException;
import java.util.List;
import java.util.Map;
import java.util.Set;
import static org.apache.flume.source.taildir.TaildirSourceConfigurationConstants
.BYTE_OFFSET_HEADER_KEY;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
public class TestTaildirEventReader {
private File tmpDir;
private String posFilePath;
public static String bodyAsString(Event event) {
return new String(event.getBody());
}
static List<String> bodiesAsStrings(List<Event> events) {
List<String> bodies = Lists.newArrayListWithCapacity(events.size());
for (Event event : events) {
bodies.add(new String(event.getBody()));
}
return bodies;
}
static List<String> headersAsStrings(List<Event> events, String headerKey) {
List<String> headers = Lists.newArrayListWithCapacity(events.size());
for (Event event : events) {
headers.add(new String(event.getHeaders().get(headerKey)));
}
return headers;
}
private ReliableTaildirEventReader getReader(Map<String, String> filePaths,
Table<String, String, String> headerTable, boolean addByteOffset) {
ReliableTaildirEventReader reader;
try {
reader = new ReliableTaildirEventReader.Builder()
.filePaths(filePaths)
.headerTable(headerTable)
.positionFilePath(posFilePath)
.skipToEnd(false)
.addByteOffset(addByteOffset)
.build();
reader.updateTailFiles();
} catch (IOException ioe) {
throw Throwables.propagate(ioe);
}
return reader;
}
private ReliableTaildirEventReader getReader(boolean addByteOffset) {
Map<String, String> filePaths = ImmutableMap.of("testFiles",
tmpDir.getAbsolutePath() + "/file.*");
Table<String, String, String> headerTable = HashBasedTable.create();
return getReader(filePaths, headerTable, addByteOffset);
}
private ReliableTaildirEventReader getReader() {
return getReader(false);
}
@Before
public void setUp() {
tmpDir = Files.createTempDir();
posFilePath = tmpDir.getAbsolutePath() + "/taildir_position_test.json";
}
@After
public void tearDown() {
for (File f : tmpDir.listFiles()) {
if (f.isDirectory()) {
for (File sdf : f.listFiles()) {
sdf.delete();
}
}
f.delete();
}
tmpDir.delete();
}
@Test
// Create three multi-line files then read them back out. Ensures that
// lines and appended ones are read correctly from files.
public void testBasicReadFiles() throws IOException {
File f1 = new File(tmpDir, "file1");
File f2 = new File(tmpDir, "file2");
File f3 = new File(tmpDir, "file3");
Files.write("file1line1\nfile1line2\n", f1, Charsets.UTF_8);
Files.write("file2line1\nfile2line2\n", f2, Charsets.UTF_8);
Files.write("file3line1\nfile3line2\n", f3, Charsets.UTF_8);
ReliableTaildirEventReader reader = getReader();
List<String> out = Lists.newArrayList();
for (TailFile tf : reader.getTailFiles().values()) {
List<String> bodies = bodiesAsStrings(reader.readEvents(tf, 2));
out.addAll(bodies);
reader.commit();
}
assertEquals(6, out.size());
// Make sure we got every line
assertTrue(out.contains("file1line1"));
assertTrue(out.contains("file1line2"));
assertTrue(out.contains("file2line1"));
assertTrue(out.contains("file2line2"));
assertTrue(out.contains("file3line1"));
assertTrue(out.contains("file3line2"));
Files.append("file3line3\nfile3line4\n", f3, Charsets.UTF_8);
reader.updateTailFiles();
for (TailFile tf : reader.getTailFiles().values()) {
List<String> bodies = bodiesAsStrings(reader.readEvents(tf, 2));
out.addAll(bodies);
reader.commit();
}
assertEquals(8, out.size());
assertTrue(out.contains("file3line3"));
assertTrue(out.contains("file3line4"));
}
@Test
// Make sure this works when there are initially no files
// and we finish reading all files and fully commit.
public void testInitiallyEmptyDirAndBehaviorAfterReadingAll() throws IOException {
ReliableTaildirEventReader reader = getReader();
List<Long> fileInodes = reader.updateTailFiles();
assertEquals(0, fileInodes.size());
File f1 = new File(tmpDir, "file1");
Files.write("file1line1\nfile1line2\n", f1, Charsets.UTF_8);
reader.updateTailFiles();
List<String> out = null;
for (TailFile tf : reader.getTailFiles().values()) {
out = bodiesAsStrings(reader.readEvents(tf, 2));
reader.commit();
}
assertEquals(2, out.size());
// Make sure we got every line
assertTrue(out.contains("file1line1"));
assertTrue(out.contains("file1line2"));
reader.updateTailFiles();
List<String> empty = null;
for (TailFile tf : reader.getTailFiles().values()) {
empty = bodiesAsStrings(reader.readEvents(tf, 15));
reader.commit();
}
assertEquals(0, empty.size());
}
@Test
// Test a basic case where a commit is missed.
public void testBasicCommitFailure() throws IOException {
File f1 = new File(tmpDir, "file1");
Files.write("file1line1\nfile1line2\nfile1line3\nfile1line4\n" +
"file1line5\nfile1line6\nfile1line7\nfile1line8\n" +
"file1line9\nfile1line10\nfile1line11\nfile1line12\n",
f1, Charsets.UTF_8);
ReliableTaildirEventReader reader = getReader();
List<String> out1 = null;
for (TailFile tf : reader.getTailFiles().values()) {
out1 = bodiesAsStrings(reader.readEvents(tf, 4));
}
assertTrue(out1.contains("file1line1"));
assertTrue(out1.contains("file1line2"));
assertTrue(out1.contains("file1line3"));
assertTrue(out1.contains("file1line4"));
List<String> out2 = bodiesAsStrings(reader.readEvents(4));
assertTrue(out2.contains("file1line1"));
assertTrue(out2.contains("file1line2"));
assertTrue(out2.contains("file1line3"));
assertTrue(out2.contains("file1line4"));
reader.commit();
List<String> out3 = bodiesAsStrings(reader.readEvents(4));
assertTrue(out3.contains("file1line5"));
assertTrue(out3.contains("file1line6"));
assertTrue(out3.contains("file1line7"));
assertTrue(out3.contains("file1line8"));
reader.commit();
List<String> out4 = bodiesAsStrings(reader.readEvents(4));
assertEquals(4, out4.size());
assertTrue(out4.contains("file1line9"));
assertTrue(out4.contains("file1line10"));
assertTrue(out4.contains("file1line11"));
assertTrue(out4.contains("file1line12"));
}
@Test
// Test a case where a commit is missed and the batch size changes.
public void testBasicCommitFailureAndBatchSizeChanges() throws IOException {
File f1 = new File(tmpDir, "file1");
Files.write("file1line1\nfile1line2\nfile1line3\nfile1line4\n" +
"file1line5\nfile1line6\nfile1line7\nfile1line8\n",
f1, Charsets.UTF_8);
ReliableTaildirEventReader reader = getReader();
List<String> out1 = null;
for (TailFile tf : reader.getTailFiles().values()) {
out1 = bodiesAsStrings(reader.readEvents(tf, 5));
}
assertTrue(out1.contains("file1line1"));
assertTrue(out1.contains("file1line2"));
assertTrue(out1.contains("file1line3"));
assertTrue(out1.contains("file1line4"));
assertTrue(out1.contains("file1line5"));
List<String> out2 = bodiesAsStrings(reader.readEvents(2));
assertTrue(out2.contains("file1line1"));
assertTrue(out2.contains("file1line2"));
reader.commit();
List<String> out3 = bodiesAsStrings(reader.readEvents(2));
assertTrue(out3.contains("file1line3"));
assertTrue(out3.contains("file1line4"));
reader.commit();
List<String> out4 = bodiesAsStrings(reader.readEvents(15));
assertTrue(out4.contains("file1line5"));
assertTrue(out4.contains("file1line6"));
assertTrue(out4.contains("file1line7"));
assertTrue(out4.contains("file1line8"));
}
@Test
public void testIncludeEmptyFile() throws IOException {
File f1 = new File(tmpDir, "file1");
File f2 = new File(tmpDir, "file2");
Files.write("file1line1\nfile1line2\n", f1, Charsets.UTF_8);
Files.touch(f2);
ReliableTaildirEventReader reader = getReader();
// Expect to read nothing from empty file
List<String> out = Lists.newArrayList();
for (TailFile tf : reader.getTailFiles().values()) {
out.addAll(bodiesAsStrings(reader.readEvents(tf, 5)));
reader.commit();
}
assertEquals(2, out.size());
assertTrue(out.contains("file1line1"));
assertTrue(out.contains("file1line2"));
assertNull(reader.readEvent());
}
@Test
public void testBackoffWithoutNewLine() throws IOException {
File f1 = new File(tmpDir, "file1");
Files.write("file1line1\nfile1", f1, Charsets.UTF_8);
ReliableTaildirEventReader reader = getReader();
List<String> out = Lists.newArrayList();
// Expect to read only the line with newline
for (TailFile tf : reader.getTailFiles().values()) {
out.addAll(bodiesAsStrings(reader.readEvents(tf, 5)));
reader.commit();
}
assertEquals(1, out.size());
assertTrue(out.contains("file1line1"));
Files.append("line2\nfile1line3\nfile1line4", f1, Charsets.UTF_8);
for (TailFile tf : reader.getTailFiles().values()) {
out.addAll(bodiesAsStrings(reader.readEvents(tf, 5)));
reader.commit();
}
assertEquals(3, out.size());
assertTrue(out.contains("file1line2"));
assertTrue(out.contains("file1line3"));
// Should read the last line if it finally has no newline
out.addAll(bodiesAsStrings(reader.readEvents(5, false)));
reader.commit();
assertEquals(4, out.size());
assertTrue(out.contains("file1line4"));
}
@Test
public void testBatchedReadsAcrossFileBoundary() throws IOException {
File f1 = new File(tmpDir, "file1");
Files.write("file1line1\nfile1line2\nfile1line3\nfile1line4\n" +
"file1line5\nfile1line6\nfile1line7\nfile1line8\n",
f1, Charsets.UTF_8);
ReliableTaildirEventReader reader = getReader();
List<String> out1 = Lists.newArrayList();
for (TailFile tf : reader.getTailFiles().values()) {
out1.addAll(bodiesAsStrings(reader.readEvents(tf, 5)));
reader.commit();
}
File f2 = new File(tmpDir, "file2");
Files.write("file2line1\nfile2line2\nfile2line3\nfile2line4\n" +
"file2line5\nfile2line6\nfile2line7\nfile2line8\n",
f2, Charsets.UTF_8);
List<String> out2 = bodiesAsStrings(reader.readEvents(5));
reader.commit();
reader.updateTailFiles();
List<String> out3 = Lists.newArrayList();
for (TailFile tf : reader.getTailFiles().values()) {
out3.addAll(bodiesAsStrings(reader.readEvents(tf, 5)));
reader.commit();
}
// Should have first 5 lines of file1
assertEquals(5, out1.size());
assertTrue(out1.contains("file1line1"));
assertTrue(out1.contains("file1line2"));
assertTrue(out1.contains("file1line3"));
assertTrue(out1.contains("file1line4"));
assertTrue(out1.contains("file1line5"));
// Should have 3 remaining lines of file1
assertEquals(3, out2.size());
assertTrue(out2.contains("file1line6"));
assertTrue(out2.contains("file1line7"));
assertTrue(out2.contains("file1line8"));
// Should have first 5 lines of file2
assertEquals(5, out3.size());
assertTrue(out3.contains("file2line1"));
assertTrue(out3.contains("file2line2"));
assertTrue(out3.contains("file2line3"));
assertTrue(out3.contains("file2line4"));
assertTrue(out3.contains("file2line5"));
}
@Test
public void testLargeNumberOfFiles() throws IOException {
int fileNum = 1000;
Set<String> expected = Sets.newHashSet();
for (int i = 0; i < fileNum; i++) {
String data = "data" + i;
File f = new File(tmpDir, "file" + i);
Files.write(data + "\n", f, Charsets.UTF_8);
expected.add(data);
}
ReliableTaildirEventReader reader = getReader();
for (TailFile tf : reader.getTailFiles().values()) {
List<Event> events = reader.readEvents(tf, 10);
for (Event e : events) {
expected.remove(new String(e.getBody()));
}
reader.commit();
}
assertEquals(0, expected.size());
}
@Test
public void testLoadPositionFile() throws IOException {
File f1 = new File(tmpDir, "file1");
File f2 = new File(tmpDir, "file2");
File f3 = new File(tmpDir, "file3");
Files.write("file1line1\nfile1line2\nfile1line3\n", f1, Charsets.UTF_8);
Files.write("file2line1\nfile2line2\n", f2, Charsets.UTF_8);
Files.write("file3line1\n", f3, Charsets.UTF_8);
ReliableTaildirEventReader reader = getReader();
Map<Long, TailFile> tailFiles = reader.getTailFiles();
long pos = f2.length();
int i = 1;
File posFile = new File(posFilePath);
for (TailFile tf : tailFiles.values()) {
Files.append(i == 1 ? "[" : "", posFile, Charsets.UTF_8);
Files.append(String.format("{\"inode\":%s,\"pos\":%s,\"file\":\"%s\"}",
tf.getInode(), pos, tf.getPath()), posFile, Charsets.UTF_8);
Files.append(i == 3 ? "]" : ",", posFile, Charsets.UTF_8);
i++;
}
reader.loadPositionFile(posFilePath);
for (TailFile tf : tailFiles.values()) {
if (tf.getPath().equals(tmpDir + "file3")) {
// when given position is larger than file size
assertEquals(0, tf.getPos());
} else {
assertEquals(pos, tf.getPos());
}
}
}
@Test
public void testSkipToEndPosition() throws IOException {
ReliableTaildirEventReader reader = getReader();
File f1 = new File(tmpDir, "file1");
Files.write("file1line1\nfile1line2\n", f1, Charsets.UTF_8);
reader.updateTailFiles();
for (TailFile tf : reader.getTailFiles().values()) {
if (tf.getPath().equals(tmpDir + "file1")) {
assertEquals(0, tf.getPos());
}
}
File f2 = new File(tmpDir, "file2");
Files.write("file2line1\nfile2line2\n", f2, Charsets.UTF_8);
// Expect to skip to EOF the read position when skipToEnd option is true
reader.updateTailFiles(true);
for (TailFile tf : reader.getTailFiles().values()) {
if (tf.getPath().equals(tmpDir + "file2")) {
assertEquals(f2.length(), tf.getPos());
}
}
}
@Test
public void testByteOffsetHeader() throws IOException {
File f1 = new File(tmpDir, "file1");
String line1 = "file1line1\n";
String line2 = "file1line2\n";
String line3 = "file1line3\n";
Files.write(line1 + line2 + line3, f1, Charsets.UTF_8);
ReliableTaildirEventReader reader = getReader(true);
List<String> headers = null;
for (TailFile tf : reader.getTailFiles().values()) {
headers = headersAsStrings(reader.readEvents(tf, 5), BYTE_OFFSET_HEADER_KEY);
reader.commit();
}
assertEquals(3, headers.size());
// Make sure we got byte offset position
assertTrue(headers.contains(String.valueOf(0)));
assertTrue(headers.contains(String.valueOf(line1.length())));
assertTrue(headers.contains(String.valueOf((line1 + line2).length())));
}
@Test
public void testNewLineBoundaries() throws IOException {
File f1 = new File(tmpDir, "file1");
Files.write("file1line1\nfile1line2\rfile1line2\nfile1line3\r\nfile1line4\n",
f1, Charsets.UTF_8);
ReliableTaildirEventReader reader = getReader();
List<String> out = Lists.newArrayList();
for (TailFile tf : reader.getTailFiles().values()) {
out.addAll(bodiesAsStrings(reader.readEvents(tf, 5)));
reader.commit();
}
assertEquals(4, out.size());
//Should treat \n as line boundary
assertTrue(out.contains("file1line1"));
//Should not treat \r as line boundary
assertTrue(out.contains("file1line2\rfile1line2"));
//Should treat \r\n as line boundary
assertTrue(out.contains("file1line3"));
assertTrue(out.contains("file1line4"));
}
@Test
// Ensure tail file is set to be read when its last updated time
// equals the underlying file's modification time and there are
// pending bytes to be read.
public void testUpdateWhenLastUpdatedSameAsModificationTime() throws IOException {
File file = new File(tmpDir, "file");
Files.write("line1\n", file, Charsets.UTF_8);
ReliableTaildirEventReader reader = getReader();
for (TailFile tf : reader.getTailFiles().values()) {
reader.readEvents(tf, 1);
reader.commit();
}
Files.append("line2\n", file, Charsets.UTF_8);
for (TailFile tf : reader.getTailFiles().values()) {
tf.setLastUpdated(file.lastModified());
}
reader.updateTailFiles();
for (TailFile tf : reader.getTailFiles().values()) {
assertEquals(true, tf.needTail());
}
}
}