/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.flume.source.taildir;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Table;
import com.google.gson.stream.JsonReader;
import org.apache.flume.Event;
import org.apache.flume.FlumeException;
import org.apache.flume.annotations.InterfaceAudience;
import org.apache.flume.annotations.InterfaceStability;
import org.apache.flume.client.avro.ReliableEventReader;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.nio.file.Files;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
@InterfaceAudience.Private
@InterfaceStability.Evolving
public class ReliableTaildirEventReader implements ReliableEventReader {
private static final Logger logger = LoggerFactory.getLogger(ReliableTaildirEventReader.class);
private final List<TaildirMatcher> taildirCache;
private final Table<String, String, String> headerTable;
private TailFile currentFile = null;
private Map<Long, TailFile> tailFiles = Maps.newHashMap();
private long updateTime;
private boolean addByteOffset;
private boolean cachePatternMatching;
private boolean committed = true;
private final boolean annotateFileName;
private final String fileNameHeader;
/**
* Create a ReliableTaildirEventReader to watch the given directory.
*/
private ReliableTaildirEventReader(Map<String, String> filePaths,
Table<String, String, String> headerTable, String positionFilePath,
boolean skipToEnd, boolean addByteOffset, boolean cachePatternMatching,
boolean annotateFileName, String fileNameHeader) throws IOException {
// Sanity checks
Preconditions.checkNotNull(filePaths);
Preconditions.checkNotNull(positionFilePath);
if (logger.isDebugEnabled()) {
logger.debug("Initializing {} with directory={}, metaDir={}",
new Object[] { ReliableTaildirEventReader.class.getSimpleName(), filePaths });
}
List<TaildirMatcher> taildirCache = Lists.newArrayList();
for (Entry<String, String> e : filePaths.entrySet()) {
taildirCache.add(new TaildirMatcher(e.getKey(), e.getValue(), cachePatternMatching));
}
logger.info("taildirCache: " + taildirCache.toString());
logger.info("headerTable: " + headerTable.toString());
this.taildirCache = taildirCache;
this.headerTable = headerTable;
this.addByteOffset = addByteOffset;
this.cachePatternMatching = cachePatternMatching;
this.annotateFileName = annotateFileName;
this.fileNameHeader = fileNameHeader;
updateTailFiles(skipToEnd);
logger.info("Updating position from position file: " + positionFilePath);
loadPositionFile(positionFilePath);
}
/**
* Load a position file which has the last read position of each file.
* If the position file exists, update tailFiles mapping.
*/
public void loadPositionFile(String filePath) {
Long inode, pos;
String path;
FileReader fr = null;
JsonReader jr = null;
try {
fr = new FileReader(filePath);
jr = new JsonReader(fr);
jr.beginArray();
while (jr.hasNext()) {
inode = null;
pos = null;
path = null;
jr.beginObject();
while (jr.hasNext()) {
switch (jr.nextName()) {
case "inode":
inode = jr.nextLong();
break;
case "pos":
pos = jr.nextLong();
break;
case "file":
path = jr.nextString();
break;
}
}
jr.endObject();
for (Object v : Arrays.asList(inode, pos, path)) {
Preconditions.checkNotNull(v, "Detected missing value in position file. "
+ "inode: " + inode + ", pos: " + pos + ", path: " + path);
}
TailFile tf = tailFiles.get(inode);
if (tf != null && tf.updatePos(path, inode, pos)) {
tailFiles.put(inode, tf);
} else {
logger.info("Missing file: " + path + ", inode: " + inode + ", pos: " + pos);
}
}
jr.endArray();
} catch (FileNotFoundException e) {
logger.info("File not found: " + filePath + ", not updating position");
} catch (IOException e) {
logger.error("Failed loading positionFile: " + filePath, e);
} finally {
try {
if (fr != null) fr.close();
if (jr != null) jr.close();
} catch (IOException e) {
logger.error("Error: " + e.getMessage(), e);
}
}
}
public Map<Long, TailFile> getTailFiles() {
return tailFiles;
}
public void setCurrentFile(TailFile currentFile) {
this.currentFile = currentFile;
}
@Override
public Event readEvent() throws IOException {
List<Event> events = readEvents(1);
if (events.isEmpty()) {
return null;
}
return events.get(0);
}
@Override
public List<Event> readEvents(int numEvents) throws IOException {
return readEvents(numEvents, false);
}
@VisibleForTesting
public List<Event> readEvents(TailFile tf, int numEvents) throws IOException {
setCurrentFile(tf);
return readEvents(numEvents, true);
}
public List<Event> readEvents(int numEvents, boolean backoffWithoutNL)
throws IOException {
if (!committed) {
if (currentFile == null) {
throw new IllegalStateException("current file does not exist. " + currentFile.getPath());
}
logger.info("Last read was never committed - resetting position");
long lastPos = currentFile.getPos();
currentFile.updateFilePos(lastPos);
}
List<Event> events = currentFile.readEvents(numEvents, backoffWithoutNL, addByteOffset);
if (events.isEmpty()) {
return events;
}
Map<String, String> headers = currentFile.getHeaders();
if (annotateFileName || (headers != null && !headers.isEmpty())) {
for (Event event : events) {
if (headers != null && !headers.isEmpty()) {
event.getHeaders().putAll(headers);
}
if (annotateFileName) {
event.getHeaders().put(fileNameHeader, currentFile.getPath());
}
}
}
committed = false;
return events;
}
@Override
public void close() throws IOException {
for (TailFile tf : tailFiles.values()) {
if (tf.getRaf() != null) tf.getRaf().close();
}
}
/** Commit the last lines which were read. */
@Override
public void commit() throws IOException {
if (!committed && currentFile != null) {
long pos = currentFile.getLineReadPos();
currentFile.setPos(pos);
currentFile.setLastUpdated(updateTime);
committed = true;
}
}
/**
* Update tailFiles mapping if a new file is created or appends are detected
* to the existing file.
*/
public List<Long> updateTailFiles(boolean skipToEnd) throws IOException {
updateTime = System.currentTimeMillis();
List<Long> updatedInodes = Lists.newArrayList();
for (TaildirMatcher taildir : taildirCache) {
Map<String, String> headers = headerTable.row(taildir.getFileGroup());
for (File f : taildir.getMatchingFiles()) {
long inode = getInode(f);
TailFile tf = tailFiles.get(inode);
if (tf == null || !tf.getPath().equals(f.getAbsolutePath())) {
long startPos = skipToEnd ? f.length() : 0;
tf = openFile(f, headers, inode, startPos);
} else {
boolean updated = tf.getLastUpdated() < f.lastModified() || tf.getPos() != f.length();
if (updated) {
if (tf.getRaf() == null) {
tf = openFile(f, headers, inode, tf.getPos());
}
if (f.length() < tf.getPos()) {
logger.info("Pos " + tf.getPos() + " is larger than file size! "
+ "Restarting from pos 0, file: " + tf.getPath() + ", inode: " + inode);
tf.updatePos(tf.getPath(), inode, 0);
}
}
tf.setNeedTail(updated);
}
tailFiles.put(inode, tf);
updatedInodes.add(inode);
}
}
return updatedInodes;
}
public List<Long> updateTailFiles() throws IOException {
return updateTailFiles(false);
}
private long getInode(File file) throws IOException {
long inode = (long) Files.getAttribute(file.toPath(), "unix:ino");
return inode;
}
private TailFile openFile(File file, Map<String, String> headers, long inode, long pos) {
try {
logger.info("Opening file: " + file + ", inode: " + inode + ", pos: " + pos);
return new TailFile(file, headers, inode, pos);
} catch (IOException e) {
throw new FlumeException("Failed opening file: " + file, e);
}
}
/**
* Special builder class for ReliableTaildirEventReader
*/
public static class Builder {
private Map<String, String> filePaths;
private Table<String, String, String> headerTable;
private String positionFilePath;
private boolean skipToEnd;
private boolean addByteOffset;
private boolean cachePatternMatching;
private Boolean annotateFileName =
TaildirSourceConfigurationConstants.DEFAULT_FILE_HEADER;
private String fileNameHeader =
TaildirSourceConfigurationConstants.DEFAULT_FILENAME_HEADER_KEY;
public Builder filePaths(Map<String, String> filePaths) {
this.filePaths = filePaths;
return this;
}
public Builder headerTable(Table<String, String, String> headerTable) {
this.headerTable = headerTable;
return this;
}
public Builder positionFilePath(String positionFilePath) {
this.positionFilePath = positionFilePath;
return this;
}
public Builder skipToEnd(boolean skipToEnd) {
this.skipToEnd = skipToEnd;
return this;
}
public Builder addByteOffset(boolean addByteOffset) {
this.addByteOffset = addByteOffset;
return this;
}
public Builder cachePatternMatching(boolean cachePatternMatching) {
this.cachePatternMatching = cachePatternMatching;
return this;
}
public Builder annotateFileName(boolean annotateFileName) {
this.annotateFileName = annotateFileName;
return this;
}
public Builder fileNameHeader(String fileNameHeader) {
this.fileNameHeader = fileNameHeader;
return this;
}
public ReliableTaildirEventReader build() throws IOException {
return new ReliableTaildirEventReader(filePaths, headerTable, positionFilePath, skipToEnd,
addByteOffset, cachePatternMatching,
annotateFileName, fileNameHeader);
}
}
}