/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.pinterest.secor.common;
import com.pinterest.secor.io.FileWriter;
import com.pinterest.secor.util.FileUtil;
import com.pinterest.secor.util.ReflectionUtil;
import com.pinterest.secor.util.StatsUtil;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.util.*;
/**
* FileRegistry keeps track of local log files currently being appended to and the associated
* writers.
*
* @author Pawel Garbacki (pawel@pinterest.com)
*/
public class FileRegistry {
private static final Logger LOG = LoggerFactory.getLogger(FileRegistry.class);
private final SecorConfig mConfig;
private HashMap<TopicPartitionGroup, HashSet<LogFilePath>> mFiles;
private HashMap<LogFilePath, FileWriter> mWriters;
private HashMap<LogFilePath, Long> mCreationTimes;
public FileRegistry(SecorConfig mConfig) {
this.mConfig = mConfig;
mFiles = new HashMap<TopicPartitionGroup, HashSet<LogFilePath>>();
mWriters = new HashMap<LogFilePath, FileWriter>();
mCreationTimes = new HashMap<LogFilePath, Long>();
}
/**
* Get all topic partitions.
* @return Collection of all registered topic partitions.
*/
public Collection<TopicPartition> getTopicPartitions() {
Collection<TopicPartitionGroup> topicPartitions = getTopicPartitionGroups();
Set<TopicPartition> tps = new HashSet<TopicPartition>();
if (topicPartitions != null) {
for (TopicPartitionGroup g : topicPartitions) {
tps.addAll(g.getTopicPartitions());
}
}
return tps;
}
public Collection<TopicPartitionGroup> getTopicPartitionGroups() {
Set<TopicPartitionGroup> topicPartitions = mFiles.keySet();
Set<TopicPartitionGroup> tps = new HashSet<TopicPartitionGroup>();
if (topicPartitions != null) {
tps.addAll(topicPartitions);
}
return tps;
}
/**
* Get paths in a given topic partition.
* @param topicPartition The topic partition to retrieve paths for.
* @return Collection of file paths in the given topic partition.
*/
public Collection<LogFilePath> getPaths(TopicPartition topicPartition) {
return getPaths(new TopicPartitionGroup(topicPartition));
}
/**
* Get paths in a given topic partition.
* @param topicPartitionGroup The topic partition to retrieve paths for.
* @return Collection of file paths in the given topic partition.
*/
public Collection<LogFilePath> getPaths(TopicPartitionGroup topicPartitionGroup) {
HashSet<LogFilePath> logFilePaths = mFiles.get(topicPartitionGroup);
if (logFilePaths == null) {
return new HashSet<LogFilePath>();
}
return new HashSet<LogFilePath>(logFilePaths);
}
/**
* Retrieve an existing writer for a given path.
* @param path The path to retrieve writer for.
* @return Writer for a given path or null if no writer has been created yet.
*/
public FileWriter getWriter(LogFilePath path)
throws Exception {
return mWriters.get(path);
}
/**
* Retrieve a writer for a given path or create a new one if it does not exist.
* @param path The path to retrieve writer for.
* @param codec Optional compression codec.
* @return Writer for a given path.
* @throws Exception
*/
public FileWriter getOrCreateWriter(LogFilePath path, CompressionCodec codec)
throws Exception {
FileWriter writer = mWriters.get(path);
if (writer == null) {
// Just in case.
FileUtil.delete(path.getLogFilePath());
FileUtil.delete(path.getLogFileCrcPath());
TopicPartitionGroup topicPartition = new TopicPartitionGroup(path.getTopic(),
path.getKafkaPartitions());
HashSet<LogFilePath> files = mFiles.get(topicPartition);
if (files == null) {
files = new HashSet<LogFilePath>();
mFiles.put(topicPartition, files);
}
if (!files.contains(path)) {
files.add(path);
}
writer = ReflectionUtil.createFileWriter(mConfig.getFileReaderWriterFactory(), path, codec, mConfig);
mWriters.put(path, writer);
mCreationTimes.put(path, System.currentTimeMillis() / 1000L);
LOG.debug("created writer for path {}", path.getLogFilePath());
LOG.debug("Register deleteOnExit for path {}", path.getLogFilePath());
FileUtil.deleteOnExit(path.getLogFileParentDir());
FileUtil.deleteOnExit(path.getLogFileDir());
FileUtil.deleteOnExit(path.getLogFilePath());
FileUtil.deleteOnExit(path.getLogFileCrcPath());
}
return writer;
}
/**
* Delete a given path, the underlying file, and the corresponding writer.
* @param path The path to delete.
* @throws IOException
*/
public void deletePath(LogFilePath path) throws IOException {
TopicPartitionGroup topicPartition = new TopicPartitionGroup(path.getTopic(),
path.getKafkaPartitions());
HashSet<LogFilePath> paths = mFiles.get(topicPartition);
paths.remove(path);
if (paths.isEmpty()) {
mFiles.remove(topicPartition);
StatsUtil.clearLabel("secor.size." + topicPartition.getTopic() + "." +
topicPartition.getPartitions()[0]);
StatsUtil.clearLabel("secor.modification_age_sec." + topicPartition.getTopic() + "." +
topicPartition.getPartitions()[0]);
}
deleteWriter(path);
FileUtil.delete(path.getLogFilePath());
FileUtil.delete(path.getLogFileCrcPath());
}
/**
* Delete all paths, files, and writers in a given topic partition.
* @param topicPartition The topic partition to remove.
* @throws IOException
*/
public void deleteTopicPartition(TopicPartition topicPartition) throws IOException {
deleteTopicPartitionGroup((new TopicPartitionGroup(topicPartition)));
}
public void deleteTopicPartitionGroup(TopicPartitionGroup topicPartitioGroup) throws IOException {
HashSet<LogFilePath> paths = mFiles.get(topicPartitioGroup);
if (paths == null) {
return;
}
HashSet<LogFilePath> clonedPaths = (HashSet<LogFilePath>) paths.clone();
for (LogFilePath path : clonedPaths) {
deletePath(path);
}
}
/**
* Delete writer for a given topic partition. Underlying file is not removed.
* @param path The path to remove the writer for.
*/
public void deleteWriter(LogFilePath path) throws IOException {
FileWriter writer = mWriters.get(path);
if (writer == null) {
LOG.warn("No writer found for path {}", path.getLogFilePath());
} else {
LOG.info("Deleting writer for path {}", path.getLogFilePath());
writer.close();
mWriters.remove(path);
mCreationTimes.remove(path);
}
}
/**
* Delete all writers in a given topic partition. Underlying files are not removed.
* @param topicPartition The topic partition to remove the writers for.
*/
public void deleteWriters(TopicPartition topicPartition) throws IOException {
deleteWriters(new TopicPartitionGroup(topicPartition));
}
public void deleteWriters(TopicPartitionGroup topicPartitionGroup) throws IOException {
HashSet<LogFilePath> paths = mFiles.get(topicPartitionGroup);
if (paths == null) {
LOG.warn("No paths found for topic {} partition {}", topicPartitionGroup.getTopic(),
Arrays.toString(topicPartitionGroup.getPartitions()));
} else {
for (LogFilePath path : paths) {
deleteWriter(path);
}
}
}
/**
* Get aggregated size of all files in a given topic partition.
* @param topicPartition The topic partition to get the size for.
* @return Aggregated size of files in the topic partition or 0 if the topic partition does
* not contain any files.
* @throws IOException
*/
public long getSize(TopicPartition topicPartition) throws IOException {
return getSize(new TopicPartitionGroup(topicPartition));
}
public long getSize(TopicPartitionGroup topicPartitionGroup) throws IOException {
Collection<LogFilePath> paths = getPaths(topicPartitionGroup);
long result = 0;
for (LogFilePath path : paths) {
FileWriter writer = mWriters.get(path);
if (writer != null) {
result += writer.getLength();
}
}
StatsUtil.setLabel("secor.size." + topicPartitionGroup.getTopic() + "." +
Arrays.toString(topicPartitionGroup.getPartitions()), Long.toString(result));
return result;
}
/**
* Get the creation age of the most recently created file in a given topic partition.
* @param topicPartition The topic partition to get the age of.
* @return Age of the most recently created file in the topic partition or -1 if the partition
* does not contain any files.
* @throws IOException
*/
public long getModificationAgeSec(TopicPartition topicPartition) throws IOException {
return getModificationAgeSec(new TopicPartitionGroup(topicPartition));
}
public long getModificationAgeSec(TopicPartitionGroup topicPartitionGroup) throws IOException {
long now = System.currentTimeMillis() / 1000L;
long result;
if (mConfig.getFileAgeYoungest()) {
result = Long.MAX_VALUE;
} else {
result = -1;
}
Collection<LogFilePath> paths = getPaths(topicPartitionGroup);
for (LogFilePath path : paths) {
Long creationTime = mCreationTimes.get(path);
if (creationTime == null) {
LOG.warn("no creation time found for path {}", path);
creationTime = now;
}
long age = now - creationTime;
if (mConfig.getFileAgeYoungest()) {
if (age < result) {
result = age;
}
} else {
if (age > result) {
result = age;
}
}
}
if (result == Long.MAX_VALUE) {
result = -1;
}
StatsUtil.setLabel("secor.modification_age_sec." + topicPartitionGroup.getTopic() + "." +
Arrays.toString(topicPartitionGroup.getPartitions()), Long.toString(result));
return result;
}
}