/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.pinterest.secor.common;
import com.pinterest.secor.message.ParsedMessage;
import org.apache.commons.codec.binary.Base64;
import org.apache.commons.lang.StringUtils;
import java.io.UnsupportedEncodingException;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.ArrayList;
import java.util.Arrays;
/**
* LogFilePath represents path of a log file. It contains convenience method for building and
* decomposing paths.
*
* Log file path has the following form:
* prefix/topic/partition1/.../partitionN/generation_kafkaPartition_firstMessageOffset
* where:
* prefix is top-level directory for log files. It can be a local path or an s3 dir,
* topic is a kafka topic,
* partition1, ..., partitionN is the list of partition names extracted from message content.
* E.g., the partition may describe the message date such as dt=2014-01-01,
* generation is the consumer version. It allows up to perform rolling upgrades of
* non-compatible Secor releases,
* kafkaPartition is the kafka partition of the topic,
* firstMessageOffset is the offset of the first message in a batch of files committed
* atomically.
*
* @author Pawel Garbacki (pawel@pinterest.com)
*/
public class LogFilePath {
private final String mPrefix;
private final String mTopic;
private final String[] mPartitions;
private final int mGeneration;
private final int[] mKafkaPartitions;
private final long[] mOffsets;
private final String mExtension;
private MessageDigest messageDigest;
public LogFilePath(String prefix, String topic, String[] partitions, int generation,
int[] kafkaPartitions, long[] offsets, String extension) {
assert kafkaPartitions != null & kafkaPartitions.length >= 1
: "Wrong kafkaParttions: " + Arrays.toString(kafkaPartitions);
assert offsets != null & offsets.length >= 1 : "Wrong offsets: " + Arrays.toString(offsets);
assert kafkaPartitions.length == offsets.length
: "Size mismatch partitions: " + Arrays.toString(kafkaPartitions) +
" offsets: " + Arrays.toString(offsets);
for (int i = 1; i < kafkaPartitions.length; i++) {
assert kafkaPartitions[i] == kafkaPartitions[i - 1] + 1
: "Non consecutive partitions " + kafkaPartitions[i] +
" and " + kafkaPartitions[i-1];
}
mPrefix = prefix;
mTopic = topic;
mPartitions = Arrays.copyOf(partitions, partitions.length);
mGeneration = generation;
mKafkaPartitions = Arrays.copyOf(kafkaPartitions, kafkaPartitions.length);
mOffsets = Arrays.copyOf(offsets, offsets.length);
mExtension = extension;
try {
messageDigest = MessageDigest.getInstance("MD5");
} catch (NoSuchAlgorithmException e) {
throw new RuntimeException("Unable to find mdt digest.", e);
}
}
public LogFilePath(String prefix, int generation, long lastCommittedOffset,
ParsedMessage message, String extension) {
this(prefix, message.getTopic(), message.getPartitions(), generation,
new int[]{message.getKafkaPartition()}, new long[]{lastCommittedOffset},
extension);
}
public LogFilePath(String prefix, String topic, String[] partitions, int generation,
int kafkaPartition, long offset, String extension) {
this(prefix, topic, partitions, generation, new int[]{kafkaPartition},
new long[]{offset}, extension);
}
public LogFilePath(String prefix, String path) {
assert path.startsWith(prefix): path + ".startsWith(" + prefix + ")";
mPrefix = prefix;
int prefixLength = prefix.length();
if (!prefix.endsWith("/")) {
prefixLength++;
}
String suffix = path.substring(prefixLength);
String[] pathElements = suffix.split("/");
// Suffix should contain a topic, at least one partition, and the basename.
assert pathElements.length >= 3: Arrays.toString(pathElements) + ".length >= 3";
mTopic = pathElements[0];
mPartitions = subArray(pathElements, 1, pathElements.length - 2);
// Parse basename.
String basename = pathElements[pathElements.length - 1];
// Remove extension.
int lastIndexOf = basename.lastIndexOf('.');
if (lastIndexOf >= 0) {
mExtension = basename.substring(lastIndexOf, basename.length());
basename = basename.substring(0, lastIndexOf);
} else {
mExtension = "";
}
String[] basenameElements = basename.split("_");
assert basenameElements.length == 3: Integer.toString(basenameElements.length) + " == 3";
mGeneration = Integer.parseInt(basenameElements[0]);
mKafkaPartitions = new int[]{Integer.parseInt(basenameElements[1])};
mOffsets = new long[]{Long.parseLong(basenameElements[2])};
}
private static String[] subArray(String[] array, int startIndex, int endIndex) {
String[] result = new String[endIndex - startIndex + 1];
for (int i = startIndex; i <= endIndex; ++i) {
result[i - startIndex] = array[i];
}
return result;
}
public LogFilePath withPrefix(String prefix) {
return new LogFilePath(prefix, mTopic, mPartitions, mGeneration, mKafkaPartitions, mOffsets,
mExtension);
}
public String getLogFileParentDir() {
ArrayList<String> elements = new ArrayList<String>();
if (mPrefix != null && mPrefix.length() > 0) {
elements.add(mPrefix);
}
if (mTopic != null && mTopic.length() > 0) {
elements.add(mTopic);
}
return StringUtils.join(elements, "/");
}
public String getLogFileDir() {
ArrayList<String> elements = new ArrayList<String>();
elements.add(getLogFileParentDir());
for (String partition : mPartitions) {
elements.add(partition);
}
return StringUtils.join(elements, "/");
}
private String getLogFileBasename() {
ArrayList<String> basenameElements = new ArrayList<String>();
basenameElements.add(Integer.toString(mGeneration));
if (mKafkaPartitions.length > 1) {
String kafkaPartitions = mKafkaPartitions[0] + "-" +
mKafkaPartitions[mKafkaPartitions.length - 1];
basenameElements.add(kafkaPartitions);
StringBuilder sb = new StringBuilder();
for (long offset : mOffsets) {
sb.append(offset);
}
try {
byte[] md5Bytes = messageDigest.digest(sb.toString().getBytes("UTF-8"));
byte[] encodedBytes = Base64.encodeBase64URLSafe(md5Bytes);
basenameElements.add(new String(encodedBytes));
} catch (UnsupportedEncodingException e) {
throw new RuntimeException(e);
}
} else {
basenameElements.add(Integer.toString(mKafkaPartitions[0]));
basenameElements.add(String.format("%020d", mOffsets[0]));
}
return StringUtils.join(basenameElements, "_");
}
public String getLogFilePath() {
String basename = getLogFileBasename();
ArrayList<String> pathElements = new ArrayList<String>();
pathElements.add(getLogFileDir());
pathElements.add(basename);
return StringUtils.join(pathElements, "/") + mExtension;
}
public String getLogFileCrcPath() {
String basename = "." + getLogFileBasename() + ".crc";
ArrayList<String> pathElements = new ArrayList<String>();
pathElements.add(getLogFileDir());
pathElements.add(basename);
return StringUtils.join(pathElements, "/");
}
public String getTopic() {
return mTopic;
}
public String[] getPartitions() {
return mPartitions;
}
public int getGeneration() {
return mGeneration;
}
@Deprecated
public int getKafkaPartition() {
return mKafkaPartitions[0];
}
public int[] getKafkaPartitions() {
return mKafkaPartitions;
}
@Deprecated
public long getOffset() {
return mOffsets[0];
}
public long[] getOffsets() {
return mOffsets;
}
public String getExtension() {
return mExtension;
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
LogFilePath that = (LogFilePath) o;
if (mGeneration != that.mGeneration) return false;
if (!Arrays.equals(mKafkaPartitions, that.mKafkaPartitions)) return false;
if (!Arrays.equals(mOffsets, that.mOffsets)) return false;
if (!Arrays.equals(mPartitions, that.mPartitions)) return false;
if (mPrefix != null ? !mPrefix.equals(that.mPrefix) : that.mPrefix != null) return false;
if (mTopic != null ? !mTopic.equals(that.mTopic) : that.mTopic != null) return false;
return true;
}
@Override
public int hashCode() {
int result = mPrefix != null ? mPrefix.hashCode() : 0;
result = 31 * result + (mTopic != null ? mTopic.hashCode() : 0);
result = 31 * result + (mPartitions != null ? Arrays.hashCode(mPartitions) : 0);
result = 31 * result + mGeneration;
result = 31 * result + Arrays.hashCode(mKafkaPartitions);
result = 31 * result + Arrays.hashCode(mOffsets);
return result;
}
@Override
public String toString() {
return getLogFilePath();
}
}