/*
* Copyright © 2014-2015 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package co.cask.cdap.data.stream;
import co.cask.cdap.common.io.Decoder;
import co.cask.cdap.common.io.Encoder;
import co.cask.cdap.common.io.LocationStatus;
import co.cask.cdap.common.io.Locations;
import co.cask.cdap.common.io.Processor;
import co.cask.cdap.data2.transaction.queue.QueueConstants;
import co.cask.cdap.data2.transaction.stream.StreamAdmin;
import co.cask.cdap.data2.transaction.stream.StreamConfig;
import co.cask.cdap.data2.util.TableId;
import co.cask.cdap.proto.Id;
import com.google.common.base.CharMatcher;
import com.google.common.base.Preconditions;
import com.google.common.base.Predicate;
import com.google.common.base.Throwables;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Iterables;
import org.apache.twill.filesystem.Location;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.net.URI;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;
/**
* Collection of helper methods.
*
* TODO: Usage of this class needs to be refactor, as some methods are temporary (e.g. encodeMap/decodeMap).
*/
public final class StreamUtils {
// The directory name for storing stream files that are pending for deletion
// StreamId cannot have "." there, so it's safe that it won't clash with any stream name
private static final String DELETED = ".deleted";
/**
* Decode a map.
*/
static Map<String, String> decodeMap(Decoder decoder) throws IOException {
ImmutableMap.Builder<String, String> map = ImmutableMap.builder();
int len = decoder.readInt();
while (len != 0) {
for (int i = 0; i < len; i++) {
String key = decoder.readString();
String value = decoder.readInt() == 0 ? decoder.readString() : (String) decoder.readNull();
map.put(key, value);
}
len = decoder.readInt();
}
return map.build();
}
/**
* Encodes a map.
*/
static void encodeMap(Map<String, String> map, Encoder encoder) throws IOException {
encoder.writeInt(map.size());
for (Map.Entry<String, String> entry : map.entrySet()) {
String value = entry.getValue();
encoder.writeString(entry.getKey())
.writeInt(value == null ? 1 : 0)
.writeString(entry.getValue());
}
if (!map.isEmpty()) {
encoder.writeInt(0);
}
}
/**
* Finds the partition name from the given event file location.
*
* @param eventLocation Location to the event file.
* @return The partition name.
* @see StreamInputFormat
*/
public static String getPartitionName(Location eventLocation) {
URI uri = eventLocation.toURI();
String path = uri.getPath();
int endIdx = path.lastIndexOf('/');
Preconditions.checkArgument(endIdx >= 0,
"Invalid event path %s. Partition is missing.", uri);
int startIdx = path.lastIndexOf('/', endIdx - 1);
Preconditions.checkArgument(startIdx < endIdx,
"Invalid event path %s. Partition is missing.", uri);
return path.substring(startIdx + 1, endIdx);
}
/**
* Returns the name of the event bucket based on the file name.
*
* @param name Name of the file.
* @see StreamInputFormat
*/
public static String getBucketName(String name) {
// Strip off the file extension
int idx = name.lastIndexOf('.');
return (idx >= 0) ? name.substring(0, idx) : name;
}
/**
* Returns the file prefix based on the given file name.
*
* @param name Name of the file.
* @return The prefix part of the stream file.
* @see StreamInputFormat
*/
public static String getNamePrefix(String name) {
String bucketName = getBucketName(name);
int idx = bucketName.lastIndexOf('.');
Preconditions.checkArgument(idx >= 0, "Invalid name %s. Name is expected in [prefix].[seqId] format", bucketName);
return bucketName.substring(0, idx);
}
/**
* Returns the sequence number of the given file name.
*
* @param name Name of the file.
* @return The sequence number of the stream file.
* @see StreamInputFormat
*/
public static int getSequenceId(String name) {
String bucketName = getBucketName(name);
int idx = bucketName.lastIndexOf('.');
Preconditions.checkArgument(idx >= 0 && (idx + 1) < bucketName.length(),
"Invalid name %s. Name is expected in [prefix].[seqId] format", bucketName);
return Integer.parseInt(bucketName.substring(idx + 1));
}
/**
* Gets the partition start time based on the name of the partition.
*
* @return The partition start timestamp in milliseconds.
*
* @see StreamInputFormat
*/
public static long getPartitionStartTime(String partitionName) {
int idx = partitionName.indexOf('.');
Preconditions.checkArgument(idx > 0,
"Invalid partition name %s. Partition name should be of format %s",
partitionName, "[startTimestamp].[duration]");
return TimeUnit.MILLISECONDS.convert(Long.parseLong(partitionName.substring(0, idx)), TimeUnit.SECONDS);
}
/**
* Returns true if it is valid partition name, false other. The partition name must be
* {@code [0-9]+.[0-9]+}
*/
public static boolean isPartition(String partitionName) {
int dotPos = -1;
for (int i = 0; i < partitionName.length(); i++) {
char c = partitionName.charAt(i);
if (c == '.') {
// Make sure there is only one '.'
if (dotPos >= 0) {
return false;
}
dotPos = i;
continue;
}
if (c < '0' || c > '9') {
return false;
}
}
// Must sure '.' is not the first character and not the last
return dotPos > 0 && dotPos < partitionName.length() - 1;
}
/**
* Gets the partition end time based on the name of the partition.
*
* @return the partition end timestamp in milliseconds.
*
* @see StreamInputFormat
*/
public static long getPartitionEndTime(String partitionName) {
int idx = partitionName.indexOf('.');
Preconditions.checkArgument(idx >= 0,
"Invalid partition name %s. Partition name should be of format %s",
partitionName, "[startTimestamp].[duration]");
long startTime = Long.parseLong(partitionName.substring(0, idx));
long duration = Long.parseLong(partitionName.substring(idx + 1));
return TimeUnit.MILLISECONDS.convert(startTime + duration, TimeUnit.SECONDS);
}
/**
* Creates stream base location with the given generation.
*
* @param streamBaseLocation the base directory for the stream
* @param generation generation id
* @return Location for the given generation
*
* @see StreamInputFormat
*/
public static Location createGenerationLocation(Location streamBaseLocation, int generation) throws IOException {
// 0 padding generation is just for sorted view in ls. Not carry any special meaning.
return (generation == 0) ? streamBaseLocation : streamBaseLocation.append(String.format("%06d", generation));
}
/**
* Creates the location for the partition directory.
*
* @param baseLocation Base location for partition directory.
* @param partitionStart Partition start timestamp in milliseconds.
* @param partitionDuration Partition duration in milliseconds.
* @return The location for the partition directory.
*/
public static Location createPartitionLocation(Location baseLocation,
long partitionStart, long partitionDuration) throws IOException {
// 0 padding is just for sorted view in ls. Not carry any special meaning.
String path = String.format("%010d.%05d",
TimeUnit.SECONDS.convert(partitionStart, TimeUnit.MILLISECONDS),
TimeUnit.SECONDS.convert(partitionDuration, TimeUnit.MILLISECONDS));
return baseLocation.append(path);
}
/**
* Creates location for stream file.
*
* @param partitionLocation The partition directory location.
* @param prefix File prefix.
* @param seqId Sequence number of the file.
* @param type Type of the stream file.
* @return The location of the stream file.
*
* @see StreamInputFormat for naming convention.
*/
public static Location createStreamLocation(Location partitionLocation, String prefix,
int seqId, StreamFileType type) throws IOException {
// 0 padding sequence id is just for sorted view in ls. Not carry any special meaning.
return partitionLocation.append(String.format("%s.%06d.%s", prefix, seqId, type.getSuffix()));
}
/**
* Returns the aligned partition start time.
*
* @param timestamp Timestamp in milliseconds.
* @param partitionDuration Partition duration in milliseconds.
* @return The partition start time of the given timestamp.
*/
public static long getPartitionStartTime(long timestamp, long partitionDuration) {
return timestamp / partitionDuration * partitionDuration;
}
/**
* Encode a {@link StreamFileOffset} instance.
*
* @param out Output for encoding
* @param offset The offset object to encode
*/
public static void encodeOffset(DataOutput out, StreamFileOffset offset) throws IOException {
out.writeInt(offset.getGeneration());
out.writeLong(offset.getPartitionStart());
out.writeLong(offset.getPartitionEnd());
out.writeUTF(offset.getNamePrefix());
out.writeInt(offset.getSequenceId());
out.writeLong(offset.getOffset());
}
/**
* Decode a {@link StreamFileOffset} encoded by the {@link #encodeOffset(DataOutput, StreamFileOffset)}
* method.
*
* @param config Stream configuration for the stream that the offset is representing
* @param in Input for decoding
* @return A new instance of {@link StreamFileOffset}
*/
public static StreamFileOffset decodeOffset(StreamConfig config, DataInput in) throws IOException {
int generation = in.readInt();
long partitionStart = in.readLong();
long duration = in.readLong() - partitionStart;
String prefix = in.readUTF();
int seqId = in.readInt();
long offset = in.readLong();
Location baseLocation = config.getLocation();
if (generation > 0) {
baseLocation = createGenerationLocation(baseLocation, generation);
}
Location partitionLocation = createPartitionLocation(baseLocation, partitionStart, duration);
Location eventLocation = createStreamLocation(partitionLocation, prefix, seqId, StreamFileType.EVENT);
return new StreamFileOffset(eventLocation, offset, generation);
}
public static StreamConfig ensureExists(StreamAdmin admin, Id.Stream streamId) throws IOException {
try {
return admin.getConfig(streamId);
} catch (Exception e) {
// Ignored
}
try {
admin.create(streamId);
return admin.getConfig(streamId);
} catch (Exception e) {
Throwables.propagateIfInstanceOf(e, IOException.class);
throw new IOException(e);
}
}
/**
* Finds the current generation id of a stream. It scans the stream directory to look for largest generation
* number in directory name.
*
* @param config configuration of the stream
* @return the generation id
*/
public static int getGeneration(StreamConfig config) throws IOException {
return getGeneration(config.getLocation());
}
/**
* Finds the current generation if of a stream. It scans the stream directory to look for largest generation
* number in directory name.
*
* @param streamLocation location to scan for generation id
* @return the generation id
*/
public static int getGeneration(Location streamLocation) throws IOException {
// Default generation is 0.
int genId = 0;
CharMatcher numMatcher = CharMatcher.inRange('0', '9');
List<Location> locations = streamLocation.list();
if (locations == null) {
return 0;
}
for (Location location : locations) {
if (numMatcher.matchesAllOf(location.getName()) && location.isDirectory()) {
int id = Integer.parseInt(location.getName());
if (id > genId) {
genId = id;
}
}
}
return genId;
}
/**
* Finds the next sequence id for the given partition with the given file prefix.
*
* @param partitionLocation the directory where the stream partition is
* @param filePrefix prefix of file name to match
* @return the next sequence id, which is the current max id + 1.
* @throws IOException if failed to find the next sequence id
*/
public static int getNextSequenceId(Location partitionLocation, String filePrefix) throws IOException {
// Try to find the file of this bucket with the highest sequence number.
int maxSequence = -1;
for (Location location : partitionLocation.list()) {
String fileName = location.getName();
if (!fileName.startsWith(filePrefix)) {
continue;
}
StreamUtils.getSequenceId(fileName);
int idx = fileName.lastIndexOf('.');
if (idx < filePrefix.length()) {
// Ignore file with invalid stream file name
continue;
}
try {
// File name format is [prefix].[sequenceId].[dat|idx]
int seq = StreamUtils.getSequenceId(fileName);
if (seq > maxSequence) {
maxSequence = seq;
}
} catch (NumberFormatException e) {
// Ignore stream file with invalid sequence id
}
}
return maxSequence + 1;
}
/**
* Get the size of the data persisted for the stream under the given stream location.
*
* @param streamLocation stream to get data size of
* @return the size of the data persisted for the stream which config is the {@code streamName}
* @throws IOException in case of any error in fetching the size
*/
public static long fetchStreamFilesSize(Location streamLocation) throws IOException {
Processor<LocationStatus, Long> processor = new Processor<LocationStatus, Long>() {
private long size = 0;
@Override
public boolean process(LocationStatus input) {
if (!input.isDir() && StreamFileType.EVENT.isMatched(input.getUri().getPath())) {
size += input.getLength();
}
return true;
}
@Override
public Long getResult() {
return size;
}
};
List<Location> locations = streamLocation.list();
// All directories are partition directories
for (Location location : locations) {
if (!location.isDirectory() || !isPartition(location.getName())) {
continue;
}
Locations.processLocations(location, false, processor);
}
return processor.getResult();
}
/**
* Gets a TableId for stream consumer state stores within a given namespace.
* @param namespace the namespace for which the table is for.
* @return constructed TableId
*/
public static TableId getStateStoreTableId(Id.Namespace namespace) {
String tableName = String.format("%s.%s.state.store",
Id.Namespace.SYSTEM.getId(), QueueConstants.QueueType.STREAM.toString());
return TableId.from(namespace.getId(), tableName);
}
/**
* Gets a {@link Id.Stream} given a stream's base directory.
* @param streamBaseLocation the location of the stream's directory
* @return Id of the stream associated with the location
*/
public static Id.Stream getStreamIdFromLocation(Location streamBaseLocation) {
// streamBaseLocation = /.../<namespace>/streams/<streamName>,
// as constructed by FileStreamAdmin#getStreamConfigLocation
Location streamsDir = Locations.getParent(streamBaseLocation);
Preconditions.checkNotNull(streamsDir,
"Streams directory of stream base location %s was null.", streamBaseLocation);
Location namespaceDir = Locations.getParent(streamsDir);
Preconditions.checkNotNull(namespaceDir,
"Namespace directory of stream base location %s was null.", streamBaseLocation);
String namespace = namespaceDir.getName();
String streamName = streamBaseLocation.getName();
return Id.Stream.from(namespace, streamName);
}
/**
* Returns the location of the stream deleted location.
*
* @param streamRootLocation the root location that all streams go under
*/
public static Location getDeletedLocation(Location streamRootLocation) throws IOException {
return streamRootLocation.append(DELETED);
}
/**
* Lists all stream locations under the given root.
*
* @param streamRootLocation the root location that all streams go under
*/
public static Iterable<Location> listAllStreams(Location streamRootLocation) throws IOException {
return Iterables.filter(streamRootLocation.list(), new Predicate<Location>() {
@Override
public boolean apply(Location location) {
// Any directories started with "." is special system file, which is not regular stream directory
return !location.getName().startsWith(".");
}
});
}
private StreamUtils() {
}
}