package io.fathom.cloud.storage;
import io.fathom.cloud.Clock;
import io.fathom.cloud.CloudException;
import io.fathom.cloud.blobs.BlobData;
import io.fathom.cloud.blobs.BlobStore;
import io.fathom.cloud.blobs.BlobStoreFactory;
import io.fathom.cloud.io.HashingOutputStream;
import io.fathom.cloud.protobuf.CloudCommons.Attributes;
import io.fathom.cloud.protobuf.CloudCommons.KeyValueData;
import io.fathom.cloud.protobuf.FileModel.BucketAttributes;
import io.fathom.cloud.protobuf.FileModel.BucketData;
import io.fathom.cloud.protobuf.FileModel.DirectoryData;
import io.fathom.cloud.protobuf.FileModel.DirectoryDataOrBuilder;
import io.fathom.cloud.protobuf.FileModel.FileData;
import io.fathom.cloud.protobuf.FileModel.FileRange;
import io.fathom.cloud.server.model.Project;
import io.fathom.cloud.server.model.User;
import io.fathom.cloud.state.DuplicateValueException;
import io.fathom.cloud.state.NamedItemCollection;
import io.fathom.cloud.state.NumberedItemCollection;
import io.fathom.cloud.state.Watched;
import io.fathom.cloud.storage.FileBlob;
import io.fathom.cloud.storage.FilePutOption;
import io.fathom.cloud.storage.FileService;
import io.fathom.cloud.storage.ResumableMD5Digest;
import io.fathom.cloud.storage.api.os.models.CloudObject;
import io.fathom.cloud.storage.api.os.models.StorageAcl;
import io.fathom.cloud.storage.api.os.models.StorageAcl.AclType;
import io.fathom.cloud.storage.api.os.resources.DirectoryListEntry;
import io.fathom.cloud.storage.state.FileStore;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.UUID;
import javax.inject.Inject;
import javax.inject.Provider;
import javax.inject.Singleton;
import javax.ws.rs.WebApplicationException;
import javax.ws.rs.core.Response.Status;
import javax.ws.rs.core.StreamingOutput;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fathomdb.utils.Hex;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import com.google.common.hash.Hasher;
import com.google.common.hash.Hashing;
import com.google.common.io.ByteSource;
import com.google.common.io.ByteStreams;
import com.google.common.util.concurrent.Futures;
import com.google.common.util.concurrent.ListenableFuture;
import com.google.inject.persist.Transactional;
import com.google.protobuf.ByteString;
@Singleton
@Transactional
public class FileServiceImpl implements FileService, FileServiceInternal {
private static final Logger log = LoggerFactory.getLogger(FileServiceImpl.class);
public static final int COMPACTION_THRESHOLD = 32;
private static final boolean PARANOID = true;
@Inject
FileStore fileStore;
@Inject
BlobStoreFactory blobStoreFactory;
@Inject
FilesystemCompactor compactor;
@Inject
Provider<StorageDerivedMetadata> derivedMetadataProvider;
@Override
public BlobStore getBlobStore(Project project) throws IOException {
String key = "obj_" + project.getId();
return blobStoreFactory.get(key);
}
protected NumberedItemCollection<DirectoryData> getDirectoryStorage(Project project) throws CloudException {
NumberedItemCollection<DirectoryData> directoryStorage = fileStore.getDirectories(project.getId());
return directoryStorage;
}
BucketData getBucket(Project project, String bucketName) throws CloudException {
NamedItemCollection<BucketData> bucketStorage = fileStore.getBuckets(project.getId());
BucketData bucket = bucketStorage.find(bucketName);
if (bucket == null) {
throw new WebApplicationException(Status.NOT_FOUND);
}
return bucket;
}
public DirectoryData storeDirectory(NumberedItemCollection<DirectoryData> directoryStorage,
DirectoryData.Builder dir) throws CloudException {
if (!dir.hasId()) {
// A new directory
// We expect this to be small
DirectoryData created = directoryStorage.create(dir);
assert created.getSerializedSize() < 1024;
return created;
}
DirectoryData built = dir.build();
int size = built.getSerializedSize();
if (size > 4096) {
// log.warn("Excessive size of DirectoryData. Size={} Data={}",
// size,
// built);
log.warn("Excessive size of DirectoryData. Size={}", size);
}
return directoryStorage.update(dir);
}
private DirectoryData getDirectoryData(Project project, BucketData bucket) throws CloudException {
NumberedItemCollection<DirectoryData> directoryStorage = getDirectoryStorage(project);
DirectoryData dir = directoryStorage.find(bucket.getRootId());
if (dir == null) {
throw new IllegalStateException();
}
return dir;
}
@Override
@Transactional
public void putFile(Project project, String bucketName, String name, FileBlob blob, String contentType,
Map<String, String> userAttributes, FilePutOption... options) throws CloudException, IOException {
// Otherwise this causes all sorts of problems...
if (name.startsWith("/")) {
name = name.substring(1);
}
// TODO: Should we use the straight hash as the key?
// Should we include a second hash?
// Should we prefix the project id?
getBlobStore(project).put(blob.data);
BucketData bucket = getBucket(project, bucketName);
DirectoryData dir = getDirectoryData(project, bucket);
DirectoryData.Builder newDir = DirectoryData.newBuilder(dir);
FileData.Builder file = null;
int foundIndex = findFile(newDir, name);
if (foundIndex != -1) {
for (FilePutOption option : options) {
if (option == FilePutOption.FAIL_IF_EXISTS) {
throw new IOException("File already exists: " + name);
}
}
file = newDir.getFilesBuilder(foundIndex);
file.clearRanges();
} else {
file = newDir.addFilesBuilder();
file.setKey(name);
}
file.setLastModified(System.currentTimeMillis());
if (contentType != null) {
file.setContentType(contentType);
}
file.setHash(blob.hash);
// NOTE: hashResume can contain plaintext. Careful around encryption!
file.setHashResume(blob.hashResume);
{
FileRange.Builder c = file.addRangesBuilder();
c.setStart(0);
c.setEnd(blob.dataLength);
c.setContentKey(blob.hash);
}
// builder.setContents(key);
file.setLength(blob.dataLength);
updateAttributes(userAttributes, file.getAttributesBuilder());
if (PARANOID) {
BlobStore blobStore = getBlobStore(project);
sanityCheck(blobStore, file);
}
storeDirectory(getDirectoryStorage(project), newDir);
}
@Override
@Transactional
public void deleteFile(Project project, String bucketName, String name) throws CloudException {
BucketData bucket = getBucket(project, bucketName);
DirectoryData dir = getDirectoryData(project, bucket);
DirectoryData.Builder newDir = DirectoryData.newBuilder(dir);
int foundIndex = findFile(newDir, name);
if (foundIndex < 0) {
throw new WebApplicationException(Status.NOT_FOUND);
}
newDir.removeFiles(foundIndex);
storeDirectory(getDirectoryStorage(project), newDir);
}
private static int findFile(DirectoryDataOrBuilder dir, String key) {
int foundIndex = -1;
for (int i = 0; i < dir.getFilesCount(); i++) {
FileData file = dir.getFiles(i);
if (key.equals(file.getKey())) {
foundIndex = i;
break;
}
}
return foundIndex;
}
@Override
public FsFile findFileInfo(FsBucket bucket, String name) throws CloudException {
DirectoryData dir = getDirectoryData(bucket.getProject(), bucket.getData());
int foundIndex = findFile(dir, name);
if (foundIndex == -1) {
return null;
}
FileData found = dir.getFiles(foundIndex);
return new FsFile(bucket, found);
}
@Override
@Transactional
public void append(Project project, String bucketName, String name, Long offset, FileBlob blob)
throws CloudException, IOException {
getBlobStore(project).put(blob.data);
BucketData bucket = getBucket(project, bucketName);
DirectoryData dir = getDirectoryData(project, bucket);
DirectoryData.Builder newDir = DirectoryData.newBuilder(dir);
FileData.Builder file = null;
FileData oldFileData;
int foundIndex = findFile(newDir, name);
if (foundIndex != -1) {
oldFileData = dir.getFiles(foundIndex);
file = newDir.getFilesBuilder(foundIndex);
} else {
// We require the file to already exist because otherwise it's
// tricky to set metadata etc.
throw new WebApplicationException(Status.NOT_FOUND);
// builder = newDir.addFilesBuilder();
// builder.setKey(name);
}
file.setLastModified(System.currentTimeMillis());
int rangeCount = file.getRangesCount() + 1;
{
FileRange.Builder c = file.addRangesBuilder();
long len = file.getLength();
if (offset != null) {
if (len != offset) {
throw new IOException("Attempt to append at non-terminal position");
}
}
c.setStart(len);
c.setEnd(len + blob.dataLength);
c.setContentKey(blob.hash);
}
ResumableMD5Digest md5 = ResumableMD5Digest.get();
if (file.hasHashResume() && !file.getHashResume().isEmpty()) {
md5.setState(file.getHashResume(), file.getLength());
md5.update(blob.data);
} else {
BlobStore blobStore = getBlobStore(project);
CloudObject cloudObject = new CloudObject(oldFileData);
try (InputStream is = cloudObject.getInputStream(blobStore)) {
md5.update(is);
}
md5.update(blob.data);
}
ByteString hashResume = md5.getState();
ByteString hash = ByteString.copyFrom(md5.digest());
file.setHash(hash);
// NOTE: hashResume can contain plaintext. Careful around encryption!
file.setHashResume(hashResume);
file.setLength(file.getLength() + blob.dataLength);
if (PARANOID) {
BlobStore blobStore = getBlobStore(project);
sanityCheck(blobStore, file);
}
storeDirectory(getDirectoryStorage(project), newDir);
if (rangeCount >= COMPACTION_THRESHOLD) {
compactor.enqueue(project, bucketName, name);
}
}
@Override
@Transactional
public void deleteBucket(Project project, String bucketName) throws CloudException {
NamedItemCollection<BucketData> bucketStorage = fileStore.getBuckets(project.getId());
BucketData bucket = bucketStorage.find(bucketName);
if (bucket == null) {
throw new WebApplicationException(Status.NOT_FOUND);
}
if (bucket.hasRootId()) {
NumberedItemCollection<DirectoryData> directoryStorage = getDirectoryStorage(project);
DirectoryData dir = directoryStorage.find(bucket.getRootId());
if (dir != null) {
if (!isEmpty(dir)) {
throw new WebApplicationException(Status.CONFLICT);
}
}
}
bucketStorage.delete(bucketName);
}
private boolean isEmpty(DirectoryData dir) {
return dir.getFilesCount() == 0;
}
@Override
@Transactional
public Status putBucket(Project project, String bucketName, BucketAttributes bucketAttributes,
Map<String, String> userAttributes) throws CloudException {
NamedItemCollection<BucketData> bucketStorage = fileStore.getBuckets(project.getId());
BucketData oldBucket = bucketStorage.find(bucketName);
BucketData.Builder newBucket;
boolean isNew;
if (oldBucket == null) {
newBucket = BucketData.newBuilder();
newBucket.setKey(bucketName);
newBucket.setCreatedAt(Clock.getTimestamp());
NumberedItemCollection<DirectoryData> directoryStorage = getDirectoryStorage(project);
fileStore.getDirectories(project.getId());
DirectoryData.Builder dir = DirectoryData.newBuilder();
DirectoryData created = storeDirectory(directoryStorage, dir);
newBucket.setRootId(created.getId());
isNew = true;
} else {
newBucket = BucketData.newBuilder(oldBucket);
isNew = false;
}
updateAttributes(bucketAttributes, newBucket.getBucketAttributesBuilder());
updateAttributes(userAttributes, newBucket.getAttributesBuilder());
BucketData updated;
if (isNew) {
try {
updated = bucketStorage.create(newBucket);
} catch (DuplicateValueException e) {
// TODO: Is this right? Should we retry?
throw new WebApplicationException(Status.CONFLICT);
}
} else {
updated = bucketStorage.update(newBucket);
}
bucketMetadataUpdated(project, updated);
return isNew ? Status.CREATED : Status.ACCEPTED;
}
private void bucketMetadataUpdated(Project project, BucketData bucket) throws CloudException {
StorageDerivedMetadata newMetadata = derivedMetadataProvider.get();
String serviceKey = "bucket/metadata/" + project.getId() + "/" + bucket.getKey();
newMetadata.apply(project, bucket, serviceKey);
}
private void updateAttributes(BucketAttributes src, BucketAttributes.Builder dest) {
if (src != null) {
// TODO: Just call merge?
if (src.hasAclRead()) {
dest.setAclRead(src.getAclRead());
}
}
}
private void updateAttributes(Map<String, String> userAttributes, Attributes.Builder builder) {
if (userAttributes != null) {
for (Map.Entry<String, String> entry : userAttributes.entrySet()) {
String key = entry.getKey();
String value = entry.getValue();
// Key cases are expected to be mangled:
// https://bugs.launchpad.net/swift/+bug/939982
key = key.toLowerCase();
boolean found = false;
for (KeyValueData.Builder kv : builder.getUserAttributesBuilderList()) {
if (key.equals(kv.getKey())) {
kv.setValue(value);
found = true;
break;
}
}
if (!found) {
KeyValueData.Builder kv = builder.addUserAttributesBuilder();
kv.setKey(key);
kv.setValue(value);
}
}
}
}
@Override
public StreamingOutput open(FsFile file, Long from, Long to) {
// TODO: This can be made much more efficient
CloudObject object = new CloudObject(file.getData());
final InputStream is;
try {
BlobStore blobStore = getBlobStore(file.getProject());
is = object.getInputStream(blobStore, from, to);
} catch (IOException e) {
throw new IllegalStateException("Error opening file", e);
}
StreamingOutput stream = new StreamingOutput() {
@Override
public void write(OutputStream os) throws IOException, WebApplicationException {
ByteStreams.copy(is, os);
os.flush();
}
};
return stream;
}
@Override
public FsBucket findBucket(User user, Project project, String bucketName) throws CloudException {
BucketData bucket = fileStore.getBuckets(project.getId()).find(bucketName);
if (bucket == null) {
throw new WebApplicationException(Status.NOT_FOUND);
}
if (user == null) {
// Check that the container is public
boolean allowed = false;
if (bucket.hasBucketAttributes()) {
BucketAttributes bucketAttributes = bucket.getBucketAttributes();
if (bucketAttributes.hasAclRead()) {
StorageAcl acl = StorageAcl.parse(AclType.Read, bucketAttributes.getAclRead());
// TODO: Do we really care about referer?
String referer = "unknown";
// String referer = request.getHeader("Referer");
if (acl.isRefererAllowed(referer)) {
allowed = true;
}
}
}
// TODO: Check user access to the container
if (!allowed) {
return null;
}
}
return new FsBucket(project, bucket);
}
@Override
@Transactional
public boolean compact(CompactOperation compaction) throws CloudException, IOException {
Project project = compaction.getProject();
String bucketName = compaction.getBucketName();
String name = compaction.getName();
BucketData bucket = getBucket(project, bucketName);
DirectoryData dir = getDirectoryData(project, bucket);
DirectoryData.Builder newDir = DirectoryData.newBuilder(dir);
FileData.Builder builder = null;
FileData oldFileData;
int foundIndex = findFile(newDir, name);
if (foundIndex != -1) {
oldFileData = dir.getFiles(foundIndex);
builder = newDir.getFilesBuilder(foundIndex);
} else {
return false;
}
if (builder.getRangesCount() < COMPACTION_THRESHOLD) {
log.debug("File is (now) smaller than compaction threshold: {}", compaction.getDebugPath());
return false;
}
if (!compaction.compact(this, builder)) {
// Compactor should have logged a reason
return false;
}
storeDirectory(getDirectoryStorage(project), newDir);
return true;
}
public static void sanityCheck(BlobStore blobStore, FileData.Builder file) {
Hasher md5 = Hashing.md5().newHasher();
try (HashingOutputStream hos = new HashingOutputStream(ByteStreams.nullOutputStream(), md5)) {
List<FileRange> ranges = file.getRangesList();
for (int i = 0; i < ranges.size(); i++) {
FileRange range = ranges.get(i);
BlobData blob = blobStore.find(range.getContentKey());
if (blob == null) {
throw new IllegalStateException("Unable to find blob for range: " + range);
}
log.debug("Sanity check: fetch blob {}", Hex.toHex(range.getContentKey().toByteArray()));
if (blob.size() != (range.getEnd() - range.getStart())) {
throw new IllegalStateException();
}
blob.copyTo(hos);
if (i != 0) {
FileRange prev = ranges.get(i - 1);
if (prev.getEnd() != range.getStart()) {
throw new IllegalStateException();
}
} else {
if (range.getStart() != 0) {
throw new IllegalStateException();
}
}
if (i == (ranges.size() - 1)) {
if (range.getEnd() != file.getLength()) {
throw new IllegalStateException();
}
}
}
} catch (IOException e) {
throw new IllegalStateException("Error checking file: " + file, e);
}
ByteString hash = ByteString.copyFrom(md5.hash().asBytes());
if (!file.getHash().equals(hash)) {
log.warn("Hash mismatch: {} vs {}", Hex.toHex(file.getHash().toByteArray()), Hex.toHex(hash.toByteArray()));
throw new IllegalStateException("Hash mismatch");
}
}
@Override
@Transactional
public ListenableFuture<?> watchBucket(FsBucket bucket, String ifNotEtag) throws CloudException {
long projectId = bucket.getProject().getId();
long rootId = bucket.getData().getRootId();
NumberedItemCollection<DirectoryData> dirs = fileStore.getDirectories(projectId);
Watched<DirectoryData> watched = dirs.watch(rootId);
if (ifNotEtag != null) {
DirectoryData dirData = watched.getValue();
String currentEtag = computeEtag(dirData);
if (!ifNotEtag.equals(currentEtag)) {
return Futures.immediateFuture(currentEtag);
}
}
return watched.getFuture();
}
private String computeEtag(DirectoryData dirData) {
log.warn("Etag computation for directories is stub-implemented");
return UUID.randomUUID().toString();
}
@Override
public FileInfo getFileInfo(Project project, String bucketName, String name) throws CloudException, IOException {
BucketData bucketData = getBucket(project, bucketName);
FsBucket fsBucket = new FsBucket(project, bucketData);
FsFile fsFile = findFileInfo(fsBucket, name);
return fsFile;
}
List<DirectoryListEntry> listFiles(Project project, FsBucket bucket) throws CloudException {
NumberedItemCollection<DirectoryData> directoryStorage = getDirectoryStorage(project);
if (!bucket.data.hasRootId()) {
return Lists.newArrayList();
}
DirectoryData dir = directoryStorage.find(bucket.data.getRootId());
if (dir == null) {
throw new IllegalStateException();
}
List<DirectoryListEntry> entries = Lists.newArrayList();
for (FileData file : dir.getFilesList()) {
entries.add(new DirectoryListEntry(file.getKey(), file));
}
return entries;
}
@Override
public List<DirectoryListEntry> listFiles(Project project, FsBucket bucket, String prefix, String delimiter,
String marker) throws CloudException {
List<DirectoryListEntry> entries = listFiles(project, bucket);
if (prefix != null) {
log.warn("Prefix filter is inefficient");
List<DirectoryListEntry> matches = Lists.newArrayList();
for (DirectoryListEntry entry : entries) {
String name = entry.getKey();
if (!name.startsWith(prefix)) {
continue;
}
matches.add(entry);
}
entries = matches;
}
if (delimiter != null) {
log.warn("Delimiter filter is inefficient");
List<DirectoryListEntry> realFiles = Lists.newArrayList();
Set<String> dirs = Sets.newHashSet();
boolean changed = false;
int offset = prefix != null ? prefix.length() : 0;
for (DirectoryListEntry entry : entries) {
String name = entry.getKey();
int nextSlash = name.indexOf(delimiter, offset);
if (nextSlash == -1) {
realFiles.add(entry);
} else {
String key = name.substring(0, nextSlash);
dirs.add(key);
changed = true;
}
}
if (changed) {
List<DirectoryListEntry> l = Lists.newArrayList();
for (String dir : dirs) {
l.add(new DirectoryListEntry(dir, null));
}
l.addAll(realFiles);
entries = l;
}
}
if (marker != null) {
log.warn("Marker filter is inefficient");
List<DirectoryListEntry> matches = Lists.newArrayList();
for (DirectoryListEntry entry : entries) {
String name = entry.getKey();
if (name.compareTo(marker) <= 0) {
continue;
}
matches.add(entry);
}
entries = matches;
}
Collections.sort(entries);
return entries;
}
@Override
public List<? extends FileInfo> listFiles(Project project, String bucketName, String prefix, String delimiter)
throws CloudException {
BucketData bucketData = getBucket(project, bucketName);
FsBucket fsBucket = new FsBucket(project, bucketData);
String marker = null;
return listFiles(project, fsBucket, prefix, delimiter, marker);
}
@Override
public ByteSource getData(Project project, String bucketName, String name, Long from, Long to) throws IOException,
CloudException {
BucketData bucketData = getBucket(project, bucketName);
FsBucket fsBucket = new FsBucket(project, bucketData);
FsFile fsFile = findFileInfo(fsBucket, name);
if (fsFile == null) {
return null;
}
CloudObject object = new CloudObject(fsFile.getData());
BlobStore blobStore = getBlobStore(project);
return object.asByteSource(blobStore, from, to);
}
@Override
public void ensureBucket(Project project, String bucketName) throws CloudException {
BucketData bucket = fileStore.getBuckets(project.getId()).find(bucketName);
if (bucket == null) {
putBucket(project, bucketName, null, null);
}
}
}