package io.fathom.cloud.storage;
import io.fathom.cloud.blobs.BlobData;
import io.fathom.cloud.blobs.BlobStore;
import io.fathom.cloud.blobs.TempFile;
import io.fathom.cloud.io.HashingOutputStream;
import io.fathom.cloud.protobuf.FileModel.FileRange;
import io.fathom.cloud.protobuf.FileModel.FileData.Builder;
import io.fathom.cloud.server.model.Project;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.util.List;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.collect.Lists;
import com.google.common.hash.Hasher;
import com.google.common.hash.Hashing;
import com.google.protobuf.ByteString;
/**
* We create a CompactOperation so that we can (in future) remember that we've
* combined blocks
*
*/
public class CompactOperation {
private static final Logger log = LoggerFactory.getLogger(CompactOperation.class);
private static final boolean PARANOID = true;
final Project project;
final String bucketName;
final String name;
public CompactOperation(Project project, String bucketName, String name) {
super();
this.project = project;
this.bucketName = bucketName;
this.name = name;
}
public Project getProject() {
return project;
}
public String getBucketName() {
return bucketName;
}
public String getName() {
return name;
}
public boolean compact(FileServiceInternal fs, Builder file) throws IOException {
int n = file.getRangesCount();
// Quick sanity check
if (n < 2) {
return false;
}
int bestStart = 0;
int bestEnd = -1;
float bestScore = Float.MAX_VALUE;
// Try to bring it down to a small number of segments; we don't want to
// immediately re-compact
int minLength = Math.max(n - FileServiceImpl.COMPACTION_THRESHOLD / 2, 2);
log.debug("Chose minimum merge length of {}", minLength);
for (int start = 0; start < n; start++) {
long maxSize = 0;
long minSize = Long.MAX_VALUE;
long totalSize = 0;
for (int end = start; end < n; end++) {
FileRange range = file.getRanges(end);
long size = range.getEnd() - range.getStart();
maxSize = Math.max(maxSize, size);
minSize = Math.min(minSize, size);
totalSize += size;
int len = end - start;
if (len >= minLength) {
// This is loosely/directly based on Lucene's tiered merge
// policy
// final float skew = ((float) maxSize) / ((float) minSize);
final float skew = ((float) maxSize) / ((float) totalSize);
// Strongly favor merges with less skew (smaller
// mergeScore is better):
float mergeScore = skew;
// Gently favor smaller merges over bigger ones. We
// don't want to make this exponent too large else we
// can end up doing poor merges of small segments in
// order to avoid the large merges:
mergeScore *= Math.pow(totalSize, 0.05);
if (mergeScore < bestScore) {
bestScore = mergeScore;
bestStart = start;
bestEnd = end;
}
// log.debug("{} - {} => {}", new Object[] { start, end,
// mergeScore });
}
}
}
if (bestEnd == -1) {
log.warn("Unable to find any merges!");
return false;
}
for (int i = 0; i < n; i++) {
FileRange range = file.getRanges(i);
long len = range.getEnd() - range.getStart();
log.info("{} {}", i, len);
}
log.info("Chose merge {}-{}", bestStart, bestEnd);
BlobStore blobStore = fs.getBlobStore(project);
List<FileRange> newRanges = Lists.newArrayList();
for (int i = 0; i < bestStart; i++) {
newRanges.add(file.getRanges(i));
}
try (TempFile tempFile = TempFile.create()) {
FileRange.Builder c = FileRange.newBuilder();
Hasher md5 = Hashing.md5().newHasher();
try (OutputStream fos = new HashingOutputStream(new FileOutputStream(tempFile.getFile()), md5)) {
for (int i = bestStart; i < bestEnd; i++) {
FileRange range = file.getRanges(i);
if (i == bestStart) {
c.setStart(range.getStart());
}
if (i == (bestEnd - 1)) {
c.setEnd(range.getEnd());
}
final BlobData blob = blobStore.find(range.getContentKey());
if (blob == null) {
throw new IOException("Unable to open storage for range: " + range);
}
blob.copyTo(fos);
}
}
if (!c.hasStart() || !c.hasEnd()) {
throw new IllegalStateException();
}
ByteString hash = ByteString.copyFrom(md5.hash().asBytes());
BlobData blobData = new BlobData(tempFile.getFile(), hash);
blobStore.put(blobData);
if (PARANOID) {
BlobData blob = blobStore.find(blobData.getHash());
if (blob == null) {
throw new IllegalStateException();
}
ByteString checkHash = ByteString.copyFrom(blob.hash(Hashing.md5()).asBytes());
if (!blobData.getHash().equals(checkHash)) {
log.warn("Hash mismatch: {} vs {}", blobData.getHash(), checkHash);
throw new IllegalStateException();
}
}
c.setContentKey(blobData.getHash());
newRanges.add(c.build());
}
for (int i = bestEnd; i < file.getRangesCount(); i++) {
newRanges.add(file.getRanges(i));
}
file.clearRanges();
file.addAllRanges(newRanges);
if (PARANOID) {
FileServiceImpl.sanityCheck(blobStore, file);
log.debug("File before modification OK");
}
if (PARANOID) {
FileServiceImpl.sanityCheck(blobStore, file);
log.debug("File after modification OK");
}
// log.info("New file: {}", file.clone().build());
return true;
}
public String getDebugPath() {
String path = project.getId() + "/" + bucketName + "/" + name;
return path;
}
@Override
public String toString() {
return "CompactOperation [path=" + getDebugPath() + "]";
}
}