package com.inin.analytics.elasticsearch.transport;
import java.io.File;
import java.io.FilenameFilter;
import java.io.IOException;
import org.apache.commons.io.FileUtils;
import com.google.common.base.Preconditions;
import com.inin.analytics.elasticsearch.BaseESReducer;
import com.inin.analytics.elasticsearch.ShardConfig;
import com.inin.analytics.elasticsearch.transport.SnapshotTransportStrategy.STORAGE_SYSTEMS;
public abstract class BaseTransport {
protected String snapshotWorkingLocation;
protected String snapshotFinalDestination;
private DirectoryFilter directoryFilter = new DirectoryFilter();
public BaseTransport(String snapshotWorkingLocation, String snapshotFinalDestination) {
this.snapshotWorkingLocation = snapshotWorkingLocation;
this.snapshotFinalDestination = snapshotFinalDestination;
Preconditions.checkNotNull(snapshotWorkingLocation);
Preconditions.checkNotNull(snapshotFinalDestination);
}
protected abstract void init();
protected abstract void close();
protected abstract void transferFile(boolean deleteSource, String destination, String filename, String localDirectory) throws IOException;
protected abstract void transferDir(String destination, String localShardPath, String shard) throws IOException;
protected abstract boolean checkExists(String destination, Integer shardNumber) throws IOException;
/**
* Transport a snapshot sitting on the local filesystem to a remote repository. Snapshots are stiched together
* shard by shard because we're snapshotting 1 shard at a time.
*
* @param snapshotName
* @param index
* @param shardNumber
* @throws IOException
*/
public void execute(String snapshotName, String index) throws IOException {
init();
// Figure out which shard has all the data
String largestShard = getShardSource(index);
String destination = removeStorageSystemFromPath(snapshotFinalDestination);
// Upload top level manifests
transferFile(false, destination, "metadata-" + snapshotName, snapshotWorkingLocation);
transferFile(false, destination, "snapshot-" + snapshotName, snapshotWorkingLocation);
transferFile(false, destination, "index", snapshotWorkingLocation);
// Upload per-index manifests
String indexManifestSource = snapshotWorkingLocation + "indices" + BaseESReducer.DIR_SEPARATOR + index;
String indexManifestDestination = destination + BaseESReducer.DIR_SEPARATOR + "indices" + BaseESReducer.DIR_SEPARATOR + index;
transferFile(false, indexManifestDestination, "snapshot-" + snapshotName, indexManifestSource);
// Cleanup shard data
cleanEmptyShards(index, largestShard);
// Upload shard data
String shardSource = snapshotWorkingLocation + "indices" + BaseESReducer.DIR_SEPARATOR + index + BaseESReducer.DIR_SEPARATOR + largestShard;
String shardDestination = destination + BaseESReducer.DIR_SEPARATOR + "indices" + BaseESReducer.DIR_SEPARATOR + index + BaseESReducer.DIR_SEPARATOR;
transferDir(shardDestination, shardSource, largestShard);
close();
}
public void placeMissingShards(String snapshotName, String index, ShardConfig shardConfig, boolean includeRootManifest) throws IOException {
init();
String destination = removeStorageSystemFromPath(snapshotFinalDestination);
if(includeRootManifest) {
// Upload top level manifests
transferFile(false, destination, "metadata-" + snapshotName, snapshotWorkingLocation);
transferFile(false, destination, "snapshot-" + snapshotName, snapshotWorkingLocation);
transferFile(false, destination, "index", snapshotWorkingLocation);
}
for(int shard = 0; shard < shardConfig.getShardsForIndex(index); shard++) {
String indexDestination = destination + BaseESReducer.DIR_SEPARATOR + "indices" + BaseESReducer.DIR_SEPARATOR + index + BaseESReducer.DIR_SEPARATOR ;
if(!checkExists(indexDestination, shard)) {
// Upload shard data
String shardSource = snapshotWorkingLocation + "indices" + BaseESReducer.DIR_SEPARATOR + index + BaseESReducer.DIR_SEPARATOR + shard;
transferDir(indexDestination, shardSource, new Integer(shard).toString());
}
}
close();
}
/**
* Rip out filesystem specific stuff off the path EG s3://
* @param s
* @return s
*/
private String removeStorageSystemFromPath(String s) {
for(STORAGE_SYSTEMS storageSystem : SnapshotTransportStrategy.STORAGE_SYSTEMS.values()) {
s = s.replaceFirst(storageSystem.name() + "://", "");
}
return s;
}
/**
* We've snapshotted an index with all data routed to a single shard (1 shard per reducer). Problem is
* we don't know which shard # it routed all the data to. We can determine that by picking
* out the largest shard folder and renaming it to the shard # we want it to be.
*/
private String getShardSource(String index) throws IOException {
// Get a list of shards in the snapshot
String baseIndexLocation = snapshotWorkingLocation + "indices" + BaseESReducer.DIR_SEPARATOR + index + BaseESReducer.DIR_SEPARATOR;
File file = new File(baseIndexLocation);
String[] shardDirectories = file.list(directoryFilter);
// Figure out which shard has all the data in it. Since we've routed all data to it, there'll only be one
Long biggestDirLength = null;
String biggestDir = null;
for(String directory : shardDirectories) {
File curDir = new File(baseIndexLocation + directory);
long curDirLength = FileUtils.sizeOfDirectory(curDir);
if(biggestDirLength == null || biggestDirLength < curDirLength) {
biggestDir = directory;
biggestDirLength = curDirLength;
}
}
return biggestDir;
}
/**
* We're building 1 shard at a time. Therefore each snapshot has a bunch of empty
* shards and 1 shard with all the data in it. This deletes all the empty shard folders
* for you.
*
* @param index
* @param biggestDir
* @throws IOException
*/
private void cleanEmptyShards(String index, String biggestDir) throws IOException {
String baseIndexLocation = snapshotWorkingLocation + "indices" + BaseESReducer.DIR_SEPARATOR + index + BaseESReducer.DIR_SEPARATOR;
File file = new File(baseIndexLocation);
String[] shardDirectories = file.list(directoryFilter);
// Remove the empty shards
for(String directory : shardDirectories) {
if(!directory.equals(biggestDir)) {
FileUtils.deleteDirectory(new File(baseIndexLocation + directory));
}
}
}
private class DirectoryFilter implements FilenameFilter {
@Override
public boolean accept(File current, String name) {
return new File(current, name).isDirectory();
}
}
}