package io.lumify.core.bootstrap.lib;
import io.lumify.core.config.Configuration;
import io.lumify.core.exception.LumifyException;
import io.lumify.core.util.LumifyLogger;
import io.lumify.core.util.LumifyLoggerFactory;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import java.io.File;
import java.io.IOException;
import java.net.URI;
import java.security.NoSuchAlgorithmException;
public class HdfsLibCacheLoader extends LibLoader {
private static final LumifyLogger LOGGER = LumifyLoggerFactory.getLogger(HdfsLibCacheLoader.class);
@Override
public void loadLibs(Configuration configuration) {
LOGGER.info("Loading libs using %s", HdfsLibCacheLoader.class.getName());
String hdfsLibCacheDirectory = configuration.get(Configuration.HDFS_LIB_CACHE_SOURCE_DIRECTORY, null);
if (hdfsLibCacheDirectory == null) {
LOGGER.warn("Skipping HDFS libcache. Configuration parameter %s not found", Configuration.HDFS_LIB_CACHE_SOURCE_DIRECTORY);
return;
}
File libCacheDirectory = getLocalHdfsLibCacheDirectory(configuration);
String hdfsLibCacheUser = getHdfsLibCacheUser(configuration);
FileSystem hdfsFileSystem = getFileSystem(configuration, hdfsLibCacheUser);
try {
syncLibCache(hdfsFileSystem, new Path(hdfsLibCacheDirectory), libCacheDirectory);
} catch (Exception ex) {
throw new LumifyException(String.format("Could not sync HDFS libcache. %s -> %s", hdfsLibCacheDirectory, libCacheDirectory.getAbsolutePath()), ex);
}
}
private File getLocalHdfsLibCacheDirectory(Configuration configuration) {
String hdfsLibCacheTempDirectoryString = configuration.get(Configuration.HDFS_LIB_CACHE_TEMP_DIRECTORY, null);
File libCacheDirectory = null;
if (hdfsLibCacheTempDirectoryString == null) {
File baseDir = new File(System.getProperty("java.io.tmpdir"));
libCacheDirectory = new File(baseDir, "lumify-hdfslibcache");
LOGGER.info("Configuration parameter %s was not set; defaulting local libcache dir to %s", Configuration.HDFS_LIB_CACHE_TEMP_DIRECTORY, libCacheDirectory.getAbsolutePath());
} else {
libCacheDirectory = new File(hdfsLibCacheTempDirectoryString);
LOGGER.info("Using local lib cache directory: %s", libCacheDirectory.getAbsolutePath());
}
if (!libCacheDirectory.exists()) {
libCacheDirectory.mkdirs();
}
return libCacheDirectory;
}
private String getHdfsLibCacheUser(Configuration configuration) {
String hdfsLibCacheUser = configuration.get(Configuration.HDFS_LIB_CACHE_HDFS_USER, null);
if (hdfsLibCacheUser == null) {
hdfsLibCacheUser = "hadoop";
LOGGER.warn("Configuration parameter %s was not set; defaulting to HDFS user '%s'.", Configuration.HDFS_LIB_CACHE_HDFS_USER, hdfsLibCacheUser);
} else {
LOGGER.info("Connecting to HDFS as user '%s'", hdfsLibCacheUser);
}
return hdfsLibCacheUser;
}
private FileSystem getFileSystem(Configuration configuration, String user) {
try {
String hdfsRootDir = configuration.get(Configuration.HADOOP_URL, null);
if (hdfsRootDir == null) {
throw new LumifyException("Could not find configuration: " + Configuration.HADOOP_URL);
}
return FileSystem.get(new URI(hdfsRootDir), configuration.toHadoopConfiguration(), user);
} catch (Exception ex) {
throw new LumifyException("Could not open HDFS file system.", ex);
}
}
private static void syncLibCache(FileSystem fs, Path source, File destDir) throws IOException, NoSuchAlgorithmException {
if (!fs.exists(source)) {
throw new LumifyException(String.format("Could not sync HDFS directory %s. Directory does not exist.", source));
}
addFilesFromHdfs(fs, source, destDir);
}
private static void addFilesFromHdfs(FileSystem fs, Path source, File destDir) throws IOException, NoSuchAlgorithmException {
LOGGER.debug("Adding files from HDFS %s -> %s", source.toString(), destDir.getAbsolutePath());
RemoteIterator<LocatedFileStatus> hdfsFiles = fs.listFiles(source, true);
while (hdfsFiles.hasNext()) {
LocatedFileStatus hdfsFile = hdfsFiles.next();
if (hdfsFile.isDirectory()) {
continue;
}
File locallyCachedFile = getLocalCacheFileName(hdfsFile, destDir);
if (locallyCachedFile.exists()) {
LOGGER.info("HDFS file %s already cached at %s. Skipping sync.", hdfsFile.getPath().toString(), locallyCachedFile.getPath());
} else {
fs.copyToLocalFile(hdfsFile.getPath(), new Path(locallyCachedFile.getAbsolutePath()));
locallyCachedFile.setLastModified(hdfsFile.getModificationTime());
LOGGER.info("Caching HDFS file %s -> %s", hdfsFile.getPath().toString(), locallyCachedFile.getPath());
}
addLibFile(locallyCachedFile);
}
}
private static File getLocalCacheFileName(LocatedFileStatus hdfsFile, File destdir) {
String filename = hdfsFile.getPath().getName();
String baseFilename = filename.substring(0, filename.lastIndexOf('.'));
String extension = filename.substring(filename.lastIndexOf('.'));
String cacheFilename = baseFilename + "-" + hdfsFile.getModificationTime() + extension;
return new File(destdir, cacheFilename);
}
}