package water.persist;
import java.io.File;
import water.Boot;
import water.H2O;
import water.util.Log;
import com.google.common.base.Objects;
import com.google.common.base.Strings;
public class HdfsLoader {
private static final String DEFAULT_HDFS_VERSION = "cdh4";
private static final String MAPRFS_HDFS_VERSION = "mapr2.1.3";
public static void loadJars() {
if (H2O.OPT_ARGS.hdfs_skip != null) {
// When H2O is launched by hadoop itself, it should use the HDFS library that
// the hadoop mapper task picks up by default.
//
// Do not load any hadoop jar that is packed with H2O.
Log.info("H2O was started by Hadoop; inheriting HDFS library from mapper task.");
return;
}
if (H2O.OPT_ARGS.hdfs_version != null) {
Log.info("HDFS version specified on the command line: " + H2O.OPT_ARGS.hdfs_version);
}
// Load the HDFS backend for existing hadoop installations.
// FIX! hadoop/mapr supports other variants? also why isn't port an option on mapr, and why volume?
// port should be optional
// understands -hdfs=hdfs://server:port OR -hdfs=maprfs:///mapr/node_name/volume
// -hdfs-root=root
// -hdfs-config=config file
String version = Objects.firstNonNull(H2O.OPT_ARGS.hdfs_version, DEFAULT_HDFS_VERSION);
// If HDFS URI is MapR-fs - Switch to MapR version of hadoop
// FIX! shouldn't we just use whatever the hdfs_version specifies previously?
if( "mapr".equals(version) || Strings.nullToEmpty(H2O.OPT_ARGS.hdfs).startsWith("maprfs:///") ) {
version = MAPRFS_HDFS_VERSION;
}
try {
if( Boot._init.fromJar() ) {
File f = new File(version);
if( f.exists() ) {
Boot._init.addExternalJars(f);
} else {
Boot._init.addInternalJars("hadoop/" + version + "/");
}
}
} catch( Exception e ) {
Log.err(e);
Log.die("[hdfs] Unable to initialize hadoop version " + version + " please use different version.");
}
}
}