package water;
import com.brsanthu.googleanalytics.DefaultRequest;
import com.brsanthu.googleanalytics.GoogleAnalytics;
import jsr166y.CountedCompleter;
import jsr166y.ForkJoinPool;
import jsr166y.ForkJoinWorkerThread;
import org.apache.log4j.LogManager;
import org.apache.log4j.PropertyConfigurator;
import org.reflections.Reflections;
import water.UDPRebooted.ShutdownTsk;
import water.api.RequestServer;
import water.exceptions.H2OFailException;
import water.exceptions.H2OIllegalArgumentException;
import water.init.*;
import water.nbhm.NonBlockingHashMap;
import water.parser.ParserService;
import water.persist.PersistManager;
import water.util.*;
import java.io.File;
import java.io.IOException;
import java.lang.management.ManagementFactory;
import java.lang.management.RuntimeMXBean;
import java.lang.reflect.Field;
import java.lang.reflect.Modifier;
import java.net.*;
import java.util.*;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Future;
import java.util.concurrent.atomic.AtomicLong;
/**
* Start point for creating or joining an <code>H2O</code> Cloud.
*
* @author <a href="mailto:cliffc@h2o.ai"></a>
* @version 1.0
*/
final public class H2O {
public static final String DEFAULT_JKS_PASS = "h2oh2o";
//-------------------------------------------------------------------------------------------------------------------
// Command-line argument parsing and help
//-------------------------------------------------------------------------------------------------------------------
/**
* Print help about command line arguments.
*/
public static void printHelp() {
String defaultFlowDirMessage;
if (DEFAULT_FLOW_DIR() == null) {
// If you start h2o on Hadoop, you must set -flow_dir.
// H2O doesn't know how to guess a good one.
// user.home doesn't make sense.
defaultFlowDirMessage =
" (The default is none; saving flows not available.)\n";
}
else {
defaultFlowDirMessage =
" (The default is '" + DEFAULT_FLOW_DIR() + "'.)\n";
}
String s =
"\n" +
"Usage: java [-Xmx<size>] -jar h2o.jar [options]\n" +
" (Note that every option has a default and is optional.)\n" +
"\n" +
" -h | -help\n" +
" Print this help.\n" +
"\n" +
" -version\n" +
" Print version info and exit.\n" +
"\n" +
" -name <h2oCloudName>\n" +
" Cloud name used for discovery of other nodes.\n" +
" Nodes with the same cloud name will form an H2O cloud\n" +
" (also known as an H2O cluster).\n" +
"\n" +
" -flatfile <flatFileName>\n" +
" Configuration file explicitly listing H2O cloud node members.\n" +
"\n" +
" -ip <ipAddressOfNode>\n" +
" IP address of this node.\n" +
"\n" +
" -port <port>\n" +
" Port number for this node (note: port+1 is also used).\n" +
" (The default port is " + ARGS.port + ".)\n" +
"\n" +
" -network <IPv4network1Specification>[,<IPv4network2Specification> ...]\n" +
" The IP address discovery code will bind to the first interface\n" +
" that matches one of the networks in the comma-separated list.\n" +
" Use instead of -ip when a broad range of addresses is legal.\n" +
" (Example network specification: '10.1.2.0/24' allows 256 legal\n" +
" possibilities.)\n" +
"\n" +
" -ice_root <fileSystemPath>\n" +
" The directory where H2O spills temporary data to disk.\n" +
"\n" +
" -log_dir <fileSystemPath>\n" +
" The directory where H2O writes logs to disk.\n" +
" (This usually has a good default that you need not change.)\n" +
"\n" +
" -log_level <TRACE,DEBUG,INFO,WARN,ERRR,FATAL>\n" +
" Write messages at this logging level, or above. Default is INFO." +
"\n" +
"\n" +
" -flow_dir <server side directory or HDFS directory>\n" +
" The directory where H2O stores saved flows.\n" +
defaultFlowDirMessage +
"\n" +
" -nthreads <#threads>\n" +
" Maximum number of threads in the low priority batch-work queue.\n" +
" (The default is " + (char)Runtime.getRuntime().availableProcessors() + ".)\n" +
"\n" +
" -client\n" +
" Launch H2O node in client mode.\n" +
"\n" +
" -context_path <context_path>\n" +
" The context path for jetty.\n" +
"\n" +
"Authentication options:\n" +
"\n" +
" -jks <filename>\n" +
" Java keystore file\n" +
"\n" +
" -jks_pass <password>\n" +
" (Default is '" + DEFAULT_JKS_PASS + "')\n" +
"\n" +
" -hash_login\n" +
" Use Jetty HashLoginService\n" +
"\n" +
" -ldap_login\n" +
" Use Jetty LdapLoginService\n" +
"\n" +
" -kerberos_login\n" +
" Use Kerberos LoginService\n" +
"\n" +
" -pam_login\n" +
" Use PAM LoginService\n" +
"\n" +
" -login_conf <filename>\n" +
" LoginService configuration file\n" +
"\n" +
" -form_auth\n" +
" Enables Form-based authentication for Flow (default is Basic authentication)\n" +
"\n" +
" -session_timeout <minutes>\n" +
" Specifies the number of minutes that a session can remain idle before the server invalidates\n" +
" the session and requests a new login. Requires '-form_auth'. Default is no timeout\n" +
"\n" +
" -internal_security_conf <filename>\n" +
" Path (absolute or relative) to a file containing all internal security related configurations\n" +
"\n" +
"Cloud formation behavior:\n" +
"\n" +
" New H2O nodes join together to form a cloud at startup time.\n" +
" Once a cloud is given work to perform, it locks out new members\n" +
" from joining.\n" +
"\n" +
"Examples:\n" +
"\n" +
" Start an H2O node with 4GB of memory and a default cloud name:\n" +
" $ java -Xmx4g -jar h2o.jar\n" +
"\n" +
" Start an H2O node with 6GB of memory and a specify the cloud name:\n" +
" $ java -Xmx6g -jar h2o.jar -name MyCloud\n" +
"\n" +
" Start an H2O cloud with three 2GB nodes and a default cloud name:\n" +
" $ java -Xmx2g -jar h2o.jar &\n" +
" $ java -Xmx2g -jar h2o.jar &\n" +
" $ java -Xmx2g -jar h2o.jar &\n" +
"\n";
System.out.print(s);
for (AbstractH2OExtension e : H2O.getExtensions()) {
e.printHelp();
}
}
/**
* Singleton ARGS instance that contains the processed arguments.
*/
public static final OptArgs ARGS = new OptArgs();
/**
* A class containing all of the arguments for H2O.
*/
public static class
OptArgs {
// Prefix of hidden system properties
public static final String SYSTEM_PROP_PREFIX = "sys.ai.h2o.";
public static final String SYSTEM_DEBUG_CORS = H2O.OptArgs.SYSTEM_PROP_PREFIX + "debug.cors";
//-----------------------------------------------------------------------------------
// Help and info
//-----------------------------------------------------------------------------------
/** -help, -help=true; print help and exit*/
public boolean help = false;
/** -version, -version=true; print version and exit */
public boolean version = false;
//-----------------------------------------------------------------------------------
// Clouding
//-----------------------------------------------------------------------------------
/** -name=name; Set cloud name */
public String name = System.getProperty("user.name"); // Cloud name
/** -flatfile=flatfile; Specify a list of cluster IP addresses */
public String flatfile;
/** -port=####; Specific Browser/API/HTML port */
public int port;
/** -baseport=####; Port to start upward searching from. */
public int baseport = 54321;
/** -web_ip=ip4_or_ip6; IP used for web server. By default it listen to all interfaces. */
public String web_ip = null;
/** -ip=ip4_or_ip6; Named IP4/IP6 address instead of the default */
public String ip;
/** -network=network; Network specification for acceptable interfaces to bind to */
public String network;
/** -client, -client=true; Client-only; no work; no homing of Keys (but can cache) */
public boolean client;
/** -user_name=user_name; Set user name */
public String user_name = System.getProperty("user.name");
//-----------------------------------------------------------------------------------
// Node configuration
//-----------------------------------------------------------------------------------
/** -ice_root=ice_root; ice root directory; where temp files go */
public String ice_root;
/** -cleaner; enable user-mode spilling of big data to disk in ice_root */
public boolean cleaner = false;
/** -nthreads=nthreads; Max number of F/J threads in the low-priority batch queue */
public short nthreads= (short)Runtime.getRuntime().availableProcessors();
/** -log_dir=/path/to/dir; directory to save logs in */
public String log_dir;
/** -flow_dir=/path/to/dir; directory to save flows in */
public String flow_dir;
/** -disable_web; disable Jetty and REST API interface */
public boolean disable_web = false;
/** -context_path=jetty_context_path; the context path for jetty */
public String context_path = "";
//-----------------------------------------------------------------------------------
// HDFS & AWS
//-----------------------------------------------------------------------------------
/** -hdfs_config=hdfs_config; configuration file of the HDFS */
public String hdfs_config = null;
/** -hdfs_skip=hdfs_skip; used by Hadoop driver to not unpack and load any HDFS jar file at runtime. */
public boolean hdfs_skip = false;
/** -aws_credentials=aws_credentials; properties file for aws credentials */
public String aws_credentials = null;
/** --ga_hadoop_ver=ga_hadoop_ver; Version string for Hadoop */
public String ga_hadoop_ver = null;
/** --ga_opt_out; Turns off usage reporting to Google Analytics */
public boolean ga_opt_out = false;
//-----------------------------------------------------------------------------------
// Authentication
//-----------------------------------------------------------------------------------
/** -jks is Java KeyStore file on local filesystem */
public String jks = null;
/** -jks_pass is Java KeyStore password; default is 'h2oh2o' */
public String jks_pass = DEFAULT_JKS_PASS;
/** -hash_login enables HashLoginService */
public boolean hash_login = false;
/** -ldap_login enables LdapLoginService */
public boolean ldap_login = false;
/** -kerberos_login enables KerberosLoginService */
public boolean kerberos_login = false;
/** -pam_login enables PAMLoginService */
public boolean pam_login = false;
/** -login_conf is login configuration service file on local filesystem */
public String login_conf = null;
/** -form_auth enables Form-based authentication */
public boolean form_auth = false;
/** -session_timeout maximum duration of session inactivity in minutes **/
private String session_timeout_spec = null; // raw value specified by the user
public int session_timeout = 0; // parsed value (in minutes)
/** -internal_security_conf path (absolute or relative) to a file containing all internal security related configurations */
public String internal_security_conf = null;
/** -internal_security_enabled is a boolean that indicates if internal communication paths are secured*/
public boolean internal_security_enabled = false;
//-----------------------------------------------------------------------------------
// Debugging
//-----------------------------------------------------------------------------------
/** -log_level=log_level; One of DEBUG, INFO, WARN, ERRR. Default is INFO. */
public String log_level;
/** -random_udp_drop, -random_udp_drop=true; test only, randomly drop udp incoming */
public boolean random_udp_drop;
/** -md5skip, -md5skip=true; test-only; Skip the MD5 Jar checksum; allows jars from different builds to mingle in the same cloud */
public boolean md5skip = false;
/** -quiet Enable quiet mode and avoid any prints to console, useful for client embedding */
public boolean quiet = false;
public boolean useUDP = false;
/** -no_latest_check Do not attempt to retrieve latest H2O version from S3 on startup */
public boolean noLatestCheck = false;
@Override public String toString() {
StringBuilder result = new StringBuilder();
//determine fields declared in this class only (no fields of superclass)
Field[] fields = this.getClass().getDeclaredFields();
//print field names paired with their values
result.append("[ ");
for (Field field : fields) {
try {
result.append(field.getName());
result.append(": ");
//requires access to private field:
result.append(field.get(this));
result.append(", ");
}
catch (IllegalAccessException ex) {
Log.err(ex);
}
}
result.deleteCharAt(result.length() - 2);
result.deleteCharAt(result.length() - 1);
result.append(" ]");
return result.toString();
}
/**
* Whether this H2O instance was launched on hadoop (using 'hadoop jar h2odriver.jar') or not.
*/
public boolean launchedWithHadoopJar() {
return hdfs_skip;
}
}
public static void parseFailed(String message) {
System.out.println("");
System.out.println("ERROR: " + message);
System.out.println("");
printHelp();
H2O.exit(1);
}
public static class OptString {
final String _s;
String _lastMatchedFor;
public OptString(String s) {
_s = s;
}
public boolean matches(String s) {
_lastMatchedFor = s;
if (_s.equals("-" + s)) return true;
if (_s.equals("--" + s)) return true;
return false;
}
public int incrementAndCheck(int i, String[] args) {
i = i + 1;
if (i >= args.length) parseFailed(_lastMatchedFor + " not specified");
return i;
}
public int parseInt(String a) {
try { return Integer.parseInt(a); }
catch (Exception e) { }
parseFailed("Argument " + _lastMatchedFor + " must be an integer (was given '" + a + "')" );
return 0;
}
@Override public String toString() { return _s; }
}
/**
* Dead stupid argument parser.
*/
private static void parseArguments(String[] args) {
for (AbstractH2OExtension e : H2O.getExtensions()) {
args = e.parseArguments(args);
}
for (int i = 0; i < args.length; i++) {
OptString s = new OptString(args[i]);
if (s.matches("h") || s.matches("help")) {
ARGS.help = true;
}
else if (s.matches("version")) {
ARGS.version = true;
}
else if (s.matches("name")) {
i = s.incrementAndCheck(i, args);
ARGS.name = args[i];
}
else if (s.matches("flatfile")) {
i = s.incrementAndCheck(i, args);
ARGS.flatfile = args[i];
}
else if (s.matches("port")) {
i = s.incrementAndCheck(i, args);
ARGS.port = s.parseInt(args[i]);
}
else if (s.matches("baseport")) {
i = s.incrementAndCheck(i, args);
ARGS.baseport = s.parseInt(args[i]);
}
else if (s.matches("ip")) {
i = s.incrementAndCheck(i, args);
ARGS.ip = args[i];
}
else if (s.matches("web_ip")) {
i = s.incrementAndCheck(i, args);
ARGS.web_ip = args[i];
}
else if (s.matches("network")) {
i = s.incrementAndCheck(i, args);
ARGS.network = args[i];
}
else if (s.matches("client")) {
ARGS.client = true;
}
else if (s.matches("user_name")) {
i = s.incrementAndCheck(i, args);
ARGS.user_name = args[i];
}
else if (s.matches("ice_root")) {
i = s.incrementAndCheck(i, args);
ARGS.ice_root = args[i];
}
else if (s.matches("log_dir")) {
i = s.incrementAndCheck(i, args);
ARGS.log_dir = args[i];
}
else if (s.matches("flow_dir")) {
i = s.incrementAndCheck(i, args);
ARGS.flow_dir = args[i];
}
else if (s.matches("disable_web")) {
ARGS.disable_web = true;
}
else if (s.matches("context_path")) {
i = s.incrementAndCheck(i, args);
String value = args[i];
ARGS.context_path = value.startsWith("/")
? value.trim().length() == 1
? "" : value
: "/" + value;
}
else if (s.matches("nthreads")) {
i = s.incrementAndCheck(i, args);
int nthreads = s.parseInt(args[i]);
if (nthreads >= 1) { //otherwise keep default (all cores)
if (nthreads > Short.MAX_VALUE)
throw H2O.unimpl("Can't handle more than " + Short.MAX_VALUE + " threads.");
ARGS.nthreads = (short) nthreads;
}
}
else if (s.matches("hdfs_config")) {
i = s.incrementAndCheck(i, args);
ARGS.hdfs_config = args[i];
}
else if (s.matches("hdfs_skip")) {
ARGS.hdfs_skip = true;
}
else if (s.matches("aws_credentials")) {
i = s.incrementAndCheck(i, args);
ARGS.aws_credentials = args[i];
}
else if (s.matches("ga_hadoop_ver")) {
i = s.incrementAndCheck(i, args);
ARGS.ga_hadoop_ver = args[i];
}
else if (s.matches("ga_opt_out")) {
// JUnits pass this as a system property, but it usually a flag without an arg
if (i+1 < args.length && args[i+1].equals("yes")) i++;
ARGS.ga_opt_out = true;
}
else if (s.matches("log_level")) {
i = s.incrementAndCheck(i, args);
ARGS.log_level = args[i];
}
else if (s.matches("random_udp_drop")) {
ARGS.random_udp_drop = true;
}
else if (s.matches("md5skip")) {
ARGS.md5skip = true;
}
else if (s.matches("quiet")) {
ARGS.quiet = true;
}
else if(s.matches("useUDP")) {
ARGS.useUDP = true;
}
else if(s.matches("cleaner")) {
ARGS.cleaner = true;
}
else if (s.matches("jks")) {
i = s.incrementAndCheck(i, args);
ARGS.jks = args[i];
}
else if (s.matches("jks_pass")) {
i = s.incrementAndCheck(i, args);
ARGS.jks_pass = args[i];
}
else if (s.matches("hash_login")) {
ARGS.hash_login = true;
}
else if (s.matches("ldap_login")) {
ARGS.ldap_login = true;
}
else if (s.matches("kerberos_login")) {
ARGS.kerberos_login = true;
}
else if (s.matches("pam_login")) {
ARGS.pam_login = true;
}
else if (s.matches("login_conf")) {
i = s.incrementAndCheck(i, args);
ARGS.login_conf = args[i];
}
else if (s.matches("form_auth")) {
ARGS.form_auth = true;
}
else if (s.matches("session_timeout")) {
i = s.incrementAndCheck(i, args);
ARGS.session_timeout_spec = args[i];
try { ARGS.session_timeout = Integer.parseInt(args[i]); } catch (Exception e) { /* ignored */ }
}
else if (s.matches("internal_security_conf")) {
i = s.incrementAndCheck(i, args);
ARGS.internal_security_conf = args[i];
}
else if (s.matches("no_latest_check")) {
ARGS.noLatestCheck = true;
}
else {
parseFailed("Unknown argument (" + s + ")");
}
}
}
private static void validateArguments() {
if (ARGS.jks != null) {
if (! new File(ARGS.jks).exists()) {
parseFailed("File does not exist: " + ARGS.jks);
}
}
if (ARGS.login_conf != null) {
if (! new File(ARGS.login_conf).exists()) {
parseFailed("File does not exist: " + ARGS.login_conf);
}
}
int login_arg_count = 0;
if (ARGS.hash_login) login_arg_count++;
if (ARGS.ldap_login) login_arg_count++;
if (ARGS.kerberos_login) login_arg_count++;
if (ARGS.pam_login) login_arg_count++;
if (login_arg_count > 1) {
parseFailed("Can only specify one of -hash_login, -ldap_login, -kerberos_login and -pam_login");
}
if (ARGS.hash_login || ARGS.ldap_login || ARGS.kerberos_login || ARGS.pam_login) {
if (H2O.ARGS.login_conf == null) {
parseFailed("Must specify -login_conf argument");
}
} else {
if (H2O.ARGS.form_auth) {
parseFailed("No login method was specified. Form-based authentication can only be used in conjunction with of a LoginService.\n" +
"Pick a LoginService by specifying '-<method>_login' option.");
}
}
if (ARGS.session_timeout_spec != null) {
if (! ARGS.form_auth) {
parseFailed("Session timeout can only be enabled for Form based authentication (use -form_auth)");
}
if (ARGS.session_timeout <= 0)
parseFailed("Invalid session timeout specification (" + ARGS.session_timeout + ")");
}
// Validate extension arguments
for (AbstractH2OExtension e : H2O.getExtensions()) {
e.validateArguments();
}
}
// Google analytics performance measurement
public static GoogleAnalytics GA;
public static int CLIENT_TYPE_GA_CUST_DIM = 1;
public static int CLIENT_ID_GA_CUST_DIM = 2;
//-------------------------------------------------------------------------------------------------------------------
// Embedded configuration for a full H2O node to be implanted in another
// piece of software (e.g. Hadoop mapper task).
//-------------------------------------------------------------------------------------------------------------------
public static volatile AbstractEmbeddedH2OConfig embeddedH2OConfig;
/**
* Register embedded H2O configuration object with H2O instance.
*/
public static void setEmbeddedH2OConfig(AbstractEmbeddedH2OConfig c) { embeddedH2OConfig = c; }
public static AbstractEmbeddedH2OConfig getEmbeddedH2OConfig() { return embeddedH2OConfig; }
/**
* Tell the embedding software that this H2O instance belongs to
* a cloud of a certain size.
* This may be non-blocking.
*
* @param ip IP address this H2O can be reached at.
* @param port Port this H2O can be reached at (for REST API and browser).
* @param size Number of H2O instances in the cloud.
*/
public static void notifyAboutCloudSize(InetAddress ip, int port, int size) {
if (embeddedH2OConfig == null) { return; }
embeddedH2OConfig.notifyAboutCloudSize(ip, port, size);
}
public static void closeAll() {
try { NetworkInit._udpSocket.close(); } catch( IOException ignore ) { }
try { H2O.getJetty().stop(); } catch( Exception ignore ) { }
try { NetworkInit._tcpSocket.close(); } catch( IOException ignore ) { }
PersistManager PM = H2O.getPM();
if( PM != null ) PM.getIce().cleanUp();
}
/** Notify embedding software instance H2O wants to exit. Shuts down a single Node.
* @param status H2O's requested process exit value.
*/
public static void exit(int status) {
// Embedded H2O path (e.g. inside Hadoop mapper task).
if( embeddedH2OConfig != null )
embeddedH2OConfig.exit(status);
// Flush all cached messages
Log.flushStdout();
// Standalone H2O path,p or if the embedded config does not exit
System.exit(status);
}
/** Cluster shutdown itself by sending a shutdown UDP packet. */
public static void shutdown(int status) {
if(status == 0) H2O.orderlyShutdown();
UDPRebooted.T.error.send(H2O.SELF);
H2O.exit(status);
}
/** Orderly shutdown with infinite timeout for confirmations from the nodes in the cluster */
public static int orderlyShutdown() {
return orderlyShutdown(-1);
}
public static int orderlyShutdown(int timeout) {
boolean [] confirmations = new boolean[H2O.CLOUD.size()];
if (H2O.SELF.index() >= 0) { // Do not wait for clients to shutdown
confirmations[H2O.SELF.index()] = true;
}
Futures fs = new Futures();
for(H2ONode n:H2O.CLOUD._memary) {
if(n != H2O.SELF)
fs.add(new RPC(n, new ShutdownTsk(H2O.SELF,n.index(), 1000, confirmations, 0)).call());
}
if(timeout > 0)
try { Thread.sleep(timeout); }
catch (Exception ignore) {}
else fs.blockForPending(); // todo, should really have block for pending with a timeout
int failedToShutdown = 0;
// shutdown failed
for(boolean b:confirmations)
if(!b) failedToShutdown++;
return failedToShutdown;
}
private static volatile boolean _shutdownRequested = false;
public static void requestShutdown() {
_shutdownRequested = true;
}
public static boolean getShutdownRequested() {
return _shutdownRequested;
}
//-------------------------------------------------------------------------------------------------------------------
public static final AbstractBuildVersion ABV;
static {
AbstractBuildVersion abv = AbstractBuildVersion.UNKNOWN_VERSION;
try {
Class klass = Class.forName("water.init.BuildVersion");
java.lang.reflect.Constructor constructor = klass.getConstructor();
abv = (AbstractBuildVersion) constructor.newInstance();
} catch (Exception ignore) { }
ABV = abv;
}
//-------------------------------------------------------------------------------------------------------------------
private static boolean _haveInheritedLog4jConfiguration = false;
public static boolean haveInheritedLog4jConfiguration() {
return _haveInheritedLog4jConfiguration;
}
public static void configureLogging() {
if (LogManager.getCurrentLoggers().hasMoreElements()) {
_haveInheritedLog4jConfiguration = true;
return;
}
// Disable logging from a few specific classes at startup.
// (These classes may (or may not) be re-enabled later on.)
//
// The full logger initialization is done by setLog4jProperties() in class water.util.Log.
// The trick is the output path / file isn't known until the H2O API PORT is chosen,
// so real logger initialization has to happen somewhat late in the startup lifecycle.
java.util.Properties p = new java.util.Properties();
p.setProperty("log4j.logger.org.reflections.Reflections", "WARN");
p.setProperty("log4j.logger.org.eclipse.jetty", "WARN");
PropertyConfigurator.configure(p);
System.setProperty("org.eclipse.jetty.LEVEL", "WARN");
// Log jetty stuff to stdout for now.
// TODO: figure out how to wire this into log4j.
System.setProperty("org.eclipse.jetty.util.log.class", "org.eclipse.jetty.util.log.StdErrLog");
}
//-------------------------------------------------------------------------------------------------------------------
// Be paranoid and check that this doesn't happen twice.
private static boolean extensionsRegistered = false;
private static long registerExtensionsMillis = 0;
/**
* Register H2O extensions.
* <p/>
* Use reflection to find all classes that inherit from water.AbstractH2OExtension
* and call H2O.addExtension() for each.
*/
public static void registerExtensions() {
if (extensionsRegistered) {
throw H2O.fail("Extensions already registered");
}
long before = System.currentTimeMillis();
// Disallow schemas whose parent is in another package because it takes ~4s to do the getSubTypesOf call.
String[] packages = new String[]{"water", "hex"};
ServiceLoader<AbstractH2OExtension> extensionsLoader = ServiceLoader.load(AbstractH2OExtension.class);
for (AbstractH2OExtension e : extensionsLoader) {
e.init();
extensions.add(e);
}
extensionsRegistered = true;
registerExtensionsMillis = System.currentTimeMillis() - before;
}
private static ArrayList<AbstractH2OExtension> extensions = new ArrayList<>();
public static void addExtension(AbstractH2OExtension e) {
extensions.add(e);
}
public static ArrayList<AbstractH2OExtension> getExtensions() {
return extensions;
}
//-------------------------------------------------------------------------------------------------------------------
// Be paranoid and check that this doesn't happen twice.
private static boolean apisRegistered = false;
/**
* Register REST API routes.
*
* Use reflection to find all classes that inherit from {@link water.api.AbstractRegister}
* and call the register() method for each.
*
* @param relativeResourcePath Relative path from running process working dir to find web resources.
*/
public static void registerRestApis(String relativeResourcePath) {
if (apisRegistered) {
throw H2O.fail("APIs already registered");
}
// Log extension registrations here so the message is grouped in the right spot.
for (AbstractH2OExtension e : H2O.getExtensions()) {
e.printInitialized();
}
Log.info("Registered " + H2O.getExtensions().size() + " extensions in: " + registerExtensionsMillis + "mS");
long before = System.currentTimeMillis();
// Disallow schemas whose parent is in another package because it takes ~4s to do the getSubTypesOf call.
String[] packages = new String[] { "water", "hex" };
for (String pkg : packages) {
Reflections reflections = new Reflections(pkg);
Log.debug("Registering REST APIs for package: " + pkg);
for (Class registerClass : reflections.getSubTypesOf(water.api.AbstractRegister.class)) {
if (!Modifier.isAbstract(registerClass.getModifiers())) {
try {
Log.debug("Found REST API registration for class: " + registerClass.getName());
Object instance = registerClass.newInstance();
water.api.AbstractRegister r = (water.api.AbstractRegister) instance;
r.register(relativeResourcePath);
}
catch (Exception e) {
throw H2O.fail(e.toString());
}
}
}
}
apisRegistered = true;
long registerApisMillis = System.currentTimeMillis() - before;
Log.info("Registered: " + RequestServer.numRoutes() + " REST APIs in: " + registerApisMillis + "mS");
}
//-------------------------------------------------------------------------------------------------------------------
public static class AboutEntry {
private String name;
private String value;
public String getName() { return name; }
public String getValue() { return value; }
AboutEntry(String n, String v) {
name = n;
value = v;
}
}
private static ArrayList<AboutEntry> aboutEntries = new ArrayList<>();
@SuppressWarnings("unused")
public static void addAboutEntry(String name, String value) {
AboutEntry e = new AboutEntry(name, value);
aboutEntries.add(e);
}
@SuppressWarnings("unused")
public static ArrayList<AboutEntry> getAboutEntries() {
return aboutEntries;
}
//-------------------------------------------------------------------------------------------------------------------
private static AtomicLong nextModelNum = new AtomicLong(0);
/**
* Calculate a unique model id that includes User-Agent info (if it can be discovered).
* For the user agent info to be discovered, this needs to be called from a Jetty thread.
*
* This lets us distinguish models created from R vs. other front-ends, for example.
* At some future point, it could make sense to include a sessionId here.
*
* The algorithm is:
* descModel_[userAgentPrefixIfKnown_]cloudId_monotonicallyIncreasingInteger
*
* Right now because of the way the REST API works, a bunch of numbers are created and
* thrown away. So the values are monotonically increasing but not contiguous.
*
* @param desc Model description.
* @return The suffix.
*/
synchronized public static String calcNextUniqueModelId(String desc) {
StringBuilder sb = new StringBuilder();
sb.append(desc).append("_model_");
// Append user agent string if we can figure it out.
String source = JettyHTTPD.getUserAgent();
if (source != null) {
StringBuilder ua = new StringBuilder();
if (source.contains("Safari")) {
ua.append("safari");
}
else if (source.contains("Python")) {
ua.append("python");
}
else {
for (int i = 0; i < source.length(); i++) {
char c = source.charAt(i);
if (c >= 'a' && c <= 'z') {
ua.append(c);
continue;
} else if (c >= 'A' && c <= 'Z') {
ua.append(c);
continue;
}
break;
}
}
if (ua.toString().length() > 0) {
sb.append(ua.toString()).append("_");
}
}
// REST API needs some refactoring to avoid burning lots of extra numbers.
//
// I actually tried only doing the addAndGet only for POST requests (and junk UUID otherwise),
// but that didn't eliminate the gaps.
long n = nextModelNum.addAndGet(1);
sb.append(Long.toString(CLUSTER_ID)).append("_").append(Long.toString(n));
return sb.toString();
}
//-------------------------------------------------------------------------------------------------------------------
// Atomically set once during startup. Guards against repeated startups.
public static final AtomicLong START_TIME_MILLIS = new AtomicLong(); // When did main() run
// Used to gate default worker threadpool sizes
public static final int NUMCPUS = Runtime.getRuntime().availableProcessors();
// Best-guess process ID
public static long PID = -1L;
/**
* Throw an exception that will cause the request to fail, but the cluster to continue.
* @see #fail(String, Throwable)
* @return never returns
*/
public static H2OIllegalArgumentException unimpl() { return new H2OIllegalArgumentException("unimplemented"); }
/**
* Throw an exception that will cause the request to fail, but the cluster to continue.
* @see #unimpl(String)
* @see #fail(String, Throwable)
* @return never returns
*/
public static H2OIllegalArgumentException unimpl(String msg) { return new H2OIllegalArgumentException("unimplemented: " + msg); }
/**
* H2O.fail is intended to be used in code where something should never happen, and if
* it does it's a coding error that needs to be addressed immediately. Examples are:
* AutoBuffer serialization for an object you're trying to serialize isn't available;
* there's a typing error on your schema; your switch statement didn't cover all the AstRoot
* subclasses available in Rapids.
* <p>
* It should *not* be used when only the single request should fail, it should *only* be
* used if the error means that someone needs to go add some code right away.
*
* @param msg Message to Log.fatal()
* @param cause Optional cause exception to Log.fatal()
* @return never returns; calls System.exit(-1)
*/
public static H2OFailException fail(String msg, Throwable cause) {
Log.fatal(msg);
if (null != cause) Log.fatal(cause);
Log.fatal("Stacktrace: ");
Log.fatal(Arrays.toString(Thread.currentThread().getStackTrace()));
H2O.shutdown(-1);
// unreachable
return new H2OFailException(msg);
}
/**
* @see #fail(String, Throwable)
* @return never returns
*/
public static H2OFailException fail() { return H2O.fail("Unknown code failure"); }
/**
* @see #fail(String, Throwable)
* @return never returns
*/
public static H2OFailException fail(String msg) { return H2O.fail(msg, null); }
/**
* Return an error message with an accompanying URL to help the user get more detailed information.
*
* @param number H2O tech note number.
* @param message Message to present to the user.
* @return A longer message including a URL.
*/
public static String technote(int number, String message) {
return message + "\n\n" +
"For more information visit:\n" +
" http://jira.h2o.ai/browse/TN-" + Integer.toString(number);
}
/**
* Return an error message with an accompanying list of URLs to help the user get more detailed information.
*
* @param numbers H2O tech note numbers.
* @param message Message to present to the user.
* @return A longer message including a list of URLs.
*/
public static String technote(int[] numbers, String message) {
StringBuilder sb = new StringBuilder()
.append(message)
.append("\n")
.append("\n")
.append("For more information visit:\n");
for (int number : numbers) {
sb.append(" http://jira.h2o.ai/browse/TN-").append(Integer.toString(number)).append("\n");
}
return sb.toString();
}
// --------------------------------------------------------------------------
// The worker pools - F/J pools with different priorities.
// These priorities are carefully ordered and asserted for... modify with
// care. The real problem here is that we can get into cyclic deadlock
// unless we spawn a thread of priority "X+1" in order to allow progress
// on a queue which might be flooded with a large number of "<=X" tasks.
//
// Example of deadlock: suppose TaskPutKey and the Invalidate ran at the same
// priority on a 2-node cluster. Both nodes flood their own queues with
// writes to unique keys, which require invalidates to run on the other node.
// Suppose the flooding depth exceeds the thread-limit (e.g. 99); then each
// node might have all 99 worker threads blocked in TaskPutKey, awaiting
// remote invalidates - but the other nodes' threads are also all blocked
// awaiting invalidates!
//
// We fix this by being willing to always spawn a thread working on jobs at
// priority X+1, and guaranteeing there are no jobs above MAX_PRIORITY -
// i.e., jobs running at MAX_PRIORITY cannot block, and when those jobs are
// done, the next lower level jobs get unblocked, etc.
public static final byte MAX_PRIORITY = Byte.MAX_VALUE-1;
public static final byte ACK_ACK_PRIORITY = MAX_PRIORITY; //126
public static final byte FETCH_ACK_PRIORITY = MAX_PRIORITY-1; //125
public static final byte ACK_PRIORITY = MAX_PRIORITY-2; //124
public static final byte DESERIAL_PRIORITY = MAX_PRIORITY-3; //123
public static final byte INVALIDATE_PRIORITY = MAX_PRIORITY-3; //123
public static final byte GET_KEY_PRIORITY = MAX_PRIORITY-4; //122
public static final byte PUT_KEY_PRIORITY = MAX_PRIORITY-5; //121
public static final byte ATOMIC_PRIORITY = MAX_PRIORITY-6; //120
public static final byte GUI_PRIORITY = MAX_PRIORITY-7; //119
public static final byte MIN_HI_PRIORITY = MAX_PRIORITY-7; //119
public static final byte MIN_PRIORITY = 0;
// F/J threads that remember the priority of the last task they started
// working on.
// made public for ddply
public static class FJWThr extends ForkJoinWorkerThread {
public int _priority;
FJWThr(ForkJoinPool pool) {
super(pool);
_priority = ((PrioritizedForkJoinPool)pool)._priority;
setPriority( _priority == Thread.MIN_PRIORITY
? Thread.NORM_PRIORITY-1
: Thread. MAX_PRIORITY-1 );
setName("FJ-"+_priority+"-"+getPoolIndex());
}
}
// Factory for F/J threads, with cap's that vary with priority.
static class FJWThrFact implements ForkJoinPool.ForkJoinWorkerThreadFactory {
private final int _cap;
FJWThrFact( int cap ) { _cap = cap; }
@Override public ForkJoinWorkerThread newThread(ForkJoinPool pool) {
int cap = _cap==-1 ? 4 * NUMCPUS : _cap;
return pool.getPoolSize() <= cap ? new FJWThr(pool) : null;
}
}
// A standard FJ Pool, with an expected priority level.
private static class PrioritizedForkJoinPool extends ForkJoinPool {
final int _priority;
private PrioritizedForkJoinPool(int p, int cap) {
super((ARGS.nthreads <= 0) ? NUMCPUS : ARGS.nthreads,
new FJWThrFact(cap),
null,
p>=MIN_HI_PRIORITY /* low priority FJQs should use the default FJ settings to use LIFO order of thread private queues. */);
_priority = p;
}
private H2OCountedCompleter poll2() { return (H2OCountedCompleter)pollSubmission(); }
}
// Hi-priority work, sorted into individual queues per-priority.
// Capped at a small number of threads per pool.
private static final PrioritizedForkJoinPool FJPS[] = new PrioritizedForkJoinPool[MAX_PRIORITY+1];
static {
// Only need 1 thread for the AckAck work, as it cannot block
FJPS[ACK_ACK_PRIORITY] = new PrioritizedForkJoinPool(ACK_ACK_PRIORITY,1);
for( int i=MIN_HI_PRIORITY+1; i<MAX_PRIORITY; i++ )
FJPS[i] = new PrioritizedForkJoinPool(i,4); // All CPUs, but no more for blocking purposes
FJPS[GUI_PRIORITY] = new PrioritizedForkJoinPool(GUI_PRIORITY,2);
}
// Easy peeks at the FJ queues
static int getWrkQueueSize (int i) { return FJPS[i]==null ? -1 : FJPS[i].getQueuedSubmissionCount();}
static int getWrkThrPoolSize(int i) { return FJPS[i]==null ? -1 : FJPS[i].getPoolSize(); }
// For testing purposes (verifying API work exceeds grunt model-build work)
// capture the class of any submitted job lower than this priority;
static public int LOW_PRIORITY_API_WORK;
static public String LOW_PRIORITY_API_WORK_CLASS;
// Submit to the correct priority queue
public static <T extends H2OCountedCompleter> T submitTask( T task ) {
int priority = task.priority();
if( priority < LOW_PRIORITY_API_WORK )
LOW_PRIORITY_API_WORK_CLASS = task.getClass().toString();
assert MIN_PRIORITY <= priority && priority <= MAX_PRIORITY:"priority " + priority + " is out of range, expected range is < " + MIN_PRIORITY + "," + MAX_PRIORITY + ">";
if( FJPS[priority]==null )
synchronized( H2O.class ) { if( FJPS[priority] == null ) FJPS[priority] = new PrioritizedForkJoinPool(priority,-1); }
FJPS[priority].submit(task);
return task;
}
public static abstract class H2OFuture<T> implements Future<T> {
public final T getResult(){
try {
return get();
} catch (InterruptedException | ExecutionException e) {
throw new RuntimeException(e);
}
}
}
/** Simple wrapper over F/J {@link CountedCompleter} to support priority
* queues. F/J queues are simple unordered (and extremely light weight)
* queues. However, we frequently need priorities to avoid deadlock and to
* promote efficient throughput (e.g. failure to respond quickly to {@link
* TaskGetKey} can block an entire node for lack of some small piece of
* data). So each attempt to do lower-priority F/J work starts with an
* attempt to work and drain the higher-priority queues. */
public static abstract class H2OCountedCompleter<T extends H2OCountedCompleter>
extends CountedCompleter
implements Cloneable, Freezable<T> {
@Override
public byte [] asBytes(){return new AutoBuffer().put(this).buf();}
@Override
public T reloadFromBytes(byte [] ary){ return read(new AutoBuffer(ary));}
private /*final*/ byte _priority;
// Without a completer, we expect this task will be blocked on - so the
// blocking thread is not available in the current thread pool, so the
// launched task needs to run at a higher priority.
public H2OCountedCompleter( ) { this(null); }
// With a completer, this task will NOT be blocked on and the the current
// thread is available for executing it... so the priority can remain at
// the current level.
static private byte computePriority( H2OCountedCompleter completer ) {
int currThrPrior = currThrPriority();
// If there's no completer, then current thread will block on this task
// at the current priority, possibly filling up the current-priority
// thread pool - so the task has to run at the next higher priority.
if( completer == null ) return (byte)(currThrPrior+1);
// With a completer - no thread blocks on this task, so no thread pool
// gets filled-up with blocked threads. We can run at the current
// priority (or the completer's priority if it's higher).
return (byte)Math.max(currThrPrior,completer.priority());
}
protected H2OCountedCompleter(H2OCountedCompleter completer) { this(completer,computePriority(completer)); }
// Special for picking GUI priorities
protected H2OCountedCompleter( byte prior ) { this(null,prior); }
protected H2OCountedCompleter(H2OCountedCompleter completer, byte prior) {
super(completer);
_priority = prior;
}
/** Used by the F/J framework internally to do work. Once per F/J task,
* drain the high priority queue before doing any low priority work.
* Calls {@link #compute2} which contains actual work. */
@Override public final void compute() {
FJWThr t = (FJWThr)Thread.currentThread();
int pp = ((PrioritizedForkJoinPool)t.getPool())._priority;
// Drain the high priority queues before the normal F/J queue
H2OCountedCompleter h2o = null;
boolean set_t_prior = false;
try {
assert priority() == pp:" wrong priority for task " + getClass().getSimpleName() + ", expected " + priority() + ", but got " + pp; // Job went to the correct queue?
assert t._priority <= pp; // Thread attempting the job is only a low-priority?
final int p2 = Math.max(pp,MIN_HI_PRIORITY);
for( int p = MAX_PRIORITY; p > p2; p-- ) {
if( FJPS[p] == null ) continue;
h2o = FJPS[p].poll2();
if( h2o != null ) { // Got a hi-priority job?
t._priority = p; // Set & do it now!
t.setPriority(Thread.MAX_PRIORITY-1);
set_t_prior = true;
h2o.compute2(); // Do it ahead of normal F/J work
p++; // Check again the same queue
}
}
} catch( Throwable ex ) {
// If the higher priority job popped an exception, complete it
// exceptionally... but then carry on and do the lower priority job.
if( h2o != null ) h2o.completeExceptionally(ex);
else { ex.printStackTrace(); throw ex; }
} finally {
t._priority = pp;
if( pp == MIN_PRIORITY && set_t_prior ) t.setPriority(Thread.NORM_PRIORITY-1);
}
// Now run the task as planned
if( this instanceof DTask ) icer().compute1(this);
else compute2();
}
public void compute1() { compute2(); }
/** Override compute3() with actual work without having to worry about tryComplete() */
public void compute2() {}
// In order to prevent deadlock, threads that block waiting for a reply
// from a remote node, need the remote task to run at a higher priority
// than themselves. This field tracks the required priority.
protected final byte priority() { return _priority; }
@Override public final T clone(){
try { return (T)super.clone(); }
catch( CloneNotSupportedException e ) { throw Log.throwErr(e); }
}
/** If this is a F/J thread, return it's priority - used to lift the
* priority of a blocking remote call, so the remote node runs it at a
* higher priority - so we don't deadlock when we burn the local
* thread. */
protected static byte currThrPriority() {
Thread cThr = Thread.currentThread();
return (byte)((cThr instanceof FJWThr) ? ((FJWThr)cThr)._priority : MIN_PRIORITY);
}
// The serialization flavor / delegate. Lazily set on first use.
private short _ice_id;
/** Find the serialization delegate for a subclass of this class */
protected Icer<T> icer() {
int id = _ice_id;
if(id != 0) {
int tyid;
if (id != 0)
assert id == (tyid = TypeMap.onIce(this)) : "incorrectly cashed id " + id + ", typemap has " + tyid + ", type = " + getClass().getName();
}
return TypeMap.getIcer(id!=0 ? id : (_ice_id=(short)TypeMap.onIce(this)),this);
}
@Override final public AutoBuffer write (AutoBuffer ab) { return icer().write (ab,(T)this); }
@Override final public AutoBuffer writeJSON(AutoBuffer ab) { return icer().writeJSON(ab,(T)this); }
@Override final public T read (AutoBuffer ab) { return icer().read (ab,(T)this); }
@Override final public T readJSON(AutoBuffer ab) { return icer().readJSON(ab,(T)this); }
@Override final public int frozenType() { return icer().frozenType(); }
}
public static abstract class H2OCallback<T extends H2OCountedCompleter> extends H2OCountedCompleter{
public H2OCallback(){}
public H2OCallback(H2OCountedCompleter cc){super(cc);}
@Override
public void compute2(){throw H2O.fail();}
@Override public void onCompletion(CountedCompleter caller){callback((T) caller);}
public abstract void callback(T t);
}
public static int H2O_PORT; // Both TCP & UDP cluster ports
public static int API_PORT; // RequestServer and the API HTTP port
/**
* @return String of the form ipaddress:port
*/
public static String getIpPortString() {
return H2O.ARGS.disable_web? "" : H2O.SELF_ADDRESS.getHostAddress() + ":" + H2O.API_PORT;
}
public static String getURL(String schema) {
return String.format(H2O.SELF_ADDRESS instanceof Inet6Address
? "%s://[%s]:%d%s" : "%s://%s:%d%s",
schema, H2O.SELF_ADDRESS.getHostAddress(), H2O.API_PORT, H2O.ARGS.context_path);
}
// The multicast discovery port
public static MulticastSocket CLOUD_MULTICAST_SOCKET;
public static NetworkInterface CLOUD_MULTICAST_IF;
public static InetAddress CLOUD_MULTICAST_GROUP;
public static int CLOUD_MULTICAST_PORT ;
/** Myself, as a Node in the Cloud */
public static H2ONode SELF = null;
/** IP address of this node used for communication
* with other nodes.
*/
public static InetAddress SELF_ADDRESS;
/* Global flag to mark this specific cloud instance IPv6 only.
* Right now, users have to force IPv6 stack by specifying the following
* JVM options:
* -Djava.net.preferIPv6Addresses=true
* -Djava.net.preferIPv6Addresses=false
*/
static final boolean IS_IPV6 = NetworkUtils.isIPv6Preferred() && !NetworkUtils.isIPv4Preferred();
// Place to store temp/swap files
public static URI ICE_ROOT;
public static String DEFAULT_ICE_ROOT() {
String username = System.getProperty("user.name");
if (username == null) username = "";
String u2 = username.replaceAll(" ", "_");
if (u2.length() == 0) u2 = "unknown";
return "/tmp/h2o-" + u2;
}
// Place to store flows
public static String DEFAULT_FLOW_DIR() {
String flow_dir = null;
try {
if (ARGS.ga_hadoop_ver != null) {
PersistManager pm = getPM();
if (pm != null) {
String s = pm.getHdfsHomeDirectory();
if (pm.exists(s)) {
flow_dir = s;
}
}
if (flow_dir != null) {
flow_dir = flow_dir + "/h2oflows";
}
} else {
flow_dir = System.getProperty("user.home") + File.separator + "h2oflows";
}
}
catch (Exception ignore) {
// Never want this to fail, as it will kill program startup.
// Returning null is fine if it fails for whatever reason.
}
return flow_dir;
}
/* A static list of acceptable Cloud members passed via -flatfile option.
* It is updated also when a new client appears. */
private static HashSet<H2ONode> STATIC_H2OS = null;
/* List of all clients that ever connected to this cloud */
private static Map<H2ONode.H2Okey, H2ONode> CLIENTS_MAP = new ConcurrentHashMap<>();
// Reverse cloud index to a cloud; limit of 256 old clouds.
static private final H2O[] CLOUDS = new H2O[256];
// Enables debug features like more logging and multiple instances per JVM
static final String DEBUG_ARG = "h2o.debug";
static final boolean DEBUG = System.getProperty(DEBUG_ARG) != null;
// Returned in REST API responses as X-h2o-cluster-id.
//
// Currently this is unique per node. Might make sense to distribute this
// as part of joining the cluster so all nodes have the same value.
public static final long CLUSTER_ID = System.currentTimeMillis();
private static JettyHTTPD jetty;
public static void setJetty(JettyHTTPD value) {
jetty = value;
}
public static JettyHTTPD getJetty() {
return jetty;
}
/** If logging has not been setup yet, then Log.info will only print to
* stdout. This allows for early processing of the '-version' option
* without unpacking the jar file and other startup stuff. */
private static void printAndLogVersion(String[] arguments) {
String latestVersion = ARGS.noLatestCheck ? "?" : ABV.getLatestH2OVersion();
Log.init(ARGS.log_level, ARGS.quiet);
Log.info("----- H2O started " + (ARGS.client?"(client)":"") + " -----");
Log.info("Build git branch: " + ABV.branchName());
Log.info("Build git hash: " + ABV.lastCommitHash());
Log.info("Build git describe: " + ABV.describe());
Log.info("Build project version: " + ABV.projectVersion() + " (latest version: " + latestVersion + ")");
Log.info("Build age: " + PrettyPrint.toAge(ABV.compiledOnDate(), new Date()));
Log.info("Built by: '" + ABV.compiledBy() + "'");
Log.info("Built on: '" + ABV.compiledOn() + "'");
if (ABV.isTooOld()) {
Log.warn("\n*** Your H2O version is too old! Please download the latest version " + latestVersion + " from http://h2o.ai/download/ ***");
Log.warn("");
}
for (AbstractH2OExtension e : H2O.getExtensions()) {
String n = e.getExtensionName() + " ";
AbstractBuildVersion abv = e.getBuildVersion();
Log.info(n + "Build git branch: ", abv.branchName());
Log.info(n + "Build git hash: ", abv.lastCommitHash());
Log.info(n + "Build git describe: ", abv.describe());
Log.info(n + "Build project version: ", abv.projectVersion());
Log.info(n + "Built by: ", abv.compiledBy());
Log.info(n + "Built on: ", abv.compiledOn());
}
Log.info("Processed H2O arguments: ", Arrays.toString(arguments));
Runtime runtime = Runtime.getRuntime();
Log.info("Java availableProcessors: " + runtime.availableProcessors());
Log.info("Java heap totalMemory: " + PrettyPrint.bytes(runtime.totalMemory()));
Log.info("Java heap maxMemory: " + PrettyPrint.bytes(runtime.maxMemory()));
Log.info("Java version: Java "+System.getProperty("java.version")+" (from "+System.getProperty("java.vendor")+")");
List<String> launchStrings = ManagementFactory.getRuntimeMXBean().getInputArguments();
Log.info("JVM launch parameters: "+launchStrings);
Log.info("OS version: "+System.getProperty("os.name")+" "+System.getProperty("os.version")+" ("+System.getProperty("os.arch")+")");
long totalMemory = OSUtils.getTotalPhysicalMemory();
Log.info ("Machine physical memory: " + (totalMemory==-1 ? "NA" : PrettyPrint.bytes(totalMemory)));
}
private static void startGAStartupReport() {
new GAStartupReportThread().start();
}
/** Initializes the local node and the local cloud with itself as the only member. */
private static void startLocalNode() {
PID = -1L;
try {
String n = ManagementFactory.getRuntimeMXBean().getName();
int i = n.indexOf('@');
if( i != -1 ) PID = Long.parseLong(n.substring(0, i));
} catch( Throwable ignore ) { }
// Figure self out; this is surprisingly hard
NetworkInit.initializeNetworkSockets();
// Do not forget to put SELF into the static configuration (to simulate
// proper multicast behavior)
if( !ARGS.client && STATIC_H2OS != null && !STATIC_H2OS.contains(SELF)) {
Log.warn("Flatfile configuration does not include self: " + SELF+ " but contains " + STATIC_H2OS);
STATIC_H2OS.add(SELF);
}
Log.info ("H2O cloud name: '" + ARGS.name + "' on " + SELF+
(ARGS.flatfile==null
? (", discovery address "+CLOUD_MULTICAST_GROUP+":"+CLOUD_MULTICAST_PORT)
: ", static configuration based on -flatfile "+ARGS.flatfile));
if (!H2O.ARGS.disable_web) {
Log.info("If you have trouble connecting, try SSH tunneling from your local machine (e.g., via port 55555):\n" +
" 1. Open a terminal and run 'ssh -L 55555:localhost:"
+ API_PORT + " " + System.getProperty("user.name") + "@" + SELF_ADDRESS.getHostAddress() + "'\n" +
" 2. Point your browser to " + jetty.getScheme() + "://localhost:55555");
}
// Create the starter Cloud with 1 member
SELF._heartbeat._jar_md5 = JarHash.JARHASH;
SELF._heartbeat._client = ARGS.client;
SELF._heartbeat._cloud_name_hash = ARGS.name.hashCode();
}
/** Starts the worker threads, receiver threads, heartbeats and all other
* network related services. */
private static void startNetworkServices() {
// We've rebooted the JVM recently. Tell other Nodes they can ignore task
// prior tasks by us. Do this before we receive any packets
UDPRebooted.T.reboot.broadcast();
// Start the UDPReceiverThread, to listen for requests from other Cloud
// Nodes. There should be only 1 of these, and it never shuts down.
// Started first, so we can start parsing UDP packets
if(H2O.ARGS.useUDP) {
new UDPReceiverThread(NetworkInit._udpSocket).start();
// Start a UDP timeout worker thread. This guy only handles requests for
// which we have not received a timely response and probably need to
// arrange for a re-send to cover a dropped UDP packet.
new UDPTimeOutThread().start();
// Same same for a dropped ACK needing an ACKACK back.
new H2ONode.AckAckTimeOutThread().start();
}
// Start the MultiReceiverThread, to listen for multi-cast requests from
// other Cloud Nodes. There should be only 1 of these, and it never shuts
// down. Started soon, so we can start parsing multi-cast UDP packets
new MultiReceiverThread().start();
// Start the Persistent meta-data cleaner thread, which updates the K/V
// mappings periodically to disk. There should be only 1 of these, and it
// never shuts down. Needs to start BEFORE the HeartBeatThread to build
// an initial histogram state.
Cleaner.THE_CLEANER.start();
// Start the TCPReceiverThread, to listen for TCP requests from other Cloud
// Nodes. There should be only 1 of these, and it never shuts down.
new TCPReceiverThread(NetworkInit._tcpSocket).start();
}
// Callbacks to add new Requests & menu items
static private volatile boolean _doneRequests;
static public void register(
String method_url, Class<? extends water.api.Handler> hclass, String method, String apiName, String summary
) {
if (_doneRequests) throw new IllegalArgumentException("Cannot add more Requests once the list is finalized");
RequestServer.registerEndpoint(apiName, method_url, hclass, method, summary);
}
public static void registerResourceRoot(File f) {
JarHash.registerResourceRoot(f);
}
/** Start the web service; disallow future URL registration.
* Blocks until the server is up. */
static public void finalizeRegistration() {
if (_doneRequests || H2O.ARGS.disable_web) return;
_doneRequests = true;
water.api.SchemaServer.registerAllSchemasIfNecessary();
jetty.acceptRequests();
}
// --------------------------------------------------------------------------
// The Current Cloud. A list of all the Nodes in the Cloud. Changes if we
// decide to change Clouds via atomic Cloud update.
public static volatile H2O CLOUD = new H2O(new H2ONode[0],0,0);
// ---
// A dense array indexing all Cloud members. Fast reversal from "member#" to
// Node. No holes. Cloud size is _members.length.
public final H2ONode[] _memary;
final int _hash;
// A dense integer identifier that rolls over rarely. Rollover limits the
// number of simultaneous nested Clouds we are operating on in-parallel.
// Really capped to 1 byte, under the assumption we won't have 256 nested
// Clouds. Capped at 1 byte so it can be part of an atomically-assigned
// 'long' holding info specific to this Cloud.
final char _idx; // no unsigned byte, so unsigned char instead
// Construct a new H2O Cloud from the member list
H2O( H2ONode[] h2os, int hash, int idx ) {
_memary = h2os; // Need to clone?
java.util.Arrays.sort(_memary); // ... sorted!
_hash = hash; // And record hash for cloud rollover
_idx = (char)(idx&0x0ff); // Roll-over at 256
}
// One-shot atomic setting of the next Cloud, with an empty K/V store.
// Called single-threaded from Paxos. Constructs the new H2O Cloud from a
// member list.
void set_next_Cloud( H2ONode[] h2os, int hash ) {
synchronized(this) {
int idx = _idx+1; // Unique 1-byte Cloud index
if( idx == 256 ) idx=1; // wrap, avoiding zero
CLOUDS[idx] = CLOUD = new H2O(h2os,hash,idx);
}
SELF._heartbeat._cloud_size=(char)CLOUD.size();
}
// Is nnn larger than old (counting for wrap around)? Gets confused if we
// start seeing a mix of more than 128 unique clouds at the same time. Used
// to tell the order of Clouds appearing.
static boolean larger( int nnn, int old ) {
assert (0 <= nnn && nnn <= 255);
assert (0 <= old && old <= 255);
return ((nnn-old)&0xFF) < 64;
}
public final int size() { return _memary.length; }
final H2ONode leader() {
return _memary[0];
}
// Find the node index for this H2ONode, or a negative number on a miss
int nidx( H2ONode h2o ) { return java.util.Arrays.binarySearch(_memary,h2o); }
boolean contains( H2ONode h2o ) { return nidx(h2o) >= 0; }
@Override public String toString() {
return java.util.Arrays.toString(_memary);
}
public H2ONode[] members() { return _memary; }
// Cluster free memory
public long free_mem() {
long memsz = 0;
for( H2ONode h2o : CLOUD._memary )
memsz += h2o._heartbeat.get_free_mem();
return memsz;
}
// Quick health check; no reason given for bad health
public boolean healthy() {
long now = System.currentTimeMillis();
for (H2ONode node : H2O.CLOUD.members())
if (!node.isHealthy(now))
return false;
return true;
}
public static void waitForCloudSize(int x, long ms) {
long start = System.currentTimeMillis();
while( System.currentTimeMillis() - start < ms ) {
if( CLOUD.size() >= x && Paxos._commonKnowledge )
break;
try { Thread.sleep(100); } catch( InterruptedException ignore ) { }
}
if( H2O.CLOUD.size() < x )
throw new RuntimeException("Cloud size under " + x);
}
public static int getCloudSize() {
if (! Paxos._commonKnowledge) return -1;
return CLOUD.size();
}
// - Wait for at least HeartBeatThread.SLEEP msecs and
// try to join others, if any. Try 2x just in case.
// - Assume that we get introduced to everybody else
// in one Paxos update, if at all (i.e, rest of
// the cloud was already formed and stable by now)
// - If nobody else is found, not an error.
public static void joinOthers() {
long start = System.currentTimeMillis();
while( System.currentTimeMillis() - start < 2000 ) {
if( CLOUD.size() > 1 && Paxos._commonKnowledge )
break;
try { Thread.sleep(100); } catch( InterruptedException ignore ) { }
}
}
// --------------------------------------------------------------------------
static void initializePersistence() {
_PM = new PersistManager(ICE_ROOT);
}
// --------------------------------------------------------------------------
// The (local) set of Key/Value mappings.
public static final NonBlockingHashMap<Key,Value> STORE = new NonBlockingHashMap<>();
// PutIfMatch
// - Atomically update the STORE, returning the old Value on success
// - Kick the persistence engine as needed
// - Return existing Value on fail, no change.
//
// Keys are interned here: I always keep the existing Key, if any. The
// existing Key is blind jammed into the Value prior to atomically inserting
// it into the STORE and interning.
//
// Because of the blind jam, there is a narrow unusual race where the Key
// might exist but be stale (deleted, mapped to a TOMBSTONE), a fresh put()
// can find it and jam it into the Value, then the Key can be deleted
// completely (e.g. via an invalidate), the table can resize flushing the
// stale Key, an unrelated weak-put can re-insert a matching Key (but as a
// new Java object), and delete it, and then the original thread can do a
// successful put_if_later over the missing Key and blow the invariant that a
// stored Value always points to the physically equal Key that maps to it
// from the STORE. If this happens, some of replication management bits in
// the Key will be set in the wrong Key copy... leading to extra rounds of
// replication.
public static Value putIfMatch( Key key, Value val, Value old ) {
if( old != null ) // Have an old value?
key = old._key; // Use prior key
if( val != null ) {
assert val._key.equals(key);
if( val._key != key ) val._key = key; // Attempt to uniquify keys
}
// Insert into the K/V store
Value res = STORE.putIfMatchUnlocked(key,val,old);
if( res != old ) return res; // Return the failure cause
// Persistence-tickle.
// If the K/V mapping is going away, remove the old guy.
// If the K/V mapping is changing, let the store cleaner just overwrite.
// If the K/V mapping is new, let the store cleaner just create
if( old != null && val == null ) old.removePersist(); // Remove the old guy
if( val != null ) {
Cleaner.dirty_store(); // Start storing the new guy
if( old==null ) Scope.track_internal(key); // New Key - start tracking
}
return old; // Return success
}
// Get the value from the store
public static void raw_remove(Key key) {
Value v = STORE.remove(key);
if( v != null ) v.removePersist();
}
public static void raw_clear() { STORE.clear(); }
public static boolean containsKey( Key key ) { return STORE.get(key) != null; }
static Key getk( Key key ) { return STORE.getk(key); }
public static Set<Key> localKeySet( ) { return STORE.keySet(); }
static Collection<Value> values( ) { return STORE.values(); }
static public int store_size() { return STORE.size(); }
// Nice local-STORE only debugging summary
public static String STOREtoString() {
int[] cnts = new int[1];
Object[] kvs = H2O.STORE.raw_array();
// Start the walk at slot 2, because slots 0,1 hold meta-data
for( int i=2; i<kvs.length; i += 2 ) {
// In the raw backing array, Keys and Values alternate in slots
Object ov = kvs[i+1];
if( !(ov instanceof Value) ) continue; // Ignore tombstones and Primes and null's
Value val = (Value)ov;
if( val.isNull() ) { Value.STORE_get(val._key); continue; } // Another variant of NULL
int t = val.type();
while( t >= cnts.length ) cnts = Arrays.copyOf(cnts,cnts.length<<1);
cnts[t]++;
}
StringBuilder sb = new StringBuilder();
for( int t=0; t<cnts.length; t++ )
if( cnts[t] != 0 )
sb.append(String.format("-%30s %5d\n",TypeMap.CLAZZES[t],cnts[t]));
return sb.toString();
}
// Persistence manager
private static PersistManager _PM;
public static PersistManager getPM() { return _PM; }
// Node persistent storage
private static NodePersistentStorage NPS;
public static NodePersistentStorage getNPS() { return NPS; }
/**
* Run System.gc() on every node in the H2O cluster.
*
* Having to call this manually from user code is a sign that something is wrong and a better
* heuristic is needed internally.
*/
public static void gc() {
class GCTask extends DTask<GCTask> {
public GCTask() {super(GUI_PRIORITY);}
@Override public void compute2() {
Log.info("Calling System.gc() now...");
System.gc();
Log.info("System.gc() finished");
tryComplete();
}
}
for (H2ONode node : H2O.CLOUD._memary) {
GCTask t = new GCTask();
new RPC<>(node, t).call().get();
}
}
/**
* Check if the Java version is not supported
* @return true if not supported
*/
public static boolean checkUnsupportedJava() {
String version = System.getProperty("java.version");
if (version != null && !(version.startsWith("1.6") || version.startsWith("1.7") || version.startsWith("1.8"))) {
System.err.println("Only Java 1.6-1.8 supported, version is " + version);
return true;
}
String vmName = System.getProperty("java.vm.name");
if (vmName != null && vmName.equals("GNU libgcj")) {
System.err.println("GNU gcj is not supported");
return true;
}
return false;
}
// --------------------------------------------------------------------------
public static void main( String[] args ) {
long time0 = System.currentTimeMillis();
if (checkUnsupportedJava())
throw new RuntimeException("Unsupported Java version");
// Record system start-time.
if( !START_TIME_MILLIS.compareAndSet(0L, System.currentTimeMillis()) )
return; // Already started
// Copy all ai.h2o.* system properties to the tail of the command line,
// effectively overwriting the earlier args.
ArrayList<String> args2 = new ArrayList<>(Arrays.asList(args));
for( Object p : System.getProperties().keySet() ) {
String s = (String)p;
if( s.startsWith("ai.h2o.") ) {
args2.add("-" + s.substring(7));
// hack: Junits expect properties, throw out dummy prop for ga_opt_out
if (!s.substring(7).equals("ga_opt_out") && !System.getProperty(s).isEmpty())
args2.add(System.getProperty(s));
}
}
// Parse args
String[] arguments = args2.toArray(args);
parseArguments(arguments);
// Get ice path before loading Log or Persist class
long time1 = System.currentTimeMillis();
String ice = DEFAULT_ICE_ROOT();
if( ARGS.ice_root != null ) ice = ARGS.ice_root.replace("\\", "/");
try {
ICE_ROOT = new URI(ice);
} catch(URISyntaxException ex) {
throw new RuntimeException("Invalid ice_root: " + ice + ", " + ex.getMessage());
}
// Always print version, whether asked-for or not!
long time2 = System.currentTimeMillis();
printAndLogVersion(arguments);
if( ARGS.version ) {
Log.flushStdout();
exit(0);
}
// Print help & exit
if (ARGS.help) {
printHelp();
exit(0);
}
// Validate arguments
validateArguments();
Log.info("X-h2o-cluster-id: " + H2O.CLUSTER_ID);
Log.info("User name: '" + H2O.ARGS.user_name + "'");
// Register with GA or not
long time3 = System.currentTimeMillis();
List<String> gaidList; // fetching this list takes ~100ms
if((new File(".h2o_no_collect")).exists()
|| (new File(System.getProperty("user.home")+File.separator+".h2o_no_collect")).exists()
|| ARGS.ga_opt_out
|| (gaidList = JarHash.getResourcesList("gaid")).contains("CRAN")
|| H2O.ABV.isDevVersion()) {
GA = null;
Log.info("Opted out of sending usage metrics.");
} else {
try {
GA = new GoogleAnalytics("UA-56665317-1", "H2O", ABV.projectVersion());
DefaultRequest defReq = GA.getDefaultRequest();
String gaid = null;
if (gaidList.size() > 0) {
if (gaidList.size() > 1) Log.debug("More than once resource seen in gaid dir.");
for (String str : gaidList) {
if (str.matches("........-....-....-....-............")
&& !str.equals("XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX")) {
gaid = str;
break;
}
}
}
if (gaid == null) { // No UUID, create one
gaid = defReq.clientId();
gaid = gaid.replaceFirst("........-","ANONYMOU-");
}
defReq.customDimension(CLIENT_ID_GA_CUST_DIM, gaid);
GA.setDefaultRequest(defReq);
} catch(Throwable t) {
Log.POST(11, t.toString());
StackTraceElement[] stes = t.getStackTrace();
for (StackTraceElement ste : stes)
Log.POST(11, ste.toString());
}
}
// Epic Hunt for the correct self InetAddress
long time4 = System.currentTimeMillis();
Log.info("IPv6 stack selected: " + IS_IPV6);
SELF_ADDRESS = NetworkInit.findInetAddressForSelf();
// Right now the global preference is to use IPv4 stack
// To select IPv6 stack user has to explicitly pass JVM flags
// to enable IPv6 preference.
if (!IS_IPV6 && SELF_ADDRESS instanceof Inet6Address) {
Log.err("IPv4 network stack specified but IPv6 address found: " + SELF_ADDRESS + "\n"
+ "Please specify JVM flags -Djava.net.preferIPv6Addresses=true and -Djava.net.preferIPv4Addresses=false to select IPv6 stack");
H2O.exit(-1);
}
if (IS_IPV6 && SELF_ADDRESS instanceof Inet4Address) {
Log.err("IPv6 network stack specified but IPv4 address found: " + SELF_ADDRESS);
H2O.exit(-1);
}
// Start the local node. Needed before starting logging.
long time5 = System.currentTimeMillis();
startLocalNode();
// Allow extensions to perform initialization that requires the network.
long time6 = System.currentTimeMillis();
for (AbstractH2OExtension ext: extensions) {
ext.onLocalNodeStarted();
}
try {
String logDir = Log.getLogDir();
Log.info("Log dir: '" + logDir + "'");
}
catch (Exception e) {
Log.info("Log dir: (Log4j configuration inherited)");
}
Log.info("Cur dir: '" + System.getProperty("user.dir") + "'");
//Print extra debug info now that logs are setup
long time7 = System.currentTimeMillis();
RuntimeMXBean rtBean = ManagementFactory.getRuntimeMXBean();
Log.debug("H2O launch parameters: "+ARGS.toString());
Log.debug("Boot class path: "+ rtBean.getBootClassPath());
Log.debug("Java class path: "+ rtBean.getClassPath());
Log.debug("Java library path: "+ rtBean.getLibraryPath());
// Load up from disk and initialize the persistence layer
long time8 = System.currentTimeMillis();
initializePersistence();
// Initialize NPS
{
String flow_dir;
if (ARGS.flow_dir != null) {
flow_dir = ARGS.flow_dir;
}
else {
flow_dir = DEFAULT_FLOW_DIR();
}
if (flow_dir != null) {
flow_dir = flow_dir.replace("\\", "/");
Log.info("Flow dir: '" + flow_dir + "'");
}
else {
Log.info("Flow dir is undefined; saving flows not available");
}
NPS = new NodePersistentStorage(flow_dir);
}
// Start network services, including heartbeats
long time9 = System.currentTimeMillis();
startNetworkServices(); // start server services
Log.trace("Network services started");
// The "Cloud of size N formed" message printed out by doHeartbeat is the trigger
// for users of H2O to know that it's OK to start sending REST API requests.
long time10 = System.currentTimeMillis();
Paxos.doHeartbeat(SELF);
assert SELF._heartbeat._cloud_hash != 0 || ARGS.client;
// Start the heartbeat thread, to publish the Clouds' existence to other
// Clouds. This will typically trigger a round of Paxos voting so we can
// join an existing Cloud.
new HeartBeatThread().start();
long time11 = System.currentTimeMillis();
if (GA != null)
startGAStartupReport();
// Log registered parsers
Log.info("Registered parsers: " + Arrays.toString(ParserService.INSTANCE.getAllProviderNames(true)));
long time12 = System.currentTimeMillis();
Log.debug("Timing within H2O.main():");
Log.debug(" Args parsing & validation: " + (time1 - time0) + "ms");
Log.debug(" Get ICE root: " + (time2 - time1) + "ms");
Log.debug(" Print log version: " + (time3 - time2) + "ms");
Log.debug(" Register GA: " + (time4 - time3) + "ms");
Log.debug(" Detect network address: " + (time5 - time4) + "ms");
Log.debug(" Start local node: " + (time6 - time5) + "ms");
Log.debug(" Extensions onLocalNodeStarted(): " + (time7 - time6) + "ms");
Log.debug(" RuntimeMxBean: " + (time8 - time7) + "ms");
Log.debug(" Initialize persistence layer: " + (time9 - time8) + "ms");
Log.debug(" Start network services: " + (time10 - time9) + "ms");
Log.debug(" Cloud up: " + (time11 - time10) + "ms");
Log.debug(" Start GA: " + (time12 - time11) + "ms");
}
// Die horribly
public static void die(String s) {
Log.fatal(s);
H2O.exit(-1);
}
public static class GAStartupReportThread extends Thread {
final private int sleepMillis = 150 * 1000; //2.5 min
// Constructor.
public GAStartupReportThread() {
super("GAStartupReport"); // Only 9 characters get printed in the log.
setDaemon(true);
setPriority(MAX_PRIORITY - 2);
}
// Class main thread.
@Override
public void run() {
try {
Thread.sleep (sleepMillis);
}
catch (Exception ignore) {};
GAUtils.logStartup();
}
}
/** Add node to a manual multicast list.
* Note: the method is valid only if -flatfile option was specified on commandline*
* @param node h2o node
* @return true if node was already in the multicast list.
*/
public static boolean addNodeToFlatfile(H2ONode node) {
assert isFlatfileEnabled() : "Trying to use flatfile, but flatfile is not enabled!";
return STATIC_H2OS.add(node);
}
/** Remove node from a manual multicast list.
* Note: the method is valid only if -flatfile option was specified on commandline*
* @param node h2o node
* @return true if node was already in the multicast list.
*/
public static boolean removeNodeFromFlatfile(H2ONode node){
assert isFlatfileEnabled() : "Trying to use flatfile, but flatfile is not enabled!";
return STATIC_H2OS.remove(node);
}
/** Check if a node is included in a manual multicast list.
* Note: the method is valid only if -flatfile option was specified on commandline
*
* @param node h2o node
* @return true if node was already in the multicast list.
*/
public static boolean isNodeInFlatfile(H2ONode node) {
assert isFlatfileEnabled() : "Trying to use flatfile, but flatfile is not enabled!";
return STATIC_H2OS.contains(node);
}
/**
* Is manual multicast enabled?
* @return true if `-flatfile` option was specified on commandline
*/
public static boolean isFlatfileEnabled() {
return STATIC_H2OS != null;
}
/** Setup a set of nodes which should be contacted during
* manual multicast.
* @param nodes set of H2O nodes.
*/
public static void setFlatfile(HashSet<H2ONode> nodes) {
STATIC_H2OS = nodes;
}
/** Returns a set of nodes which are contacted during manual
* multicast. The returned value can be modified by the user since
* the call return a copy of the original set.
* @return set of nodes
*/
public static HashSet<H2ONode> getFlatfile() {
return (HashSet<H2ONode>) STATIC_H2OS.clone();
}
public static H2ONode reportClient(H2ONode client){
H2ONode oldClient = CLIENTS_MAP.put(client._key, client);
if(oldClient == null){
Log.info("New client discovered at " + client);
}
return oldClient;
}
public static H2ONode removeClient(H2ONode client){
return CLIENTS_MAP.remove(client._key);
}
public static HashSet<H2ONode> getClients(){
return new HashSet<>(CLIENTS_MAP.values());
}
public static Map<H2ONode.H2Okey, H2ONode> getClientsByKey(){
return new HashMap<>(CLIENTS_MAP);
}
}