package water;
import java.io.*;
import java.net.*;
import java.nio.ByteBuffer;
import java.nio.channels.DatagramChannel;
import java.util.*;
import jsr166y.*;
import water.Job.JobCancelledException;
import water.fvec.Chunk;
import water.fvec.Frame;
import water.ga.EventHit;
import water.ga.GoogleAnalytics;
import water.nbhm.NonBlockingHashMap;
import water.persist.*;
import water.util.*;
import water.util.Log.Tag.Sys;
import water.license.LicenseManager;
import java.nio.channels.ServerSocketChannel;
/**
* Start point for creating or joining an <code>H2O</code> Cloud.
*
* @author <a href="mailto:cliffc@h2o.ai"></a>
* @version 1.0
*/
public final class H2O {
public static volatile AbstractEmbeddedH2OConfig embeddedH2OConfig;
public static volatile ApiIpPortWatchdogThread apiIpPortWatchdog;
public static volatile LicenseManager licenseManager;
public static String VERSION = "(unknown)";
public static long START_TIME_MILLIS = -1;
// User name for this Cloud (either the username or the argument for the option -name)
public static String NAME;
// The default port for finding a Cloud
public static int DEFAULT_PORT = 54321;
public static int H2O_PORT; // Fast/small UDP transfers
public static int API_PORT; // RequestServer and the new API HTTP port
// Whether to toggle to single precision as upper limit for storing floating point numbers
public static boolean SINGLE_PRECISION = false;
// Max. number of factor levels ber column (before flipping all to NAs)
public static int DATA_MAX_FACTOR_LEVELS = 1000000;
public static int LOG_CHK = 22; // Chunks are 1<<22, or 4Meg
// The multicast discovery port
static MulticastSocket CLOUD_MULTICAST_SOCKET;
static NetworkInterface CLOUD_MULTICAST_IF;
static InetAddress CLOUD_MULTICAST_GROUP;
static int CLOUD_MULTICAST_PORT ;
// Default NIO Datagram channel
static DatagramChannel CLOUD_DGRAM;
// Myself, as a Node in the Cloud
public static H2ONode SELF = null;
public static InetAddress SELF_ADDRESS;
public static String DEFAULT_ICE_ROOT() {
String username = System.getProperty("user.name");
if (username == null) username = "";
String u2 = username.replaceAll(" ", "_");
if (u2.length() == 0) u2 = "unknown";
return "/tmp/h2o-" + u2;
}
public static URI ICE_ROOT;
// Initial arguments
public static String[] ARGS;
public static final PrintStream OUT = System.out;
public static final PrintStream ERR = System.err;
public static final int NUMCPUS = Runtime.getRuntime().availableProcessors();
// Convenience error
public static RuntimeException unimpl(String msg) { return new RuntimeException("unimplemented: " + msg); }
public static RuntimeException unimpl() { return new RuntimeException("unimplemented"); }
public static RuntimeException fail() { return new RuntimeException("do not call"); }
public static RuntimeException fail(String msg) { return new RuntimeException("FAILURE: " + msg); }
// Central /dev/null for ignored exceptions
public static void ignore(Throwable e) { ignore(e,"[h2o] Problem ignored: "); }
public static void ignore(Throwable e, String msg) { ignore(e, msg, true); }
public static void ignore(Throwable e, String msg, boolean printException) { Log.debug(Sys.WATER, msg + (printException? e.toString() : "")); }
//Google analytics performance measurement
public static GoogleAnalytics GA;
public static int CLIENT_TYPE_GA_CUST_DIM = 1;
// --------------------------------------------------------------------------
// Embedded configuration for a full H2O node to be implanted in another
// piece of software (e.g. Hadoop mapper task).
/**
* Register embedded H2O configuration object with H2O instance.
*/
public static void setEmbeddedH2OConfig(AbstractEmbeddedH2OConfig c) { embeddedH2OConfig = c; }
public static AbstractEmbeddedH2OConfig getEmbeddedH2OConfig() { return embeddedH2OConfig; }
/**
* Tell the embedding software that this H2O instance belongs to
* a cloud of a certain size.
* This may be nonblocking.
*
* @param ip IP address this H2O can be reached at.
* @param port Port this H2O can be reached at (for REST API and browser).
* @param size Number of H2O instances in the cloud.
*/
public static void notifyAboutCloudSize(InetAddress ip, int port, int size) {
if (embeddedH2OConfig == null) { return; }
embeddedH2OConfig.notifyAboutCloudSize(ip, port, size);
}
/**
* Notify embedding software instance H2O wants to exit.
* @param status H2O's requested process exit value.
*/
public static void exit(int status) {
// embeddedH2OConfig is only valid if this H2O node is living inside
// another software instance (e.g. a Hadoop mapper task).
//
// Expect embeddedH2OConfig to be null if H2O is run standalone.
// Cleanly shutdown internal H2O services.
if (apiIpPortWatchdog != null) {
apiIpPortWatchdog.shutdown();
}
if (embeddedH2OConfig == null) {
// Standalone H2O path.
System.exit (status);
}
// Embedded H2O path (e.g. inside Hadoop mapper task).
embeddedH2OConfig.exit(status);
// Should never reach here.
System.exit(222);
}
/** Shutdown itself by sending a shutdown UDP packet. */
public void shutdown() {
UDPRebooted.T.shutdown.send(H2O.SELF);
H2O.exit(0);
}
// --------------------------------------------------------------------------
// The Current Cloud. A list of all the Nodes in the Cloud. Changes if we
// decide to change Clouds via atomic Cloud update.
static public volatile H2O CLOUD = new H2O(new H2ONode[0],0,0);
// ---
// A dense array indexing all Cloud members. Fast reversal from "member#" to
// Node. No holes. Cloud size is _members.length.
public final H2ONode[] _memary;
public final int _hash;
//public boolean _healthy;
// A dense integer identifier that rolls over rarely. Rollover limits the
// number of simultaneous nested Clouds we are operating on in-parallel.
// Really capped to 1 byte, under the assumption we won't have 256 nested
// Clouds. Capped at 1 byte so it can be part of an atomically-assigned
// 'long' holding info specific to this Cloud.
public final char _idx; // no unsigned byte, so unsigned char instead
// Is nnn larger than old (counting for wrap around)? Gets confused if we
// start seeing a mix of more than 128 unique clouds at the same time. Used
// to tell the order of Clouds appearing.
static public boolean larger( int nnn, int old ) {
assert (0 <= nnn && nnn <= 255);
assert (0 <= old && old <= 255);
return ((nnn-old)&0xFF) < 64;
}
static public boolean isHealthy() {
H2O cloud = H2O.CLOUD;
for (H2ONode h2o : cloud._memary) {
if(!h2o._node_healthy) return false;
}
return true;
}
// Static list of acceptable Cloud members
public static HashSet<H2ONode> STATIC_H2OS = null;
// Reverse cloud index to a cloud; limit of 256 old clouds.
static private final H2O[] CLOUDS = new H2O[256];
// Enables debug features like more logging and multiple instances per JVM
public static final String DEBUG_ARG = "h2o.debug";
public static final boolean DEBUG = System.getProperty(DEBUG_ARG) != null;
// Construct a new H2O Cloud from the member list
public H2O( H2ONode[] h2os, int hash, int idx ) {
_memary = h2os; // Need to clone?
Arrays.sort(_memary); // ... sorted!
_hash = hash; // And record hash for cloud rollover
_idx = (char)(idx&0x0ff); // Roll-over at 256
}
// One-shot atomic setting of the next Cloud, with an empty K/V store.
// Called single-threaded from Paxos. Constructs the new H2O Cloud from a
// member list.
void set_next_Cloud( H2ONode[] h2os, int hash ) {
synchronized(this) {
int idx = _idx+1; // Unique 1-byte Cloud index
if( idx == 256 ) idx=1; // wrap, avoiding zero
CLOUDS[idx] = CLOUD = new H2O(h2os,hash,idx);
}
SELF._heartbeat._cloud_size=(char)CLOUD.size();
}
public final int size() { return _memary.length; }
public final H2ONode leader() { return _memary[0]; }
public static void waitForCloudSize(int x) {
waitForCloudSize(x, 10000);
}
public static void waitForCloudSize(int x, long ms) {
long start = System.currentTimeMillis();
while( System.currentTimeMillis() - start < ms ) {
if( CLOUD.size() >= x && Paxos._commonKnowledge )
break;
try { Thread.sleep(100); } catch( InterruptedException ie ) { }
}
if( H2O.CLOUD.size() < x )
throw new RuntimeException("Cloud size under " + x);
}
// Find the node index for this H2ONode, or a negative number on a miss
public int nidx( H2ONode h2o ) { return Arrays.binarySearch(_memary,h2o); }
public boolean contains( H2ONode h2o ) { return nidx(h2o) >= 0; }
// BIG WARNING: do you not change this toString() method since cloud hash value depends on it
@Override public String toString() {
return Arrays.toString(_memary);
}
public String toPrettyString() {
if (_memary==null || _memary.length==0) return "[]";
int iMax = _memary.length - 1;
StringBuilder sb = new StringBuilder();
sb.append('[');
for (int i = 0; ; i++) {
sb.append(String.valueOf(_memary[i]));
if (_memary[i]!=null) sb.append(" (").append(PrettyPrint.msecs(_memary[i].runtime(),false)).append(')');
if (i==iMax) return sb.append(']').toString();
sb.append(", ");
}
}
/**
* Return a list of interfaces sorted by importance (most important first).
* This is the order we want to test for matches when selecting an interface.
*/
private static ArrayList<NetworkInterface> calcPrioritizedInterfaceList() {
ArrayList<NetworkInterface> networkInterfaceList = null;
try {
Enumeration<NetworkInterface> nis = NetworkInterface.getNetworkInterfaces();
ArrayList<NetworkInterface> tmpList = Collections.list(nis);
Comparator<NetworkInterface> c = new Comparator<NetworkInterface>() {
@Override public int compare(NetworkInterface lhs, NetworkInterface rhs) {
// Handle null inputs.
if ((lhs == null) && (rhs == null)) { return 0; }
if (lhs == null) { return 1; }
if (rhs == null) { return -1; }
// If the names are equal, then they are equal.
if (lhs.getName().equals (rhs.getName())) { return 0; }
// If both are bond drivers, choose a precedence.
if (lhs.getName().startsWith("bond") && (rhs.getName().startsWith("bond"))) {
Integer li = lhs.getName().length();
Integer ri = rhs.getName().length();
// Bond with most number of characters is always highest priority.
if (li.compareTo(ri) != 0) {
return li.compareTo(ri);
}
// Otherwise, sort lexicographically by name.
return lhs.getName().compareTo(rhs.getName());
}
// If only one is a bond driver, give that precedence.
if (lhs.getName().startsWith("bond")) { return -1; }
if (rhs.getName().startsWith("bond")) { return 1; }
// Everything that isn't a bond driver is equal.
return 0;
}
};
Collections.sort(tmpList, c);
networkInterfaceList = tmpList;
} catch( SocketException e ) { Log.err(e); }
return networkInterfaceList;
}
/**
* Return a list of internet addresses sorted by importance (most important first).
* This is the order we want to test for matches when selecting an internet address.
*/
public static ArrayList<java.net.InetAddress> calcPrioritizedInetAddressList() {
ArrayList<java.net.InetAddress> ips = new ArrayList<java.net.InetAddress>();
{
ArrayList<NetworkInterface> networkInterfaceList = calcPrioritizedInterfaceList();
for (int i = 0; i < networkInterfaceList.size(); i++) {
NetworkInterface ni = networkInterfaceList.get(i);
Enumeration<InetAddress> ias = ni.getInetAddresses();
while( ias.hasMoreElements() ) {
InetAddress ia;
ia = ias.nextElement();
ips.add(ia);
Log.info("Possible IP Address: " + ni.getName() + " (" + ni.getDisplayName() + "), " + ia.getHostAddress());
}
}
}
return ips;
}
public static InetAddress findInetAddressForSelf() throws Error {
if(SELF_ADDRESS == null) {
if ((OPT_ARGS.ip != null) && (OPT_ARGS.network != null)) {
Log.err("ip and network options must not be used together");
H2O.exit(-1);
}
ArrayList<UserSpecifiedNetwork> networkList = UserSpecifiedNetwork.calcArrayList(OPT_ARGS.network);
if (networkList == null) {
Log.err("Exiting.");
H2O.exit(-1);
}
// Get a list of all valid IPs on this machine.
ArrayList<InetAddress> ips = calcPrioritizedInetAddressList();
InetAddress local = null; // My final choice
// Check for an "-ip xxxx" option and accept a valid user choice; required
// if there are multiple valid IP addresses.
InetAddress arg = null;
if (OPT_ARGS.ip != null) {
try{
arg = InetAddress.getByName(OPT_ARGS.ip);
} catch( UnknownHostException e ) {
Log.err(e);
H2O.exit(-1);
}
if( !(arg instanceof Inet4Address) ) {
Log.warn("Only IP4 addresses allowed.");
H2O.exit(-1);
}
if( !ips.contains(arg) ) {
Log.warn("IP address not found on this machine");
H2O.exit(-1);
}
local = arg;
} else if (networkList.size() > 0) {
// Return the first match from the list, if any.
// If there are no matches, then exit.
Log.info("Network list was specified by the user. Searching for a match...");
for( InetAddress ip : ips ) {
Log.info(" Considering " + ip.getHostAddress() + " ...");
for ( UserSpecifiedNetwork n : networkList ) {
if (n.inetAddressOnNetwork(ip)) {
Log.info(" Matched " + ip.getHostAddress());
local = ip;
SELF_ADDRESS = local;
return SELF_ADDRESS;
}
}
}
Log.err("No interface matches the network list from the -network option. Exiting.");
H2O.exit(-1);
}
else {
// No user-specified IP address. Attempt auto-discovery. Roll through
// all the network choices on looking for a single Inet4.
ArrayList<InetAddress> validIps = new ArrayList();
for( InetAddress ip : ips ) {
// make sure the given IP address can be found here
if( ip instanceof Inet4Address &&
!ip.isLoopbackAddress() &&
!ip.isLinkLocalAddress() ) {
validIps.add(ip);
}
}
if( validIps.size() == 1 ) {
local = validIps.get(0);
} else {
local = guessInetAddress(validIps);
}
}
// The above fails with no network connection, in that case go for a truly
// local host.
if( local == null ) {
try {
Log.warn("Failed to determine IP, falling back to localhost.");
// set default ip address to be 127.0.0.1 /localhost
local = InetAddress.getByName("127.0.0.1");
} catch( UnknownHostException e ) {
throw Log.errRTExcept(e);
}
}
SELF_ADDRESS = local;
}
return SELF_ADDRESS;
}
private static InetAddress guessInetAddress(List<InetAddress> ips) {
String m = "Multiple local IPs detected:\n";
for(InetAddress ip : ips) m+=" " + ip;
m+="\nAttempting to determine correct address...\n";
Socket s = null;
try {
// using google's DNS server as an external IP to find
// Add a timeout to the touch of google.
// https://0xdata.atlassian.net/browse/HEX-743
s = new Socket();
// only 3000 milliseconds before giving up
// Exceptions: IOException, SocketTimeoutException, plus two Illegal* exceptions
s.connect(new InetSocketAddress("8.8.8.8", 53), 3000);
m+="Using " + s.getLocalAddress() + "\n";
return s.getLocalAddress();
} catch( java.net.SocketException se ) {
return null; // No network at all? (Laptop w/wifi turned off?)
} catch( java.net.SocketTimeoutException se ) {
return null; // could be firewall?
} catch( Throwable t ) {
Log.err(t);
return null;
} finally {
Log.info(m);
Utils.close(s);
}
}
// --------------------------------------------------------------------------
// The (local) set of Key/Value mappings.
static final NonBlockingHashMap<Key,Value> STORE = new NonBlockingHashMap<Key, Value>();
// Dummy shared volatile for ordering games
static public volatile int VOLATILE;
// PutIfMatch
// - Atomically update the STORE, returning the old Value on success
// - Kick the persistence engine as needed
// - Return existing Value on fail, no change.
//
// Keys are interned here: I always keep the existing Key, if any. The
// existing Key is blind jammed into the Value prior to atomically inserting
// it into the STORE and interning.
//
// Because of the blind jam, there is a narrow unusual race where the Key
// might exist but be stale (deleted, mapped to a TOMBSTONE), a fresh put()
// can find it and jam it into the Value, then the Key can be deleted
// completely (e.g. via an invalidate), the table can resize flushing the
// stale Key, an unrelated weak-put can re-insert a matching Key (but as a
// new Java object), and delete it, and then the original thread can do a
// successful put_if_later over the missing Key and blow the invariant that a
// stored Value always points to the physically equal Key that maps to it
// from the STORE. If this happens, some of replication management bits in
// the Key will be set in the wrong Key copy... leading to extra rounds of
// replication.
public static Value putIfMatch( Key key, Value val, Value old ) {
if( old != null ) // Have an old value?
key = old._key; // Use prior key
if( val != null )
val._key = key;
// Insert into the K/V store
Value res = STORE.putIfMatchUnlocked(key,val,old);
if( res != old ) return res; // Return the failure cause
// Persistence-tickle.
// If the K/V mapping is going away, remove the old guy.
// If the K/V mapping is changing, let the store cleaner just overwrite.
// If the K/V mapping is new, let the store cleaner just create
if( old != null && val == null ) old.removeIce(); // Remove the old guy
if( val != null ) {
dirty_store(); // Start storing the new guy
Scope.track(key);
}
return old; // Return success
}
// Raw put; no marking the memory as out-of-sync with disk. Used to import
// initial keys from local storage, or to intern keys.
public static Value putIfAbsent_raw( Key key, Value val ) {
Value res = STORE.putIfMatchUnlocked(key,val,null);
assert res == null;
return res;
}
// Get the value from the store
public static Value get( Key key ) { return STORE.get(key); }
public static Value raw_get( Key key ) { return STORE.get(key); }
public static Key getk( Key key ) { return STORE.getk(key); }
public static Set<Key> localKeySet( ) { return STORE.keySet(); }
public static Collection<Value> values( ) { return STORE.values(); }
public static int store_size() { return STORE.size(); }
// --------------------------------------------------------------------------
// The worker pools - F/J pools with different priorities.
// These priorities are carefully ordered and asserted for... modify with
// care. The real problem here is that we can get into cyclic deadlock
// unless we spawn a thread of priority "X+1" in order to allow progress
// on a queue which might be flooded with a large number of "<=X" tasks.
//
// Example of deadlock: suppose TaskPutKey and the Invalidate ran at the same
// priority on a 2-node cluster. Both nodes flood their own queues with
// writes to unique keys, which require invalidates to run on the other node.
// Suppose the flooding depth exceeds the thread-limit (e.g. 99); then each
// node might have all 99 worker threads blocked in TaskPutKey, awaiting
// remote invalidates - but the other nodes' threads are also all blocked
// awaiting invalidates!
//
// We fix this by being willing to always spawn a thread working on jobs at
// priority X+1, and guaranteeing there are no jobs above MAX_PRIORITY -
// i.e., jobs running at MAX_PRIORITY cannot block, and when those jobs are
// done, the next lower level jobs get unblocked, etc.
public static final byte MAX_PRIORITY = Byte.MAX_VALUE-1;
public static final byte ACK_ACK_PRIORITY = MAX_PRIORITY-0;
public static final byte FETCH_ACK_PRIORITY = MAX_PRIORITY-1;
public static final byte ACK_PRIORITY = MAX_PRIORITY-2;
public static final byte DESERIAL_PRIORITY = MAX_PRIORITY-3;
public static final byte INVALIDATE_PRIORITY = MAX_PRIORITY-3;
public static final byte GET_KEY_PRIORITY = MAX_PRIORITY-4;
public static final byte PUT_KEY_PRIORITY = MAX_PRIORITY-5;
public static final byte ATOMIC_PRIORITY = MAX_PRIORITY-6;
public static final byte GUI_PRIORITY = MAX_PRIORITY-7;
public static final byte MIN_HI_PRIORITY = MAX_PRIORITY-7;
public static final byte MIN_PRIORITY = 0;
// F/J threads that remember the priority of the last task they started
// working on.
public static class FJWThr extends ForkJoinWorkerThread {
public int _priority;
FJWThr(ForkJoinPool pool) {
super(pool);
_priority = ((ForkJoinPool2)pool)._priority;
setPriority( _priority == Thread.MIN_PRIORITY
? Thread.NORM_PRIORITY-1
: Thread. MAX_PRIORITY-1 );
setName("FJ-"+_priority+"-"+getPoolIndex());
}
}
// Factory for F/J threads, with cap's that vary with priority.
static class FJWThrFact implements ForkJoinPool.ForkJoinWorkerThreadFactory {
private final int _cap;
FJWThrFact( int cap ) { _cap = cap; }
@Override public ForkJoinWorkerThread newThread(ForkJoinPool pool) {
int cap = 4 * NUMCPUS;
return pool.getPoolSize() <= cap ? new FJWThr(pool) : null;
}
}
// A standard FJ Pool, with an expected priority level.
static class ForkJoinPool2 extends ForkJoinPool {
final int _priority;
private ForkJoinPool2(int p, int cap) {
super((OPT_ARGS == null || OPT_ARGS.nthreads <= 0) ? NUMCPUS : OPT_ARGS.nthreads,
new FJWThrFact(cap),
null,
p<MIN_HI_PRIORITY);
_priority = p;
}
private H2OCountedCompleter poll2() { return (H2OCountedCompleter)pollSubmission(); }
}
// Hi-priority work, sorted into individual queues per-priority.
// Capped at a small number of threads per pool.
private static final ForkJoinPool2 FJPS[] = new ForkJoinPool2[MAX_PRIORITY+1];
static {
// Only need 1 thread for the AckAck work, as it cannot block
FJPS[ACK_ACK_PRIORITY] = new ForkJoinPool2(ACK_ACK_PRIORITY,1);
for( int i=MIN_HI_PRIORITY+1; i<MAX_PRIORITY; i++ )
FJPS[i] = new ForkJoinPool2(i,NUMCPUS); // All CPUs, but no more for blocking purposes
FJPS[GUI_PRIORITY] = new ForkJoinPool2(GUI_PRIORITY,2);
}
// Easy peeks at the FJ queues
static int getWrkQueueSize (int i) { return FJPS[i]==null ? -1 : FJPS[i].getQueuedSubmissionCount();}
static int getWrkThrPoolSize(int i) { return FJPS[i]==null ? -1 : FJPS[i].getPoolSize(); }
// Submit to the correct priority queue
public static H2OCountedCompleter submitTask( H2OCountedCompleter task ) {
int priority = task.priority();
assert MIN_PRIORITY <= priority && priority <= MAX_PRIORITY:"priority " + priority + " is out of range, expected range is < " + MIN_PRIORITY + "," + MAX_PRIORITY + ">";
if( FJPS[priority]==null )
synchronized( H2O.class ) { if( FJPS[priority] == null ) FJPS[priority] = new ForkJoinPool2(priority,-1); }
FJPS[priority].submit(task);
return task;
}
// Simple wrapper over F/J CountedCompleter to support priority queues. F/J
// queues are simple unordered (and extremely light weight) queues. However,
// we frequently need priorities to avoid deadlock and to promote efficient
// throughput (e.g. failure to respond quickly to TaskGetKey can block an
// entire node for lack of some small piece of data). So each attempt to do
// lower-priority F/J work starts with an attempt to work & drain the
// higher-priority queues.
public static abstract class
H2OCountedCompleter<T extends H2OCountedCompleter> extends CountedCompleter implements Cloneable {
public H2OCountedCompleter(){}
protected H2OCountedCompleter(H2OCountedCompleter completer){super(completer);}
// Once per F/J task, drain the high priority queue before doing any low
// priority work.
@Override public final void compute() {
FJWThr t = (FJWThr)Thread.currentThread();
int pp = ((ForkJoinPool2)t.getPool())._priority;
// Drain the high priority queues before the normal F/J queue
H2OCountedCompleter h2o = null;
try {
assert priority() == pp; // Job went to the correct queue?
assert t._priority <= pp; // Thread attempting the job is only a low-priority?
final int p2 = Math.max(pp,MIN_HI_PRIORITY);
for( int p = MAX_PRIORITY; p > p2; p-- ) {
if( FJPS[p] == null ) continue;
h2o = FJPS[p].poll2();
if( h2o != null ) { // Got a hi-priority job?
t._priority = p; // Set & do it now!
t.setPriority(Thread.MAX_PRIORITY-1);
h2o.compute2(); // Do it ahead of normal F/J work
p++; // Check again the same queue
}
}
} catch( Throwable ex ) {
// If the higher priority job popped an exception, complete it
// exceptionally... but then carry on and do the lower priority job.
if( h2o != null ) h2o.onExceptionalCompletion(ex, h2o.getCompleter());
else ex.printStackTrace();
} finally {
t._priority = pp;
if( pp == MIN_PRIORITY ) t.setPriority(Thread.NORM_PRIORITY-1);
}
// Now run the task as planned
compute2();
}
// Do the actually intended work
public abstract void compute2();
@Override public boolean onExceptionalCompletion(Throwable ex, CountedCompleter caller) {
if(!(ex instanceof JobCancelledException) && !(ex instanceof IllegalArgumentException) && this.getCompleter() == null)
ex.printStackTrace();
return true;
}
// In order to prevent deadlock, threads that block waiting for a reply
// from a remote node, need the remote task to run at a higher priority
// than themselves. This field tracks the required priority.
public byte priority() { return MIN_PRIORITY; }
@Override public T clone(){
try { return (T)super.clone(); }
catch( CloneNotSupportedException e ) { throw water.util.Log.errRTExcept(e); }
}
}
public static abstract class H2OCallback<T extends H2OCountedCompleter> extends H2OCountedCompleter{
public H2OCallback(){}
public H2OCallback(H2OCountedCompleter cc){super(cc);}
@Override public void compute2(){throw new UnsupportedOperationException();}
@Override public void onCompletion(CountedCompleter caller){callback((T) caller);}
public abstract void callback(T t);
}
public static class H2OEmptyCompleter extends H2OCountedCompleter{
public H2OEmptyCompleter(){}
public H2OEmptyCompleter(H2OCountedCompleter cc){super(cc);}
@Override public void compute2(){throw new UnsupportedOperationException();}
}
// --------------------------------------------------------------------------
public static OptArgs OPT_ARGS = new OptArgs();
public static class OptArgs extends Arguments.Opt {
public String name; // set_cloud_name_and_mcast()
public String flatfile; // set_cloud_name_and_mcast()
public int baseport; // starting number to search for open ports
public int port; // set_cloud_name_and_mcast()
public String ip; // Named IP4/IP6 address instead of the default
public String network; // Network specification for acceptable interfaces to bind to.
public String ice_root; // ice root directory
public String hdfs; // HDFS backend
public String hdfs_version; // version of the filesystem
public String hdfs_config; // configuration file of the HDFS
public String hdfs_skip = null; // used by hadoop driver to not unpack and load any hdfs jar file at runtime.
public String aws_credentials; // properties file for aws credentials
public String keepice; // Do not delete ice on startup
public String soft = null; // soft launch for demos
public String random_udp_drop = null; // test only, randomly drop udp incoming
public int pparse_limit = Integer.MAX_VALUE;
public String no_requests_log = null; // disable logging of Web requests
public boolean check_rest_params = true; // enable checking unused/unknown REST params e.g., -check_rest_params=false disable control of unknown rest params
public int nthreads=NUMCPUS; // desired F/J parallelism level for low priority queues.
public String license; // License file
public String h = null;
public String help = null;
public String version = null;
public String single_precision = null;
public int data_max_factor_levels;
public String many_cols = null;
public int chunk_bytes;
public String beta = null;
public String mem_watchdog = null; // For developer debugging
public boolean md5skip = false;
public boolean ga_opt_out = false;
public String ga_hadoop_ver = null;
public boolean no_ice = false;
}
public static void printHelp() {
String s =
"Start an H2O node.\n" +
"\n" +
"Usage: java [-Xmx<size>] -jar h2o.jar [options]\n" +
" (Note that every option has a default and is optional.)\n" +
"\n" +
" -h | -help\n" +
" Print this help.\n" +
"\n" +
" -version\n" +
" Print version info and exit.\n" +
"\n" +
" -name <h2oCloudName>\n" +
" Cloud name used for discovery of other nodes.\n" +
" Nodes with the same cloud name will form an H2O cloud\n" +
" (also known as an H2O cluster).\n" +
"\n" +
" -flatfile <flatFileName>\n" +
" Configuration file explicitly listing H2O cloud node members.\n" +
"\n" +
" -ip <ipAddressOfNode>\n" +
" IP address of this node.\n" +
"\n" +
" -port <port>\n" +
" Port number for this node (note: port+1 is also used).\n" +
" (The default port is " + DEFAULT_PORT + ".)\n" +
"\n" +
" -network <IPv4network1Specification>[,<IPv4network2Specification> ...]\n" +
" The IP address discovery code will bind to the first interface\n" +
" that matches one of the networks in the comma-separated list.\n" +
" Use instead of -ip when a broad range of addresses is legal.\n" +
" (Example network specification: '10.1.2.0/24' allows 256 legal\n" +
" possibilities.)\n" +
"\n" +
" -ice_root <fileSystemPath>\n" +
" The directory where H2O spills temporary data to disk.\n" +
" (The default is '" + DEFAULT_ICE_ROOT() + "'.)\n" +
"\n" +
" -single_precision\n" +
" Reduce the max. (storage) precision for floating point numbers\n" +
" from double to single precision to save memory of numerical data.\n" +
" (The default is double precision.)\n" +
"\n" +
" -many_cols\n" +
" Enables improved handling of high-dimensional datasets. Same as -chunk_bytes 24.\n" +
"\n" +
" -chunk_bytes <integer>\n" +
" Experimental option. Not in combination with -many_cols. The log (base 2) of chunk size in bytes.\n" +
" (The default is " + LOG_CHK + ", which leads to a chunk size of " + PrettyPrint.bytes(1<<LOG_CHK) + ".)\n" +
"\n" +
" -data_max_factor_levels <integer>\n" +
" The maximum number of factor levels for categorical columns.\n" +
" Columns with more than the specified number of factor levels\n" +
" are converted into all missing values.\n" +
" (The default is " + DATA_MAX_FACTOR_LEVELS + ".)\n" +
"\n" +
" -nthreads <#threads>\n" +
" Maximum number of threads in the low priority batch-work queue.\n" +
" (The default is 4*numcpus.)\n" +
"\n" +
" -license <licenseFilePath>\n" +
" Path to license file on local filesystem.\n" +
"\n" +
"Cloud formation behavior:\n" +
"\n" +
" New H2O nodes join together to form a cloud at startup time.\n" +
" Once a cloud is given work to perform, it locks out new members\n" +
" from joining.\n" +
"\n" +
"Examples:\n" +
"\n" +
" Start an H2O node with 4GB of memory and a default cloud name:\n" +
" $ java -Xmx4g -jar h2o.jar\n" +
"\n" +
" Start an H2O node with 6GB of memory and a specify the cloud name:\n" +
" $ java -Xmx6g -jar h2o.jar -name MyCloud\n" +
"\n" +
" Start an H2O cloud with three 2GB nodes and a default cloud name:\n" +
" $ java -Xmx2g -jar h2o.jar &\n" +
" $ java -Xmx2g -jar h2o.jar &\n" +
" $ java -Xmx2g -jar h2o.jar &\n" +
"\n";
System.out.print(s);
}
public static boolean IS_SYSTEM_RUNNING = false;
/** Load a h2o build version or return default unknown version
* @return never returns null
*/
public static AbstractBuildVersion getBuildVersion() {
try {
Class klass = Class.forName("water.BuildVersion");
java.lang.reflect.Constructor constructor = klass.getConstructor();
AbstractBuildVersion abv = (AbstractBuildVersion) constructor.newInstance();
return abv;
// it exists on the classpath
} catch (Exception e) {
return AbstractBuildVersion.UNKNOWN_VERSION;
}
}
/**
* If logging has not been setup yet, then Log.info will only print to stdout.
* This allows for early processing of the '-version' option without unpacking
* the jar file and other startup stuff.
*/
public static void printAndLogVersion() {
// Try to load a version
AbstractBuildVersion abv = getBuildVersion();
String build_branch = abv.branchName();
String build_hash = abv.lastCommitHash();
String build_describe = abv.describe();
String build_project_version = abv.projectVersion();
String build_by = abv.compiledBy();
String build_on = abv.compiledOn();
Log.info ("----- H2O started -----");
Log.info ("Build git branch: " + build_branch);
Log.info ("Build git hash: " + build_hash);
Log.info ("Build git describe: " + build_describe);
Log.info ("Build project version: " + build_project_version);
Log.info ("Built by: '" + build_by + "'");
Log.info ("Built on: '" + build_on + "'");
Runtime runtime = Runtime.getRuntime();
double ONE_GB = 1024 * 1024 * 1024;
Log.info ("Java availableProcessors: " + runtime.availableProcessors());
Log.info ("Java heap totalMemory: " + String.format("%.2f gb", runtime.totalMemory() / ONE_GB));
Log.info ("Java heap maxMemory: " + String.format("%.2f gb", runtime.maxMemory() / ONE_GB));
Log.info ("Java version: " + String.format("Java %s (from %s)", System.getProperty("java.version"), System.getProperty("java.vendor")));
Log.info ("OS version: " + String.format("%s %s (%s)", System.getProperty("os.name"), System.getProperty("os.version"), System.getProperty("os.arch")));
long totalMemory = OSUtils.getTotalPhysicalMemory();
Log.info ("Machine physical memory: " + (totalMemory==-1 ? "NA" : String.format("%.2f gb", totalMemory / ONE_GB)));
}
/**
* We had a report from a user that H2O didn't start properly on MacOS X in a
* case where the user was part of the root group. So warn about it.
*/
public static void printWarningIfRootOnMac() {
String os_name = System.getProperty("os.name");
if (os_name.equals("Mac OS X")) {
String user_name = System.getProperty("user.name");
if (user_name.equals("root")) {
Log.warn("Running as root on MacOS; check if java binary is unintentionally setuid");
}
}
}
public static String getVersion() {
String build_project_version = "(unknown)";
try {
Class klass = Class.forName("water.BuildVersion");
java.lang.reflect.Constructor constructor = klass.getConstructor();
AbstractBuildVersion abv = (AbstractBuildVersion) constructor.newInstance();
build_project_version = abv.projectVersion();
// it exists on the classpath
} catch (Exception e) {
// it does not exist on the classpath
}
return build_project_version;
}
// Start up an H2O Node and join any local Cloud
public static void main( String[] args ) {
Log.POST(300,"");
// To support launching from JUnit, JUnit expects to call main() repeatedly.
// We need exactly 1 call to main to startup all the local services.
if (IS_SYSTEM_RUNNING) return;
IS_SYSTEM_RUNNING = true;
VERSION = getVersion(); // Pick this up from build-specific info.
START_TIME_MILLIS = System.currentTimeMillis();
// Parse args
Arguments arguments = new Arguments(args);
arguments.extract(OPT_ARGS);
ARGS = arguments.toStringArray();
printAndLogVersion();
printWarningIfRootOnMac();
GA = new GoogleAnalytics("UA-56665317-2","H2O",H2O.getVersion());
if((new File(".h2o_no_collect")).exists()
|| (new File(System.getProperty("user.home")+File.separator+".h2o_no_collect")).exists()
|| OPT_ARGS.ga_opt_out ) {
GA.setEnabled(false);
Log.info("Opted out of sending usage metrics.");
}
if (OPT_ARGS.baseport != 0) {
DEFAULT_PORT = OPT_ARGS.baseport;
}
SINGLE_PRECISION = OPT_ARGS.single_precision != null;
if (SINGLE_PRECISION) Log.info("Using single precision for floating-point numbers.");
if (OPT_ARGS.data_max_factor_levels != 0) {
DATA_MAX_FACTOR_LEVELS = OPT_ARGS.data_max_factor_levels;
Log.info("Max. number of factor levels per column: " + DATA_MAX_FACTOR_LEVELS);
}
if (OPT_ARGS.chunk_bytes != 0 || OPT_ARGS.many_cols != null) {
if (OPT_ARGS.many_cols != null) {
LOG_CHK = 24;
if (OPT_ARGS.chunk_bytes > 0) {
Log.warn("-chunk_bytes is ignored since -many_cols was set.");
}
} else if (OPT_ARGS.chunk_bytes > 0) {
LOG_CHK = OPT_ARGS.chunk_bytes;
if (OPT_ARGS.chunk_bytes < 22) {
Log.warn("-chunk_bytes < 22 is not officially supported. Use at your own risk.");
}
if (OPT_ARGS.chunk_bytes > 24) {
Log.warn("-chunk_bytes > 24 is not officially supported. Use at your own risk.");
}
}
}
Log.info("Chunk size: " + PrettyPrint.bytes(1<<LOG_CHK));
// Get ice path before loading Log or Persist class
String ice = DEFAULT_ICE_ROOT();
if( OPT_ARGS.ice_root != null ) ice = OPT_ARGS.ice_root.replace("\\", "/");
try {
ICE_ROOT = new URI(ice);
} catch(URISyntaxException ex) {
throw new RuntimeException("Invalid ice_root: " + ice + ", " + ex.getMessage());
}
Log.info ("ICE root: '" + ICE_ROOT + "'");
findInetAddressForSelf();
//if (OPT_ARGS.rshell.equals("false"))
Log.POST(310,"");
Log.wrap(); // Logging does not wrap when the rshell is on.
// Start the local node
startLocalNode();
Log.POST(320,"");
String logDir = (Log.getLogDir() != null) ? Log.getLogDir() : "(unknown)";
Log.info ("Log dir: '" + logDir + "'");
// Load up from disk and initialize the persistence layer
initializePersistence();
Log.POST(340, "");
initializeLicenseManager();
Log.POST(345, "");
// Start network services, including heartbeats & Paxos
startNetworkServices(); // start server services
Log.POST(350,"");
startApiIpPortWatchdog(); // Check if the API port becomes unreachable
Log.POST(360,"");
if (OPT_ARGS.mem_watchdog != null) {
startMemoryWatchdog();
Log.POST(370, "");
}
startupFinalize(); // finalizes the startup & tests (if any)
Log.POST(380,"");
startGAStartupReport();
}
/** Starts the local k-v store.
* Initializes the local k-v store, local node and the local cloud with itself
* as the only member.
*/
private static void startLocalNode() {
// Print this first, so if any network stuff is affected it's clear this is going on.
if (OPT_ARGS.random_udp_drop != null) {
Log.warn("Debugging option RANDOM UDP DROP is ENABLED, make sure you really meant it");
}
// Figure self out; this is surprisingly hard
initializeNetworkSockets();
// Do not forget to put SELF into the static configuration (to simulate
// proper multicast behavior)
if( STATIC_H2OS != null && !STATIC_H2OS.contains(SELF)) {
Log.warn("Flatfile configuration does not include self: " + SELF+ " but contains " + STATIC_H2OS);
STATIC_H2OS.add(SELF);
}
Log.info ("H2O cloud name: '" + NAME + "'");
Log.info("(v"+VERSION+") '"+NAME+"' on " + SELF+(OPT_ARGS.flatfile==null
? (", discovery address "+CLOUD_MULTICAST_GROUP+":"+CLOUD_MULTICAST_PORT)
: ", static configuration based on -flatfile "+OPT_ARGS.flatfile));
Log.info("If you have trouble connecting, try SSH tunneling from your local machine (e.g., via port 55555):\n" +
" 1. Open a terminal and run 'ssh -L 55555:localhost:"
+ API_PORT + " " + System.getProperty("user.name") + "@" + SELF_ADDRESS.getHostAddress() + "'\n" +
" 2. Point your browser to http://localhost:55555");
// Create the starter Cloud with 1 member
SELF._heartbeat._jar_md5 = Boot._init._jarHash;
Paxos.doHeartbeat(SELF);
assert SELF._heartbeat._cloud_hash != 0;
}
/** Initializes the network services of the local node.
*
* Starts the worker threads, receiver threads, heartbeats and all other
* network related services.
*/
private static void startNetworkServices() {
// We've rebooted the JVM recently. Tell other Nodes they can ignore task
// prior tasks by us. Do this before we receive any packets
UDPRebooted.T.reboot.broadcast();
// Start the UDPReceiverThread, to listen for requests from other Cloud
// Nodes. There should be only 1 of these, and it never shuts down.
// Started first, so we can start parsing UDP packets
new UDPReceiverThread().start();
// Start the MultiReceiverThread, to listen for multi-cast requests from
// other Cloud Nodes. There should be only 1 of these, and it never shuts
// down. Started soon, so we can start parsing multicast UDP packets
new MultiReceiverThread().start();
// Start the Persistent meta-data cleaner thread, which updates the K/V
// mappings periodically to disk. There should be only 1 of these, and it
// never shuts down. Needs to start BEFORE the HeartBeatThread to build
// an initial histogram state.
new Cleaner().start();
// Start the heartbeat thread, to publish the Clouds' existence to other
// Clouds. This will typically trigger a round of Paxos voting so we can
// join an existing Cloud.
new HeartBeatThread().start();
// Start a UDP timeout worker thread. This guy only handles requests for
// which we have not recieved a timely response and probably need to
// arrange for a re-send to cover a dropped UDP packet.
new UDPTimeOutThread().start();
new H2ONode.AckAckTimeOutThread().start();
// Start the TCPReceiverThread, to listen for TCP requests from other Cloud
// Nodes. There should be only 1 of these, and it never shuts down.
new TCPReceiverThread().start();
// Start the Nano HTTP server thread
water.api.RequestServer.start();
}
/** Initializes a watchdog thread to make sure the API IP:Port is reachable.
*
* The IP and port are meant to be accessible from outside this
* host, much less inside. The real reason behind this check is the
* one-node cloud case where people move their laptop around and
* DHCP assigns them a new IP address.
*/
private static void startApiIpPortWatchdog() {
apiIpPortWatchdog = new ApiIpPortWatchdogThread();
apiIpPortWatchdog.start();
}
private static void startMemoryWatchdog() {
new MemoryWatchdogThread().start();
}
private static void startGAStartupReport() {
new GAStartupReportThread().start();
}
// Used to update the Throwable detailMessage field.
private static java.lang.reflect.Field DETAILMESSAGE;
public static <T extends Throwable> T setDetailMessage( T t, String s ) {
try { if( DETAILMESSAGE != null ) DETAILMESSAGE.set(t,s); }
catch( IllegalAccessException iae) {}
return t;
}
/** Finalizes the node startup.
*
* Displays the startup message and runs the tests (if applicable).
*/
private static void startupFinalize() {
// Allow Throwable detailMessage's to be updated on the fly. Ugly, ugly,
// but I want to add info without rethrowing/rebuilding whole exceptions.
try {
DETAILMESSAGE = Throwable.class.getDeclaredField("detailMessage");
DETAILMESSAGE.setAccessible(true);
} catch( NoSuchFieldException nsfe ) { }
// Sleep a bit so all my other threads can 'catch up'
try { Thread.sleep(100); } catch( InterruptedException e ) { }
}
public static DatagramChannel _udpSocket;
public static ServerSocket _apiSocket;
// Parse arguments and set cloud name in any case. Strip out "-name NAME"
// and "-flatfile <filename>". Ignore the rest. Set multi-cast port as a hash
// function of the name. Parse node ip addresses from the filename.
static void initializeNetworkSockets( ) {
// Assign initial ports
API_PORT = OPT_ARGS.port != 0 ? OPT_ARGS.port : DEFAULT_PORT;
while (true) {
H2O_PORT = API_PORT+1;
if( API_PORT<0 || API_PORT>65534 ) // 65535 is max, implied for udp port
Log.die("Attempting to use system illegal port, either "+API_PORT+" or "+ H2O_PORT);
try {
// kbn. seems like we need to set SO_REUSEADDR before binding?
// http://www.javadocexamples.com/java/net/java.net.ServerSocket.html#setReuseAddress:boolean
// When a TCP connection is closed the connection may remain in a timeout state
// for a period of time after the connection is closed (typically known as the
// TIME_WAIT state or 2MSL wait state). For applications using a well known socket address
// or port it may not be possible to bind a socket to the required SocketAddress
// if there is a connection in the timeout state involving the socket address or port.
// Enabling SO_REUSEADDR prior to binding the socket using bind(SocketAddress)
// allows the socket to be bound even though a previous connection is in a timeout state.
// cnc: this is busted on windows. Back to the old code.
// If the user specified the -ip flag, honor it for the Web UI interface bind.
// Otherwise bind to all interfaces.
_apiSocket = OPT_ARGS.ip == null
? new ServerSocket(API_PORT)
: new ServerSocket(API_PORT, -1/*defaultBacklog*/, SELF_ADDRESS);
_apiSocket.setReuseAddress(true);
// Bind to the UDP socket
_udpSocket = DatagramChannel.open();
_udpSocket.socket().setReuseAddress(true);
InetSocketAddress isa = new InetSocketAddress(H2O.SELF_ADDRESS, H2O_PORT);
_udpSocket.socket().bind(isa);
// Bind to the TCP socket also
TCPReceiverThread.SOCK = ServerSocketChannel.open();
TCPReceiverThread.SOCK.socket().setReceiveBufferSize(water.AutoBuffer.TCP_BUF_SIZ);
TCPReceiverThread.SOCK.socket().bind(isa);
break;
} catch (IOException e) {
try { if( _apiSocket != null ) _apiSocket.close(); } catch( IOException ohwell ) { Log.err(ohwell); }
Utils.close(_udpSocket);
if( TCPReceiverThread.SOCK != null ) try { TCPReceiverThread.SOCK.close(); } catch( IOException ie ) { }
_apiSocket = null;
_udpSocket = null;
TCPReceiverThread.SOCK = null;
if( OPT_ARGS.port != 0 )
Log.die("On " + SELF_ADDRESS +
" some of the required ports " + (OPT_ARGS.port+0) +
", " + (OPT_ARGS.port+1) +
" are not available, change -port PORT and try again.");
}
API_PORT += 2;
}
SELF = H2ONode.self(SELF_ADDRESS);
Log.info("Internal communication uses port: ", H2O_PORT,"\nListening for HTTP and REST traffic on http://",SELF_ADDRESS.getHostAddress(),":"+_apiSocket.getLocalPort()+"/");
String embeddedConfigFlatfile = null;
AbstractEmbeddedH2OConfig ec = getEmbeddedH2OConfig();
if (ec != null) {
ec.notifyAboutEmbeddedWebServerIpPort (SELF_ADDRESS, API_PORT);
if (ec.providesFlatfile()) {
try {
embeddedConfigFlatfile = ec.fetchFlatfile();
}
catch (Exception e) {
Log.err("Failed to get embedded config flatfile");
Log.err(e);
H2O.exit(1);
}
}
}
NAME = OPT_ARGS.name==null? System.getProperty("user.name") : OPT_ARGS.name;
// Read a flatfile of allowed nodes
if (embeddedConfigFlatfile != null) {
STATIC_H2OS = parseFlatFileFromString(embeddedConfigFlatfile);
}
else {
STATIC_H2OS = parseFlatFile(OPT_ARGS.flatfile);
}
// Multi-cast ports are in the range E1.00.00.00 to EF.FF.FF.FF
int hash = NAME.hashCode()&0x7fffffff;
int port = (hash % (0xF0000000-0xE1000000))+0xE1000000;
byte[] ip = new byte[4];
for( int i=0; i<4; i++ )
ip[i] = (byte)(port>>>((3-i)<<3));
try {
CLOUD_MULTICAST_GROUP = InetAddress.getByAddress(ip);
} catch( UnknownHostException e ) { throw Log.errRTExcept(e); }
CLOUD_MULTICAST_PORT = (port>>>16);
}
// Multicast send-and-close. Very similar to udp_send, except to the
// multicast port (or all the individuals we can find, if multicast is
// disabled).
static void multicast( ByteBuffer bb ) {
try { multicast2(bb); }
catch (Exception xe) {}
}
static private void multicast2( ByteBuffer bb ) {
if( H2O.STATIC_H2OS == null ) {
byte[] buf = new byte[bb.remaining()];
bb.get(buf);
synchronized( H2O.class ) { // Sync'd so single-thread socket create/destroy
assert H2O.CLOUD_MULTICAST_IF != null;
try {
if( CLOUD_MULTICAST_SOCKET == null ) {
CLOUD_MULTICAST_SOCKET = new MulticastSocket();
// Allow multicast traffic to go across subnets
CLOUD_MULTICAST_SOCKET.setTimeToLive(2);
CLOUD_MULTICAST_SOCKET.setNetworkInterface(H2O.CLOUD_MULTICAST_IF);
}
// Make and send a packet from the buffer
CLOUD_MULTICAST_SOCKET.send(new DatagramPacket(buf, buf.length, CLOUD_MULTICAST_GROUP,CLOUD_MULTICAST_PORT));
} catch( Exception e ) { // On any error from anybody, close all sockets & re-open
// and if not a soft launch (hibernate mode)
if(H2O.OPT_ARGS.soft == null)
Log.err("Multicast Error ",e);
if( CLOUD_MULTICAST_SOCKET != null )
try { CLOUD_MULTICAST_SOCKET.close(); }
catch( Exception e2 ) { Log.err("Got",e2); }
finally { CLOUD_MULTICAST_SOCKET = null; }
}
}
} else { // Multicast Simulation
// The multicast simulation is little bit tricky. To achieve union of all
// specified nodes' flatfiles (via option -flatfile), the simulated
// multicast has to send packets not only to nodes listed in the node's
// flatfile (H2O.STATIC_H2OS), but also to all cloud members (they do not
// need to be specified in THIS node's flatfile but can be part of cloud
// due to another node's flatfile).
//
// Furthermore, the packet have to be send also to Paxos proposed members
// to achieve correct functionality of Paxos. Typical situation is when
// this node receives a Paxos heartbeat packet from a node which is not
// listed in the node's flatfile -- it means that this node is listed in
// another node's flatfile (and wants to create a cloud). Hence, to
// allow cloud creation, this node has to reply.
//
// Typical example is:
// node A: flatfile (B)
// node B: flatfile (C), i.e., A -> (B), B-> (C), C -> (A)
// node C: flatfile (A)
// Cloud configuration: (A, B, C)
//
// Hideous O(n) algorithm for broadcast - avoid the memory allocation in
// this method (since it is heavily used)
HashSet<H2ONode> nodes = (HashSet<H2ONode>)H2O.STATIC_H2OS.clone();
nodes.addAll(Paxos.PROPOSED.values());
bb.mark();
for( H2ONode h2o : nodes ) {
bb.reset();
try {
H2O.CLOUD_DGRAM.send(bb, h2o._key);
} catch( IOException e ) {
Log.warn("Multicast Error to "+h2o+e);
}
}
}
}
/**
* Read a set of Nodes from a file. Format is:
*
* name/ip_address:port
* - name is unused and optional
* - port is optional
* - leading '#' indicates a comment
*
* For example:
*
* 10.10.65.105:54322
* # disabled for testing
* # 10.10.65.106
* /10.10.65.107
* # run two nodes on 108
* 10.10.65.108:54322
* 10.10.65.108:54325
*/
private static HashSet<H2ONode> parseFlatFile( String fname ) {
if( fname == null ) return null;
File f = new File(fname);
if( !f.exists() ) {
Log.warn("-flatfile specified but not found: " + fname);
return null; // No flat file
}
HashSet<H2ONode> h2os = new HashSet<H2ONode>();
List<FlatFileEntry> list = parseFlatFile(f);
for(FlatFileEntry entry : list)
h2os.add(H2ONode.intern(entry.inet, entry.port+1));// use the UDP port here
return h2os;
}
public static HashSet<H2ONode> parseFlatFileFromString( String s ) {
HashSet<H2ONode> h2os = new HashSet<H2ONode>();
InputStream is = new ByteArrayInputStream(s.getBytes());
List<FlatFileEntry> list = parseFlatFile(is);
for(FlatFileEntry entry : list)
h2os.add(H2ONode.intern(entry.inet, entry.port+1));// use the UDP port here
return h2os;
}
public static class FlatFileEntry {
public InetAddress inet;
public int port;
}
public static List<FlatFileEntry> parseFlatFile( File f ) {
InputStream is = null;
try {
is = new FileInputStream(f);
}
catch (Exception e) { Log.die(e.toString()); }
return parseFlatFile(is);
}
public static List<FlatFileEntry> parseFlatFile( InputStream is ) {
List<FlatFileEntry> list = new ArrayList<FlatFileEntry>();
BufferedReader br = null;
int port = DEFAULT_PORT;
try {
br = new BufferedReader(new InputStreamReader(is));
String strLine = null;
while( (strLine = br.readLine()) != null) {
strLine = strLine.trim();
// be user friendly and skip comments and empty lines
if (strLine.startsWith("#") || strLine.isEmpty()) continue;
String ip = null, portStr = null;
int slashIdx = strLine.indexOf('/');
int colonIdx = strLine.indexOf(':');
if( slashIdx == -1 && colonIdx == -1 ) {
ip = strLine;
} else if( slashIdx == -1 ) {
ip = strLine.substring(0, colonIdx);
portStr = strLine.substring(colonIdx+1);
} else if( colonIdx == -1 ) {
ip = strLine.substring(slashIdx+1);
} else if( slashIdx > colonIdx ) {
Log.die("Invalid format, must be name/ip[:port], not '"+strLine+"'");
} else {
ip = strLine.substring(slashIdx+1, colonIdx);
portStr = strLine.substring(colonIdx+1);
}
InetAddress inet = InetAddress.getByName(ip);
if( !(inet instanceof Inet4Address) )
Log.die("Only IP4 addresses allowed: given " + ip);
if( portStr!=null && !portStr.equals("") ) {
try {
port = Integer.decode(portStr);
} catch( NumberFormatException nfe ) {
Log.die("Invalid port #: "+portStr);
}
}
FlatFileEntry entry = new FlatFileEntry();
entry.inet = inet;
entry.port = port;
list.add(entry);
}
} catch( Exception e ) { Log.die(e.toString()); }
finally { Utils.close(br); }
return list;
}
static void initializePersistence() {
Log.POST(3001);
HdfsLoader.loadJars();
Log.POST(3002);
if( OPT_ARGS.aws_credentials != null ) {
try {
Log.POST(3003);
PersistS3.getClient();
Log.POST(3004);
} catch( IllegalArgumentException e ) {
Log.POST(3005);
Log.err(e);
}
}
Log.POST(3006);
Persist.initialize();
Log.POST(3007);
}
static void initializeLicenseManager() {
licenseManager = new LicenseManager();
if (OPT_ARGS.license != null) {
LicenseManager.Result r = licenseManager.readLicenseFile(OPT_ARGS.license);
if (r == LicenseManager.Result.OK) {
Log.info("Successfully read license file ("+ OPT_ARGS.license + ")");
licenseManager.logLicensedFeatures();
}
else {
Log.err("readLicenseFile failed (" + r + ")");
}
}
}
// Cleaner ---------------------------------------------------------------
// msec time at which the STORE was dirtied.
// Long.MAX_VALUE if clean.
static private volatile long _dirty; // When was store dirtied
static void dirty_store() { dirty_store(System.currentTimeMillis()); }
static void dirty_store( long x ) {
// Keep earliest dirty time seen
if( x < _dirty ) _dirty = x;
}
public abstract static class KVFilter {
public abstract boolean filter(KeyInfo k);
}
public static final class KeyInfo extends Iced implements Comparable<KeyInfo>{
public final Key _key;
public final int _type;
public final boolean _rawData;
public final int _sz;
public final int _ncols;
public final long _nrows;
public final byte _backEnd;
public KeyInfo(Key k, Value v){
assert k!=null : "Key should be not null!";
assert v!=null : "Value should be not null!";
_key = k;
_type = v.type();
_rawData = v.isRawData();
if(v.isFrame()){
Frame f = v.get();
// NOTE: can't get byteSize here as it may invoke RollupStats! :(
// _sz = f.byteSize();
_sz = v._max;
// do at least nrows/ncols instead
_ncols = f.numCols();
_nrows = f.numRows();
} else {
_sz = v._max;
_ncols = 0;
_nrows = 0;
}
_backEnd = v.backend();
}
@Override public int compareTo(KeyInfo ki){ return _key.compareTo(ki._key);}
public boolean isFrame(){
return _type == TypeMap.onIce(Frame.class.getName());
}
public boolean isLockable(){
return TypeMap.newInstance(_type) instanceof Lockable;
}
}
public static class KeySnapshot extends Iced {
private static volatile long _lastUpdate;
private static final long _updateInterval = 1000;
private static volatile KeySnapshot _cache;
public final KeyInfo [] _keyInfos;
public long lastUpdated(){return _lastUpdate;}
public KeySnapshot cache(){return _cache;}
public KeySnapshot filter(KVFilter kvf){
ArrayList<KeyInfo> res = new ArrayList<KeyInfo>();
for(KeyInfo kinfo: _keyInfos)
if(kvf.filter(kinfo))res.add(kinfo);
return new KeySnapshot(res.toArray(new KeyInfo[res.size()]));
}
KeySnapshot(KeyInfo [] snapshot){
_keyInfos = snapshot;}
public Key [] keys(){
Key [] res = new Key[_keyInfos.length];
for(int i = 0; i < _keyInfos.length; ++i)
res[i] = _keyInfos[i]._key;
return res;
}
public <T extends Iced> Map<String, T> fetchAll(Class<T> c) { return fetchAll(c,false,0,Integer.MAX_VALUE);}
public <T extends Iced> Map<String, T> fetchAll(Class<T> c, boolean exact) { return fetchAll(c,exact,0,Integer.MAX_VALUE);}
public <T extends Iced> Map<String, T> fetchAll(Class<T> c, boolean exact, int offset, int limit) {
TreeMap<String, T> res = new TreeMap<String, T>();
final int typeId = TypeMap.onIce(c.getName());
for (KeyInfo kinfo : _keyInfos) {
if (kinfo._type == typeId || (!exact && c.isAssignableFrom(TypeMap.clazz(kinfo._type)))) {
if (offset > 0) {
--offset;
continue;
}
Value v = DKV.get(kinfo._key);
if (v != null) {
T t = v.get();
res.put(kinfo._key.toString(), t);
if (res.size() == limit)
break;
}
}
}
return res;
}
public static KeySnapshot localSnapshot(){return localSnapshot(false);}
public static KeySnapshot localSnapshot(boolean homeOnly){
Object [] kvs = STORE.raw_array();
ArrayList<KeyInfo> res = new ArrayList<KeyInfo>();
for(int i = 2; i < kvs.length; i+= 2){
Object ok = kvs[i], ov = kvs[i+1];
if( !(ok instanceof Key ) || ov==null ) continue; // Ignore tombstones or deleted values
Key key = (Key) ok;
if(!key.user_allowed())continue;
if(homeOnly && !key.home())continue;
// Raw array can contain regular and also wrapped values into Prime marker class:
// - if we see Value object, create instance of KeyInfo
// - if we do not see Value object, try to unwrap it via calling STORE.get and then
// look at wrapped value again.
if (!(ov instanceof Value)) {
ov = H2O.get(key); // H2Oget returns already Value object or null
if (ov==null) continue;
}
res.add(new KeyInfo(key,(Value)ov));
}
final KeyInfo [] arr = res.toArray(new KeyInfo[res.size()]);
Arrays.sort(arr);
return new KeySnapshot(arr);
}
public static KeySnapshot globalSnapshot(){ return globalSnapshot(-1);}
public static KeySnapshot globalSnapshot(long timeTolerance){
KeySnapshot res = _cache;
final long t = System.currentTimeMillis();
if(res == null || (t - _lastUpdate) > timeTolerance)
res = new KeySnapshot(new GlobalUKeySetTask().invokeOnAllNodes()._res);
else if(t - _lastUpdate > _updateInterval)
H2O.submitTask(new H2OCountedCompleter() {
@Override
public void compute2() {
new GlobalUKeySetTask().invokeOnAllNodes();
}
});
return res;
}
private static class GlobalUKeySetTask extends DRemoteTask<GlobalUKeySetTask> {
KeyInfo [] _res;
@Override public byte priority(){return H2O.GET_KEY_PRIORITY;}
@Override public void lcompute(){
_res = localSnapshot(true)._keyInfos;
tryComplete();
}
@Override public void reduce(GlobalUKeySetTask gbt){
if(_res == null)_res = gbt._res;
else if(gbt._res != null){ // merge sort keys together
KeyInfo [] res = new KeyInfo[_res.length + gbt._res.length];
int j = 0, k = 0;
for(int i = 0; i < res.length; ++i)
res[i] = j < gbt._res.length && (k == _res.length || gbt._res[j].compareTo(_res[k]) < 0)?gbt._res[j++]:_res[k++];
_res = res;
}
}
@Override public void postGlobal(){
_cache = new KeySnapshot(_res);
_lastUpdate = System.currentTimeMillis();
}
}
}
// Periodically write user keys to disk
public static class Cleaner extends Thread {
// Desired cache level. Set by the MemoryManager asynchronously.
static public volatile long DESIRED;
// Histogram used by the Cleaner
private final Histo _myHisto;
boolean _diskFull = false;
public Cleaner() {
super("MemCleaner");
setDaemon(true);
setPriority(MAX_PRIORITY-2);
_dirty = Long.MAX_VALUE; // Set to clean-store
_myHisto = new Histo(); // Build/allocate a first histogram
_myHisto.compute(0); // Compute lousy histogram; find eldest
H = _myHisto; // Force to be the most recent
_myHisto.histo(true); // Force a recompute with a good eldest
MemoryManager.set_goals("init",false);
}
static boolean lazyPersist(){ // free disk > our DRAM?
return !H2O.OPT_ARGS.no_ice && H2O.SELF._heartbeat.get_free_disk() > MemoryManager.MEM_MAX;
}
static boolean isDiskFull(){ // free disk space < 5K?
long space = Persist.getIce().getUsableSpace();
return space != Persist.UNKNOWN && space < (5 << 10);
}
@Override public void run() {
boolean diskFull = false;
while( true ) {
// Sweep the K/V store, writing out Values (cleaning) and free'ing
// - Clean all "old" values (lazily, optimistically)
// - Clean and free old values if above the desired cache level
// Do not let optimistic cleaning get in the way of emergency cleaning.
// Get a recent histogram, computing one as needed
Histo h = _myHisto.histo(false);
long now = System.currentTimeMillis();
long dirty = _dirty; // When things first got dirtied
// Start cleaning if: "dirty" was set a "long" time ago, or we beyond
// the desired cache levels. Inverse: go back to sleep if the cache
// is below desired levels & nothing has been dirty awhile.
if( h._cached < DESIRED && // Cache is low and
(now-dirty < 5000) ) { // not dirty a long time
// Block asleep, waking every 5 secs to check for stuff, or when poked
Boot.block_store_cleaner();
continue; // Awoke; loop back and re-check histogram.
}
now = System.currentTimeMillis();
_dirty = Long.MAX_VALUE; // Reset, since we are going write stuff out
MemoryManager.set_goals("preclean",false);
// The age beyond which we need to toss out things to hit the desired
// caching levels. If forced, be exact (toss out the minimal amount).
// If lazy, store-to-disk things down to 1/2 the desired cache level
// and anything older than 5 secs.
boolean force = (h._cached >= DESIRED); // Forced to clean
if( force && diskFull )
diskFull = isDiskFull();
long clean_to_age = h.clean_to(force ? DESIRED : (DESIRED>>1));
// If not forced cleaning, expand the cleaning age to allows Values
// more than 5sec old
if( !force ) clean_to_age = Math.max(clean_to_age,now-5000);
// No logging if under memory pressure: can deadlock the cleaner thread
if( Log.flag(Sys.CLEAN) ) {
String s = h+" DESIRED="+(DESIRED>>20)+"M dirtysince="+(now-dirty)+" force="+force+" clean2age="+(now-clean_to_age);
if( MemoryManager.canAlloc() ) Log.debug(Sys.CLEAN ,s);
else Log.unwrap(System.err,s);
}
long cleaned = 0;
long freed = 0;
// For faster K/V store walking get the NBHM raw backing array,
// and walk it directly.
Object[] kvs = STORE.raw_array();
// Start the walk at slot 2, because slots 0,1 hold meta-data
for( int i=2; i<kvs.length; i += 2 ) {
// In the raw backing array, Keys and Values alternate in slots
Object ok = kvs[i], ov = kvs[i+1];
if( !(ok instanceof Key ) ) continue; // Ignore tombstones and Primes and null's
Key key = (Key )ok;
if( !(ov instanceof Value) ) continue; // Ignore tombstones and Primes and null's
Value val = (Value)ov;
byte[] m = val.rawMem();
Object p = val.rawPOJO();
if( m == null && p == null ) continue; // Nothing to throw out
if( val.isLockable() ) continue; // we do not want to throw out Lockables.
boolean isChunk = p instanceof Chunk;
// Ignore things younger than the required age. In particular, do
// not spill-to-disk all dirty things we find.
long touched = val._lastAccessedTime;
if( touched > clean_to_age ) { // Too recently touched?
// But can toss out a byte-array if already deserialized & on disk
// (no need for both forms). Note no savings for Chunks, for which m==p._mem
if( val.isPersisted() && m != null && p != null && !isChunk ) {
val.freeMem(); // Toss serialized form, since can rebuild from POJO
freed += val._max;
}
dirty_store(touched); // But may write it out later
continue; // Too young
}
// Should I write this value out to disk?
// Should I further force it from memory?
if( !val.isPersisted() && !diskFull && (force || (lazyPersist() && lazy_clean(key)))) {
try {
val.storePersist(); // Write to disk
if( m == null ) m = val.rawMem();
if( m != null ) cleaned += m.length;
} catch(IOException e) {
if( isDiskFull() )
Log.warn(Sys.CLEAN,"Disk full! Disabling swapping to disk." + (force?" Memory low! Please free some space in " + Persist.getIce().getPath() + "!":""));
else
Log.warn(Sys.CLEAN,"Disk swapping failed! " + e.getMessage());
// Something is wrong so mark disk as full anyways so we do not
// attempt to write again. (will retry next run when memory is low)
diskFull = true;
}
}
// And, under pressure, free all
if( force && val.isPersisted() ) {
val.freeMem (); if( m != null ) freed += val._max; m = null;
val.freePOJO(); if( p != null ) freed += val._max; p = null;
if( isChunk ) freed -= val._max; // Double-counted freed mem for Chunks since val._pojo._mem & val._mem are the same.
}
// If we have both forms, toss the byte[] form - can be had by
// serializing again.
if( m != null && p != null && !isChunk ) {
val.freeMem();
freed += val._max;
}
}
h = _myHisto.histo(true); // Force a new histogram
MemoryManager.set_goals("postclean",false);
// No logging if under memory pressure: can deadlock the cleaner thread
if( Log.flag(Sys.CLEAN) ) {
String s = h+" cleaned="+(cleaned>>20)+"M, freed="+(freed>>20)+"M, DESIRED="+(DESIRED>>20)+"M";
if( MemoryManager.canAlloc() ) Log.debug(Sys.CLEAN ,s);
else Log.unwrap(System.err,s);
}
}
}
// Rules on when to write & free a Key, when not under memory pressure.
boolean lazy_clean( Key key ) {
// Only data chunks are worth tossing out even lazily.
if( !key.isChunkKey() ) // Not arraylet?
return false; // Not enough savings to write it with mem-pressure to force us
// If this is a chunk of a system-defined array, then assume it has
// short lifetime, and we do not want to spin the disk writing it
// unless we're under memory pressure.
Key veckey = key.getVecKey();
return veckey.user_allowed(); // Write user keys but not system keys
}
// Current best histogram
static private volatile Histo H;
// Histogram class
public static class Histo {
final long[] _hs = new long[128];
long _oldest; // Time of the oldest K/V discovered this pass
long _eldest; // Time of the eldest K/V found in some prior pass
long _hStep; // Histogram step: (now-eldest)/histogram.length
long _cached; // Total alive data in the histogram
long _when; // When was this histogram computed
Value _vold; // For assertions: record the oldest Value
boolean _clean; // Was "clean" K/V when built?
// Return the current best histogram
static Histo best_histo() { return H; }
// Return the current best histogram, recomputing in-place if it is
// getting stale. Synchronized so the same histogram can be called into
// here and will be only computed into one-at-a-time.
synchronized Histo histo( boolean force ) {
final Histo h = H; // Grab current best histogram
if( !force && System.currentTimeMillis() < h._when+100 )
return h; // It is recent; use it
if( h._clean && _dirty==Long.MAX_VALUE )
return h; // No change to the K/V store, so no point
compute(h._oldest); // Use last oldest value for computing the next histogram in-place
return (H = this); // Record current best histogram & return it
}
// Compute a histogram
public void compute( long eldest ) {
Arrays.fill(_hs, 0);
_when = System.currentTimeMillis();
_eldest = eldest; // Eldest seen in some prior pass
_hStep = Math.max(1,(_when-eldest)/_hs.length);
boolean clean = _dirty==Long.MAX_VALUE;
// Compute the hard way
Object[] kvs = STORE.raw_array();
long cached = 0; // Total K/V cached in ram
long oldest = Long.MAX_VALUE; // K/V with the longest time since being touched
Value vold = null;
// Start the walk at slot 2, because slots 0,1 hold meta-data
for( int i=2; i<kvs.length; i += 2 ) {
// In the raw backing array, Keys and Values alternate in slots
Object ok = kvs[i+0], ov = kvs[i+1];
if( !(ok instanceof Key ) ) continue; // Ignore tombstones and Primes and null's
if( !(ov instanceof Value) ) continue; // Ignore tombstones and Primes and null's
Value val = (Value)ov;
int len = 0;
byte[] m = val.rawMem();
Object p = val.rawPOJO();
if( m != null ) len += val._max;
if( p != null ) len += val._max;
if( p instanceof Chunk ) len -= val._max; // Do not double-count Chunks
if( len == 0 ) continue;
cached += len; // Accumulate total amount of cached keys
if( val._lastAccessedTime < oldest ) { // Found an older Value?
vold = val; // Record oldest Value seen
oldest = val._lastAccessedTime;
}
// Compute histogram bucket
int idx = (int)((val._lastAccessedTime - eldest)/_hStep);
if( idx < 0 ) idx = 0;
else if( idx >= _hs.length ) idx = _hs.length-1;
_hs[idx] += len; // Bump histogram bucket
}
_cached = cached; // Total cached; NOTE: larger than sum of histogram buckets
_oldest = oldest; // Oldest seen in this pass
_vold = vold;
_clean = clean && _dirty==Long.MAX_VALUE; // Looks like a clean K/V the whole time?
}
// Compute the time (in msec) for which we need to throw out things
// to throw out enough things to hit the desired cached memory level.
long clean_to( long desired ) {
long age = _eldest; // Age of bucket zero
if( _cached < desired ) return age; // Already there; nothing to remove
long s = 0; // Total amount toss out
for( long t : _hs ) { // For all buckets...
s += t; // Raise amount tossed out
age += _hStep; // Raise age beyond which you need to go
if( _cached - s < desired ) break;
}
return age;
}
// Pretty print
@Override
public String toString() {
long x = _eldest;
long now = System.currentTimeMillis();
return "H("+(_cached>>20)+"M, "+x+"ms < +"+(_oldest-x)+"ms <...{"+_hStep+"ms}...< +"+(_hStep*128)+"ms < +"+(now-x)+")";
}
}
}
// API IP Port Watchdog ---------------------------------------------------------------
// Monitor API IP:Port for availability.
//
// This thread is only a watchdog. You can comment this thread out
// so it does not run without affecting any service functionality.
public static class ApiIpPortWatchdogThread extends Thread {
final private String threadName = "ApiPortWatchdog";
private volatile boolean gracefulShutdownInitiated; // Thread-safe.
// Failure-tracking.
private int consecutiveFailures;
private long failureStartTimestampMillis;
// Timing things that can be tuned if needed.
final private int maxFailureSeconds = 180;
final private int maxConsecutiveFailures = 20;
final private int checkIntervalSeconds = 10;
final private int timeoutSeconds = 30;
final private int millisPerSecond = 1000;
final private int timeoutMillis = timeoutSeconds * millisPerSecond;
final private int sleepMillis = checkIntervalSeconds * millisPerSecond;
// Constructor.
public ApiIpPortWatchdogThread() {
super("ApiWatch"); // Only 9 characters get printed in the log.
setDaemon(true);
setPriority(MAX_PRIORITY-2);
reset();
gracefulShutdownInitiated = false;
}
// Exit this watchdog thread.
public void shutdown() {
gracefulShutdownInitiated = true;
}
// Sleep method.
private void mySleep(int millis) {
try {
Thread.sleep (sleepMillis);
}
catch (Exception xe)
{}
}
// Print some help for the user if a failure occurs.
private void printPossibleCauses() {
Log.info(threadName + ": A possible cause is DHCP (e.g. changing WiFi networks)");
Log.info(threadName + ": A possible cause is your laptop going to sleep (if running on a laptop)");
Log.info(threadName + ": A possible cause is the network interface going down");
Log.info(threadName + ": A possible cause is this host being overloaded");
}
// Reset the failure counting when a successful check() occurs.
private void reset() {
consecutiveFailures = 0;
failureStartTimestampMillis = 0;
}
// Count the impact of one failure.
@SuppressWarnings("unused")
private void failed() {
printPossibleCauses();
if (consecutiveFailures == 0) {
failureStartTimestampMillis = System.currentTimeMillis();
}
consecutiveFailures++;
}
// Check if enough failures have occurred or time has passed to
// shut down this node.
private void testForFailureShutdown() {
if (consecutiveFailures >= maxConsecutiveFailures) {
Log.err(threadName + ": Too many failures (>= " + maxConsecutiveFailures + "), H2O node shutting down");
H2O.exit(1);
}
if (consecutiveFailures > 0) {
final long now = System.currentTimeMillis();
final long deltaMillis = now - failureStartTimestampMillis;
final long thresholdMillis = (maxFailureSeconds * millisPerSecond);
if (deltaMillis > thresholdMillis) {
Log.err(threadName + ": Failure time threshold exceeded (>= " +
thresholdMillis +
" ms), H2O node shutting down");
H2O.exit(1);
}
}
}
// Do the watchdog check.
private void check() {
final Socket s = new Socket();
final InetSocketAddress apiIpPort = new InetSocketAddress(H2O.SELF_ADDRESS, H2O.API_PORT);
Exception e=null;
String msg=null;
try {
s.connect (apiIpPort, timeoutMillis);
reset();
}
catch (SocketTimeoutException se) { e= se; msg=": Timed out"; }
catch (IOException ioe) { e=ioe; msg=": Failed"; }
catch (Exception ee) { e= ee; msg=": Failed unexpectedly"; }
finally {
if (gracefulShutdownInitiated) { return; }
if( e != null ) {
Log.err(threadName+msg+" trying to connect to REST API IP and Port (" +
H2O.SELF_ADDRESS + ":" + H2O.API_PORT + ", " + timeoutMillis + " ms)");
fail();
}
testForFailureShutdown();
try { s.close(); } catch (Exception xe) {}
}
}
// Class main thread.
@Override
public void run() {
Log.debug (threadName + ": Thread run() started");
reset();
while (true) {
mySleep (sleepMillis);
if (gracefulShutdownInitiated) { break; }
check();
if (gracefulShutdownInitiated) { break; }
}
}
}
/**
* Log physical (RSS) memory usage periodically.
* Used by developers to look for memory leaks.
* Currently this only works for Linux.
*/
private static class MemoryWatchdogThread extends Thread {
final private String threadName = "MemoryWatchdog";
private volatile boolean gracefulShutdownInitiated; // Thread-safe.
// Timing things that can be tuned if needed.
final private int checkIntervalSeconds = 5;
final private int millisPerSecond = 1000;
final private int sleepMillis = checkIntervalSeconds * millisPerSecond;
// Constructor.
public MemoryWatchdogThread() {
super("MemWatch"); // Only 9 characters get printed in the log.
setDaemon(true);
setPriority(MAX_PRIORITY - 2);
gracefulShutdownInitiated = false;
}
// Exit this watchdog thread.
public void shutdown() {
gracefulShutdownInitiated = true;
}
// Sleep method.
private void mySleep(int millis) {
try {
Thread.sleep (sleepMillis);
}
catch (Exception xe)
{}
}
// Do the watchdog check.
private void check() {
water.util.LinuxProcFileReader r = new LinuxProcFileReader();
r.read();
long rss = -1;
try {
rss = r.getProcessRss();
}
catch (AssertionError xe) {}
Log.info("RSS: " + rss);
}
// Class main thread.
@Override
public void run() {
Log.debug(threadName + ": Thread run() started");
while (true) {
mySleep (sleepMillis);
if (gracefulShutdownInitiated) { break; }
check();
if (gracefulShutdownInitiated) { break; }
}
}
}
public static class GAStartupReportThread extends Thread {
final private String threadName = "GAStartupReport";
final private int sleepMillis = 150 * 1000; //2.5 min
// Constructor.
public GAStartupReportThread() {
super("GAStartupReport"); // Only 9 characters get printed in the log.
setDaemon(true);
setPriority(MAX_PRIORITY - 2);
}
// Class main thread.
@Override
public void run() {
try {
Thread.sleep (sleepMillis);
}
catch (Exception ignore) {};
if (H2O.SELF == H2O.CLOUD._memary[0]) {
if (OPT_ARGS.ga_hadoop_ver != null)
H2O.GA.postAsync(new EventHit("System startup info", "Hadoop version", OPT_ARGS.ga_hadoop_ver, 1));
H2O.GA.postAsync(new EventHit("System startup info", "Cloud", "Cloud size", CLOUD.size()));
}
}
}
}