package water;
import java.lang.management.ManagementFactory;
import javax.management.*;
import water.persist.Persist;
import water.util.LinuxProcFileReader;
import water.util.Log;
/**
* Starts a thread publishing multicast HeartBeats to the local subnet: the
* Leader of this Cloud.
*
* @author <a href="mailto:cliffc@h2o.ai"></a>
* @version 1.0
*/
public class HeartBeatThread extends Thread {
public HeartBeatThread() {
super("Heartbeat");
setDaemon(true);
}
// Time between heartbeats. Strictly several iterations less than the
// timeout.
static final int SLEEP = 1000;
// Timeout in msec before we decide to not include a Node in the next round
// of Paxos Cloud Membership voting.
static public final int TIMEOUT = 60000;
// Timeout in msec before we decide a Node is suspect, and call for a vote
// to remove him. This must be strictly greater than the TIMEOUT.
static final int SUSPECT = TIMEOUT+500;
// My Histogram. Called from any thread calling into the MM.
// Singleton, allocated now so I do not allocate during an OOM event.
static private final H2O.Cleaner.Histo myHisto = new H2O.Cleaner.Histo();
// uniquely number heartbeats for better timelines
static private int HB_VERSION;
// The Run Method.
// Started by main() on a single thread, this code publishes Cloud membership
// to the Cloud once a second (across all members). If anybody disagrees
// with the membership Heartbeat, they will start a round of Paxos group
// discovery.
public void run() {
MBeanServer mbs = ManagementFactory.getPlatformMBeanServer();
ObjectName os;
try {
os = new ObjectName("java.lang:type=OperatingSystem");
} catch( MalformedObjectNameException e ) {
throw Log.errRTExcept(e);
}
Thread.currentThread().setPriority(Thread.MAX_PRIORITY);
int counter = 0;
while( true ) {
// Once per second, for the entire cloud a Node will multi-cast publish
// itself, so other unrelated Clouds discover each other and form up.
try { Thread.sleep(SLEEP); } // Only once-sec per entire Cloud
catch( InterruptedException ignore ) { }
// Update the interesting health self-info for publication also
H2O cloud = H2O.CLOUD;
HeartBeat hb = H2O.SELF._heartbeat;
hb._hb_version = HB_VERSION++;
hb._jvm_boot_msec= TimeLine.JVM_BOOT_MSEC;
final Runtime run = Runtime.getRuntime();
hb.set_free_mem (run. freeMemory());
hb.set_max_mem (run. maxMemory());
hb.set_tot_mem (run.totalMemory());
hb._keys = (H2O.STORE.size ());
hb.set_valsz (myHisto.histo(false)._cached);
hb._num_cpus = (char)run.availableProcessors();
// Run mini-benchmark every 5 mins. However, on startup - do not have
// all JVMs immediately launch a all-core benchmark - they will fight
// with each other. Stagger them using the hashcode.
if( (counter+Math.abs(H2O.SELF.hashCode())) % 300 == 0) {
hb._gflops = Linpack.run(hb._cpus_allowed);
hb._membw = MemoryBandwidth.run(hb._cpus_allowed);
}
Object load = null;
try {
load = mbs.getAttribute(os, "SystemLoadAverage");
} catch( Exception e ) {
// Ignore, data probably not available on this VM
}
hb._system_load_average = load instanceof Double ? ((Double) load).floatValue() : 0;
int rpcs = 0;
for( H2ONode h2o : cloud._memary )
rpcs += h2o.taskSize();
hb._rpcs = (char)rpcs;
// Scrape F/J pool counts
hb._fjthrds = new short[H2O.MAX_PRIORITY+1];
hb._fjqueue = new short[H2O.MAX_PRIORITY+1];
for( int i=0; i<hb._fjthrds.length; i++ ) {
hb._fjthrds[i] = (short)H2O.getWrkThrPoolSize(i);
hb._fjqueue[i] = (short)H2O.getWrkQueueSize(i);
}
hb._tcps_active= (char)H2ONode.TCPS.get();
// get the usable and total disk storage for the partition where the
// persistent KV pairs are stored
hb.set_free_disk(Persist.getIce().getUsableSpace());
hb.set_max_disk(Persist.getIce().getTotalSpace());
// get cpu utilization for the system and for this process. (linux only.)
LinuxProcFileReader lpfr = new LinuxProcFileReader();
lpfr.read();
if (lpfr.valid()) {
hb._system_idle_ticks = lpfr.getSystemIdleTicks();
hb._system_total_ticks = lpfr.getSystemTotalTicks();
hb._process_total_ticks = lpfr.getProcessTotalTicks();
hb._process_num_open_fds = lpfr.getProcessNumOpenFds();
}
else {
hb._system_idle_ticks = -1;
hb._system_total_ticks = -1;
hb._process_total_ticks = -1;
hb._process_num_open_fds = -1;
}
hb._cpus_allowed = lpfr.getProcessCpusAllowed();
if (H2O.OPT_ARGS.nthreads < hb._cpus_allowed) {
hb._cpus_allowed = H2O.OPT_ARGS.nthreads;
}
hb._nthreads = H2O.OPT_ARGS.nthreads;
hb._pid = lpfr.getProcessID();
// Announce what Cloud we think we are in.
// Publish our health as well.
UDPHeartbeat.build_and_multicast(cloud, hb);
// If we have no internet connection, then the multicast goes
// nowhere and we never receive a heartbeat from ourselves!
// Fake it now.
long now = System.currentTimeMillis();
H2O.SELF._last_heard_from = now;
// Look for napping Nodes & propose removing from Cloud
for( H2ONode h2o : cloud._memary ) {
long delta = now - h2o._last_heard_from;
if( delta > SUSPECT ) {// We suspect this Node has taken a dirt nap
if( !h2o._announcedLostContact ) {
Paxos.print("hart: announce suspect node",cloud._memary,h2o.toString());
h2o._announcedLostContact = true;
}
} else if( h2o._announcedLostContact ) {
Paxos.print("hart: regained contact with node",cloud._memary,h2o.toString());
h2o._announcedLostContact = false;
}
}
counter++;
}
}
}