package water.api;
import dontweave.gson.*;
import water.*;
import water.util.Log;
import java.util.concurrent.ConcurrentHashMap;
public class Cloud extends Request2 {
@API(help="quiet", required=false, filter=Default.class)
protected boolean quiet = false;
@API(help="skip_ticks", required=false, filter=Default.class)
protected boolean skip_ticks = false;
/**
* Data structure to store last tick counts from a given node.
*/
private class LastTicksEntry {
final public long _system_idle_ticks;
final public long _system_total_ticks;
final public long _process_total_ticks;
LastTicksEntry(HeartBeat hb) {
_system_idle_ticks = hb._system_idle_ticks;
_system_total_ticks = hb._system_total_ticks;
_process_total_ticks = hb._process_total_ticks;
}
}
@Override
public RequestServer.API_VERSION[] supportedVersions() {
return SUPPORTS_V1_V2;
}
/**
* Store last tick counts for each node.
*
* This is local to a node and doesn't need to be Iced, so make it transient.
* Access this each time the Cloud status page is called on this node.
*
* The window of tick aggregation is between calls to this page (which might come from the browser or from REST
* API clients).
*
* Note there is no attempt to distinguish between REST API sessions. Every call updates the last tick count info.
*/
private static transient ConcurrentHashMap<String,LastTicksEntry> ticksHashMap = new ConcurrentHashMap<String, LastTicksEntry>();
private static volatile boolean lastCloudHealthy = false;
public Cloud() {
_requestHelp = "Displays the information about the current cloud. For each"
+ " node displays its heartbeat information.";
}
@Override public Response serve() {
JsonObject response = new JsonObject();
final H2O cloud = H2O.CLOUD;
final H2ONode self = H2O.SELF;
response.addProperty(VERSION, H2O.VERSION);
response.addProperty(CLOUD_NAME, H2O.NAME);
response.addProperty(NODE_NAME, self.toString());
response.addProperty(CLOUD_SIZE, cloud._memary.length);
long now = System.currentTimeMillis();
response.addProperty(CLOUD_UPTIME_MILLIS, now - H2O.START_TIME_MILLIS);
boolean cloudHealthy = true;
JsonArray nodes = new JsonArray();
for (H2ONode h2o : cloud._memary) {
HeartBeat hb = h2o._heartbeat;
JsonObject node = new JsonObject();
node.addProperty(NAME,h2o.toString());
node.addProperty(NUM_KEYS, hb._keys);
node.addProperty(VALUE_SIZE, hb.get_valsz());
node.addProperty(FREE_MEM, hb.get_free_mem());
node.addProperty(TOT_MEM, hb.get_tot_mem());
node.addProperty(MAX_MEM, hb.get_max_mem());
node.addProperty(MEM_BW, hb._membw);
node.addProperty(FREE_DISK, hb.get_free_disk());
node.addProperty(MAX_DISK, hb.get_max_disk());
node.addProperty(NUM_CPUS, (int)hb._num_cpus);
node.addProperty(GFLOPS, hb._gflops);
node.addProperty(SYSTEM_LOAD, hb._system_load_average);
Long elapsed = System.currentTimeMillis() - h2o._last_heard_from;
node.addProperty(ELAPSED, elapsed);
h2o._node_healthy = elapsed > HeartBeatThread.TIMEOUT ? false : true;
node.addProperty(NODE_HEALTH, h2o._node_healthy);
if (! h2o._node_healthy) {
cloudHealthy = false;
}
node.addProperty("cpus_allowed", hb._cpus_allowed);
node.addProperty("nthreads", hb._nthreads);
node.addProperty("PID", hb._pid);
JsonArray fjth = new JsonArray();
JsonArray fjqh = new JsonArray();
JsonArray fjtl = new JsonArray();
JsonArray fjql = new JsonArray();
if( hb._fjthrds != null ) {
for( int i=0; i<H2O.MIN_HI_PRIORITY; i++ ) {
if( hb._fjthrds[i]==-1 ) break;
fjtl.add(new JsonPrimitive(hb._fjthrds[i]));
fjql.add(new JsonPrimitive(hb._fjqueue[i]));
}
node.add(FJ_THREADS_LO, fjtl);
node.add(FJ_QUEUE_LO , fjql);
for( int i=H2O.MIN_HI_PRIORITY; i<H2O.MAX_PRIORITY; i++ ) {
fjth.add(new JsonPrimitive(hb._fjthrds[i]));
fjqh.add(new JsonPrimitive(hb._fjqueue[i]));
}
node.add(FJ_THREADS_HI, fjth);
node.add(FJ_QUEUE_HI , fjqh);
}
node.addProperty(RPCS, (int) hb._rpcs);
node.addProperty(TCPS_ACTIVE, (int) hb._tcps_active);
if (hb._process_num_open_fds >= 0) { node.addProperty("open_fds", hb._process_num_open_fds); } else { node.addProperty("open_fds", "N/A"); }
// Use tick information to calculate CPU usage percentage for the entire system and
// for the specific H2O node.
//
// Note that 100% here means "the entire box". This is different from 'top' 100%,
// which usually means one core.
int my_cpu_pct = -1;
int sys_cpu_pct = -1;
if (!skip_ticks) {
LastTicksEntry lte = ticksHashMap.get(h2o.toString());
if (lte != null) {
long system_total_ticks_delta = hb._system_total_ticks - lte._system_total_ticks;
// Avoid divide by 0 errors.
if (system_total_ticks_delta > 0) {
long system_idle_ticks_delta = hb._system_idle_ticks - lte._system_idle_ticks;
double sys_cpu_frac_double = 1 - ((double)(system_idle_ticks_delta) / (double)system_total_ticks_delta);
if (sys_cpu_frac_double < 0) sys_cpu_frac_double = 0; // Clamp at 0.
else if (sys_cpu_frac_double > 1) sys_cpu_frac_double = 1; // Clamp at 1.
sys_cpu_pct = (int)(sys_cpu_frac_double * 100);
long process_total_ticks_delta = hb._process_total_ticks - lte._process_total_ticks;
double process_cpu_frac_double = ((double)(process_total_ticks_delta) / (double)system_total_ticks_delta);
// Saturate at 0 and 1.
if (process_cpu_frac_double < 0) process_cpu_frac_double = 0; // Clamp at 0.
else if (process_cpu_frac_double > 1) process_cpu_frac_double = 1; // Clamp at 1.
my_cpu_pct = (int)(process_cpu_frac_double * 100);
}
}
LastTicksEntry newLte = new LastTicksEntry(hb);
ticksHashMap.put(h2o.toString(), newLte);
}
if (my_cpu_pct >= 0) { node.addProperty("my_cpu_%", my_cpu_pct); } else { node.addProperty("my_cpu_%", "N/A"); }
if (sys_cpu_pct >= 0) { node.addProperty("sys_cpu_%", sys_cpu_pct); } else { node.addProperty("sys_cpu_%", "N/A"); }
node.addProperty(LAST_CONTACT, h2o._last_heard_from);
nodes.add(node);
}
response.addProperty(CLOUD_HEALTH, cloudHealthy);
response.add(NODES,nodes);
response.addProperty(CONSENSUS, Paxos._commonKnowledge); // Cloud is globally accepted
response.addProperty(LOCKED, Paxos._cloudLocked); // Cloud is locked against changes
boolean logCloudStatus = (!cloudHealthy) || (cloudHealthy != lastCloudHealthy) || !quiet;
lastCloudHealthy = cloudHealthy;
if (logCloudStatus) {
Log.info("H2O Cloud Status:");
for (String s : response.toString().split("[{}]"))
if (!s.equals(",") && s.length() > 0) Log.info(s); // Log the cloud status to stdout
}
Response r = Response.done(response);
r.setBuilder(CONSENSUS, new BooleanStringBuilder("","Voting new members"));
r.setBuilder(LOCKED, new BooleanStringBuilder("Locked","Accepting new members"));
r.setBuilder(NODES, new MyAryBuilder());
r.setBuilder(NODES+"."+NAME, new NodeCellBuilder());
r.setBuilder(NODES+"."+LAST_CONTACT, new LastContactBuilder());
return r;
}
public static String pos_neg(double d) {
return d >= 0 ? String.valueOf(d) : "n/a";
}
// Just the Node as a link
private static class NodeCellBuilder extends ArrayRowElementBuilder {
@Override public String elementToString(JsonElement element, String contextName) {
String str = element.getAsString();
if( str.equals(H2O.SELF.toString()) ) {
return "<a href='StoreView.html'>"+str+"</a>";
}
String str2 = str.startsWith("/") ? str.substring(1) : str;
String str3 = "<a href='http://" + str2 + "/StoreView.html'>" + str + "</a>";
return str3;
}
}
// Highlight sick nodes
private static class MyAryBuilder extends ArrayBuilder {
static ArrayRowBuilder MY_ARRAY_ROW = new MyRowBuilder();
@Override public Builder defaultBuilder(JsonElement element) { return MY_ARRAY_ROW; }
}
private static class MyRowBuilder extends ArrayRowBuilder {
@Override public String header(JsonObject object, String objectName) {
long then = object.getAsJsonPrimitive(LAST_CONTACT).getAsLong();
long now = System.currentTimeMillis();
return ((now-then) >= HeartBeatThread.TIMEOUT) ? "\n<tr class=\"error\">" : "\n<tr>";
}
}
// Last-heard-from time pretty-printing
private static class LastContactBuilder extends ArrayRowElementBuilder {
@Override public String elementToString(JsonElement element, String contextName) {
long then = element.getAsLong();
long now = System.currentTimeMillis();
return (now-then >= 2*1000) ? ""+((now-then)/1000)+" secs ago" : "now";
}
}
}