package water.deploy; import java.io.File; import java.io.Serializable; import java.util.*; import water.*; import water.H2O.FlatFileEntry; import water.deploy.VM.Params; import water.deploy.VM.Watchdog; import water.util.Log; import water.util.Utils; /** * Deploys and starts a remote cluster. * <br> * Note: This class is intended for debug and experimentation purposes only, please refer to the * documentation to run an H2O cluster. */ public class Cloud { public final List<String> publicIPs = new ArrayList<String>(); public final List<String> privateIPs = new ArrayList<String>(); /** Includes for rsync to the master */ public final Set<String> clientRSyncIncludes = new HashSet<String>(); /** Excludes for rsync to the master */ public final Set<String> clientRSyncExcludes = new HashSet<String>(); /** Includes for rsync between the master and slaves */ public final Set<String> fannedRSyncIncludes = new HashSet<String>(); /** Excludes for rsync between the master and slaves */ public final Set<String> fannedRSyncExcludes = new HashSet<String>(); /** Port for all remote machines. */ public static final int PORT = 54423; public static final int FORWARDED_LOCAL_PORT = 54321; /** * To avoid configuring remote machines, a JVM can be sent through rsync with H2O. By default, * decompress the Oracle Linux x64 JDK to a local folder and point this path to it. */ static final String JRE = null; // System.getProperty("user.home") + "/libs/jdk/jre"; /** Watch dogs are additional JVMs that shutdown the cluster when the client is killed */ static final boolean WATCHDOGS = true; static final String FLATFILE = "flatfile"; public void start(String[] java_args, String[] args) { // Take first box as cloud master Host master = new Host(publicIPs.get(0)); Set<String> incls = new HashSet<String>(clientRSyncIncludes); if( JRE != null && !new File(JRE + "/bin/java").exists() ) throw new IllegalArgumentException("Invalid JRE"); if( JRE != null ) incls.add(JRE); List<String> ips = privateIPs.size() > 0 ? privateIPs : publicIPs; String s = ""; for( Object o : ips ) s += (s.length() == 0 ? "" : '\n') + o.toString() + ":" + PORT; File flatfile = Utils.writeFile(new File(Utils.tmp(), FLATFILE), s); incls.add(flatfile.getAbsolutePath()); master.rsync(incls, clientRSyncExcludes, false); ArrayList<String> list = new ArrayList<String>(); list.add("-mainClass"); list.add(Master.class.getName()); CloudParams p = new CloudParams(); p._incls = new HashSet<String>(fannedRSyncIncludes); p._excls = fannedRSyncExcludes; p._incls.add(FLATFILE); if( JRE != null ) p._incls.add(new File(JRE).getName()); list.add(VM.write(p)); list.addAll(Arrays.asList(args)); String[] java = Utils.append(java_args, NodeVM.class.getName()); Params params = new Params(master, java, list.toArray(new String[0])); if( WATCHDOGS ) { SSHWatchdog r = new SSHWatchdog(params); r.inheritIO(); r.start(); } else { try { SSHWatchdog.run(params); } catch( Exception e ) { throw new RuntimeException(e); } } } static class CloudParams implements Serializable { Set<String> _incls, _excls; } static class SSHWatchdog extends Watchdog { public SSHWatchdog(Params p) { super(javaArgs(SSHWatchdog.class.getName()), new String[] { write(p) }); } public static void main(String[] args) throws Exception { exitWithParent(); Params p = read(args[0]); run(p); } static void run(Params p) throws Exception { Host host = new Host(p._host[0], p._host[1], p._host[2]); String key = host.key() != null ? host.key() : ""; String s = "ssh-agent sh -c \"ssh-add " + key + "; ssh -l " + host.user() + " -A" + Host.SSH_OPTS; s += " -L " + FORWARDED_LOCAL_PORT + ":127.0.0.1:" + PORT; // Port forwarding s += " " + host.address() + " '" + SSH.command(p._java, p._node) + "'\""; s = s.replace("\\", "\\\\").replace("$", "\\$"); ArrayList<String> list = new ArrayList<String>(); // Have to copy to file for cygwin, but works also on -nix File sh = Utils.writeFile(s); File onWindows = new File("C:/cygwin/bin/bash.exe"); if( onWindows.exists() ) { list.add(onWindows.getPath()); list.add("--login"); } else list.add("bash"); list.add(sh.getAbsolutePath()); exec(list); } } public static class Master { public static void main(String[] args) throws Exception { VM.exitWithParent(); CloudParams params = VM.read(args[0]); args = Utils.remove(args, 0); String[] workerArgs = new String[] { "-flatfile", FLATFILE, "-port", "" + PORT }; List<FlatFileEntry> flatfile = H2O.parseFlatFile(new File(FLATFILE)); HashMap<String, Host> hosts = new HashMap<String, Host>(); ArrayList<Node> workers = new ArrayList<Node>(); for( int i = 1; i < flatfile.size(); i++ ) { Host host = new Host(flatfile.get(i).inet.getHostAddress()); hosts.put(host.address(), host); workers.add(new NodeHost(host, workerArgs)); } Host.rsync(hosts.values().toArray(new Host[0]), params._incls, params._excls, false); for( Node w : workers ) { w.inheritIO(); w.start(); } H2O.main(Utils.append(workerArgs, args)); stall_till_cloudsize(1 + workers.size(), 10000); // stall for cloud 10seconds Log.unwrap(System.out, ""); Log.unwrap(System.out, "Cloud is up, local port " + FORWARDED_LOCAL_PORT + " forwarded"); Log.unwrap(System.out, "Go to http://127.0.0.1:" + FORWARDED_LOCAL_PORT); Log.unwrap(System.out, ""); int index = Arrays.asList(args).indexOf("-mainClass"); if( index >= 0 ) { String pack = args[index + 1].substring(0, args[index + 1].lastIndexOf('.')); LaunchJar.weavePackages(pack); Boot.run(args); } } public static void stall_till_cloudsize(int x, long ms) { H2O.waitForCloudSize(x, ms); UKV.put(Job.LIST, new Job.List()); // Jobs.LIST must be part of initial keys } } }