h2odriver.java example

Explorer
h2o-2-master
package water.hadoop;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.*;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

import java.io.*;
import java.net.*;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * Driver class to start a Hadoop mapreduce job which wraps an H2O cluster launch.
 *
 * All mapreduce I/O is typed as <Text, Text>.
 * The first Text is the Key (Mapper Id).
 * The second Text is the Value (a log output).
 *
 * Adapted from
 * https://svn.apache.org/repos/asf/hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/SleepJob.java
 */
@SuppressWarnings("deprecation")
public class h2odriver extends Configured implements Tool {
  final static int DEFAULT_CLOUD_FORMATION_TIMEOUT_SECONDS = 120;
  final static int CLOUD_FORMATION_SETTLE_DOWN_SECONDS = 2;
  final static int DEFAULT_EXTRA_MEM_PERCENT = 10;

  // Options that are parsed by the main thread before other threads are created.
  static String jobtrackerName = null;
  static int numNodes = -1;
  static String outputPath = null;
  static String mapperXmx = null;
  static int extraMemPercent = -1;            // Between 0 and 10, typically.  Cannot be negative.
  static String driverCallbackIp = null;
  static int driverCallbackPort = 0;          // By default, let the system pick the port.
  static String network = null;
  static boolean disown = false;
  static String clusterReadyFileName = null;
  static int cloudFormationTimeoutSeconds = DEFAULT_CLOUD_FORMATION_TIMEOUT_SECONDS;
  static int nthreads = -1;
  static int basePort = -1;
  static boolean manyCols = false;
  static int chunk_bytes;
  static int data_max_factor_levels;
  static boolean beta = false;
  static boolean enableRandomUdpDrop = false;
  static boolean enableExceptions = false;
  static boolean enableVerboseGC = false;
  static boolean enablePrintGCDetails = false;
  static boolean enablePrintGCTimeStamps = false;
  static boolean enableVerboseClass = false;
  static boolean enablePrintCompilation = false;
  static boolean enableExcludeMethods = false;
  static boolean enableLog4jDefaultInitOverride = h2odriver_config.overrideLog4jInit();
  static boolean enableDebug = false;
  static boolean enableSuspend = false;
  static int debugPort = 5005;    // 5005 is the default from IDEA
  static String licenseFileName = null;
  static String extraJavaOpts = null;

  // State filled in as a result of handling options.
  static String licenseData = null;

  // Runtime state that might be touched by different threads.
  volatile ServerSocket driverCallbackSocket = null;
  volatile Job job = null;
  volatile CtrlCHandler ctrlc = null;
  volatile boolean clusterIsUp = false;
  volatile boolean clusterFailedToComeUp = false;
  volatile boolean clusterHasNodeWithLocalhostIp = false;
  volatile boolean shutdownRequested = false;

  public void setShutdownRequested() {
    shutdownRequested = true;
  }

  public boolean getShutdownRequested() {
    return shutdownRequested;
  }

  public static class H2ORecordReader extends RecordReader<Text, Text> {
    H2ORecordReader() {
    }

    public void initialize(InputSplit split, TaskAttemptContext context) {
    }

    public boolean nextKeyValue() throws IOException {
      return false;
    }

    public Text getCurrentKey() { return null; }
    public Text getCurrentValue() { return null; }
    public void close() throws IOException { }
    public float getProgress() throws IOException { return 0; }
  }

  public static class EmptySplit extends InputSplit implements Writable {
    public void write(DataOutput out) throws IOException { }
    public void readFields(DataInput in) throws IOException { }
    public long getLength() { return 0L; }
    public String[] getLocations() { return new String[0]; }
  }

  public static class H2OInputFormat extends InputFormat<Text, Text> {
    H2OInputFormat() {
    }

    public List<InputSplit> getSplits(JobContext context) throws IOException, InterruptedException {
      List<InputSplit> ret = new ArrayList<InputSplit>();
      int numSplits = numNodes;
      for (int i = 0; i < numSplits; ++i) {
        ret.add(new EmptySplit());
      }
      return ret;
    }

    public RecordReader<Text, Text> createRecordReader(
            InputSplit ignored, TaskAttemptContext taskContext)
            throws IOException {
      H2ORecordReader trr = new H2ORecordReader();
      return trr;
    }
  }

  /**
   * Handle Ctrl-C and other catchable shutdown events.
   * If we successfully catch one, then try to kill the hadoop job if
   * we have not already been told it completed.
   *
   * (Of course kill -9 cannot be handled.)
   */
  class CtrlCHandler extends Thread {
    volatile boolean _complete = false;

    public void setComplete() {
      _complete = true;
    }

    @Override
    public void run() {
      if (_complete) {
        return;
      }
      _complete = true;

      boolean killed = false;

      try {
        System.out.println("Attempting to clean up hadoop job...");
        job.killJob();
        for (int i = 0; i < 5; i++) {
          if (job.isComplete()) {
            System.out.println("Killed.");
            killed = true;
            break;
          }

          Thread.sleep(1000);
        }
      }
      catch (Exception ignore) {
      }
      finally {
        if (! killed) {
          System.out.println("Kill attempt failed, please clean up job manually.");
        }
      }
    }
  }

  /**
   * Read and handle one Mapper->Driver Callback message.
   */
  class CallbackHandlerThread extends Thread {
    private Socket _s;
    private CallbackManager _cm;

    private void createClusterReadyFile(String ip, int port) throws Exception {
      String fileName = clusterReadyFileName + ".tmp";
      String text = ip + ":" + port + "\n";
      try {
        File file = new File(fileName);
        BufferedWriter output = new BufferedWriter(new FileWriter(file));
        output.write(text);
        output.flush();
        output.close();

        File file2 = new File(clusterReadyFileName);
        boolean success = file.renameTo(file2);
        if (! success) {
          throw new Exception ("Failed to create file " + clusterReadyFileName);
        }
      } catch ( IOException e ) {
        e.printStackTrace();
      }
    }

    public void setSocket (Socket value) {
      _s = value;
    }

    public void setCallbackManager (CallbackManager value) {
      _cm = value;
    }

    @Override
    public void run() {
      MapperToDriverMessage msg = new MapperToDriverMessage();
      try {
        msg.read(_s);
        char type = msg.getType();
        if (type == MapperToDriverMessage.TYPE_EOF_NO_MESSAGE) {
          // Ignore it.
          _s.close();
          return;
        }

        // System.out.println("Read message with type " + (int)type);
        if (type == MapperToDriverMessage.TYPE_EMBEDDED_WEB_SERVER_IP_PORT) {
          // System.out.println("H2O node " + msg.getEmbeddedWebServerIp() + ":" + msg.getEmbeddedWebServerPort() + " started");
          _s.close();
        }
        else if (type == MapperToDriverMessage.TYPE_FETCH_FLATFILE) {
          // DO NOT close _s here!
          // Callback manager accumulates sockets to H2O nodes so it can
          // a synthesized flatfile once everyone has arrived.

          System.out.println("H2O node " + msg.getEmbeddedWebServerIp() + ":" + msg.getEmbeddedWebServerPort() + " requested flatfile");
          if (msg.getEmbeddedWebServerIp().equals("127.0.0.1")) {
            clusterHasNodeWithLocalhostIp = true;
          }
          _cm.registerNode(msg.getEmbeddedWebServerIp(), msg.getEmbeddedWebServerPort(), _s);
        }
        else if (type == MapperToDriverMessage.TYPE_CLOUD_SIZE) {
          _s.close();
          System.out.println("H2O node " + msg.getEmbeddedWebServerIp() + ":" + msg.getEmbeddedWebServerPort() + " reports H2O cluster size " + msg.getCloudSize());
          if (msg.getCloudSize() == numNodes) {
            // Do this under a synchronized block to avoid getting multiple cluster ready notification files.
            synchronized (h2odriver.class) {
              if (! clusterIsUp) {
                if (clusterReadyFileName != null) {
                  createClusterReadyFile(msg.getEmbeddedWebServerIp(), msg.getEmbeddedWebServerPort());
                  System.out.println("Cluster notification file (" + clusterReadyFileName + ") created.");
                }
                clusterIsUp = true;
              }
            }
          }
        }
        else if (type == MapperToDriverMessage.TYPE_EXIT) {
          System.out.println(
                  "H2O node " + msg.getEmbeddedWebServerIp() + ":" + msg.getEmbeddedWebServerPort() +
                  " on host " + _s.getInetAddress().getHostAddress() +
                  " exited with status " + msg.getExitStatus()
          );
          _s.close();
          if (! clusterIsUp) {
            clusterFailedToComeUp = true;
          }
        }
        else {
          _s.close();
          System.err.println("MapperToDriverMessage: Read invalid type (" + type + ") from socket, ignoring...");
        }
      }
      catch (Exception e) {
        System.out.println("Exception occurred in CallbackHandlerThread");
        System.out.println(e.toString());
        if (e.getMessage() != null) {
          System.out.println(e.getMessage());
        }
        e.printStackTrace();
      }
    }
  }

  /**
   * Start a long-running thread ready to handle Mapper->Driver messages.
   */
  class CallbackManager extends Thread {
    private ServerSocket _ss;

    // Nodes and socks
    private HashSet<String> _dupChecker;
    private ArrayList<String> _nodes;
    private ArrayList<Socket> _socks;

    public void setServerSocket (ServerSocket value) {
      _ss = value;
    }

    public void registerNode (String ip, int port, Socket s) {
      synchronized (_dupChecker) {
        String entry = ip + ":" + port;

        if (_dupChecker.contains(entry)) {
          // This is bad.
          System.out.println("ERROR: Duplicate node registered (" + entry + "), exiting");
          System.exit(1);
        }

        _nodes.add(entry);
        _socks.add(s);
        if (_nodes.size() != numNodes) {
          return;
        }

        System.out.println("Sending flatfiles to nodes...");

        assert (_nodes.size() == numNodes);
        assert (_nodes.size() == _socks.size());

        // Build the flatfile and send it to all nodes.
        String flatfile = "";
        for (int i = 0; i < _nodes.size(); i++) {
          String val = _nodes.get(i);
          flatfile += val;
          flatfile += "\n";
        }

        for (int i = 0; i < _socks.size(); i++) {
          Socket nodeSock = _socks.get(i);
          DriverToMapperMessage msg = new DriverToMapperMessage();
          msg.setMessageFetchFlatfileResponse(flatfile);
          try {
            System.out.println("    [Sending flatfile to node " + _nodes.get(i) + "]");
            msg.write(nodeSock);
            nodeSock.close();
          }
          catch (Exception e) {
            System.out.println("ERROR: Failed to write to H2O node " + _nodes.get(i));
            System.out.println(e.toString());
            if (e.getMessage() != null) {
              System.out.println(e.getMessage());
            }
            e.printStackTrace();
            System.exit(1);
          }
        }
      }
    }

    @Override
    public void run() {
      _dupChecker = new HashSet<String>();
      _nodes = new ArrayList<String>();
      _socks = new ArrayList<Socket>();

      while (true) {
        try {
          Socket s = _ss.accept();
          CallbackHandlerThread t = new CallbackHandlerThread();
          t.setSocket(s);
          t.setCallbackManager(this);
          t.start();
        }
        catch (SocketException e) {
          if (getShutdownRequested()) {
            _ss = null;
            return;
          }
          else {
            System.out.println("Exception occurred in CallbackManager");
            System.out.println("ERROR: " + (e.getMessage() != null ? e.getMessage() : "(null)"));
            e.printStackTrace();
          }
        }
        catch (Exception e) {
          System.out.println("Exception occurred in CallbackManager");
          System.out.println("ERROR: " + (e.getMessage() != null ? e.getMessage() : "(null)"));
          e.printStackTrace();
        }
      }
    }
  }

  /**
   * Print usage and exit 1.
   */
  static void usage() {
    System.err.printf(
            "\n" +
                    "Usage: h2odriver\n" +
                    "          -libjars <.../h2o.jar>\n" +
                    "          [other generic Hadoop ToolRunner options]\n" +
                    "          [-h | -help]\n" +
                    "          [-jobname <name of job in jobtracker (defaults to: 'H2O_nnnnn')>]\n" +
                    "              (Note nnnnn is chosen randomly to produce a unique name)\n" +
                    "          [-driverif <ip address of mapper->driver callback interface>]\n" +
                    "          [-driverport <port of mapper->driver callback interface>]\n" +
                    "          [-network <IPv4network1Specification>[,<IPv4network2Specification> ...]\n" +
                    "          [-timeout <seconds>]\n" +
                    "          [-disown]\n" +
                    "          [-notify <notification file name>]\n" +
                    "          -mapperXmx <per mapper Java Xmx heap size>\n" +
                    "          [-extramempercent <0 to 20>]\n" +
                    "          -n | -nodes <number of H2O nodes (i.e. mappers) to create>\n" +
                    "          [-nthreads <maximum typical worker threads, i.e. cpus to use>]\n" +
                    "          [-baseport <starting HTTP port for H2O nodes; default is 54321>]\n" +
                    "          [-many_cols] (improve handling of high-dimensional datasets, same as -chunk_bytes 24)\n" +
                    "          [-chunk_bytes <log (base 2) of chunk size in bytes (e.g., default is 22 for 4MB chunks)>]\n" +
                    "          [-data_max_factor_levels <max. number of factors per column (e.g., default is 1,000,000)>]\n" +
                    "          [-ea]\n" +
                    "          [-verbose:gc]\n" +
                    "          [-XX:+PrintGCDetails]\n" +
                    "          [-license <license file name (local filesystem, not hdfs)>]\n" +
                    "          [-extraJavaOpts <extra Java options (e.g., -XX:MaxDirectMemorySize=128m)>]\n" +
                    "          -o | -output <hdfs output dir>\n" +
                    "\n" +
                    "Notes:\n" +
                    "          o  Each H2O node runs as a mapper.\n" +
                    "          o  Only one mapper may be run per host.\n" +
                    "          o  There are no combiners or reducers.\n" +
                    "          o  Each H2O cluster should have a unique jobname.\n" +
                    "          o  -mapperXmx, -nodes and -output are required.\n" +
                    "\n" +
                    "          o  -mapperXmx is set to both Xms and Xmx of the mapper to reserve\n" +
                    "             memory up front.\n" +
                    "          o  -extramempercent is a percentage of mapperXmx.  (Default: " + DEFAULT_EXTRA_MEM_PERCENT + ")\n" +
                    "             Extra memory for internal JVM use outside of Java heap.\n" +
                    "                 mapreduce.map.memory.mb = mapperXmx * (1 + extramempercent/100)\n" +
                    "          o  -libjars with an h2o.jar is required.\n" +
                    "          o  -driverif and -driverport let the user optionally specify the\n" +
                    "             network interface and port (on the driver host) for callback\n" +
                    "             messages from the mapper to the driver.\n" +
                    "          o  -network allows the user to specify a list of networks that the\n" +
                    "             H2O nodes can bind to.  Use this if you have multiple network\n" +
                    "             interfaces on the hosts in your Hadoop cluster and you want to\n" +
                    "             force H2O to use a specific one.\n" +
                    "             (Example network specification: '10.1.2.0/24' allows 256 legal\n" +
                    "             possibilities.)\n" +
                    "          o  -timeout specifies how many seconds to wait for the H2O cluster\n" +
                    "             to come up before giving up.  (Default: " + DEFAULT_CLOUD_FORMATION_TIMEOUT_SECONDS + " seconds\n" +
                    "          o  -disown causes the driver to exit as soon as the cloud forms.\n" +
                    "             Otherwise, Ctrl-C of the driver kills the Hadoop Job.\n" +
                    "          o  -notify specifies a file to write when the cluster is up.\n" +
                    "             The file contains one line with the IP and port of the embedded\n" +
                    "             web server for one of the H2O nodes in the cluster.  e.g.\n" +
                    "                 192.168.1.100:54321\n" +
                    "          o  All mappers must start before the H2O cloud is considered up.\n" +
                    "\n" +
                    "Examples:\n" +
                    "          hadoop jar h2odriver_HHH.jar water.hadoop.h2odriver -jt <yourjobtracker>:<yourport> -libjars h2o.jar -mapperXmx 1g -nodes 1 -output hdfsOutputDir\n" +
                    "          hadoop jar h2odriver_HHH.jar water.hadoop.h2odriver -jt <yourjobtracker>:<yourport> -libjars h2o.jar -mapperXmx 1g -nodes 1 -notify notify.txt -disown -output hdfsOutputDir\n" +
                    "          (Choose the proper h2odriver (_HHH) for your version of hadoop.\n" +
                    "\n" +
                    "Exit value:\n" +
                    "          0 means the cluster exited successfully with an orderly Shutdown.\n" +
                    "              (From the Web UI or the REST API.)\n" +
                    "\n" +
                    "          non-zero means the cluster exited with a failure.\n" +
                    "              (Note that Ctrl-C is treated as a failure.)\n" +
                    "\n"
    );

    System.exit(1);
  }

  /**
   * Print an error message, print usage, and exit 1.
   * @param s Error message
   */
  static void error(String s) {
    System.err.printf("\nERROR: " + "%s\n\n", s);
    usage();
  }

  /**
   * Read a file into a string.
   * @param fileName File to read.
   * @return String contents of file.
   */
  static private String readFile(String fileName) throws IOException {
    BufferedReader br = new BufferedReader(new FileReader(fileName));
    try {
      StringBuilder sb = new StringBuilder();
      String line = br.readLine();

      while (line != null) {
        sb.append(line);
        sb.append("\n");
        line = br.readLine();
      }
      return sb.toString();
    } finally {
      br.close();
    }
  }

  /**
   * Parse remaining arguments after the ToolRunner args have already been removed.
   * @param args Argument list
   */
  void parseArgs(String[] args) {
    int i = 0;
    while (true) {
      if (i >= args.length) {
        break;
      }

      String s = args[i];
      if (s.equals("-h") ||
              s.equals("help") ||
              s.equals("-help") ||
              s.equals("--help")) {
        usage();
      }
      else if (s.equals("-n") ||
              s.equals("-nodes")) {
        i++; if (i >= args.length) { usage(); }
        numNodes = Integer.parseInt(args[i]);
      }
      else if (s.equals("-o") ||
              s.equals("-output")) {
        i++; if (i >= args.length) { usage(); }
        outputPath = args[i];
      }
      else if (s.equals("-jobname")) {
        i++; if (i >= args.length) { usage(); }
        jobtrackerName = args[i];
      }
      else if (s.equals("-mapperXmx")) {
        i++; if (i >= args.length) { usage(); }
        mapperXmx = args[i];
      }
      else if (s.equals("-extramempercent")) {
        i++; if (i >= args.length) { usage(); }
        extraMemPercent = Integer.parseInt(args[i]);
      }
      else if (s.equals("-driverif")) {
        i++; if (i >= args.length) { usage(); }
        driverCallbackIp = args[i];
      }
      else if (s.equals("-driverport")) {
        i++; if (i >= args.length) { usage(); }
        driverCallbackPort = Integer.parseInt(args[i]);
      }
      else if (s.equals("-network")) {
        i++; if (i >= args.length) { usage(); }
        network = args[i];
      }
      else if (s.equals("-timeout")) {
        i++; if (i >= args.length) { usage(); }
        cloudFormationTimeoutSeconds = Integer.parseInt(args[i]);
      }
      else if (s.equals("-disown")) {
        disown = true;
      }
      else if (s.equals("-notify")) {
        i++; if (i >= args.length) { usage(); }
        clusterReadyFileName = args[i];
      }
      else if (s.equals("-nthreads")) {
        i++; if (i >= args.length) { usage(); }
        nthreads = Integer.parseInt(args[i]);
      }
      else if (s.equals("-many_cols")) {
        manyCols = true;
      }
      else if (s.equals("-chunk_bytes")) {
        i++; if (i >= args.length) { usage(); }
        chunk_bytes = Integer.parseInt(args[i]);
      }
      else if (s.equals("-data_max_factor_levels")) {
        i++; if (i >= args.length) { usage(); }
        data_max_factor_levels = Integer.parseInt(args[i]);
      }
      else if (s.equals("-baseport")) {
        i++; if (i >= args.length) { usage(); }
        basePort = Integer.parseInt(args[i]);
        if ((basePort < 0) || (basePort > 65535)) {
            error("Base port must be between 1 and 65535");
        }
      }
      else if (s.equals("-beta")) {
        beta = true;
      }
      else if (s.equals("-random_udp_drop")) {
        enableRandomUdpDrop = true;
      }
      else if (s.equals("-ea")) {
        enableExceptions = true;
      }
      else if (s.equals("-verbose:gc")) {
        enableVerboseGC = true;
      }
      else if (s.equals("-verbose:class")) {
        enableVerboseClass = true;
      }
      else if (s.equals("-XX:+PrintCompilation")) {
        enablePrintCompilation = true;
      }
      else if (s.equals("-exclude")) {
        enableExcludeMethods = true;
      }
      else if (s.equals("-Dlog4j.defaultInitOverride=true")) {
        enableLog4jDefaultInitOverride = true;
      }
      else if (s.equals("-debug")) {
        enableDebug = true;
      }
      else if (s.equals("-suspend")) {
        enableSuspend = true;
      }
      else if (s.equals("-debugport")) {
        i++; if (i >= args.length) { usage(); }
        debugPort = Integer.parseInt(args[i]);
        if ((debugPort < 0) || (debugPort > 65535)) {
          error("Debug port must be between 1 and 65535");
        }
      }
      else if (s.equals("-XX:+PrintGCDetails")) {
        enablePrintGCDetails = true;
      }
      else if (s.equals("-XX:+PrintGCTimeStamps")) {
        enablePrintGCTimeStamps = true;
      }
      else if (s.equals("-gc")) {
        enableVerboseGC = true;
        enablePrintGCDetails = true;
        enablePrintGCTimeStamps = true;
      }
      else if (s.equals("-license")) {
        i++; if (i >= args.length) { usage(); }
        licenseFileName = args[i];
      }
      else if (s.equals("-extraJavaOpts")) {
        i++; if (i >= args.length) { usage(); }
        extraJavaOpts = args[i];
      }
      else {
        error("Unrecognized option " + s);
      }

      i++;
    }

    // Check for mandatory arguments.
    if (numNodes < 1) {
      error("Number of H2O nodes must be greater than 0 (must specify -n)");
    }
    if (outputPath == null) {
      error("Missing required option -output");
    }
    if (mapperXmx == null) {
      error("Missing required option -mapperXmx");
    }

    // Check for sane arguments.
    if (! mapperXmx.matches("[1-9][0-9]*[mgMG]")) {
      error("-mapperXmx invalid (try something like -mapperXmx 4g)");
    }

    if (extraMemPercent < 0) {
      extraMemPercent = DEFAULT_EXTRA_MEM_PERCENT;
    }

    if (jobtrackerName == null) {
      Random rng = new Random();
      int num = rng.nextInt(99999);
      jobtrackerName = "H2O_" + num;
    }

    if (network == null) {
      network = "";
    }
    else {
      String[] networks;
      if (network.contains(",")) {
        networks = network.split(",");
      }
      else {
        networks = new String[1];
        networks[0] = network;
      }

      for (int j = 0; j < networks.length; j++) {
        String n = networks[j];
        Pattern p = Pattern.compile("(\\d+)\\.(\\d+)\\.(\\d+)\\.(\\d+)/(\\d+)");
        Matcher m = p.matcher(n);
        boolean b = m.matches();
        if (! b) {
          error("network invalid: " + n);
        }

        for (int k = 1; k <=4; k++) {
          int o = Integer.parseInt(m.group(k));
          if ((o < 0) || (o > 255)) {
            error("network invalid: " + n);
          }

          int bits = Integer.parseInt(m.group(5));
          if ((bits < 0) || (bits > 32)) {
            error("network invalid: " + n);
          }
        }
      }
    }

    if (network == null) {
      error("Internal error, network should not be null at this point");
    }

    if ((nthreads >= 0) && (nthreads < 4)) {
      error("nthreads invalid (must be >= 4): " + nthreads);
    }

    if (licenseFileName != null) {
      try {
        licenseData = readFile(licenseFileName);
      }
      catch (Exception xe) {
        StringBuilder sb = new StringBuilder();
        sb.append("Failed to read license file: ");
        if (xe.getLocalizedMessage() != null) {
          sb.append(xe.getLocalizedMessage());
        }
        else {
          sb.append(licenseFileName);
        }

        error(sb.toString());
      }
    }
  }

  static String calcMyIp() throws Exception {
    Enumeration nis = NetworkInterface.getNetworkInterfaces();

    System.out.println("Determining driver host interface for mapper->driver callback...");
    while (nis.hasMoreElements()) {
      NetworkInterface ni = (NetworkInterface) nis.nextElement();
      Enumeration ias = ni.getInetAddresses();
      while (ias.hasMoreElements()) {
        InetAddress ia = (InetAddress) ias.nextElement();
        String s = ia.getHostAddress();
        System.out.println("    [Possible callback IP address: " + s + "]");
      }
    }

    InetAddress ia = InetAddress.getLocalHost();
    String s = ia.getHostAddress();

    return s;
  }

  private int waitForClusterToComeUp() throws Exception {
    long startMillis = System.currentTimeMillis();
    while (true) {
      if (clusterFailedToComeUp) {
        System.out.println("ERROR: At least one node failed to come up during cluster formation");
        job.killJob();
        return 4;
      }

      if (job.isComplete()) {
        break;
      }

      if (clusterIsUp) {
        break;
      }

      long nowMillis = System.currentTimeMillis();
      long deltaMillis = nowMillis - startMillis;
      if (cloudFormationTimeoutSeconds > 0) {
        if (deltaMillis > (cloudFormationTimeoutSeconds * 1000)) {
          System.out.println("ERROR: Timed out waiting for H2O cluster to come up (" + cloudFormationTimeoutSeconds + " seconds)");
          System.out.println("ERROR: (Try specifying the -timeout option to increase the waiting time limit)");
          if (clusterHasNodeWithLocalhostIp) {
            System.out.println("");
            System.out.println("NOTE: One of the nodes chose 127.0.0.1 as its IP address, which is probably wrong.");
            System.out.println("NOTE: You may want to specify the -network option, which lets you specify the network interface the mappers bind to.");
            System.out.println("NOTE: Typical usage is:  -network a.b.c.d/24");
          }
          job.killJob();
          return 3;
        }
      }

      final int ONE_SECOND_MILLIS = 1000;
      Thread.sleep (ONE_SECOND_MILLIS);
    }

    return 0;
  }

  private void waitForClusterToShutdown() throws Exception {
    while (true) {
      if (job.isComplete()) {
        break;
      }

      final int ONE_SECOND_MILLIS = 1000;
      Thread.sleep (ONE_SECOND_MILLIS);
    }
  }

  private String calcHadoopVersion() {
    try {
      Process p = new ProcessBuilder("hadoop", "version").start();
      p.waitFor();
      BufferedReader br = new BufferedReader(new InputStreamReader(p.getInputStream()));
      String line = br.readLine();
      if (line == null) {
        line = "(unknown)";
      }
      return line;
    }
    catch (Exception e) {
      return "(unknown)";
    }
  }

  /*
   * Clean up driver-side resources after the hadoop job has finished.
   *
   * This method was added so that it can be called from inside
   * Spring Hadoop and the driver can be created and then deleted from inside
   * a single process.
   */
  private void cleanUpDriverResources() {
    ctrlc.setComplete();
    try {
      Runtime.getRuntime().removeShutdownHook(ctrlc);
    }
    catch (IllegalStateException ignore) {
      // If "Shutdown in progress" exception would be thrown, just ignore and don't bother to remove the hook.
    }
    ctrlc = null;

    try {
      setShutdownRequested();
      driverCallbackSocket.close();
      driverCallbackSocket = null;
    }
    catch (Exception e) {
      System.out.println("ERROR: " + (e.getMessage() != null ? e.getMessage() : "(null)"));
      e.printStackTrace();
    }

    // At this point, resources are released.
    // The hadoop job has completed (job.isComplete() is true),
    // so the cluster memory and cpus are freed.
    // The driverCallbackSocket has been closed so a new one can be made.

    // The callbackManager itself may or may not have finished, but it doesn't
    // matter since the server socket has been closed.
  }

  private int run2(String[] args) throws Exception {
    // Parse arguments.
    // ----------------
    parseArgs (args);

    // Set up callback address and port.
    // ---------------------------------
    if (driverCallbackIp == null) {
      driverCallbackIp = calcMyIp();
    }
    driverCallbackSocket = new ServerSocket();
    driverCallbackSocket.setReuseAddress(true);
    InetSocketAddress sa = new InetSocketAddress(driverCallbackIp, driverCallbackPort);
    driverCallbackSocket.bind(sa, driverCallbackPort);
    int actualDriverCallbackPort = driverCallbackSocket.getLocalPort();
    CallbackManager cm = new CallbackManager();
    cm.setServerSocket(driverCallbackSocket);
    cm.start();
    System.out.println("Using mapper->driver callback IP address and port: " + driverCallbackIp + ":" + actualDriverCallbackPort);
    System.out.println("(You can override these with -driverif and -driverport.)");

    // Set up configuration.
    // ---------------------
    Configuration conf = getConf();

    if (h2odriver_config.usingYarn()) {
      System.out.println("Driver program compiled with MapReduce V2 (Yarn)");
    }
    else {
      System.out.println("Driver program compiled with MapReduce V1 (Classic)");
    }

    // Set memory parameters.
    {
      Pattern p = Pattern.compile("([1-9][0-9]*)([mgMG])");
      Matcher m = p.matcher(mapperXmx);
      boolean b = m.matches();
      if (!b) {
        System.out.println("(Could not parse mapperXmx.");
        System.out.println("INTERNAL FAILURE.  PLEASE CONTACT TECHNICAL SUPPORT.");
        System.exit(1);
      }
      assert (m.groupCount() == 2);
      String number = m.group(1);
      String units = m.group(2);
      long megabytes = Long.parseLong(number);
      if (units.equals("g") || units.equals("G")) {
        megabytes = megabytes * 1024;
      }

      // YARN container must be sized greater than Xmx.
      // YARN will kill the application if the RSS of the process is larger than
      // mapreduce.map.memory.mb.
      long jvmInternalMemoryMegabytes = (long) ((double)megabytes * ((double)extraMemPercent)/100.0);
      long processTotalPhysicalMemoryMegabytes = megabytes + jvmInternalMemoryMegabytes;
      conf.set("mapreduce.job.ubertask.enable", "false");
      String mapreduceMapMemoryMb = Long.toString(processTotalPhysicalMemoryMegabytes);
      conf.set("mapreduce.map.memory.mb", mapreduceMapMemoryMb);

      // MRv1 standard options, but also required for YARN.
      String mapChildJavaOpts =
              "-Xms" + mapperXmx
              + " -Xmx" + mapperXmx
              + (enableExceptions ? " -ea" : "")
              + (enableVerboseGC ? " -verbose:gc" : "")
              + (enablePrintGCDetails ? " -XX:+PrintGCDetails" : "")
              + (enablePrintGCTimeStamps ? " -XX:+PrintGCTimeStamps" : "")
              + (enableVerboseClass ? " -verbose:class" : "")
              + (enablePrintCompilation ? " -XX:+PrintCompilation" : "")
              + (enableExcludeMethods ? " -XX:CompileCommand=exclude,water/fvec/NewChunk.append2slowd" : "")
              + (enableLog4jDefaultInitOverride ? " -Dlog4j.defaultInitOverride=true" : "")
              + (enableDebug ? " -agentlib:jdwp=transport=dt_socket,server=y,suspend=" + (enableSuspend ? "y" : "n") + ",address=" + debugPort : "")
              + (extraJavaOpts != null ? (" " + extraJavaOpts) : "")
              ;
      conf.set("mapred.child.java.opts", mapChildJavaOpts);
      conf.set("mapred.map.child.java.opts", mapChildJavaOpts);       // MapR 2.x requires this.

      System.out.println("Memory Settings:");
      System.out.println("    mapred.child.java.opts:      " + mapChildJavaOpts);
      System.out.println("    mapred.map.child.java.opts:  " + mapChildJavaOpts);
      System.out.println("    Extra memory percent:        " + extraMemPercent);
      System.out.println("    mapreduce.map.memory.mb:     " + mapreduceMapMemoryMb);
    }

    // Sometimes for debugging purposes, it helps to jam stuff in to the Java command
    // of the mapper child.
    //
    //        conf.set("mapred.child.java.opts", "-Dh2o.FINDME=ignored");
    //        conf.set("mapred.map.child.java.opts", "-Dh2o.FINDME2=ignored");
    //        conf.set("mapred.map.child.java.opts", "-agentlib:jdwp=transport=dt_socket,server=y,suspend=y,address=8999");

    // This is really silly, but without this line, the following ugly warning
    // gets emitted as the very first line of output, which is confusing for
    // the user.
    // Generic options parser is used automatically by ToolRunner, but somehow
    // that framework is not smart enough to disable the warning.
    //
    // Eliminates this runtime warning!
    // "WARN mapred.JobClient: Use GenericOptionsParser for parsing the arguments. Applications should implement Tool for the same."
    conf.set("mapred.used.genericoptionsparser", "true");

    // We don't want hadoop launching extra nodes just to shoot them down.
    // Not good for in-memory H2O processing!
    conf.set("mapreduce.map.speculative", "false");
    conf.set("mapred.map.tasks.speculative.execution", "false");

    conf.set("mapred.map.max.attempts", "1");
    conf.set("mapred.job.reuse.jvm.num.tasks", "1");
    conf.set(h2omapper.H2O_JOBTRACKERNAME_KEY, jobtrackerName);

    conf.set(h2omapper.H2O_DRIVER_IP_KEY, driverCallbackIp);
    conf.set(h2omapper.H2O_DRIVER_PORT_KEY, Integer.toString(actualDriverCallbackPort));
    conf.set(h2omapper.H2O_NETWORK_KEY, network);
    if (nthreads >= 0) {
        conf.set(h2omapper.H2O_NTHREADS_KEY, Integer.toString(nthreads));
    }
    if (basePort >= 0) {
        conf.set(h2omapper.H2O_BASE_PORT_KEY, Integer.toString(basePort));
    }
    if (beta) {
        conf.set(h2omapper.H2O_BETA_KEY, "-beta");
    }
    if (manyCols) {
      conf.set(h2omapper.H2O_MANYCOLS_KEY, "-many_cols");
    }
    if (chunk_bytes > 0) {
      conf.set(h2omapper.H2O_CHUNKBITS_KEY, Integer.toString(chunk_bytes));
    }
    if (data_max_factor_levels > 0) {
      conf.set(h2omapper.H2O_DATAMAXFACTORLEVELS_KEY, Integer.toString(data_max_factor_levels));
    }
    if (enableRandomUdpDrop) {
      conf.set(h2omapper.H2O_RANDOM_UDP_DROP_KEY, "-random_udp_drop");
    }
    if (licenseData != null) {
        conf.set(h2omapper.H2O_LICENSE_DATA_KEY, licenseData);
    }
    String hadoopVersion = calcHadoopVersion();
    conf.set(h2omapper.H2O_HADOOP_VERSION, hadoopVersion);
    if((new File(".h2o_no_collect")).exists() || (new File(System.getProperty("user.home")+ File.separator + ".h2o_no_collect")).exists()) {
      conf.set(h2omapper.H2O_GA_OPTOUT, "-ga_opt_out");
    }

    // Set up job stuff.
    // -----------------
    job = new Job(conf, jobtrackerName);
    job.setJarByClass(getClass());
    job.setInputFormatClass(H2OInputFormat.class);
    job.setMapperClass(h2omapper.class);
    job.setNumReduceTasks(0);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    FileInputFormat.addInputPath(job, new Path("ignored"));
    if (outputPath != null) {
      FileOutputFormat.setOutputPath(job, new Path(outputPath));
    }

    // Run job.  We are running a zero combiner and zero reducer configuration.
    // ------------------------------------------------------------------------
    job.submit();
    System.out.println("Job name '" + jobtrackerName + "' submitted");
    System.out.println("JobTracker job ID is '" + job.getJobID() + "'");
    String applicationID = job.getJobID().toString().replace("job_", "application_");
    System.out.println("For YARN users, logs command is 'yarn logs -applicationId " + applicationID + "'");

    // Register ctrl-c handler to try to clean up job when possible.
    ctrlc = new CtrlCHandler();
    Runtime.getRuntime().addShutdownHook(ctrlc);

    System.out.printf("Waiting for H2O cluster to come up...\n");
    int rv = waitForClusterToComeUp();
    if (rv != 0) {
      System.out.println("ERROR: H2O cluster failed to come up");
      return rv;
    }

    if (job.isComplete()) {
      System.out.println("ERROR: H2O cluster failed to come up");
      ctrlc.setComplete();
      return 2;
    }

    System.out.printf("H2O cluster (%d nodes) is up\n", numNodes);
    if (disown) {
      // Do a short sleep here just to make sure all of the cloud
      // status stuff in H2O has settled down.
      Thread.sleep(CLOUD_FORMATION_SETTLE_DOWN_SECONDS);

      System.out.println("Disowning cluster and exiting.");
      Runtime.getRuntime().removeShutdownHook(ctrlc);
      return 0;
    }

    System.out.println("(Note: Use the -disown option to exit the driver after cluster formation)");
    System.out.println("(Press Ctrl-C to kill the cluster)");
    System.out.println("Blocking until the H2O cluster shuts down...");
    waitForClusterToShutdown();
    cleanUpDriverResources();

    boolean success = job.isSuccessful();
    int exitStatus;
    exitStatus = success ? 0 : 1;
    System.out.println((success ? "" : "ERROR: ") + "Job was" + (success ? " " : " not ") + "successful");
    if (success) {
      System.out.println("Exiting with status 0");
    }
    else {
      System.out.println("Exiting with nonzero exit status");
    }
    return exitStatus;
  }

  /**
   * The run method called by ToolRunner.
   * @param args Arguments after ToolRunner arguments have been removed.
   * @return Exit value of program.
   */
  @Override
  public int run(String[] args) {
    int rv = -1;

    try {
      rv = run2(args);
    }
    catch (org.apache.hadoop.mapred.FileAlreadyExistsException e) {
      if (ctrlc != null) { ctrlc.setComplete(); }
      System.out.println("ERROR: " + (e.getMessage() != null ? e.getMessage() : "(null)"));
      System.exit(1);
    }
    catch (Exception e) {
      System.out.println("ERROR: " + (e.getMessage() != null ? e.getMessage() : "(null)"));
      e.printStackTrace();
      System.exit(1);
    }

    return rv;
  }

  /**
   * Main entry point
   * @param args Full program args, including those that go to ToolRunner.
   * @throws Exception
   */
  public static void main(String[] args) throws Exception {
    int exitCode = ToolRunner.run(new h2odriver(), args);
    System.exit(exitCode);
  }
}