package water.hadoop;
import java.io.*;
import java.net.*;
import java.util.Map;
import java.util.List;
import java.util.ArrayList;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Counter;
import org.apache.hadoop.mapreduce.Mapper;
import water.H2O;
import water.util.Log;
/**
* Interesting Configuration properties:
* mapper mapred.local.dir=/tmp/hadoop-tomk/mapred/local/taskTracker/tomk/jobcache/job_local1117903517_0001/attempt_local1117903517_0001_m_000000_0
*/
public class h2omapper extends Mapper<Text, Text, Text, Text> {
final static public String H2O_JOBTRACKERNAME_KEY = "h2o.jobtrackername";
final static public String H2O_DRIVER_IP_KEY = "h2o.driver.ip";
final static public String H2O_DRIVER_PORT_KEY = "h2o.driver.port";
final static public String H2O_NETWORK_KEY = "h2o.network";
final static public String H2O_BETA_KEY = "h2o.beta";
final static public String H2O_RANDOM_UDP_DROP_KEY = "h2o.random.udp.drop";
final static public String H2O_NTHREADS_KEY = "h2o.nthreads";
final static public String H2O_MANYCOLS_KEY = "h2o.many.cols";
final static public String H2O_CHUNKBITS_KEY = "h2o.chunk.bits";
final static public String H2O_DATAMAXFACTORLEVELS_KEY = "h2o.data.max.factor.levels";
final static public String H2O_BASE_PORT_KEY = "h2o.baseport";
final static public String H2O_LICENSE_DATA_KEY = "h2o.license.data";
final static public String H2O_HADOOP_VERSION = "h2o.hadoop.version";
final static public String H2O_GA_OPTOUT = "h2o.ga.optout";
static EmbeddedH2OConfig _embeddedH2OConfig;
/**
* Start an H2O instance in the local JVM.
*/
public static class UserMain {
private static void registerEmbeddedH2OConfig(String[] args) {
String ip = null;
int port = -1;
int mport = -1;
for (int i = 0; i < args.length; i++) {
if (args[i].equals("-driverip")) {
i++;
ip = args[i];
}
else if (args[i].equals("-driverport")) {
i++;
port = Integer.parseInt(args[i]);
}
else if (args[i].equals("-mapperport")) {
i++;
mport = Integer.parseInt(args[i]);
}
}
_embeddedH2OConfig = new EmbeddedH2OConfig();
_embeddedH2OConfig.setDriverCallbackIp(ip);
_embeddedH2OConfig.setDriverCallbackPort(port);
_embeddedH2OConfig.setMapperCallbackPort(mport);
H2O.setEmbeddedH2OConfig(_embeddedH2OConfig);
}
public static void main(String[] args) {
Log.POST(30, "Entered UserMain");
registerEmbeddedH2OConfig(args);
Log.POST(31, "built textId");
try {
Log.POST(32, "top of try");
// for (int i = 0; i < args.length; i++) {
// System.out.println("UserMain H2O arg: " + args[i]);
// }
H2O.main(args);
Log.POST(33, "after H2O.main");
}
catch (Exception e) {
Log.POST(37, "exception occurred");
try {
e.printStackTrace();
}
catch (Exception e2) {
System.err.println("_context.write excepted in UserMain");
e2.printStackTrace();
}
}
finally {
Log.POST(38, "top of finally");
Log.POST(38, "bottom of finally");
}
Log.POST(39, "leaving UserMain");
}
}
private static class EmbeddedH2OConfig extends water.AbstractEmbeddedH2OConfig {
volatile String _driverCallbackIp;
volatile int _driverCallbackPort = -1;
volatile int _mapperCallbackPort = -1;
volatile String _embeddedWebServerIp = "(Unknown)";
volatile int _embeddedWebServerPort = -1;
void setDriverCallbackIp(String value) {
_driverCallbackIp = value;
}
void setDriverCallbackPort(int value) {
_driverCallbackPort = value;
}
void setMapperCallbackPort(int value) {
_mapperCallbackPort = value;
}
private class BackgroundWriterThread extends Thread {
MapperToDriverMessage _m;
void setMessage (MapperToDriverMessage value) {
_m = value;
}
public void run() {
try {
Socket s = new Socket(_m.getDriverCallbackIp(), _m.getDriverCallbackPort());
_m.write(s);
s.close();
}
catch (java.net.ConnectException e) {
System.out.println("EmbeddedH2OConfig: BackgroundWriterThread could not connect to driver at " + _driverCallbackIp + ":" + _driverCallbackPort);
System.out.println("(This is normal when the driver disowns the hadoop job and exits.)");
}
catch (Exception e) {
System.out.println("EmbeddedH2OConfig: BackgroundWriterThread caught an Exception");
e.printStackTrace();
}
}
}
@Override
public void notifyAboutEmbeddedWebServerIpPort (InetAddress ip, int port) {
_embeddedWebServerIp = ip.getHostAddress();
_embeddedWebServerPort = port;
try {
MapperToDriverMessage msg = new MapperToDriverMessage();
msg.setDriverCallbackIpPort(_driverCallbackIp, _driverCallbackPort);
msg.setMessageEmbeddedWebServerIpPort(ip.getHostAddress(), port);
BackgroundWriterThread bwt = new BackgroundWriterThread();
System.out.printf("EmbeddedH2OConfig: notifyAboutEmbeddedWebServerIpPort called (%s, %d)\n", ip.getHostAddress(), port);
bwt.setMessage(msg);
bwt.start();
}
catch (Exception e) {
System.out.println("EmbeddedH2OConfig: notifyAboutEmbeddedWebServerIpPort caught an Exception");
e.printStackTrace();
}
}
@Override
public boolean providesFlatfile() {
return true;
}
@Override
public String fetchFlatfile() throws Exception {
System.out.printf("EmbeddedH2OConfig: fetchFlatfile called\n");
MapperToDriverMessage msg = new MapperToDriverMessage();
msg.setMessageFetchFlatfile(_embeddedWebServerIp, _embeddedWebServerPort);
Socket s = new Socket(_driverCallbackIp, _driverCallbackPort);
msg.write(s);
DriverToMapperMessage msg2 = new DriverToMapperMessage();
msg2.read(s);
char type = msg2.getType();
if (type != DriverToMapperMessage.TYPE_FETCH_FLATFILE_RESPONSE) {
int typeAsInt = (int)type & 0xff;
String str = new String("DriverToMapperMessage type unrecognized (" + typeAsInt + ")");
Log.err(str);
throw new Exception (str);
}
s.close();
String flatfile = msg2.getFlatfile();
System.out.printf("EmbeddedH2OConfig: fetchFlatfile returned\n");
System.out.println("------------------------------------------------------------");
System.out.println(flatfile);
System.out.println("------------------------------------------------------------");
return flatfile;
}
@Override
public void notifyAboutCloudSize (InetAddress ip, int port, int size) {
_embeddedWebServerIp = ip.getHostAddress();
_embeddedWebServerPort = port;
try {
MapperToDriverMessage msg = new MapperToDriverMessage();
msg.setDriverCallbackIpPort(_driverCallbackIp, _driverCallbackPort);
msg.setMessageCloudSize(ip.getHostAddress(), port, size);
BackgroundWriterThread bwt = new BackgroundWriterThread();
System.out.printf("EmbeddedH2OConfig: notifyAboutCloudSize called (%s, %d, %d)\n", ip.getHostAddress(), port, size);
bwt.setMessage(msg);
bwt.start();
}
catch (Exception e) {
System.out.println("EmbeddedH2OConfig: notifyAboutCloudSize caught an Exception");
e.printStackTrace();
}
}
@Override
public void exit(int status) {
try {
MapperToDriverMessage msg = new MapperToDriverMessage();
msg.setDriverCallbackIpPort(_driverCallbackIp, _driverCallbackPort);
msg.setMessageExit(_embeddedWebServerIp, _embeddedWebServerPort, status);
System.out.printf("EmbeddedH2OConfig: exit called (%d)\n", status);
BackgroundWriterThread bwt = new BackgroundWriterThread();
bwt.setMessage(msg);
bwt.start();
System.out.println("EmbeddedH2OConfig: after bwt.start()");
}
catch (Exception e) {
System.out.println("EmbeddedH2OConfig: exit caught an exception 1");
e.printStackTrace();
}
try {
// Wait one second to deliver the message before exiting.
Thread.sleep (1000);
Socket s = new Socket("127.0.0.1", _mapperCallbackPort);
byte[] b = new byte[1];
b[0] = (byte)status;
OutputStream os = s.getOutputStream();
os.write(b);
os.flush();
s.close();
System.out.println("EmbeddedH2OConfig: after write to mapperCallbackPort");
Thread.sleep(60 * 1000);
// Should never make it this far!
}
catch (Exception e) {
System.out.println("EmbeddedH2OConfig: exit caught an exception 2");
e.printStackTrace();
}
System.exit(111);
}
@Override
public void print() {
System.out.println("EmbeddedH2OConfig print()");
System.out.println(" Driver callback IP: " + ((_driverCallbackIp != null) ? _driverCallbackIp : "(null)"));
System.out.println(" Driver callback port: " + _driverCallbackPort);
System.out.println(" Embedded webserver IP: " + ((_embeddedWebServerIp != null) ? _embeddedWebServerIp : "(null)"));
System.out.println(" Embedded webserver port: " + _embeddedWebServerPort);
}
}
/**
* Emit a bunch of logging output at the beginning of the map task.
* @throws IOException
* @throws InterruptedException
*/
private void emitLogHeader(Context context, String mapredTaskId) throws IOException, InterruptedException {
Configuration conf = context.getConfiguration();
Text textId = new Text(mapredTaskId);
for (Map.Entry<String, String> entry: conf) {
StringBuilder sb = new StringBuilder();
sb.append(entry.getKey());
sb.append("=");
sb.append(entry.getValue());
context.write(textId, new Text(sb.toString()));
}
context.write(textId, new Text("----- Properties -----"));
String[] plist = {
"mapred.local.dir",
"mapred.child.java.opts",
};
for (String k : plist) {
String v = conf.get(k);
if (v == null) {
v = "(null)";
}
context.write(textId, new Text(k + " " + v));
}
String userDir = System.getProperty("user.dir");
context.write(textId, new Text("user.dir " + userDir));
try {
java.net.InetAddress localMachine = java.net.InetAddress.getLocalHost();
context.write(textId, new Text("hostname " + localMachine.getHostName()));
}
catch (java.net.UnknownHostException uhe) {
// handle exception
}
}
/**
* Identify hadoop mapper counter
*/
public static enum H2O_MAPPER_COUNTER {
HADOOP_COUNTER_HEARTBEAT
}
/**
* Hadoop heartbeat keepalive thread. Periodically update a counter so that
* jobtracker knows not to kill the job.
*/
public class CounterThread extends Thread {
Context _context;
Counter _counter;
final int TEN_SECONDS_MILLIS = 10 * 1000;
CounterThread (Context context, Counter counter) {
_context = context;
_counter = counter;
}
@Override
public void run() {
while (true) {
_context.progress();
_counter.increment(1);
try {
Thread.sleep (TEN_SECONDS_MILLIS);
}
catch (Exception e) {}
}
}
}
private int run2(Context context) throws IOException, InterruptedException {
Configuration conf = context.getConfiguration();
String mapredTaskId = conf.get("mapred.task.id");
Text textId = new Text(mapredTaskId);
emitLogHeader(context, mapredTaskId);
Log.POST(10, "After emitLogHeader");
Counter counter = context.getCounter(H2O_MAPPER_COUNTER.HADOOP_COUNTER_HEARTBEAT);
Thread counterThread = new CounterThread(context, counter);
counterThread.start();
String mapredLocalDir = conf.get("mapred.local.dir");
String ice_root;
if (mapredLocalDir.contains(",")) {
ice_root = mapredLocalDir.split(",")[0];
}
else {
ice_root = mapredLocalDir;
}
String jobtrackerName = conf.get(H2O_JOBTRACKERNAME_KEY);
context.write(textId, new Text("mapred.local.dir is " + ice_root));
String driverIp = conf.get(H2O_DRIVER_IP_KEY);
String driverPortString = conf.get(H2O_DRIVER_PORT_KEY);
String network = conf.get(H2O_NETWORK_KEY);
String manyColsString = conf.get(H2O_MANYCOLS_KEY);
String chunkBytesString = conf.get(H2O_CHUNKBITS_KEY);
String dataMaxFactorLevelsString = conf.get(H2O_DATAMAXFACTORLEVELS_KEY);
String nthreadsString = conf.get(H2O_NTHREADS_KEY);
String basePortString = conf.get(H2O_BASE_PORT_KEY);
String betaString = conf.get(H2O_BETA_KEY);
String randomUdpDropString = conf.get(H2O_RANDOM_UDP_DROP_KEY);
String licenseData = conf.get(H2O_LICENSE_DATA_KEY);
String hadoopVersion = conf.get(H2O_HADOOP_VERSION);
String gaOptOut = conf.get(H2O_GA_OPTOUT);
ServerSocket ss = new ServerSocket();
InetSocketAddress sa = new InetSocketAddress("127.0.0.1", 0);
ss.bind(sa);
String localPortString = Integer.toString(ss.getLocalPort());
List<String> argsList = new ArrayList<String>();
// Options used by H2O.
argsList.add("-ice_root");
argsList.add(ice_root);
argsList.add("-name");
argsList.add(jobtrackerName);
argsList.add("-hdfs_skip");
if (network != null) {
if (network.length() > 0) {
argsList.add("-network");
argsList.add(network);
}
}
if (nthreadsString != null) {
if (nthreadsString.length() > 0) {
argsList.add("-nthreads");
int nthreads = Integer.parseInt(nthreadsString);
argsList.add(Integer.toString(nthreads));
}
}
if (basePortString != null) {
if (basePortString.length() > 0) {
argsList.add("-baseport");
int basePort = Integer.parseInt(basePortString);
argsList.add(Integer.toString(basePort));
}
}
if (dataMaxFactorLevelsString != null) {
if (dataMaxFactorLevelsString.length() > 0) {
argsList.add("-data_max_factor_levels");
int dataMaxFactorLevels = Integer.parseInt(dataMaxFactorLevelsString);
argsList.add(Integer.toString(dataMaxFactorLevels));
}
}
if (manyColsString != null) {
if (manyColsString.length() > 0) {
argsList.add("-many_cols");
}
}
if (chunkBytesString != null) {
if (chunkBytesString.length() > 0) {
argsList.add("-chunk_bytes");
int chunkBytes = Integer.parseInt(chunkBytesString);
argsList.add(Integer.toString(chunkBytes));
}
}
if (betaString != null) {
if (betaString.length() > 0) {
argsList.add(betaString);
}
}
if (randomUdpDropString != null) {
if (randomUdpDropString.length() > 0) {
argsList.add(randomUdpDropString);
}
}
if (licenseData != null) {
if (licenseData.length() > 0) {
Log.POST(100, "Before writing license file");
Log.POST(101, ice_root);
File f = new File(ice_root);
boolean b = f.exists();
Log.POST(102, b ? "exists" : "does not exist");
if (! b) {
Log.POST(103, "before mkdirs()");
f.mkdirs();
Log.POST(104, "after mkdirs()");
}
String fileName = ice_root + File.separator + "h2o_license.txt";
PrintWriter out = new PrintWriter(fileName);
out.print(licenseData);
out.close();
argsList.add("-license");
argsList.add(fileName);
}
}
if (hadoopVersion != null) {
argsList.add("-ga_hadoop_ver");
argsList.add(hadoopVersion);
}
if (gaOptOut != null) argsList.add(gaOptOut);
// Options passed through to UserMain for configuring the EmbeddedH2OConfig.
argsList.add("-driverip");
argsList.add(driverIp);
argsList.add("-driverport");
argsList.add(driverPortString);
argsList.add("-mapperport");
argsList.add(localPortString);
context.write(textId, new Text("before water.Boot.main()"));
String[] args = (String[]) argsList.toArray(new String[0]);
try {
Log.POST(11, "Before boot");
water.Boot.main(UserMain.class, args);
Log.POST(12, "After boot");
}
catch (Exception e) {
Log.POST(13, "Exception in boot");
Log.POST(13, "");
context.write(textId, new Text("exception in water.Boot.main()"));
String s = e.getMessage();
if (s == null) { s = "(null exception message)"; }
Log.POST(13, s);
Log.POST(13, "");
context.write(textId, new Text(s));
s = e.toString();
if (s == null) { s = "(null exception toString)"; }
Log.POST(13, s);
Log.POST(13, "");
context.write(textId, new Text(s));
StackTraceElement[] els = e.getStackTrace();
for (int i = 0; i < els.length; i++) {
StackTraceElement el = els[i];
s = el.toString();
Log.POST(13, s);
context.write(textId, new Text(" " + s));
}
}
finally {
Log.POST(14, "Top of finally");
context.write(textId, new Text("after water.Boot.main()"));
}
Log.POST(15, "Waiting for exit");
// EmbeddedH2OConfig will send a one-byte exit status to this socket.
Socket sock = ss.accept();
System.out.println("Wait for exit woke up from accept");
byte[] b = new byte[1];
InputStream is = sock.getInputStream();
int expectedBytes = 1;
int receivedBytes = 0;
while (receivedBytes < expectedBytes) {
int n = is.read(b, receivedBytes, expectedBytes-receivedBytes);
System.out.println("is.read returned " + n);
if (n < 0) {
System.exit(112);
}
receivedBytes += n;
}
int exitStatus = (int)b[0];
System.out.println("Received exitStatus " + exitStatus);
return exitStatus;
}
@Override
public void run(Context context) throws IOException, InterruptedException {
try {
Log.POST(0, "Entered run");
setup(context);
// "Consume" mapped input.
while (context.nextKeyValue()) {
}
int exitStatus = run2(context);
cleanup(context);
Log.POST(1000, "Leaving run");
System.out.println("Exiting with status " + exitStatus);
System.out.flush();
if (exitStatus != 0) {
System.exit(exitStatus);
}
}
catch (Exception e) {
Log.POST(999, e);
System.exit(100);
}
System.out.println("Exiting mapper run method");
System.out.flush();
}
/**
* For debugging only.
*/
public static void main (String[] args) {
try {
h2omapper m = new h2omapper();
m.run(null);
}
catch (Exception e) {
System.out.println (e);
}
}
}