package edu.washington.escience.myria.daemon;
import java.io.IOException;
import java.net.InetAddress;
import java.net.UnknownHostException;
import java.nio.charset.StandardCharsets;
import java.nio.file.DirectoryStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import javax.annotation.Nullable;
import javax.annotation.concurrent.GuardedBy;
import javax.inject.Inject;
import org.apache.commons.cli.ParseException;
import org.apache.commons.io.FilenameUtils;
import org.apache.reef.client.ClientConfiguration;
import org.apache.reef.client.CompletedJob;
import org.apache.reef.client.DriverConfiguration;
import org.apache.reef.client.FailedJob;
import org.apache.reef.client.FailedRuntime;
import org.apache.reef.client.JobMessage;
import org.apache.reef.client.LauncherStatus;
import org.apache.reef.client.REEF;
import org.apache.reef.client.RunningJob;
import org.apache.reef.runtime.yarn.client.YarnClientConfiguration;
import org.apache.reef.runtime.local.client.LocalRuntimeConfiguration;
import org.apache.reef.tang.Configuration;
import org.apache.reef.tang.Configurations;
import org.apache.reef.tang.Injector;
import org.apache.reef.tang.Tang;
import org.apache.reef.tang.annotations.Name;
import org.apache.reef.tang.annotations.NamedParameter;
import org.apache.reef.tang.annotations.Unit;
import org.apache.reef.tang.exceptions.BindException;
import org.apache.reef.tang.exceptions.InjectionException;
import org.apache.reef.tang.formats.AvroConfigurationSerializer;
import org.apache.reef.tang.formats.CommandLine;
import org.apache.reef.tang.formats.ConfigurationModule;
import org.apache.reef.util.Optional;
import org.apache.reef.wake.EventHandler;
import org.apache.reef.wake.remote.address.LocalAddressProvider;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import edu.washington.escience.myria.MyriaConstants;
import edu.washington.escience.myria.coordinator.ConfigFileException;
import edu.washington.escience.myria.tools.MyriaConfigurationParser;
import edu.washington.escience.myria.tools.MyriaGlobalConfigurationModule;
@Unit
public final class MyriaDriverLauncher {
private static final Logger LOGGER = LoggerFactory.getLogger(MyriaDriverLauncher.class);
private static final long DRIVER_PING_TIMEOUT_MILLIS = 60 * 1000;
/**
* @param args full path to the configuration directory.
* @throws Exception if the Driver can't start.
*/
public static void main(final String[] args) throws Exception {
LauncherStatus status = run(args);
LOGGER.info("Driver launcher exiting with status " + status);
if (status == LauncherStatus.FAILED) {
System.exit(1);
} else if (status == LauncherStatus.FORCE_CLOSED) {
System.exit(2);
} else {
System.exit(0);
}
}
private final REEF reef;
@GuardedBy("this")
private Optional<RunningJob> driver = Optional.empty();
@GuardedBy("this")
private LauncherStatus status = LauncherStatus.INIT;
@Inject
private MyriaDriverLauncher(final REEF reef) {
this.reef = reef;
}
private static Configuration getRuntimeConf(final String runtimeClassName)
throws ClassNotFoundException, IllegalArgumentException, IllegalAccessException,
NoSuchFieldException, SecurityException {
final Class<?> runtimeClass = Class.forName(runtimeClassName);
ConfigurationModule cm = (ConfigurationModule) runtimeClass.getField("CONF").get(null);
// need to allow some room for non-heap memory in the Driver
if (cm.equals(YarnClientConfiguration.CONF)) {
cm = cm.set(YarnClientConfiguration.JVM_HEAP_SLACK, "0.1");
}
// allow unlimited workers when running in local mode
if (cm.equals(LocalRuntimeConfiguration.CONF)) {
cm = cm.set(LocalRuntimeConfiguration.MAX_NUMBER_OF_EVALUATORS, 1024);
}
return cm.build();
}
private static Configuration getClientConf() {
return ClientConfiguration.CONF
.set(ClientConfiguration.ON_JOB_RUNNING, RunningJobHandler.class)
.set(ClientConfiguration.ON_JOB_MESSAGE, JobMessageHandler.class)
.set(ClientConfiguration.ON_JOB_COMPLETED, CompletedJobHandler.class)
.set(ClientConfiguration.ON_JOB_FAILED, FailedJobHandler.class)
.set(ClientConfiguration.ON_RUNTIME_ERROR, RuntimeErrorHandler.class)
.build();
}
/**
* @return The Driver configuration.
*/
private static Configuration getDriverConf(
@Nullable final String driverJobSubmissionDirectory,
final String driverHostName,
final int driverMemoryMB,
final String[] libPaths,
final String[] filePaths)
throws IOException {
ConfigurationModule driverConf =
DriverConfiguration.CONF
.set(DriverConfiguration.DRIVER_IDENTIFIER, "MyriaDriver")
.set(DriverConfiguration.DRIVER_NODE, driverHostName)
.set(DriverConfiguration.DRIVER_MEMORY, driverMemoryMB)
.set(DriverConfiguration.ON_DRIVER_STARTED, MyriaDriver.StartHandler.class)
.set(DriverConfiguration.ON_DRIVER_STOP, MyriaDriver.StopHandler.class)
.set(
DriverConfiguration.ON_EVALUATOR_ALLOCATED,
MyriaDriver.EvaluatorAllocatedHandler.class)
.set(
DriverConfiguration.ON_EVALUATOR_COMPLETED,
MyriaDriver.CompletedEvaluatorHandler.class)
.set(DriverConfiguration.ON_EVALUATOR_FAILED, MyriaDriver.EvaluatorFailureHandler.class)
.set(DriverConfiguration.ON_CONTEXT_ACTIVE, MyriaDriver.ActiveContextHandler.class)
.set(DriverConfiguration.ON_CONTEXT_FAILED, MyriaDriver.ContextFailureHandler.class)
.set(DriverConfiguration.ON_TASK_RUNNING, MyriaDriver.RunningTaskHandler.class)
.set(DriverConfiguration.ON_TASK_COMPLETED, MyriaDriver.CompletedTaskHandler.class)
.set(DriverConfiguration.ON_TASK_FAILED, MyriaDriver.TaskFailureHandler.class)
.set(DriverConfiguration.ON_TASK_MESSAGE, MyriaDriver.TaskMessageHandler.class)
.set(DriverConfiguration.ON_CLIENT_MESSAGE, MyriaDriver.ClientMessageHandler.class)
.set(DriverConfiguration.ON_CLIENT_CLOSED, MyriaDriver.ClientCloseHandler.class);
if (driverJobSubmissionDirectory != null) {
driverConf =
driverConf.set(
DriverConfiguration.DRIVER_JOB_SUBMISSION_DIRECTORY, driverJobSubmissionDirectory);
}
for (String dirPath : libPaths) {
for (String filePath : getFileNamesInDirectory(Paths.get(dirPath))) {
driverConf = driverConf.set(DriverConfiguration.GLOBAL_LIBRARIES, filePath);
}
}
for (String dirPath : filePaths) {
for (String filePath : getFileNamesInDirectory(Paths.get(dirPath))) {
driverConf = driverConf.set(DriverConfiguration.GLOBAL_FILES, filePath);
}
}
return driverConf.build();
}
/**
* @param configPath path to directory of containing configuration files
* @return Configuration object.
*/
private static Configuration getMyriaGlobalConf(final String configPath)
throws IOException, BindException, ConfigFileException {
final String configFile = FilenameUtils.concat(configPath, MyriaConstants.DEPLOYMENT_CONF_FILE);
return MyriaConfigurationParser.loadConfiguration(configFile);
}
private static List<String> getFileNamesInDirectory(final Path root) throws IOException {
final List<String> fileNames = new ArrayList<>();
getFileNamesInDirectoryHelper(root, fileNames);
return fileNames;
}
private static void getFileNamesInDirectoryHelper(final Path dir, final List<String> acc)
throws IOException {
try (DirectoryStream<Path> stream = Files.newDirectoryStream(dir)) {
for (Path path : stream) {
if (path.toFile().isDirectory()) {
getFileNamesInDirectoryHelper(path, acc);
} else {
acc.add(path.toAbsolutePath().toString());
}
}
}
}
private static String getMasterHost(final Configuration conf) throws InjectionException {
final Injector injector = Tang.Factory.getTang().newInjector(conf);
LocalAddressProvider addressProvider = injector.getInstance(LocalAddressProvider.class);
final String masterHost =
injector.getNamedInstance(MyriaGlobalConfigurationModule.MasterHost.class);
// REEF (org.apache.reef.wake.remote.address.HostnameBasedLocalAddressProvider) will
// unpredictably pick a local DNS name or IP address instead of "localhost" or 127.0.0.1
String reefMasterHost = masterHost;
if (masterHost.equals("localhost") || masterHost.equals("127.0.0.1")) {
try {
reefMasterHost = InetAddress.getByName(addressProvider.getLocalAddress()).getHostName();
LOGGER.info(
"Original host: {}, HostnameBasedLocalAddressProvider returned {}",
masterHost,
reefMasterHost);
} catch (final UnknownHostException e) {
LOGGER.warn("Failed to get canonical hostname for host {}", masterHost);
}
}
return reefMasterHost;
}
private static int getDriverMemory(final Configuration conf) throws InjectionException {
final Injector injector = Tang.Factory.getTang().newInjector(conf);
final float driverMemoryGB =
injector.getNamedInstance(MyriaGlobalConfigurationModule.DriverMemoryQuotaGB.class);
final int driverMemoryMB = (int) (driverMemoryGB * 1024);
return driverMemoryMB;
}
/**
* Prints the expected configuration, compared to the actual configuration given.
* Used for logging command line arguments.
*/
private static String genStartupMessage(Class<? extends Name<?>>[] classes, Injector inj) {
String allparams =
Arrays.stream(classes)
.map(
c -> {
final NamedParameter annotation = c.getAnnotation(NamedParameter.class);
final String fullName = c.getName();
final String simpleName = c.getSimpleName();
final String shortName = annotation.short_name();
final String doc = annotation.doc();
final String defaultVal;
if (!annotation.default_value().equals(NamedParameter.REEF_UNINITIALIZED_VALUE)) {
defaultVal = annotation.default_value();
} else if (!annotation.default_class().equals(Void.class)) {
defaultVal = annotation.default_class().getSimpleName();
} else if (annotation.default_values().length > 0) {
defaultVal = Arrays.toString(annotation.default_values());
} else if (annotation.default_classes().length > 0) {
final String classNames =
Arrays.stream(annotation.default_classes())
.map(Class::getSimpleName)
.reduce("", (a, b) -> a + ", " + b);
defaultVal = String.format("[%s]", classNames);
} else {
defaultVal = "";
}
final StringBuilder sb = new StringBuilder(simpleName);
if (!shortName.isEmpty()) {
sb.append(String.format(" [-%s]", shortName));
}
// If desired, the type of the parameter can be obtained from
// commandLineConf.getNamedParameters() --> .getSimpleArgName()
sb.append(String.format(": %s\n-> ", doc));
boolean ok;
try {
ok = inj.isParameterSet(fullName);
if (ok) {
sb.append(inj.getInstance(fullName).toString());
}
} catch (InjectionException | BindException e) {
ok = false;
}
if (!ok) {
if (!defaultVal.isEmpty()) {
sb.append("[cannot parse; using default] ").append(defaultVal);
} else {
sb.append("[cannot parse; no default]");
}
}
return sb.toString();
})
.reduce("", (a, b) -> a + "\n" + b);
return "MyriaDriverLauncher configuration:\n" + allparams;
}
/**
* Launch the Myria driver.
*
* @param args Command line arguments.
*/
@SuppressWarnings("unchecked")
public static LauncherStatus run(final String[] args)
throws InjectionException, IOException, ParseException, ConfigFileException,
ClassNotFoundException, IllegalArgumentException, IllegalAccessException,
NoSuchFieldException, SecurityException {
final Tang tang = Tang.Factory.getTang();
try {
final Class<? extends Name<?>>[] commandLineClasses =
new Class[] {
RuntimeClassName.class,
DriverJobSubmissionDirectory.class,
ConfigPath.class,
JavaLibPath.class,
NativeLibPath.class,
PythonLibPath.class
};
final Configuration commandLineConf =
CommandLine.parseToConfiguration(args, commandLineClasses);
final Injector commandLineInjector = tang.newInjector(commandLineConf);
LOGGER.info(genStartupMessage(commandLineClasses, commandLineInjector));
final String runtimeClassName = commandLineInjector.getNamedInstance(RuntimeClassName.class);
final String driverJobSubmissionDirectory;
if (commandLineInjector.isParameterSet(DriverJobSubmissionDirectory.class)) {
driverJobSubmissionDirectory =
commandLineInjector.getNamedInstance(DriverJobSubmissionDirectory.class);
} else {
driverJobSubmissionDirectory = null;
}
final String configPath = commandLineInjector.getNamedInstance(ConfigPath.class);
final String javaLibPath = commandLineInjector.getNamedInstance(JavaLibPath.class);
final String nativeLibPath = commandLineInjector.getNamedInstance(NativeLibPath.class);
final String pythonLibPath = commandLineInjector.getNamedInstance(PythonLibPath.class);
final Configuration globalConf = getMyriaGlobalConf(configPath);
final String serializedGlobalConf = new AvroConfigurationSerializer().toString(globalConf);
final Configuration globalConfWrapper =
tang.newConfigurationBuilder()
.bindNamedParameter(SerializedGlobalConf.class, serializedGlobalConf)
.build();
final String driverHostName = getMasterHost(globalConf);
final int driverMemoryMB = getDriverMemory(globalConf);
final Configuration driverConf =
Configurations.merge(
getDriverConf(
driverJobSubmissionDirectory,
driverHostName,
driverMemoryMB,
new String[] {javaLibPath},
new String[] {nativeLibPath, pythonLibPath}),
globalConfWrapper);
return tang.newInjector(getRuntimeConf(runtimeClassName), getClientConf())
.getInstance(MyriaDriverLauncher.class)
.run(driverConf);
} catch (ParseException | InjectionException e) {
LOGGER.error("Problem with command line options (see previous log message)", e);
return LauncherStatus.FAILED;
}
}
private LauncherStatus run(final Configuration driverConf) {
// Most UNIX signals will not throw an exception, so need to be trapped here.
Runtime.getRuntime().addShutdownHook(new Thread(this::close));
try {
LOGGER.info("Submitting Myria driver to REEF...");
reef.submit(driverConf);
synchronized (this) {
while (!status.isDone()) {
try {
this.wait(DRIVER_PING_TIMEOUT_MILLIS);
if (driver.isPresent()) {
final byte[] driverMsg = MyriaDriver.DRIVER_PING_MSG.getBytes(StandardCharsets.UTF_8);
LOGGER.info("Sending message to Myria driver: {}", MyriaDriver.DRIVER_PING_MSG);
driver.get().send(driverMsg);
}
} catch (final InterruptedException ex) {
LOGGER.info("Interrupted while waiting for Myria driver to finish, exiting...");
close(); // this sets status to FORCE_CLOSED
}
}
}
return status;
} finally {
reef.close();
}
}
public synchronized void close() {
if (status.isRunning()) {
status = LauncherStatus.FORCE_CLOSED;
}
if (driver.isPresent()) {
driver.get().close();
}
notify();
}
/**
* Command line parameter: runtime configuration class to use (defaults to local runtime).
*/
@NamedParameter(
doc = "Fully qualified name of runtime configuration class",
short_name = "runtimeClass",
default_value = "org.apache.reef.runtime.local.client.LocalRuntimeConfiguration"
)
public static final class RuntimeClassName implements Name<String> {}
/**
* Command line parameter: path of driver job submission directory;
* must be visible to the driver launcher and the driver
*/
@NamedParameter(
doc =
"Full path of driver job submission directory; must be visible to the driver launcher and the driver",
short_name = "driverDir"
)
public static final class DriverJobSubmissionDirectory implements Name<String> {}
/**
* Command line parameter: directory containing configuration file on driver launch host.
*/
@NamedParameter(doc = "local configuration file directory", short_name = "configPath")
public static final class ConfigPath implements Name<String> {}
/**
* Command line parameter: directory containing JAR/class files on driver launch host.
*/
@NamedParameter(doc = "local JAR/class file directory", short_name = "javaLibPath")
public static final class JavaLibPath implements Name<String> {}
/**
* Command line parameter: directory containing native shared libraries on driver launch host.
*/
@NamedParameter(doc = "local native shared library directory", short_name = "nativeLibPath")
public static final class NativeLibPath implements Name<String> {}
/**
* Command line parameter: directory containing Python modules on driver launch host.
*/
@NamedParameter(doc = "local Python module directory", short_name = "pythonLibPath")
public static final class PythonLibPath implements Name<String> {}
/**
* Serialized Myria global configuration (which itself contains serialized configuration for each worker).
*/
@NamedParameter(doc = "serialized Myria global configuration")
public static final class SerializedGlobalConf implements Name<String> {}
final class JobMessageHandler implements EventHandler<JobMessage> {
@Override
public void onNext(final JobMessage message) {
final byte[] msg = message.get();
final String msgStr = new String(msg, StandardCharsets.UTF_8);
LOGGER.info("Message from Myria driver: {}", msgStr);
}
}
final class RunningJobHandler implements EventHandler<RunningJob> {
@Override
public void onNext(final RunningJob job) {
LOGGER.info("Myria driver is running...");
synchronized (MyriaDriverLauncher.this) {
status = LauncherStatus.RUNNING;
driver = Optional.of(job);
}
}
}
final class CompletedJobHandler implements EventHandler<CompletedJob> {
@Override
public void onNext(final CompletedJob job) {
LOGGER.info("Myria driver exited");
synchronized (MyriaDriverLauncher.this) {
status = LauncherStatus.COMPLETED;
MyriaDriverLauncher.this.notify();
}
}
}
final class FailedJobHandler implements EventHandler<FailedJob> {
@Override
public void onNext(final FailedJob job) {
LOGGER.error("Myria driver failed: {}", job.getReason().orElse(null));
synchronized (MyriaDriverLauncher.this) {
status = LauncherStatus.failed(job.getReason());
MyriaDriverLauncher.this.notify();
}
}
}
final class RuntimeErrorHandler implements EventHandler<FailedRuntime> {
@Override
public void onNext(final FailedRuntime error) {
LOGGER.error("Myria driver runtime error {}: {}", error, error.getReason().orElse(null));
synchronized (MyriaDriverLauncher.this) {
status = LauncherStatus.failed(error.getReason());
MyriaDriverLauncher.this.notify();
}
}
}
}