package cz.cuni.mff.d3s.been.hostruntime;
import static cz.cuni.mff.d3s.been.cluster.Names.ACTION_QUEUE_NAME;
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import cz.cuni.mff.d3s.been.cluster.IClusterService;
import cz.cuni.mff.d3s.been.cluster.Reaper;
import cz.cuni.mff.d3s.been.cluster.ServiceException;
import cz.cuni.mff.d3s.been.cluster.context.ClusterContext;
import cz.cuni.mff.d3s.been.core.TaskPropertyNames;
import cz.cuni.mff.d3s.been.core.protocol.messages.BaseMessage;
import cz.cuni.mff.d3s.been.core.ri.RuntimeInfo;
import cz.cuni.mff.d3s.been.detectors.Monitoring;
import cz.cuni.mff.d3s.been.mq.IMessageQueue;
import cz.cuni.mff.d3s.been.mq.MessageQueues;
import cz.cuni.mff.d3s.been.mq.MessagingException;
import cz.cuni.mff.d3s.been.swrepoclient.SwRepoClient;
import cz.cuni.mff.d3s.been.swrepoclient.SwRepoClientFactory;
/**
*
* This is the main implementation of Host Runtime.
*
* Host runtime is responsible for launching new tasks triggered by appropriate
* message. It is also responsible for operating with already running tasks on
* parent machine. Operation are as follows: killing tasks, allowing and
* supporting communication between tasks and results object repository,
* allowing logging).
*
* @author donarus
*
*/
public final class HostRuntime implements IClusterService {
private static final Logger log = LoggerFactory.getLogger(HostRuntime.class);
/**
* Stores basic information (name, id, host, port, OS, memory, Java) about
* this {@link HostRuntime} instance.
*/
private final RuntimeInfo hostRuntimeInfo;
/**
* Picks up relevant messages from Hazelcast and queues them for internal
* processing.
*/
private HostRuntimeMessageListener messageListener;
/**
* Factory for creating {@link SwRepoClient} instances from real-time obtained
* IP:port
*/
private final SwRepoClientFactory swRepoClientFactory;
/**
* Grants access to all instantiated cluster-dependent utils.
*/
private final ClusterContext clusterContext;
/**
* Takes care of task's processes.
*/
private ProcessManager processManager;
/**
* Message Queues manager.
*/
private final MessageQueues messageQueues;
/**
* Name of the resource with the logger.py
*/
private static final String LOGGER_RESOURCE_NAME = "scripts/logger.py";
/**
* Monitoring object.
*/
private Monitoring monitoring;
/**
* Creates new {@link HostRuntime} with cluster-unique id.
*
* @param clusterContext
* Grants access to all instantiated cluster-dependent utils.
* @param swRepoClientFactory
* factory for creating {@link SwRepoClient} instances from real-time
* obtained IP and port
* @param hostRuntimeInfo
* object which stores basic information about HostRuntime
*/
HostRuntime(ClusterContext clusterContext, SwRepoClientFactory swRepoClientFactory, RuntimeInfo hostRuntimeInfo) {
this.clusterContext = clusterContext;
this.hostRuntimeInfo = hostRuntimeInfo;
this.swRepoClientFactory = swRepoClientFactory;
this.messageQueues = MessageQueues.getInstance();
}
/**
* Starts this {@link HostRuntime}. Registers all listeners and register
* itself in cluster.
*/
@Override
public void start() throws ServiceException {
log.info("Starting Host Runtime...");
try {
// creates necessary files and directories
prepareFiles(hostRuntimeInfo.getWorkingDirectory(), hostRuntimeInfo.getTasksWorkingDirectory());
startProcessManager();
// All listeners must be initialized before any message will be
// received.
startListeners();
// Now, we can register the runtime without missing any messages.
registerHostRuntime();
// HR is now prepared to consume all important messages.
startMonitoring();
} catch (Exception e) {
throw new ServiceException("Cannot start Host Runtime", e);
}
log.info("Host Runtime started.");
}
/**
* Get the ID of the Host Runtime instance
*
* @return The Host Runtime ID
*/
public String getId() {
return hostRuntimeInfo.getId();
}
private void startMonitoring() {
monitoring = new Monitoring(clusterContext.getProperties());
try {
monitoring.addListener(ResendMonitoringListener.create(MessageQueues.getInstance().createSender(ACTION_QUEUE_NAME)));
monitoring.addListener(new PersistMonitoringListener(clusterContext, this));
} catch (MessagingException e) {
throw new RuntimeException("Cannot request message.", e);
}
monitoring.startMonitoring();
}
private void prepareFiles(String workingDirName, String tasksWorkingDirName) throws IOException {
Path workingDir = Paths.get(workingDirName).toAbsolutePath();
Files.createDirectories(workingDir);
Path tasksWorkingDir = Paths.get(tasksWorkingDirName).toAbsolutePath();
Files.createDirectories(tasksWorkingDir);
extractLogger(workingDir);
}
/**
* Extracts logger and exports logger property setting
*
* @param workingDir
* where to put the logger
*/
private void extractLogger(Path workingDir) {
InputStream input = HostRuntime.class.getClassLoader().getResourceAsStream(LOGGER_RESOURCE_NAME);
try {
Path scriptDir = workingDir.resolve("scripts");
Path resourcePath = workingDir.resolve(LOGGER_RESOURCE_NAME);
Files.createDirectories(scriptDir);
Files.deleteIfExists(resourcePath);
Files.copy(input, resourcePath);
System.setProperty(TaskPropertyNames.LOGGER, resourcePath.toString());
} catch (IOException e) {
String msg = String.format("Cannot extract %s. Native task logging will not work", LOGGER_RESOURCE_NAME);
log.error(msg, e);
}
}
/**
* Causes clean Host Runtime shutdown.
*/
@Override
public void stop() {
log.info("Stopping Host Runtime...");
monitoring.stopMonitoring();
unregisterHostRuntime();
stopListeners();
stopProcessManager();
log.info("Host Runtime stopped.");
}
@Override
public Reaper createReaper() {
return new Reaper() {
@Override
protected void reap() throws InterruptedException {
HostRuntime.this.stop();
}
};
}
/**
* Starts process manger.
*/
private void startProcessManager() throws ServiceException {
try {
IMessageQueue<BaseMessage> queue = messageQueues.createInprocQueue(ACTION_QUEUE_NAME);
queue.getReceiver(); //binds receiver
processManager = new ProcessManager(clusterContext, swRepoClientFactory, hostRuntimeInfo);
processManager.start();
} catch (MessagingException e) {
String msg = String.format("Cannot start %s queue", ACTION_QUEUE_NAME);
throw new ServiceException(msg, e);
}
}
/**
* Stops process manager.
*/
private void stopProcessManager() {
log.debug("Stopping process manager...");
processManager.stop();
processManager = null;
try {
messageQueues.terminate(ACTION_QUEUE_NAME);
} catch (MessagingException e) {
String msg = String.format("Cannot terminate %s", ACTION_QUEUE_NAME);
log.error(msg, e);
}
log.debug("Process manager stopped.");
}
/**
* Stops all cluster listeners.
*/
private void stopListeners() {
messageListener.stop();
messageListener = null;
}
/**
* Registers all needed cluster listeners.
*/
private void startListeners() throws ServiceException {
messageListener = new HostRuntimeMessageListener(clusterContext, processManager.getNodeId());
messageListener.start();
}
/**
* Stores {@link RuntimeInfo} (created in constructor) in cluster.
*/
private void registerHostRuntime() {
clusterContext.getRuntimes().storeRuntimeInfo(hostRuntimeInfo);
}
/**
* Removes {@link RuntimeInfo} (created in constructor) from cluster.
*/
private void unregisterHostRuntime() {
try {
clusterContext.getRuntimes().removeRuntimeInfo(hostRuntimeInfo.getId());
} catch (IllegalStateException e) {
// an attempt is made to unregister on a cluster instance that is no longer active
// this happens when Hazelcast shutdown hook snags runtime control before BEEN shutdown hooks
log.warn("Failed to unhook HostRuntime from the cluster. HostRuntime data is likely to linger.", e);
}
}
}