package automately.core.services.job; import automately.core.data.Job; import automately.core.data.User; import automately.core.file.VirtualFileService; import automately.core.services.container.ContainerService; import automately.core.services.core.AutomatelyService; import automately.core.services.http.ClusteredHttpServer; import automately.core.services.job.execution.ExecutionContext; import automately.core.services.job.execution.ExecutionContextFactory; import automately.core.services.job.execution.factories.js.NativeJSContextFactory; import com.hazelcast.client.config.ClientConfig; import com.hazelcast.client.config.ClientNetworkConfig; import com.hazelcast.config.NetworkConfig; import com.hazelcast.core.ICountDownLatch; import com.hazelcast.core.LifecycleEvent; import com.hazelcast.core.LifecycleListener; import io.jsync.Handler; import io.jsync.app.ClusterApp; import io.jsync.app.core.Cluster; import io.jsync.app.core.Config; import io.jsync.app.core.Logger; import io.jsync.buffer.Buffer; import io.jsync.eventbus.Message; import io.jsync.json.DecodeException; import io.jsync.json.JsonArray; import io.jsync.json.JsonObject; import io.jsync.json.impl.Base64; import java.lang.reflect.Constructor; import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; import static automately.core.services.job.JobServer.updateStatus; public class JobRunner extends ClusterApp { private static Job currentJob; private static Runnable initHandler = null; private static JsonArray nodeList = null; public static void main(String[] args) { try { // We need to ensure that when this node is shutting down // that we shut down. However that works.. String nodeId = args[0]; // TODO utilize the nodeId... nodeList = new JsonArray(); try { nodeList = new JsonArray(new Buffer(Base64.decode(args[1])).toString()); } catch (DecodeException e){ } String configPath = args[2]; System.setProperty("execution.jobId", args[3]); if (args.length > 4) { System.setProperty("execution.factory", args[4]); } else { System.setProperty("execution.factory", NativeJSContextFactory.class.getCanonicalName()); } if (args.length > 5) { System.setProperty("execution.queue.timeout", args[5]); } ClusterApp.initialize(new JobRunner(), "--join", "--cluster-role", "jobrunner", "--hazelcast-mode", "client", "--disable-saving", "--config", configPath); if (initHandler != null) { initHandler.run(); } } catch (Exception e) { e.printStackTrace(); System.exit(1); } } public static Job currentJob() { return currentJob; } @Override protected void prepareConfig(Config config) { // This should be an effective number config.rawConfig().putObject("cluster", config.rawConfig().getObject("cluster", new JsonObject()).putNumber("async_pool_size", 4)); } @Override protected void prepareCluster(Cluster cluster) { try { cluster.manager().addClientConfigHandler(config -> { ClientNetworkConfig networkConfig = config.getNetworkConfig(); for (Object node : nodeList) { if(node instanceof String){ networkConfig.addAddress((String) node); } } config.setNetworkConfig(networkConfig); }); cluster.addService(new VirtualFileService()); cluster.addService(new ClusteredHttpServer()); cluster.addService(new ContainerService()); cluster.addService(new JobServer()); // Let's add a service that handles the script execution cluster.addService(new AutomatelyService() { private String executionAddress = ""; /** * This is called when we want to ensure the job gets updated in the cluster and stored */ private void updateJob() { if (currentJob != null) { cluster().data().persistentMap("jobs").set(currentJob.token(), currentJob); } } @Override public String name() { return "JobRunnerService"; } @Override public void start(Cluster cluster) { // We run the job within it's own // thread so we don't black (start); new Thread(new Runnable() { @Override public void run() { Thread.currentThread().setName("jobrunner-main-thread"); Logger logger = cluster.logger(); try { logger.info("Running the JobRunner service..."); ClassLoader classLoader = getClass().getClassLoader(); String contextFactory = System.getProperty("execution.factory", NativeJSContextFactory.class.getCanonicalName()); logger.debug("Attempting to load the context factory from " + contextFactory); Class clazz = classLoader.loadClass(contextFactory); if (clazz != null) { Constructor<?> clu = clazz.getConstructor(); ExecutionContextFactory factory = (ExecutionContextFactory) clu.newInstance(); factory.initialize(cluster); logger.debug("Loaded the context factory from " + contextFactory); if (ExecutionContextFactory.class.isAssignableFrom(clazz)) { String executionJobId = System.getProperty("execution.jobId"); // This means awe can go ahead and wait // for a job to be received if (executionJobId.startsWith("await_")) { String queuedJobId = executionJobId.replaceFirst("await_", ""); ICountDownLatch runnerAwaitLatch = cluster.hazelcast().getCountDownLatch("_jobrunner_await_" + queuedJobId); runnerAwaitLatch.trySetCount(1); ICountDownLatch runnerContinueLatch = cluster.hazelcast().getCountDownLatch("_jobrunner_awaitcont_" + queuedJobId); // Allow any waiting code to continue while (runnerContinueLatch.getCount() > 0) { runnerContinueLatch.countDown(); } logger.info("Putting JobRunner into queued mode."); try { logger.debug("Waiting for queued job " + queuedJobId + " CountDownLatch to reach 0..."); int queueTimeout = Integer.parseInt(System.getProperty("execution.queue.timeout", "60")); logger.info("Queued job " + queuedJobId + " await timeout set to " + queueTimeout + " minute(s)."); boolean waitResult = runnerAwaitLatch.await(queueTimeout, TimeUnit.MINUTES); while (!waitResult && runnerAwaitLatch.getCount() > 0) { logger.debug("Timeout reached for queued job " + queuedJobId + ". Waiting another " + queueTimeout + " minute(s)..."); waitResult = runnerAwaitLatch.await(queueTimeout, TimeUnit.MINUTES); } logger.debug("Finished waiting for the CountDownLatch to reach 0. JobId is " + queuedJobId); System.setProperty("execution.jobId", queuedJobId); } catch (Exception ignored) { logger.warn("The queued job caught an exception while waiting to start."); initHandler = () -> { try { logger.warn("Shutting down because await exception occurred."); cluster().leave(); } catch (Exception e) { e.printStackTrace(); } System.exit(1); }; return; } } String jobToken = System.getProperty("execution.jobId"); currentJob = jobs().get(jobToken); if (currentJob == null) { logger.error("The job " + jobToken + " was not found."); initHandler = () -> { try { logger.warn("Shutting down because the job " + jobToken + " does not exist."); cluster().leave(); } catch (Exception e) { e.printStackTrace(); } System.exit(1); }; return; } // Let's go ahead and set the ExecutionSecurityPolicy // so we can get a good idea of what has access to what. // This is important as we need to ensure we // set the ExecutionSecurityPolicy. The ExecutionSecurityPolicy // pretty much helps secure things a little more than normal. /*Policy.setPolicy(new ExecutionSecurityPolicy()); System.setSecurityManager(new SecurityManager());*/ User user = users().get(currentJob.userToken); updateStatus(currentJob, "running"); executionAddress = "job.server." + currentJob.token() + ".execution"; CountDownLatch jobAwaitLatch = new CountDownLatch(1); Thread[] executionThread = new Thread[1]; // The killHandler is an attempt to gracefully stop // script execution and shut down the cluster. Handler<Message> killHandler = new Handler<Message>() { @Override public void handle(Message event) { if (!(event.body() instanceof String)) return; String method = (String) event.body(); if (method.equals("kill") || method.equals("error") || method.equals("halt")) { logger.info("Execution event received \"" + method + "\"..."); // We must unregister the handler first so it does not continue running cluster().eventBus().unregisterHandler(executionAddress, this); logger.info("Killing the job \"" + currentJob.token() + "\"..."); try { if(executionThread[0] != null){ // TODO verify this is working.. // Let's go ahead and trigger the "shutdown" handler. // Your script must implement this properly.. cluster().eventBus().publish("job.server." + currentJob.token() + ".execution", "shutdown"); cluster().async().setTimer(4500, event12 -> { // executionThread[0].interrupt(); This may not be needed while (jobAwaitLatch.getCount() > 0){ jobAwaitLatch.countDown(); } }); } else { logger.warn("Forcing shutdown..."); try { cluster.leave(); } catch (Exception ignored) { // There can be errors while attempting to leave the cluster } System.exit(1); } } catch (Exception ignored) { // We can ignore this } } } }; // This exists so we can actually // attempt to stop things gracefully cluster().eventBus().registerHandler(executionAddress, killHandler); currentJob.config.putValue("_jobToken", currentJob.token()); JsonObject mergedResults = new JsonObject(); mergedResults.putBoolean("success", true); executionThread[0] = new Thread(() -> { try { ExecutionContext context = factory.create(user, currentJob.config); boolean broadcastPrintStream = currentJob.config.getBoolean("_broadcastPrintStream", false); // since this may be cluster related context.setConsoleHandler(event -> { if (broadcastPrintStream) { cluster.eventBus().publish("private.job." + currentJob.token() + ".printStream", new Buffer().appendBytes(event.getBytes())); } }).setExceptionHandler(t -> { t.printStackTrace(); logger.info("Exception handler triggered."); JsonObject error = new JsonObject(); error.putString("code", "Error"); error.putString("message", t.toString()); mergedResults.putObject("error", error); mergedResults.putBoolean("success", false); mergedResults.mergeIn(currentJob.results); // This is safe since the error didn't happen on the execution thread currentJob.results = mergedResults; updateJob(); }).setUncaughtExceptionHandler(e -> { e.printStackTrace(); // This is more for errors that are not thrown // in the main execution thread. logger.info("Uncaught Exception handler triggered."); JsonObject error = new JsonObject(); error.putString("code", "Error"); error.putString("message", e.toString()); mergedResults.putObject("error", error); mergedResults.putBoolean("success", false); mergedResults.mergeIn(currentJob.results); // This is safe since the error didn't happen on the execution thread currentJob.results = mergedResults; updateJob(); // This will go ahead and tell the job to stop because // there was an uncaught error (async???) cluster().eventBus().publish(executionAddress, "error"); }).execute(); logger.info("Execution has finished..."); } catch (Exception e) { // Let's go ahead and store any errors // that are throw when creating the ExecutionContext JsonObject error = new JsonObject(); error.putString("code", "Error"); error.putString("message", e.toString()); mergedResults.putObject("error", error); mergedResults.putBoolean("success", false); } finally { // When we are no longer executing we can simply call this. jobAwaitLatch.countDown(); } }); executionThread[0].setName("execution-thread"); long startTime = System.nanoTime(); executionThread[0].run(); // Let's go ahead and wait for the // execution thread to finish try { jobAwaitLatch.await(); } catch (InterruptedException ignored){ } // We want to ensure there are no fields that start with // "_" for (String field : currentJob.results.getFieldNames()) { if (field.startsWith("_")) { currentJob.results.removeField(field); } } long totalTime = TimeUnit.NANOSECONDS.toSeconds(System.nanoTime() - startTime); mergedResults.putNumber("timeToComplete", totalTime); mergedResults.mergeIn(currentJob.results); currentJob.results = mergedResults; updateJob(); logger.info("Finished updating the job..."); logger.info("Finishing job..."); // Let's go ahead and attempt to leave the cluster. cluster().leave(); logger.info("The job has completed..."); System.exit(0); } else { cluster().logger().error("\"" + System.getProperty("execution.factory", NativeJSContextFactory.class.getCanonicalName()) + "\" is not a valid ExecutionContextFactory."); try { cluster.leave(); } catch (Exception ignored) { } System.exit(1); } } } catch (Exception e) { cluster().logger().error(e.getMessage()); try { cluster().leave(); } catch (Exception ignored) { } System.exit(1); } } }).run(); } @Override public void stop() { try { if (!cluster().hazelcast().getLifecycleService().isRunning()){ // We must kill the process if hazelcast is already dead cluster().logger().error("Forcefully shutting down the process since hazelcast has died."); System.exit(1); } } catch (Exception ignored) { System.exit(1); } } }); } catch (Exception e) { throw new RuntimeException("There was an issue while attempting to prepare the cluster.", e); } } }