JobServer.java example

Explorer
automately-core-master
- src
  - main
    - java
      - automately
        core
        Automately.java
        data
        IdentifiedDataTypeFactory.java
        Job.java
        Meta.java
        User.java
        UserData.java
        comparators
        FileComparator.java
        JobComparator.java
        JsonFieldComparator.java
        UserComparator.java
        package-info.java
        predicates
        JsonQueryPredicate.java
        KeyStartsWithPredicate.java
        StringMatcherPredicate.java
        file
        VirtualFile.java
        VirtualFileService.java
        VirtualFileStore.java
        VirtualFileSystem.java
        VirtualFileToken.java
        nio
        UserFileAttributes.java
        UserFileChannel.java
        UserFilePath.java
        UserFileSystem.java
        UserFileSystemProvider.java
        package-info.java
        stores
        AmazonS3Store.java
        AzureBlobStore.java
        FileSystemStore.java
        package-info.java
        services
        api
        ApiServer.java
        routers
        ApiHandler.java
        DefaultHandler.java
        FilesHandler.java
        JobsHandler.java
        KeyHandler.java
        MessageBusHandler.java
        SecureHandler.java
        container
        ContainerService.java
        core
        AutomatelyService.java
        BaseService.java
        BasicEventBusService.java
        EventBusService.java
        http
        ClusteredHttpServer.java
        data
        UserHostPredicate.java
        job
        JobRunner.java
        JobServer.java
        execution
        ExecutionContext.java
        ExecutionContextFactory.java
        factories
        container
        ContainerContextFactory.java
        dart
        DartContextFactory.java
        hello
        HelloWorldContext.java
        HelloWorldContextFactory.java
        java
        JavaContextFactory.java
        js
        JsonObjectHelper.java
        NativeExecutionClassLoader.java
        NativeJSContext.java
        NativeJSContextFactory.java
        ScriptObject.java
        ScriptUtil.java
        logging
        ContextLogHandler.java
        objects
        core
        AutomatelyObject.java
        BufferObject.java
        container
        ContainerManagerObject.java
        ContainerObject.java
        ExecObject.java
        filesystem
        FileObject.java
        FileSystemObject.java
        network
        DataBusObject.java
        MessageBusObject.java
        MessageObject.java
        SmtpClientObject.java
        TcpSocketObject.java
        http
        CachedEntryListener.java
        HttpClientObject.java
        HttpServerObject.java
        node
        NodeJSContextFactory.java
        php
        PHPContextFactory.java
        v8
        V8Context.java
        V8ContextFactory.java
        V8Util.java
        package-info.java
        sdk
        SdkDeploymentService.java
        SdkSockJSServer.java
        eventbus
        SdkAuthManager.java
        SdkEventManager.java
        ssh
        SSHCommandFactory.java
        SSHDaemonService.java
        SubSystem.java
        SubSystemFactory.java
        package-info.java
        util
        NetworkUtil.java
        file
        FileUtil.java
        package-info.java
      - org
        apache
        sshd
        server
        subsystem
        sftp
        SftpSubsystem.java
package automately.core.services.job;

import automately.core.data.Job;
import automately.core.data.Meta;
import automately.core.data.User;
import automately.core.data.UserData;
import automately.core.data.comparators.JobComparator;
import automately.core.data.predicates.JsonQueryPredicate;
import automately.core.file.VirtualFile;
import automately.core.file.VirtualFileSystem;
import automately.core.services.core.AutomatelyService;
import automately.core.services.job.execution.factories.hello.HelloWorldContextFactory;
import automately.core.services.job.execution.factories.js.NativeJSContextFactory;
import automately.core.services.job.execution.factories.v8.V8ContextFactory;
import com.hazelcast.core.*;
import com.hazelcast.nio.Address;
import com.hazelcast.query.*;
import io.jsync.Async;
import io.jsync.Handler;
import io.jsync.app.core.Cluster;
import io.jsync.app.core.Config;
import io.jsync.app.core.Logger;
import io.jsync.buffer.Buffer;
import io.jsync.eventbus.EventBus;
import io.jsync.eventbus.Message;
import io.jsync.http.HttpClient;
import io.jsync.impl.Windows;
import io.jsync.json.JsonArray;
import io.jsync.json.JsonObject;
import io.jsync.json.impl.Base64;

import java.io.*;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.*;
import java.util.concurrent.*;

/**
 * JobServer handles all jobs. This is used to submit jobs to the cluster
 * for execution.
 */
public class JobServer extends AutomatelyService {

    // TODO Definitely complete javadocs

    private static boolean initialized = false;

    private static Cluster cluster = null;
    private static IMap<String, Job> jobs = null;
    private static ISet<String> jobsBeingExecuted = null;
    private static IMap<String, String> jobExecutionNodes = null;
    private static IMap<String, JsonObject> registeredJobServers = null;

    private static String defaultExecutionFactory = NativeJSContextFactory.class.getCanonicalName();

    @Deprecated
    public static void setScriptContextFactory(String factory) {
        setDefaultExecutionFactory(factory);
    }

    @Deprecated
    public static String getScriptContextFactory() {
        return getDefaultExecutionFactory();
    }

    public static void setDefaultExecutionFactory(String factory) {
        defaultExecutionFactory = factory;
    }

    public static String getDefaultExecutionFactory() {
        return defaultExecutionFactory;
    }

    public static boolean initialized(){
        return initialized;
    }

    private static void checkInitialized(){
        if(!initialized){
            throw new RuntimeException("The JobServer has not been initialized yet!");
        }
    }

    /**
     * isStale is used to check if an Automately Job is stale in the cluster. It will
     * return false if the job is not stale. When a job is stale it basically means that
     *
     * @param job the Job you wish to check
     * @return returns true if the job is stale
     */
    public static boolean isStale(Job job) {
        checkInitialized();

        if (job == null) {
            throw new NullPointerException();
        }

        ICountDownLatch globalJobFinishLatch = cluster.hazelcast().getCountDownLatch(job.token() + "_job_finish_latch");

        String status = job.status;

        // Return false since the job pretty much has already been handled
        if (status.equals("complete") || status.equals("halted") || status.equals("stopping") || status.equals("timeout")) {
            return false;
        }

        final boolean[] isStale = {false};

        // Newest method for checking for stale jobs
        // TWe are checking to see if there is a lock on the job within the cluster
        // If the job is not being executed then there will be no lock so if it's locked
        // The job is not stale
        ILock executionLock = cluster.hazelcast().getLock("_job_lock_execution_" + job.token());
        if(!executionLock.isLocked()){
            isStale[0] = true;
        }

        // If the job is running, queued, processing we should definitely check if it is stale
        if (status.equals("running") || status.equals("queued") || status.equals("processing")) {

            // This means the job is being executed and a node is handling it.
            // So we should check if the node still exists
            if(jobsBeingExecuted.contains(job.token()) && jobExecutionNodes.containsKey(job.token())){
                String nodeId = jobExecutionNodes.get(job.token());

                // This means it was removed the server was removed. So the job is stale
                if(!registeredJobServers.containsKey(nodeId)){
                    isStale[0] = true;

                }
            } else if (!jobsBeingExecuted.contains(job.token())) {
                isStale[0] = true;
            }
        }

        if(isStale[0]){

            job.status = "complete";
            JsonObject error = new JsonObject();
            error.putString("message", "The job has went stale. It is no longer being executed.");
            error.putString("code", "Stale Job");
            job.results.putObject("error", error);
            job.results.putBoolean("success", false);

            jobs.set(job.token(), job);

            if (globalJobFinishLatch.trySetCount(1)) {
                while (globalJobFinishLatch.getCount() > 0){
                    globalJobFinishLatch.countDown();
                }
            }

            jobsBeingExecuted.remove(job.token());
            jobExecutionNodes.remove(job.token());
        }
        return isStale[0];
    }

    /**
     * getJob allows you to retrieve a User's Job from the Cluster via
     * it's token.
     *
     * @param user the User you wish to retrieve the Job from
     * @param jobToken the token for the Job you are attempting to find
     * @return returns the Job if it was found or null if it wasn't
     */
    public static Job getJob(User user, String jobToken) {
        checkInitialized();
        EntryObject e = new PredicateBuilder().getEntryObject();
        Predicate predicate = e.get("userToken").equal(user.token())
                .and(e.get("token").equal(jobToken));
        for (Job job : jobs.values(predicate)) {
            if (job.token().equals(jobToken)) {
                return job;
            }
        }
        return null;
    }

    /**
     * getJobs retrieves a Collection<Job> for every single Job belonging
     * to the specified User.
     *
     * @param user the User you wish to retrieve the Job's for
     * @return returns a Colllection<Job> for the User
     */
    public static Collection<Job> getJobs(User user) {
        checkInitialized();
        EntryObject e = new PredicateBuilder().getEntryObject();
        Predicate predicate = e.get("userToken").equal(user.token());
        return jobs.values(predicate);
    }

    /**
     * getJobs retrieves a Collection<Job> for every single Job belonging
     * to the specified User. This returns 10 results by default.
     *
     * @param user the User you wish to retrieve the Job's for
     * @param page the page index starting from 0 you are looking for
     * @return returns a Colllection<Job> for the User
     */
    public static Collection<Job> getJobs(User user, int page) {
        return getJobs(user, page, 10);
    }


    /**
     * getJobs retrieves a Collection<Job> for every single Job belonging
     * to the specified User.
     *
     * @param user the User you wish to retrieve the Job's for
     * @param page the page index starting from 0 you are looking for
     * @param count the number of results to return
     * @return returns a Colllection<Job> for the User
     */
    public static Collection<Job> getJobs(User user, int page, int count) {
        checkInitialized();
        EntryObject e = new PredicateBuilder().getEntryObject();
        com.hazelcast.query.Predicate userJobsPredicate = e.get("userToken").equal(user.token());

        if (page < 0) {
            page = 0;
        }

        if (count < 0) {
            count = 10;
        }

        // Max Count is always 100
        if (count > 100) count = 100;

        // This predicate uses the previous one.. and then sorts the posts by date...
        // IMPORTANT apparently can't lambda
        PagingPredicate pagingPredicate = new PagingPredicate(userJobsPredicate, new JobComparator(), count);

        Collection<Job> values = jobs.values(pagingPredicate);

        if (count > pagingPredicate.getPage()) {
            while (page > pagingPredicate.getPage()) {
                pagingPredicate.nextPage();
            }
            values = jobs.values(pagingPredicate);
        }
        return values;
    }

    /**
     * getRunningJobs will return a Collection<Job> containing all
     * of the "normal" running jobs for the user.
     *
     * @param user the User you wish to retrieve the Collection<Job> for
     * @return the Collection<Job> you are retrieving
     */
    public static Collection<Job> getRunningJobs(User user) {
        checkInitialized();
        EntryObject e = new PredicateBuilder().getEntryObject();
        Predicate predicate = e.get("userToken").equal(user.token())
                .and(e.get("service").equal(false))
                .and(e.get("status").equal("running"));

        // We only return running service jobs
        return jobs.values(predicate);
    }

    /**
     * getRunningServices will return a Collection<Job> containing all
     * of the running service jobs for the user.
     *
     * @param user the User you wish to retrieve the Collection<Job> for
     * @return the Collection<Job> you are retrieving
     */
    public static Collection<Job> getRunningServices(User user) {
        checkInitialized();
        EntryObject e = new PredicateBuilder().getEntryObject();
        Predicate predicate = e.get("userToken").equal(user.token())
                .and(e.get("service").equal(true))
                .and(e.get("status").equal("running"));

        return jobs.values(predicate);
    }

    /**
     * getService is used to retrieve a service Job via it's serviceName.
     *
     * @param user the User you wish to retrieve the Job for
     * @param serviceName the serviceName for the Job you wish to retrieve
     * @return the Job you wish to retrieve or null if it doesn't exist
     */
    public static Job getService(User user, String serviceName) {
        checkInitialized();
        EntryObject e = new PredicateBuilder().getEntryObject();
        Predicate predicate = e.get("userToken").equal(user.token())
                .and(e.get("service").equal(true))
                .and(e.get("status").equal("running"))
                .and(e.get("serviceName").equal(serviceName));

        // We only want to get running services
        Collection<Job> values = jobs.values(predicate);
        if (values.iterator().hasNext()) {
            return values.iterator().next();
        }
        return null;
    }

    /**
     * publishJobEvent is a tool used to publish events on the internal JobServer such as
     * halt, error, etc.
     *
     * @param job the Job you are publishing the event for
     * @param event the event you are publishing
     */
    public static void publishEvent(Job job, String event) {
        checkInitialized();
        String jobEventIdentifier = "job.server." + job.token() + ".events";
        EventBus eventBus = cluster.eventBus();
        eventBus.publish(jobEventIdentifier, event.trim());
    }

    /**
     * updateJobStatus allows you to simply update a Job's status from something like
     * error to complete.
     *
     * @param job the Job you are setting the status for
     * @param status the status you are setting
     */
    public static void updateStatus(Job job, String status) {
        checkInitialized();
        status = status.trim().toLowerCase();
        cluster.logger().info("Updating Job status for the job " + job.token() + ": " + status);
        job.status = status;
        job.updated = new Date();
        jobs.set(job.token(), job);
        publishEvent(job, status);
    }

    private Logger logger;
    private Async async;
    private EventBus eventBus;

    private ExecutorService jobExecutorService;

    private String nodeId = "";
    private Handler<Message> jobEventBusHandler = null;
    private long staleJobTimer = -1;

    private String defaultJobLogPath = "./fs/logs/";

    private int maxJvmSize = 512;
    private int minJvmSize = 16;

    private IMap<String, String> enabledExecutionFactories;

    private long queuedJobCleanupTimer = -1;
    private int maxQueuedJobs = 5;
    private int minQueuedJobs = 1;

    private boolean enableQueuedJobs = false;

    // This will store nodeIds
    private IMap<String, String> queuedJobMap;
    private IMap<String, String> tmpQueuedJobMap;

    private Map<String, Process> queuedJobs;

    private Process initJobRunnerProcess(Job job, int minJvmSize, int maxJvmSize) throws IOException {
        return initJobRunnerProcess(job, minJvmSize, maxJvmSize, defaultExecutionFactory, false, 60 * 24);
    }

    private Process initJobRunnerProcess(Job job, int minJvmSize, int maxJvmSize, String executionFactory) throws IOException {
        return initJobRunnerProcess(job, minJvmSize, maxJvmSize, executionFactory, false, 60 * 24);
    }

    private Process initJobRunnerProcess(Job job, int minJvmSize, int maxJvmSize, String executionFactory, boolean queued) throws IOException {
        return initJobRunnerProcess(job, minJvmSize, maxJvmSize, executionFactory, queued, 60 * 24);
    }

    private Process initJobRunnerProcess(Job job, int minJvmSize, int maxJvmSize, String executionFactory, boolean queued, long awaitTimeout) throws IOException {
        // Is this a new status??
        job.status = "waiting";

        // Each Script must be ran by the
        // script runner process

        String javaHome = System.getProperty("java.home");
        String javaBin = javaHome +
                File.separator + "bin" +
                File.separator + "java";

        String classpath = System.getProperty("java.class.path");

        String className = JobRunner.class.getCanonicalName();

        String clusterHost = "127.0.0.1:5271";

        try {
            Address address = cluster.hazelcast().getCluster().getLocalMember().getAddress();
            clusterHost = address.getHost() + ":" + address.getPort();
        } catch (Exception ignored) {
        }

        JsonArray nodeList = new JsonArray();
        nodeList.add(clusterHost);

        cluster.hazelcast().getCluster().getMembers().forEach(member -> {
            Address memberAddress = member.getAddress();
            String address = memberAddress.getHost() + ":" + memberAddress.getPort();
            if(!nodeList.contains(address)){
                nodeList.add(address);
            }
        });

        String nodeListStr = Base64.encodeBytes(nodeList.encode(true).getBytes(), Base64.DONT_BREAK_LINES);

        String jobToken = job.token();

        String[] args;

        String configPath = cluster().config().getConfigPath();

        if (queued) {
            jobToken = "await_" + jobToken;
            args = new String[]{
                    javaBin, "-Xms" + minJvmSize + "m", "-Xmx" + maxJvmSize + "m", "-cp", classpath,
                    className, cluster().manager().nodeId(), nodeListStr, configPath, jobToken, executionFactory,
                    String.valueOf(awaitTimeout)
            };
        } else {
            args = new String[]{
                    javaBin, "-Xms" + minJvmSize + "m", "-Xmx" + maxJvmSize + "m", "-cp", classpath,
                    className, cluster().manager().nodeId(), nodeListStr, configPath, jobToken, executionFactory
            };
        }

        ProcessBuilder builder = new ProcessBuilder(args);

        Path logFile = Paths.get(defaultJobLogPath + job.token() + ".log");
        Path logFolder = Paths.get(defaultJobLogPath);
        if (!Files.exists(logFolder)) {
            Files.createDirectories(logFolder);
        }
        if (!Files.exists(logFile)) {
            Files.createFile(logFile);
        }

        builder.redirectError(logFile.toAbsolutePath().toFile());

        logger.info("Starting process for the job \"" + job.token() + "\"...");

        return builder.start();
    }

    private void initQueuedJobs() {
        initQueuedJobs(minJvmSize, maxJvmSize, 60 * 24, maxQueuedJobs);
    }

    private void initQueuedJobs(int count) {
        initQueuedJobs(minJvmSize, maxJvmSize, 60 * 24, count);
    }

    private void initQueuedJobs(int minJvmSize, int count) {
        initQueuedJobs(minJvmSize, maxJvmSize, 60 * 24, count);
    }

    private void initQueuedJobs(int minJvmSize, int maxJvmSize, int count) {
        initQueuedJobs(minJvmSize, maxJvmSize, 60 * 24, count);
    }

    private void initQueuedJobs(int minJvmSize, int maxJvmSize, int awaitTimeout, int count) {
        if (!enableQueuedJobs) {
            return;
        }
        if (queuedJobs == null) {
            queuedJobs = new ConcurrentHashMap<>();
            // Every 60 Seconds seems like a decent time to attempt to cleanup jobs
            queuedJobCleanupTimer = async.setPeriodic(1000 * 60, event -> queuedJobs.forEach((s, process) -> {
                if(!process.isAlive()){
                    queuedJobs.remove(s);
                    tmpQueuedJobMap.remove(s);
                    queuedJobMap.remove(s);
                }
            }));
        }
        // Add a job if the queued job size is less than the minimum
        // Do not add a job if the queued size is greater than the maximum
        while (minQueuedJobs > queuedJobs.size() || (maxQueuedJobs > queuedJobs.size() && count > 0)) {
            count--;
            Job tmpJob = new Job();

            try {
                // We are initializing the default ExecutionContextFactory since these jobs are queued
                Process process = initJobRunnerProcess(tmpJob, minJvmSize, maxJvmSize, defaultExecutionFactory, true, awaitTimeout);
                queuedJobs.put(tmpJob.token(), process);
                // This will allow queued jobs to work
                // from jobs submitted from another job
                queuedJobMap.put(tmpJob.token(), this.nodeId);
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }

    private void gracefullyStopQueuedJob(String jobToken){
        if(tmpQueuedJobMap.containsKey(jobToken)){
            // We cannot block the main event loop so we run the timeout
            // in it's own thread
            new Thread(() -> {

                Thread.currentThread().setName(jobToken + "_queued_timeout");

                HazelcastInstance hz = cluster.hazelcast();
                // We need to remove this job tell the process to stop
                // since we reached a timeout
                ICountDownLatch runnerAwaitLatch = hz.getCountDownLatch("_jobrunner_await_" + jobToken);

                if(runnerAwaitLatch.getCount() == 0){

                    // This will help ensure the jobrunner is in fact ready which is possible
                    ICountDownLatch awaitContinueLatch = hz.getCountDownLatch("_jobrunner_awaitcont_" + jobToken);
                    awaitContinueLatch.trySetCount(1);

                    // This means that the JobRunner hasn't finished starting.
                    try {
                        awaitContinueLatch.await(15, TimeUnit.SECONDS);
                    } catch (InterruptedException ignored) {
                    }
                }

                while (runnerAwaitLatch.getCount() > 0) {
                    runnerAwaitLatch.countDown();
                }

            }).run();
        }
    }

    public Job getQueuedJob(){
        return getQueuedJob(false, false);
    }

    public Job getQueuedJob(boolean random, boolean localOnly) {
        if (queuedJobMap.size() > 0) {
            List<String> queuedJobIds;

            if(localOnly && (cluster().config().isRole("job") || cluster().config().isAll())){
                queuedJobIds = new ArrayList<>(queuedJobMap.keySet(Predicates.equal("toString", this.nodeId)));
            } else {
                queuedJobIds = new ArrayList<>(queuedJobMap.keySet());
            }

            if(random){
                // Let's attempt to get a random job
                Collections.shuffle(queuedJobIds);
            }

            Iterator<String> iterator = queuedJobIds.iterator();

            while (iterator.hasNext()) {
                String jobId = queuedJobIds.iterator().next();
                Job tmpJob = new Job();
                tmpJob.loadJson(new JsonObject().putString("token", jobId));

                String nodeId = queuedJobMap.remove(tmpJob.token());

                if (!registeredJobServers.containsKey(nodeId)) {
                    continue;
                }
                // Let's store the serverId
                tmpQueuedJobMap.put(tmpJob.token(), nodeId);

                // Let's go ahead and set a timer that will ensure
                // the retrieved queued job will shut down if it's not used.
                // This will ensure there aren't any rogue jobs.
                async.setTimer(60 * 1000, event -> {
                    try {
                        if(!jobs().containsKey(tmpJob.token())){
                            gracefullyStopQueuedJob(tmpJob.token());
                        }
                    } catch (Exception ignored){
                    }
                });
                return tmpJob;
            }
        }
        return null;
    }

    @Override
    public void start(Cluster owner) {

        cluster = owner;
        this.logger = cluster.logger();
        this.async = cluster.async();
        this.eventBus = cluster.eventBus();

        Config config = cluster.config();

        // We use this so we can queue up jobs that don't get processed due to load
        jobsBeingExecuted = cluster.data().getSet("jobs.executing");
        jobExecutionNodes = cluster.data().getMap("jobs.executing.nodes");
        registeredJobServers = cluster.data().getMap("job.server.nodes");
        queuedJobMap = cluster.data().getMap("jobs.queued");
        tmpQueuedJobMap = cluster.data().getMap("jobs.queued.tmp");
        enabledExecutionFactories = cluster.data().getMap("jobs.execution.factories");

        jobs = jobs();

        initialized = true;

        IMap<String, Job> registeredServices = cluster.data().persistentMap("job.server.user.services");

        // If we are not a job server or our role isn't configured for all
        // then we do not need to continue.
        if ((!config.isRole("job") && !config.isAll()) || cluster().manager().clientMode()) return;

        JsonObject jobServerConfig = coreConfig().getObject("job", new JsonObject());

        if (!jobServerConfig.containsField("max_jobs")) {
            jobServerConfig.putNumber("max_jobs", 50);
        }

        if (!jobServerConfig.containsField("max_queued_jobs")) {
            jobServerConfig.putNumber("max_queued_jobs", 5);
        }

        if (!jobServerConfig.containsField("min_queued_jobs")) {
            jobServerConfig.putNumber("min_queued_jobs", 2);
        }

        if (!jobServerConfig.containsField("enable_queued_jobs")) {
            jobServerConfig.putBoolean("enable_queued_jobs", true);
        }

        // Let's set some default execution factories..
        if (!jobServerConfig.containsField("execution_factories")) {
            JsonArray defaultExecutionFactories = new JsonArray();

            // Let's go ahead and add the default execution factory
            JsonObject defaultFactoryConf = new JsonObject();
            defaultFactoryConf.putString("platformId", "default");
            defaultFactoryConf.putString("executionFactory", defaultExecutionFactory);
            // Default is always enabled - maybe this should be changed
            //defaultFactoryConf.putBoolean("enabled", true);

            // Let's go ahead and add the default execution factory
            JsonObject helloWorldFactoryConf = new JsonObject();
            helloWorldFactoryConf.putString("platformId", "hello");
            helloWorldFactoryConf.putString("executionFactory", HelloWorldContextFactory.class.getCanonicalName());
            helloWorldFactoryConf.putBoolean("enabled", true);

            // Let's go ahead and add the default execution factory
            JsonObject v8FactoryConf = new JsonObject();
            v8FactoryConf.putString("platformId", "v8");
            v8FactoryConf.putString("executionFactory", V8ContextFactory.class.getCanonicalName());
            v8FactoryConf.putBoolean("enabled", false);

            defaultExecutionFactories.add(defaultFactoryConf);
            jobServerConfig.putArray("execution_factories", defaultExecutionFactories);
        }

        JsonArray executionFactories = jobServerConfig.getArray("execution_factories", new JsonArray());

        for (Object factoryConf : executionFactories) {
            if(factoryConf instanceof JsonObject){
                JsonObject jsonFactoryConf = (JsonObject) factoryConf;
                String platformId = jsonFactoryConf.getString("platformId", "");
                String executionFactory = jsonFactoryConf.getString("executionFactory", "");

                // We can skip this since it doesn't look like an execution factory
                if(platformId.isEmpty() || executionFactory.isEmpty()){
                    continue;
                }
                if(jsonFactoryConf.getBoolean("enabled", false)){
                    enabledExecutionFactories.put(platformId, executionFactory);
                } else {
                    // We need to remove it because all nodes
                    // need the same configuration when it comes to this
                    enabledExecutionFactories.remove(platformId);
                }
            }
        }

        // We need to ensure the default execution factory is always enabled
        if(!enabledExecutionFactories.containsKey("default")){
            enabledExecutionFactories.set("default", defaultExecutionFactory);
        }

        coreConfig().putObject("job", jobServerConfig);

        config.save();

        // END configuration settings.

        // This value is used by _serverConfig
        String jobServerType = jobServerConfig.getString("server_type", "all");

        logger.info("Server type is \"" + jobServerType + "\"");

        int maxJobs = jobServerConfig.getInteger("max_jobs");

        this.nodeId = cluster.manager().nodeId();

        // We need to store this information temporarily so we can save it in registeredJobServers
        jobServerConfig.putBoolean("clientMode", cluster.manager().clientMode());
        jobServerConfig.putString("nodeId", this.nodeId);

        registeredJobServers.set(this.nodeId, jobServerConfig);

        jobExecutorService = Executors.newFixedThreadPool(maxJobs + 5); // Pull the default amount of max jobs + 5 (Seems to be a fail safe)

        // Job handler - this is an event bus handler that actually handles our job execution
        jobEventBusHandler = (Message event) -> {
            if (event.body() != null) {
                if (event.body() instanceof String &&
                        jobs().containsKey(event.body().toString())) {

                    jobExecutorService.submit(new Runnable() {
                        @Override
                        public void run() {

                            // Here we will handle the actual processing of the job.
                            Job job = jobs().get(event.body().toString());

                            updateStatus(job, "processing");

                            // Create an ICountDownLatch so we can let the cluster know that we are not finished running this job.
                            ICountDownLatch globalJobFinishLatch = cluster.hazelcast().getCountDownLatch(job.token() + "_job_finish_latch");
                            globalJobFinishLatch.trySetCount(1);

                            // Store the current job token so other nodes can know that this job is being executed with a simple check
                            jobsBeingExecuted.add(job.token());
                            // Store the current node handling the execution of this job
                            jobExecutionNodes.set(job.token(), cluster().manager().nodeId());

                            // Begin timeout handling - This ensures jobs are not running forever

                            // By default all jobs are timed out at 15 minutes unless they are a service job
                            long defaultTimeout = TimeUnit.MINUTES.toMillis(15);

                            if (job.service) {
                                defaultTimeout = 0; // Service jobs do not have a timeout
                            }

                            String executionAddr = "job.server." + job.token() + ".execution";

                            long timeoutTimer = 0;
                            // If the defaultTimeout is set to 0 then we will not cause timeouts.. this could be a very dangerous feature. Use at own risk
                            if (defaultTimeout > 0) {
                                timeoutTimer = async.setTimer(defaultTimeout, aLong -> eventBus.publish(executionAddr, "timeout"));
                            }

                            // if script is null we will pull script data from job..
                            Job completedJob;

                            // This will help let us know that the job is being handled.
                            ILock executionLock = cluster.hazelcast().getLock("_job_lock_execution_" + job.token());
                            executionLock.lock();
                            // We store the process in an array so the execution handler
                            // can access it
                            Process[] process = new Process[1];

                            if (tmpQueuedJobMap.containsKey(job.token()) &&
                                    queuedJobs.containsKey(job.token())) {

                                process[0] = queuedJobs.get(job.token());

                                queuedJobs.remove(job.token());
                                tmpQueuedJobMap.remove(job.token());

                                // Let's go ahead and initialize some n
                                if (queuedJobs.size() < minQueuedJobs) {
                                    // We run this in it's own thread so we don't take up async
                                    new Thread(() -> {
                                        Thread.currentThread().setName("init-queued-jobs");
                                        // Let's try to start two more queued jobs in it's place
                                        initQueuedJobs(5);
                                    }).run();
                                }
                            }

                            Buffer consoleBuffer = new Buffer();

                            // Let's allow the console buffer to be retrieved..
                            Handler<Message> jobPrintStreamHandler = message -> {
                                if (!(message.body() instanceof String)) return;
                                String method = (String) message.body();
                                if (method.equals("retrieve")) {
                                    message.reply(consoleBuffer);
                                }
                            };

                            // We need a simple way to retrieve the console
                            eventBus.registerHandler("job.server." + job.token() + ".printStreamBuffer", jobPrintStreamHandler);

                            try {

                                final long finalTimeoutTimer = timeoutTimer;

                                // This is used so we can tell the job script execution to error/stop/timeout/ or halt
                                // Begin handler for job execution control
                                eventBus.registerHandler(executionAddr, new Handler<Message>() {
                                    @Override
                                    public void handle(io.jsync.eventbus.Message message) {
                                        if (!(message.body() instanceof String)) return;
                                        String method = (String) message.body();

                                        logger.info("Execution event received \"" + method + "\"...");

                                        if (method.equals("halt") || method.equals("stop") || method.equals("timeout") || method.equals("error")) {
                                            // This is extremely important so we do not update the status of the job improperly
                                            Job latestJob = jobs().get(job.token());

                                            // IMPORTANT - We want to store this in the cache because we have halted execution.. which means something outside our scope stopped it.
                                            // We can make it empty too because we don't care about the value..
                                            // Remove this handler.. we don't need it anymore..
                                            switch (method) {
                                                case "error":
                                                    // Handle async error
                                                    // We don't need to do anything
                                                    break;
                                                case "stop":
                                                    updateStatus(latestJob, "stopping");
                                                    break;
                                                case "timeout":
                                                    updateStatus(latestJob, "timeout");
                                                    break;
                                            }

                                            Handler<Message> self = this;

                                            new Thread(() -> {
                                                try {

                                                    // This will go ahead and tell the JobRunner to gracefully terminate
                                                    eventBus.publish(executionAddr, "kill");

                                                    // We are going to tell the final shutdown to wait at least
                                                    // 30 seconds before we forcibly destroy it
                                                    process[0].waitFor(30, TimeUnit.SECONDS);
                                                } catch (InterruptedException ignored) {
                                                } finally {
                                                    process[0].destroyForcibly();
                                                    forceKillJob(latestJob);
                                                    eventBus.unregisterHandler(executionAddr, self);
                                                }
                                            }).run();

                                        } else if (method.equals("cancel_timeout")) {
                                            async.cancelTimer(finalTimeoutTimer);
                                            logger.info("Canceling timeout for job " + job.token());
                                        }
                                    }
                                });

                                if (process[0] == null) {
                                    logger.info("Starting process for the job " + job.token());
                                    // Default should always be enabled
                                    process[0] = initJobRunnerProcess(job, minJvmSize, maxJvmSize, enabledExecutionFactories.get(job.config.getString("platform", "default")));
                                } else {
                                    if (process[0].isAlive()) {
                                        logger.info("Continuing process for the job " + job.token());
                                        // Do nothing
                                        ICountDownLatch runnerAwaitLatch = cluster.hazelcast().getCountDownLatch("_jobrunner_await_" + job.token());

                                        if(runnerAwaitLatch.getCount() == 0){

                                            // This will help ensure the jobrunner is in fact ready which is possible
                                            // that it's not
                                            ICountDownLatch awaitContinueLatch = cluster.hazelcast().getCountDownLatch("_jobrunner_awaitcont_" + job.token());
                                            awaitContinueLatch.trySetCount(1);

                                            // This means that the JobRunner hasn't finished starting.
                                            awaitContinueLatch.await(60, TimeUnit.SECONDS);
                                        }

                                        while (runnerAwaitLatch.getCount() > 0) {
                                            runnerAwaitLatch.countDown();
                                        }

                                    } else {
                                        process[0] = initJobRunnerProcess(job, minJvmSize, maxJvmSize);
                                    }
                                }

                                // This is a method of keeping the console
                                // stored in memory in case we have to kill the process
                                InputStream consoleStream = process[0].getInputStream();

                                // Let's ensure we read the console stream.
                                new Thread(() -> {
                                    try {
                                        InputStreamReader isr = new InputStreamReader(consoleStream);
                                        BufferedReader br = new BufferedReader(isr);
                                        int c;
                                        while ((c = br.read()) != -1) {
                                            if (cluster().config().isDebug()) {
                                                System.out.print(((char) c));
                                            }
                                            consoleBuffer.appendByte((byte) c);
                                        }
                                    } catch (IOException ignored) {
                                    }
                                }).run();

                                // End handler for job execution control

                                process[0].waitFor();
                            } catch (IOException | InterruptedException e) {
                                logger.info("The job " + job.token() + " was interrupted.");
                            } finally {
                                completedJob = jobs().get(job.token());

                                // Let's store the output from the consoleBuffer object.
                                // This makes it easier to have actual console results
                                completedJob.results.putString("output", consoleBuffer.toString());

                                // Let's tell the job to kill itself
                                eventBus.publish(executionAddr, "kill");

                                if (process[0] != null) {
                                    try {
                                        process[0].waitFor(30, TimeUnit.SECONDS);
                                    } catch (Exception e) {
                                        e.printStackTrace();
                                    }

                                    logger.info("Destroying process for the job " + job.token());
                                    process[0].destroyForcibly();
                                    forceKillJob(completedJob);
                                }
                                eventBus.unregisterHandler("job.server." + job.token() + ".printStreamBuffer", jobPrintStreamHandler);
                            }

                            final Job finalJob = completedJob;

                            if (finalJob.results.containsField("error")) {
                                cluster.eventBus().publish("private.job." + job.token() + ".printStream", new Buffer(finalJob.results.getObject("error",
                                        new JsonObject()).getString("message", "error") + "\n"));
                            }

                            // Timeout must be canceled..
                            // Cancel the timeout timer so we don't do weird stuff..
                            async.cancelTimer(timeoutTimer);

                            // We want to remove all reserved variables
                            // For some reason field names causes it to stay open
                            for (String key : finalJob.config.toMap().keySet()) {
                                if (key.startsWith("_")) {
                                    finalJob.config.removeField(key);
                                }
                            }

                            // Ensure it gets stored
                            updateStatus(finalJob, "complete");

                            jobs().set(finalJob.token(), finalJob);

                            jobsBeingExecuted.remove(finalJob.token());
                            jobExecutionNodes.remove(finalJob.token());

                            if (finalJob.config.containsField("callbackUrl")) {
                                async.runOnContext(event -> {

                                    try {
                                        HttpClient httpClient = async.createHttpClient();

                                        JsonObject formatted = new JsonObject();
                                        formatted.putString("token", finalJob.token());
                                        formatted.putValue("created", finalJob.created);
                                        formatted.putValue("updated", finalJob.updated);
                                        formatted.putString("status", finalJob.status);
                                        JsonObject formattedResults = new JsonObject();
                                        formattedResults.putBoolean("success", finalJob.results.getBoolean("success", false));

                                        if (finalJob.results.containsField("error")) {
                                            formattedResults.putObject("error", finalJob.results.getObject("error"));
                                        }
                                        formatted.putObject("results", formattedResults);

                                        String postbackUrl = finalJob.config.getString("callbackUrl");
                                        httpClient.post(postbackUrl, event12 -> logger.debug("callbackUrl Response Received: " + event12 + " (" + postbackUrl + ")")).putHeader("Content-Type", "application/json")
                                                .putHeader("User-Agent", "Automately-Job-Callback")
                                                .end(formatted.encode());

                                    } catch (Exception ignored) {
                                    }
                                });
                            }

                            while (globalJobFinishLatch.getCount() > 0) {
                                globalJobFinishLatch.countDown();
                            }

                            // This is a way to tell any waiting handlers that the job is indeed finished
                            eventBus.publish("job.server." + job.token() + ".finished", "finished");
                        }
                    });
                }
            }
        };

        // We register a handler so we have a place that receives events for jobs
        cluster.eventBus().registerHandler("job.server." + this.nodeId, jobEventBusHandler);

        maxJvmSize = jobServerConfig.getInteger("max_jvm_size", 512);
        minJvmSize = jobServerConfig.getInteger("min_jvm_size", 16);

        logger.info("Max jobs set to " + maxJobs);

        maxQueuedJobs = jobServerConfig.getInteger("max_queued_jobs");
        minQueuedJobs = jobServerConfig.getInteger("min_queued_jobs");

        logger.info("Maximum queued jobs set to " + maxQueuedJobs);
        logger.info("Minimum queued jobs set to " + minQueuedJobs);

        enableQueuedJobs = jobServerConfig.getBoolean("enable_queued_jobs", true);

        if (enableQueuedJobs) {
            initQueuedJobs(maxQueuedJobs);
        }

        // This exists so we can ensure that all the data for the dataBus is pre-loaded for
        // anything accessing it such as the DataBusObject
        cluster.data().persistentMap("dataBus");

        if (!cluster.hazelcast().getPartitionService().isClusterSafe()) {
            // Let's go ahead and start up some stuff
            // If the cluster is big it may take up to 10 minutes for it to be ready
            cluster.hazelcast().getPartitionService().forceLocalMemberToBeSafe(10, TimeUnit.MINUTES);
        }

        // TODO improve startup scripts

        // Startup Scripts are called right before any other job gets started when the JobServer first starts.
        // This allows you to have scripts running on the server that can be handling many things
        JsonArray scriptsToStart = jobServerConfig.getArray("startup_scripts", new JsonArray());

        // Begin Startup Scripts
        for (Object value : scriptsToStart) {
            if (value instanceof String && value.toString().split(":").length > 1) {
                String newVal = (String) value;
                String user = newVal.split(":")[0];
                String script = newVal.split(":")[1];
                User mUser = UserData.getUserByUsername(user);
                if (mUser != null) {
                    // TODO replace with UserFileSystem usage
                    if (VirtualFileSystem.containsUserFile(mUser, script)) {
                        VirtualFile file = VirtualFileSystem.getUserFile(mUser, script);

                        JsonObject scriptConfig = new JsonObject();

                        logger.info("Attempting to start a job for the startup script " + script);

                        scriptConfig.putString("scriptPath", file.pathAlias);
                        scriptConfig.putString("scriptData", VirtualFileSystem.readFileData(file).toString());

                        Job newJob = new Job();
                        newJob.config = new JsonObject().putObject("script", scriptConfig);
                        // we make sure service is false because the script will handle itself if it is a service
                        newJob.service = false;
                        newJob.serviceConfig = new JsonObject();
                        newJob.serviceName = ""; // Make it empty by default
                        newJob.userToken = mUser.token();

                        try {
                            newJob = submit(newJob);
                            logger.info("Started new startup job " + newJob.token() + " for the script " + script);
                        } catch (Exception e) {
                            logger.error("Failed to start new startup job " + newJob.token() + " for the script " + script);
                        }
                    } else {
                        logger.error("Failed to to start \"" + newVal + "\". The file " + script + " does not exist.");
                    }
                } else {
                    logger.error("Failed to to start \"" + newVal + "\". The user " + user + " does not exist.");
                }
            }
        }

        // End Startup Scripts

        CountDownLatch waitLatch = new CountDownLatch(1);
        Timer startupTimer = new Timer();

        startupTimer.schedule(new TimerTask() {
            @Override
            public void run() {
                waitLatch.countDown();
            }
        }, 15000);

        try {
            waitLatch.await(2, TimeUnit.MINUTES);
        } catch (InterruptedException e) {
            logger.warn("Timeout reached while waiting for the startup script timer to finish.");
        }

        if (jobServerConfig.getBoolean("autostart_services", true)) {
            // Begin the startup of all registered services.
            for (Job job : registeredServices.values()) {

                // We can go ahead and clone the job then submitted
                Job newJob = new Job();
                newJob.config = job.config;
                newJob.service = false; // We set this to false because services will call initService
                newJob.serviceConfig = job.serviceConfig;
                newJob.serviceName = job.serviceName;
                newJob.userToken = job.userToken;

                // Ensure that we do not start up a service when there has already been a job started for one.
                Collection<Job> existingServices = jobs().values(Predicates.and(Predicates.equal("userToken", newJob.userToken),
                        Predicates.equal("serviceName", newJob.serviceName),
                        Predicates.or(Predicates.equal("status", "running"),
                                Predicates.equal("status", "queued"),
                                Predicates.equal("status", "processing")
                        )));

                if (!existingServices.isEmpty()) {
                    boolean alreadyRunning = true;
                    for (Job existing : existingServices) {
                        if (isStale(existing)) {
                            alreadyRunning = false;

                            logger.debug("The job " + existing.token() + " went stale.");
                            // This will attempt to kill it just in case
                            cluster.eventBus().publish("job.server." + existing.token() + ".execution", "kill");
                            // Just to tell other things waiting to finish it
                            cluster.eventBus().publish("job.server." + existing.token() + ".finished", "finished");

                            // This code is a last resort method of shutting down the job.
                            forceKillJob(existing);
                        } else {
                            // Set this back to true
                            alreadyRunning = true;
                        }
                    }
                    if (alreadyRunning) {
                        logger.error("Failed to start new service job " + newJob.token() + " for the service " + newJob.serviceName + " for the user " + newJob.userToken + " because a service already has been started.");
                        return;
                    }
                }

                try {
                    submit(newJob);
                    logger.debug("Started new service job " + newJob.token() + " for the service " + newJob.serviceName + " for the user " + newJob.userToken);
                } catch (Exception e) {
                    logger.error("Failed to start new service job " + newJob.token() + " for the service " + newJob.serviceName + " for the user " + newJob.userToken);
                }
            }
        }

        if (!cluster.manager().clientMode()) {

            ExecutorService staleExecutor = Executors.newSingleThreadExecutor();

            Runnable staleJobHandler = () -> {
                logger.debug("Processing old jobs.");
                for (Job job : jobs().values()) {
                    if (isStale(job)) {
                        logger.debug("The job " + job.token() + " went stale.");
                        cluster.eventBus().publish("job.server." + job.token() + ".execution", "kill");
                        // Just to tell other things waiting to finish it
                        cluster.eventBus().publish("job.server." + job.token() + ".finished", "finished");

                        // Last resort method to kill the job
                        forceKillJob(job);
                    } else if (isJobExpired(job, 14)) {
                        logger.debug("Removing the job " + job.token() + " because it has expired. (over 14 days old)");
                        jobs().remove(job.token());
                    } else if (isJobExpired(job, 5)) {
                        logger.info("Scrubbing the job " + job.token() + " because it over 5 days old.");
                        try {
                            if (job.results != null && job.results.containsField("output")) {
                                job.results.putString("output", "Output Scrubbed");
                            }
                            job.config = new JsonObject();
                            job.updated = new Date();
                            jobs().set(job.token(), job);
                        } catch (Exception e) {
                            e.printStackTrace();
                        }
                    }
                }
            };

            // Ensure we never block the main event loop
            staleJobTimer = async.setPeriodic(TimeUnit.MINUTES.toMillis(30), event -> staleExecutor.execute(staleJobHandler));

            // Adding timeout for first check
            async.setTimer(15000, event -> staleExecutor.execute(staleJobHandler));
        } else {
            logger.warn("Not checking for stale jobs since we are in client mode.");
        }

    }

    /**
     * This method is used to submit a job to the cluster.
     *
     * @param job the Job you wish to send to the Cluster
     * @return returns a new Job after it has been submitted returns null if it failed
     */
    public Job submit(final Job job) {

        if (job == null) {
            throw new NullPointerException("Your job cannot be null.");
        }

        if (job.userToken == null || job.userToken.isEmpty()) {
            throw new NullPointerException("Your job's userToken cannot be null or empty.");
        }

        if (job.service && job.serviceConfig == null) {
            throw new IllegalArgumentException("Cannot start a new service job with an empty service config");
        }

        if (registeredJobServers.size() < 1) {
            throw new RuntimeException("Cannot submit a job when there are no registered job servers.");
        }

        // retrieve it from enabledExecutionFactories

        String platformId = job.config.getString("platform", "default");

        // Here we will go ahead and try to retrieve
        // a queued job to replace the given Job. If one
        // is found then the token will be updated.
        if ((!tmpQueuedJobMap.containsKey(job.token()) && !queuedJobMap.containsKey(job.token()) &&
                queuedJobs != null && !queuedJobs.containsKey(job.token())) && platformId.equals("default")){

            Job queuedJob = getQueuedJob();
            if (queuedJob != null) {
                // This will ensure that the tmpJob's token will be copied
                job.loadJson(new JsonObject().putString("token", queuedJob.token()));
            }
        }

        if(!platformId.equals("default") && !enabledExecutionFactories.containsKey(platformId)){
            throw new RuntimeException(platformId + " is an invalid or disabled platformId!");
        }

        // We set the job as queued so other people know that
        // the job is going to be processed in the cluster.
        job.status = "queued";

        // We must store this job inside the cluster
        // so we can access it across multiple nodes.
        jobs().set(job.token(), job);

        if (!jobsBeingExecuted.contains(job.token())) {

            final ILock handleLock = cluster().hazelcast().getLock("_job_lock_" + job.token());

            if (!handleLock.isLocked()) {

                try {

                    // We must get a lock for at least 5 minutes so we
                    // don't handle the job multiple times in the server
                    if (handleLock.tryLock()) {

                        User jobUser = UserData.getUserByToken(job.userToken);
                        if (jobUser != null) {
                            if (job.service) {
                                Meta maxServiceJobs = UserData.getMeta(jobUser, "max_service_jobs");
                                if (maxServiceJobs != null) {
                                    if (maxServiceJobs.value instanceof Number) {
                                        Number max = (Number) maxServiceJobs.value;
                                        // Check for jobs owned by the user that are not lite jobs but are service and are running
                                        EntryObject e = new PredicateBuilder().getEntryObject();
                                        Predicate p = e.get("userToken").equal(jobUser.token())
                                                .and(e.get("service").equal(true))
                                                .and(e.get("status").equal("running"));

                                        if (jobs().values(p).size() > max.intValue()) {

                                            JsonObject newResults = new JsonObject();
                                            newResults.putBoolean("success", false);

                                            JsonObject error = new JsonObject();
                                            error.putString("code", "Quota Reached");
                                            error.putString("message", "You have reached your maximum amount of service jobs you can run at the same time.");
                                            newResults.putObject("error", error);

                                            job.status = "quota_reached";
                                            job.results = newResults;

                                            // Let's attempt to stop a queued job if it exists.
                                            gracefullyStopQueuedJob(job.token());

                                            jobs().set(job.token(), job);
                                            return job;
                                        }
                                    }
                                }
                            } else {
                                // Check for the Maximum Concurrent Allowed Jobs Per User
                                Meta maxConcurrentJobs = UserData.getMeta(jobUser, "max_jobs");
                                if (maxConcurrentJobs != null) {
                                    if (maxConcurrentJobs.value instanceof Number) {
                                        Number max = (Number) maxConcurrentJobs.value;

                                        // Check for jobs owned by the user that are not lite jobs and are not service and are running
                                        EntryObject e = new PredicateBuilder().getEntryObject();
                                        Predicate p = e.get("userToken").equal(jobUser.token())
                                                .and(e.get("service").equal(false))
                                                .and(e.get("status").equal("running"));

                                        if (jobs().values(p).size() > max.intValue()) {

                                            JsonObject newResults = new JsonObject();
                                            newResults.putBoolean("success", false);

                                            JsonObject error = new JsonObject();
                                            error.putString("code", "Quota Reached");
                                            error.putString("message", "You have reached your maximum amount of jobs you can run at the same time.");
                                            newResults.putObject("error", error);

                                            job.status = "quota_reached";
                                            job.results = newResults;

                                            // Let's attempt to stop a queued job if it exists.
                                            gracefullyStopQueuedJob(job.token());

                                            jobs().set(job.token(), job);
                                            return job;
                                        }
                                    }
                                }
                            }

                            String jobServerToUse = null;

                            JsonObject jobConfig = job.config;

                            if (tmpQueuedJobMap.containsKey(job.token())) {
                                // We retrieve the server from here to utilize queued jobs
                                jobServerToUse = tmpQueuedJobMap.get(job.token());
                                logger.info("Submitting the queued job to the server \"" + jobServerToUse + "\"");

                            } else {
                                // This code cannot be used if hazelcast is in client mode for jCluster
                                if (coreConfig().getObject("job", new JsonObject()).getBoolean("execute_on_least_jobs", true)) {
                                    JsonObject leastMemberConfig = null;
                                    Set<String> keys;

                                    if (jobConfig.containsField("_serverConfig")) {
                                        keys = registeredJobServers.keySet(new JsonQueryPredicate(jobConfig.getObject("_server_config",
                                                new JsonObject())));
                                    } else {
                                        keys = registeredJobServers.keySet();
                                    }

                                    for (String nodeId : keys) {
                                        JsonObject memberConfig = registeredJobServers.get(nodeId);
                                        if (leastMemberConfig != null) {
                                            int memberSize = jobExecutionNodes.values(Predicates.equal("toString", nodeId)).size();
                                            int leastMemberSize = jobExecutionNodes.values(Predicates.equal("toString", leastMemberConfig.getString("nodeId"))).size();
                                            if (memberSize < leastMemberSize) {
                                                leastMemberConfig = memberConfig;
                                            }
                                        } else {
                                            leastMemberConfig = memberConfig;
                                        }
                                    }

                                    if (leastMemberConfig != null) {
                                        jobServerToUse = leastMemberConfig.getString("nodeId");
                                    }
                                }
                            }

                            if (jobServerToUse == null) {
                                // Let's choose a server from random now since we haven't
                                // detected one we should use
                                Set<String> keys;
                                if (jobConfig.containsField("_serverConfig")) {
                                    keys = registeredJobServers.keySet(new JsonQueryPredicate(jobConfig.getObject("_server_config",
                                            new JsonObject())));
                                } else {
                                    keys = registeredJobServers.keySet();
                                }
                                List<String> nList = new ArrayList<>(keys);
                                Collections.shuffle(nList);
                                jobServerToUse = nList.iterator().next();
                            }

                            // Now let's actually execute this job by publishing it to the cluster.
                            String serverId = "job.server." + jobServerToUse;
                            logger.info("Submitting the job " + job.token() + " to \"" + serverId + "\"");
                            cluster.eventBus().publish(serverId, job.token());
                            return job;
                        }
                    }
                } catch (Exception e) {
                    e.printStackTrace();
                } finally {
                    handleLock.unlock();
                }
                return job;
            }
        }

        // We return null if the job cannot be submitted for some reason
        return null;
    }

    private void forceKillJob(Job job) {
        if (!Windows.isWindows()) {
            try {
                Runtime.getRuntime().exec("kill -9 `ps -eo pid,args --cols=10000 | awk '/" + job.token()
                        + "/ && $1 != PROCINFO[\"pid\"] { print $1 }'` &> /dev/null");
            } catch (Exception ignored) {
            }
        }
    }

    /**
     * This is a simple utility to check if a job has expired.
     *
     * @param job
     * @param days
     * @return
     */
    private boolean isJobExpired(Job job, int days) {
        if (job == null) {
            throw new NullPointerException();
        }
        String status = job.status;
        // This means we are already processing it.
        if (status.equals("running") || status.equals("queued") || status.equals("processing")) {
            return false;
        }
        long howManyDays = TimeUnit.MILLISECONDS.toDays(((new Date())).getTime() - job.updated.getTime());
        return howManyDays >= days;
    }

    @Override
    public void stop() {

        if (!nodeId.isEmpty() && jobEventBusHandler != null) {

            logger.info("Shutting down the JobServer for the node " + nodeId);

            // We register a handler so we have a place that receives events for jobs
            cluster.eventBus().unregisterHandler("job.server." + nodeId, jobEventBusHandler);

            registeredJobServers.remove(this.nodeId);

            // We need to ensure we stop the processes for all the (queued jobs)
            if (queuedJobs != null) {
                for (Map.Entry<String, Process> queuedJob : queuedJobs.entrySet()) {
                    try {
                        Process process = queuedJob.getValue();
                        logger.info("Stopping process for the queued job " + queuedJob.getKey() + "...");
                        queuedJobs.remove(queuedJob.getKey());
                        queuedJobMap.remove(queuedJob.getKey());
                        tmpQueuedJobMap.remove(queuedJob.getKey());
                        process.destroyForcibly();
                    } catch (Exception ignored) {
                    }
                }
            }

            if(queuedJobCleanupTimer > -1){
                async.cancelTimer(queuedJobCleanupTimer);
            }

            if (staleJobTimer > -1) {
                async.cancelTimer(staleJobTimer);
            }

            Collection<String> handlingJobs = jobExecutionNodes.keySet(Predicates.equal("toString", nodeId));

            logger.debug("There are " + handlingJobs.size() + " being handled by the node " + this.nodeId);

            // This latch will help us speed the shutdown up faster
            CountDownLatch processLatch = new CountDownLatch(handlingJobs.size());

            ExecutorService shutdownService = Executors.newCachedThreadPool();

            // Let's automatically handle jobs for this node
            for (String jobToken : handlingJobs) {
                logger.debug("Attempting to cleanup the job " + jobToken);

                Job job = jobs().get(jobToken);

                if (job != null) {

                    // We can submit the shutdown to the shutdown service
                    // so we can process more than one job at a time.
                    shutdownService.submit(() -> {
                        try {
                            // We can tell whatever handler to let the job finish.
                            ICountDownLatch globalJobFinishLatch = cluster.hazelcast().getCountDownLatch(job.token() + "_job_finish_latch");

                            // This will tell the migration handler to start only if there is a jobserver available
                            if(jobExecutionNodes.size() > 0){
                                cluster.eventBus().publish("job.server." + job.token() + ".execution", "migrate");
                            }

                            if (job.service) {
                                logger.info("Attempting to migrate the job " + job.token() + "...");
                                ICountDownLatch serviceReadyLatch = cluster.hazelcast().getCountDownLatch(job.userToken + "_" + job.serviceName + "_service_ready_latch");
                                try {
                                    // Let's go ahead and attempt to set the count giving
                                    // the latch some time to wait. This is safe because if
                                    // the service doesn't get migrated it will still be stopped
                                    serviceReadyLatch.trySetCount(5);
                                    serviceReadyLatch.await(15, TimeUnit.SECONDS);
                                } catch (Exception ignored) {
                                }
                            }

                            // Let's wait up to 5 seconds for the job
                            // to handle it's own migration.
                            try {
                                globalJobFinishLatch.await(5, TimeUnit.SECONDS);
                            } catch (InterruptedException ignored) {
                            }

                            // We are going to send a direct hook to to the job to tell it to halt
                            cluster.eventBus().publish("job.server." + job.token() + ".execution", "stop");
                            cluster.eventBus().publish("job.server." + job.token() + ".execution", "kill");

                            logger.debug("Waiting for the job " + job.token() + " to finish.");

                            try {
                                globalJobFinishLatch.await(60, TimeUnit.SECONDS);
                            } catch (InterruptedException e) {
                                e.printStackTrace();
                            }

                            jobExecutionNodes.remove(jobToken);
                            jobsBeingExecuted.remove(jobToken);
                        } catch (Exception e){
                            e.printStackTrace();
                        } finally {
                            processLatch.countDown();
                        }
                    });
                }
            }

            try {
                if(!processLatch.await(1, TimeUnit.MINUTES)){
                    // TODO Change Note
                    logger.warn("Failed to shutdown local jobs properly!");
                }
                jobExecutorService.shutdownNow();
            } catch (InterruptedException e) {
                e.printStackTrace();
            }
        }
    }

    @Override
    public String name() {
        return getClass().getCanonicalName();
    }

}