/* * To change this license header, choose License Headers in Project Properties. * To change this template file, choose Tools | Templates * and open the template in the editor. */ package se.kth.karamel.backend.machines; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.TimeUnit; import org.apache.log4j.Logger; import se.kth.karamel.backend.running.model.MachineRuntime; import se.kth.karamel.backend.running.model.tasks.Task; import se.kth.karamel.common.util.Settings; import se.kth.karamel.common.util.SshKeyPair; import se.kth.karamel.common.exception.KaramelException; /** * * @author kamal */ public class MachinesMonitor implements TaskSubmitter, Runnable { private static final Logger logger = Logger.getLogger(MachinesMonitor.class); private final String clusterName; private final Map<String, SshMachine> machines = new HashMap<>(); private boolean paused = false; ExecutorService executor; private final SshKeyPair keyPair; private boolean stopping = false; public MachinesMonitor(String clusterName, int numMachines, SshKeyPair keyPair) { this.keyPair = keyPair; this.clusterName = clusterName; executor = Executors.newFixedThreadPool(numMachines); } public void setStopping(boolean stopping) { for (Map.Entry<String, SshMachine> entry : machines.entrySet()) { SshMachine sshMachine = entry.getValue(); sshMachine.setStopping(true); } this.stopping = stopping; } public SshMachine getMachine(String publicIp) { for (Map.Entry<String, SshMachine> entry : machines.entrySet()) { SshMachine sshMachine = entry.getValue(); if (sshMachine.getMachineEntity().getPublicIp().equals(publicIp)) { return sshMachine; } } return null; } public void addMachines(List<MachineRuntime> machineEntities) { for (MachineRuntime machineEntity : machineEntities) { SshMachine sshMachine = new SshMachine(machineEntity, keyPair.getPublicKey(), keyPair.getPrivateKey(), keyPair.getPassphrase()); machines.put(machineEntity.getId(), sshMachine); executor.execute(sshMachine); } } public void resume() { if (paused) { logger.info("Sending resume signal to all machines"); for (Map.Entry<String, SshMachine> entry : machines.entrySet()) { SshMachine sshMachine = entry.getValue(); sshMachine.resume(); } paused = false; } } public void pause() { if (!paused) { logger.info("Sending pause signal to all machines"); for (Map.Entry<String, SshMachine> entry : machines.entrySet()) { SshMachine sshMachine = entry.getValue(); sshMachine.pause(); } paused = true; } } @Override public void run() { logger.info(String.format("Machines-Monitor started for '%s' d'-'", clusterName)); while (true && !stopping) { try { Set<Map.Entry<String, SshMachine>> entrySet = new HashSet<>(); entrySet.addAll(machines.entrySet()); for (Map.Entry<String, SshMachine> entry : entrySet) { SshMachine machine = entry.getValue(); machine.ping(); } try { Thread.currentThread().sleep(Settings.SSH_PING_INTERVAL); } catch (InterruptedException ex) { if (stopping) { logger.error("Terminating machines threadpool"); executor.shutdownNow(); try { executor.awaitTermination(1, TimeUnit.MINUTES); logger.info("Machines threadpool terminated"); logger.info(String.format("Machines-Monitor stoped for '%s' d'-'", clusterName)); return; } catch (InterruptedException ex1) { } } else { logger.error("Got interupted without having recived the stopping signal..", ex); } } } catch (Exception ex) { logger.error("", ex); } } } @Override public void submitTask(Task task) throws KaramelException { logger.debug(String.format("Recieved '%s' from DAG", task.toString())); String machineName = task.getMachineId(); if (!machines.containsKey(machineName)) { throw new KaramelException(String.format("Machine '%s' does not exist in manager", machineName)); } SshMachine machine = machines.get(machineName); machine.enqueue(task); // TODO - check if there is a return value.... } public void disconnect() throws KaramelException { Set<Map.Entry<String, SshMachine>> entrySet = machines.entrySet(); for (Map.Entry<String, SshMachine> entry : entrySet) { SshMachine machine = entry.getValue(); machine.disconnect(); } } @Override public void prepareToStart(Task task) throws KaramelException { MachineRuntime machine = task.getMachine(); machine.addTask(task); } @Override public void terminate(Task task) throws KaramelException { logger.debug(String.format("Recieved '%s' from DAG to remove", task.toString())); String machineName = task.getMachineId(); if (!machines.containsKey(machineName)) { throw new KaramelException(String.format("Machine '%s' does not exist in manager", machineName)); } SshMachine sshMachine = machines.get(machineName); sshMachine.remove(task); MachineRuntime machine = task.getMachine(); machine.removeTask(task); } @Override public void killMe(Task task) throws KaramelException { String machineName = task.getMachineId(); if (!machines.containsKey(machineName)) { throw new KaramelException(String.format("Machine '%s' does not exist in manager", machineName)); } SshMachine machine = machines.get(machineName); machine.killTaskSession(task); } @Override public void retryMe(Task task) throws KaramelException { String machineName = task.getMachineId(); if (!machines.containsKey(machineName)) { throw new KaramelException(String.format("Machine '%s' does not exist in manager", machineName)); } SshMachine machine = machines.get(machineName); machine.retryFailedTask(task); } @Override public void skipMe(Task task) throws KaramelException { String machineName = task.getMachineId(); if (!machines.containsKey(machineName)) { throw new KaramelException(String.format("Machine '%s' does not exist in manager", machineName)); } SshMachine machine = machines.get(machineName); machine.skipFailedTask(task); } }