package se.kth.karamel.backend.launcher.nova; import com.google.common.base.Predicate; import com.google.common.base.Predicates; import com.google.common.collect.FluentIterable; import com.google.common.collect.Maps; import com.google.common.collect.Sets; import org.apache.log4j.Logger; import org.jclouds.ContextBuilder; import org.jclouds.compute.RunNodesException; import org.jclouds.compute.domain.NodeMetadata; import org.jclouds.compute.domain.TemplateBuilder; import org.jclouds.compute.options.TemplateOptions; import org.jclouds.http.HttpResponseException; import org.jclouds.net.domain.IpProtocol; import org.jclouds.openstack.nova.v2_0.compute.options.NovaTemplateOptions; import org.jclouds.openstack.nova.v2_0.domain.Ingress; import org.jclouds.openstack.nova.v2_0.domain.KeyPair; import org.jclouds.openstack.nova.v2_0.domain.SecurityGroup; import org.jclouds.openstack.nova.v2_0.extensions.SecurityGroupApi; import org.jclouds.rest.AuthorizationException; import se.kth.karamel.backend.converter.UserClusterDataExtractor; import se.kth.karamel.backend.launcher.Launcher; import se.kth.karamel.backend.running.model.ClusterRuntime; import se.kth.karamel.backend.running.model.GroupRuntime; import se.kth.karamel.backend.running.model.MachineRuntime; import se.kth.karamel.common.clusterdef.Nova; import se.kth.karamel.common.clusterdef.Provider; import se.kth.karamel.common.clusterdef.json.JsonCluster; import se.kth.karamel.common.clusterdef.json.JsonGroup; import se.kth.karamel.common.exception.InvalidNovaCredentialsException; import se.kth.karamel.common.exception.KaramelException; import se.kth.karamel.common.util.Confs; import se.kth.karamel.common.util.NovaCredentials; import se.kth.karamel.common.util.Settings; import se.kth.karamel.common.util.SshKeyPair; import se.kth.karamel.common.util.settings.NovaSetting; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; /** * Created by Alberto on 2015-05-16. */ public final class NovaLauncher extends Launcher{ private static final Logger logger = Logger.getLogger(NovaLauncher.class); private static boolean TESTING = true; private final NovaContext novaContext; private final SshKeyPair sshKeyPair; private Set<String> keys = new HashSet<>(); public NovaLauncher(NovaContext novaContext, SshKeyPair sshKeyPair) throws KaramelException { if (novaContext == null) { throw new KaramelException("Register your valid credentials first :-| "); } else if (sshKeyPair == null) { throw new KaramelException("Choose your ssh keypair first :-| "); } else { this.novaContext = novaContext; this.sshKeyPair = sshKeyPair; logger.info(String.format("Account-Name='%s'", novaContext.getNovaCredentials().getAccountName())); logger.info(String.format("Public-key='%s'", sshKeyPair.getPublicKeyPath())); logger.info(String.format("Private-key='%s'", sshKeyPair.getPrivateKeyPath())); } } public static NovaContext validateCredentials(NovaCredentials novaCredentials, ContextBuilder builder) throws InvalidNovaCredentialsException { try { NovaContext context = new NovaContext(novaCredentials, builder); SecurityGroupApi securityGroupApi = context.getSecurityGroupApi(); securityGroupApi.list(); return context; } catch (AuthorizationException e) { throw new InvalidNovaCredentialsException("account-name:" + novaCredentials.getAccountName(), e); } } public static NovaCredentials readCredentials(Confs confs) { String accountId = confs.getProperty(NovaSetting.NOVA_ACCOUNT_ID_KEY.getParameter()); String accessKey = confs.getProperty(NovaSetting.NOVA_ACCESSKEY_KEY.getParameter()); String endpoint = confs.getProperty(NovaSetting.NOVA_ACCOUNT_ENDPOINT.getParameter()); String novaRegion = confs.getProperty(NovaSetting.NOVA_REGION.getParameter()); String novaNetworkId = confs.getProperty(NovaSetting.NOVA_NETWORKID.getParameter()); NovaCredentials novaCredentials = null; if (accountId != null && !accountId.isEmpty() && accessKey != null && !accessKey.isEmpty() && endpoint != null && !endpoint.isEmpty() && novaRegion != null && !novaRegion.isEmpty()) { novaCredentials = new NovaCredentials(); novaCredentials.setAccountName(accountId); novaCredentials.setAccountPass(accessKey); novaCredentials.setEndpoint(endpoint); novaCredentials.setRegion(novaRegion); novaCredentials.setNetworkId(novaNetworkId); } return novaCredentials; } public String createSecurityGroup(String clusterName, String groupName, Nova nova, Set<String> ports) { String securityGroupUniqueName = NovaSetting.NOVA_UNIQUE_GROUP_NAME(clusterName, groupName); logger.info(String.format("Creating security group '%s' ...", securityGroupUniqueName)); SecurityGroupApi client = novaContext.getSecurityGroupApi(); String groupId; //TODO Do we have something similar to VPC EC2 in Nova? SecurityGroup created = client.createWithDescription(securityGroupUniqueName, NovaSetting .NOVA_UNIQUE_GROUP_DESCRIPTION(clusterName, groupName)); //Get id of the security group groupId = created.getId(); //Go over the ips if (!TESTING) { for (String port : ports) { Integer portNumber; IpProtocol ipProtocol; if (port.contains("/")) { String[] s = port.split("/"); portNumber = Integer.valueOf(s[0]); ipProtocol = IpProtocol.valueOf(s[1]); } else { portNumber = Integer.valueOf(port); ipProtocol = IpProtocol.TCP; } Ingress ingress = Ingress.builder() .fromPort(portNumber) .toPort(portNumber) .ipProtocol(ipProtocol) .build(); client.createRuleAllowingCidrBlock(groupId, ingress, "0.0.0.0/0"); logger.info(String.format("Ports became open for '%s'", securityGroupUniqueName)); } } else { Ingress ingress = Ingress.builder() .fromPort(1) .toPort(65535) .ipProtocol(IpProtocol.TCP) .build(); client.createRuleAllowingCidrBlock(groupId, ingress, "0.0.0.0/0"); logger.info(String.format("Ports became open for '%s'", securityGroupUniqueName)); } logger.info(String.format("Security group '%s' was created :)", securityGroupUniqueName)); return groupId; } public boolean uploadSshPublicKey(String keyPairName, Nova nova, boolean removeOld) { boolean uploadSuccesful; FluentIterable<KeyPair> keyPairs = novaContext.getKeyPairApi().list(); if (keyPairs.isEmpty()) { logger.info(String.format("New keypair '%s' is being uploaded to Nova OpenStack", keyPairName)); novaContext.getKeyPairApi().createWithPublicKey(keyPairName, sshKeyPair.getPublicKey()); uploadSuccesful = true; } else if (removeOld) { logger.info(String.format("Removing the old keypair '%s' and uploading the new one ...", keyPairName)); boolean deleteSuccesful = novaContext.getKeyPairApi().delete(keyPairName); KeyPair pair = novaContext.getKeyPairApi().createWithPublicKey(keyPairName, sshKeyPair.getPublicKey()); uploadSuccesful = deleteSuccesful && pair != null; } else { uploadSuccesful = false; } return uploadSuccesful; } public boolean cleanupFailedNodes(Map<NodeMetadata, Throwable> failedNodes) { boolean success; if (failedNodes.size() > 0) { Set<String> lostIds = Sets.newLinkedHashSet(); for (Map.Entry<NodeMetadata, Throwable> lostNode : failedNodes.entrySet()) { lostIds.add(lostNode.getKey().getId()); } int numberOfNodesToDelete = lostIds.size(); logger.info(String.format("Destroying failed nodes with ids: %s", lostIds.toString())); Set<? extends NodeMetadata> destroyedNodes = novaContext.getComputeService().destroyNodesMatching( Predicates.in(failedNodes.keySet())); lostIds.clear(); for (NodeMetadata destroyed : destroyedNodes) { lostIds.add(destroyed.getId()); } logger.info("Failed nodes destroyed ;)"); int numberOfNodesSuccesfullyDeleted = lostIds.size(); success = numberOfNodesSuccesfullyDeleted == numberOfNodesToDelete; } else { success = true; } return success; } @Override public void cleanup(JsonCluster definition, ClusterRuntime runtime) throws KaramelException { runtime.resolveFailures(); List<GroupRuntime> groups = runtime.getGroups(); Set<String> allNovaVms = new HashSet<>(); Set<String> allNovaVmsIds = new HashSet<>(); Map<String, String> groupRegion = new HashMap<>(); for (GroupRuntime group : groups) { group.getCluster().resolveFailures(); Provider provider = UserClusterDataExtractor.getGroupProvider(definition, group.getName()); if (provider instanceof Nova) { for (MachineRuntime machine : group.getMachines()) { if (machine.getVmId() != null) { allNovaVmsIds.add(machine.getVmId()); } } JsonGroup jg = UserClusterDataExtractor.findGroup(definition, group.getName()); List<String> vmNames = NovaSetting.NOVA_UNIQUE_VM_NAMES(group.getCluster().getName(), group.getName(), jg.getSize()); allNovaVms.addAll(vmNames); groupRegion.put(group.getName(), novaContext.getNovaCredentials().getRegion()); } } cleanup(definition.getName(), allNovaVmsIds, allNovaVms, groupRegion); } public void cleanup(String clusterName, Set<String> vmIds, Set<String> vmNames, Map<String, String> groupRegion) throws KaramelException { Set<String> groupNames = new HashSet<>(); for (Map.Entry<String, String> gp : groupRegion.entrySet()) { groupNames.add(NovaSetting.NOVA_UNIQUE_GROUP_NAME(clusterName, gp.getKey())); } logger.info(String.format("Killing following machines with names: \n %s \nor inside group names %s \nor with ids: " + "%s", vmNames.toString(), groupNames, vmIds)); logger.info(String.format("Killing all machines in groups: %s", groupNames.toString())); novaContext.getComputeService().destroyNodesMatching(withPredicate(vmIds, vmNames, groupNames)); logger.info(String.format("All machines destroyed in all the security groups. :) ")); for (Map.Entry<String, String> gp : groupRegion.entrySet()) { String uniqueGroupName = NovaSetting.NOVA_UNIQUE_GROUP_NAME(clusterName, gp.getKey()); for (SecurityGroup secgroup : novaContext.getSecurityGroupApi().list()) { //TODO find the real name of the jclouds groups in openstack if (secgroup.getName().startsWith("jclouds-" + uniqueGroupName) || secgroup.getName().equals(uniqueGroupName)) { logger.info(String.format("Destroying security group '%s' ...", secgroup.getName())); boolean retry = false; int count = 0; do { count++; try { logger.info(String.format("#%d Destroying security group '%s' ...", count, secgroup.getName())); novaContext.getSecurityGroupApi().delete(secgroup.getId()); } catch (IllegalStateException ex) { logger.info(String.format("Hurry up Nova!! terminate machines!! '%s', will retry in %d ms :@", uniqueGroupName, NovaSetting.NOVA_RETRY_INTERVAL.getParameter())); retry = true; try { Thread.currentThread().sleep(Long.parseLong(NovaSetting.NOVA_RETRY_INTERVAL.getParameter())); } catch (InterruptedException ex1) { logger.error("", ex1); } } } while (retry); logger.info(String.format("The security group '%s' destroyed ^-^", secgroup.getName())); } } } } @Override public String forkGroup(JsonCluster definition, ClusterRuntime runtime, String name) throws KaramelException { JsonGroup jg = UserClusterDataExtractor.findGroup(definition,name); Provider provider = UserClusterDataExtractor.getGroupProvider(definition,name); Nova nova = (Nova) provider; Set<String> ports = new HashSet<>(); ports.addAll(Settings.AWS_VM_PORTS_DEFAULT); String groupId = createSecurityGroup(definition.getName(), jg.getName(), nova, ports); return groupId; } @Override public List<MachineRuntime> forkMachines(JsonCluster definition, ClusterRuntime runtime, String name) throws KaramelException { Nova nova = (Nova) UserClusterDataExtractor.getGroupProvider(definition,name); JsonGroup definedGroup = UserClusterDataExtractor.findGroup(definition, name); GroupRuntime groupRuntime = UserClusterDataExtractor.findGroup(runtime,name); Set<String> groupIds = new HashSet<>(); groupIds.add(groupRuntime.getId()); String keypairName = NovaSetting.NOVA_KEYPAIR_NAME(runtime.getName(), novaContext.getNovaCredentials().getRegion()); if(!keys.contains(keypairName)) { uploadSshPublicKey(keypairName,nova,true); keys.add(keypairName); } return requestNodes(keypairName,groupRuntime,groupIds,Integer.valueOf(definedGroup.getSize()),nova); } private List<MachineRuntime> requestNodes(String keypairName, GroupRuntime groupRuntime, Set<String> groupIds, Integer totalSize, Nova nova) throws KaramelException { String uniqueGroupName = NovaSetting.NOVA_UNIQUE_GROUP_NAME(groupRuntime.getCluster().getName(), groupRuntime.getName()); List<String> allVmNames = NovaSetting.NOVA_UNIQUE_VM_NAMES(groupRuntime.getCluster().getName(), groupRuntime.getName(), totalSize.intValue()); logger.info(String.format("Start forking %d machine(s) for '%s' ...", totalSize, uniqueGroupName)); boolean succeed = false; int tries = 0; Set<NodeMetadata> successfulNodes = Sets.newLinkedHashSet(); List<String> unforkedVmNames = new ArrayList<>(); List<String> toBeForkedVmNames; unforkedVmNames.addAll(allVmNames); Map<NodeMetadata, Throwable> failedNodes = Maps.newHashMap(); while (!succeed && tries < Settings.AWS_RETRY_MAX) { int requestSize = totalSize - successfulNodes.size(); int maxForkRequests = Integer.parseInt(NovaSetting.NOVA_MAX_FORK_VMS_PER_REQUEST.getParameter()); if (requestSize > maxForkRequests) { requestSize = maxForkRequests; toBeForkedVmNames = unforkedVmNames.subList(0, maxForkRequests); } else { toBeForkedVmNames = unforkedVmNames; } TemplateBuilder template = novaContext.getComputeService().templateBuilder(); TemplateOptions templateOptions = novaContext.getComputeService().templateOptions().securityGroups(groupIds); logger.info("novaContext.getNovaCredentials().getNetworkId() = " + novaContext.getNovaCredentials().getNetworkId()); NovaTemplateOptions options = templateOptions.as(NovaTemplateOptions.class) .keyPairName(keypairName) .autoAssignFloatingIp(true) .nodeNames(toBeForkedVmNames) .networks("d5465024-4d06-44b2-acba-43c1363762fd"); // .networks(novaContext.getNovaCredentials().getNetworkId()); template.options(options); template.os64Bit(true); template.hardwareId(novaContext.getNovaCredentials().getRegion()+"/"+nova.getFlavor()); template.imageId(novaContext.getNovaCredentials().getRegion()+"/"+nova.getImage()); template.locationId(novaContext.getNovaCredentials().getRegion()); tries++; Set<NodeMetadata> succ = new HashSet<>(); try { logger.info(String.format("Forking %d machine(s) for '%s', so far(succeeded:%d, failed:%d, total:%d)", requestSize, uniqueGroupName, successfulNodes.size(), failedNodes.size(), totalSize)); succ.addAll(novaContext.getComputeService().createNodesInGroup( uniqueGroupName, requestSize, template.build())); } catch (RunNodesException ex) { addSuccessAndLostNodes(ex, succ, failedNodes); } catch (HttpResponseException e) { //Need error handling on the different possible logger.error("", e); } catch (IllegalStateException ex) { logger.error("", ex); logger.info(String.format("#%d Hurry up Nova!! I want machines for %s, will ask you again in %d ms :@", tries, uniqueGroupName, NovaSetting.NOVA_RETRY_INTERVAL), ex); } unforkedVmNames = findLeftVmNames(succ, unforkedVmNames); successfulNodes.addAll(succ); if (successfulNodes.size() < totalSize) { try { succeed = false; logger.info(String.format("So far we got %d successful-machine(s) and %d failed-machine(s) out of %d " + "original-number for '%s'. Failed nodes will be killed later.", successfulNodes.size(), failedNodes.size(), totalSize, uniqueGroupName)); Thread.currentThread().sleep(Settings.AWS_RETRY_INTERVAL); } catch (InterruptedException ex1) { logger.error("", ex1); } } else { succeed = true; logger.info(String.format("Cool!! we got all %d machine(s) for '%s' |;-) we have %d failed-machines to kill " + "before we go on..", totalSize, uniqueGroupName, failedNodes.size())); if (failedNodes.size() > 0) { cleanupFailedNodes(failedNodes); } List<MachineRuntime> machines = new ArrayList<>(); for (NodeMetadata node : successfulNodes) { if (node != null) { MachineRuntime machine = new MachineRuntime(groupRuntime); ArrayList<String> privateIps = new ArrayList(); ArrayList<String> publicIps = new ArrayList(); privateIps.addAll(node.getPrivateAddresses()); publicIps.addAll(node.getPublicAddresses()); machine.setVmId(node.getId()); machine.setName(node.getName()); machine.setPrivateIp(privateIps.get(0)); //TODO fix this, for now we set ip to the same private if not accessible String publicIp = publicIps.isEmpty()?privateIps.get(0):publicIps.get(0); machine.setPublicIp(publicIp); machine.setSshPort(node.getLoginPort()); machine.setSshUser(node.getCredentials().getUser()); machines.add(machine); } } return machines; } } throw new KaramelException(String.format("Couldn't fork machines for group'%s'", groupRuntime.getName())); } private void addSuccessAndLostNodes(RunNodesException rnex, Set<NodeMetadata> successfulNodes, Map<NodeMetadata, Throwable> lostNodes) { // workaround https://code.google.com/p/jclouds/issues/detail?id=923 // by ensuring that any nodes in the "NodeErrors" do not get considered // successful Set<? extends NodeMetadata> reportedSuccessfulNodes = rnex.getSuccessfulNodes(); Map<? extends NodeMetadata, ? extends Throwable> errorNodesMap = rnex.getNodeErrors(); Set<? extends NodeMetadata> errorNodes = errorNodesMap.keySet(); // "actual" successful nodes are ones that don't appear in the errorNodes successfulNodes.addAll(Sets.difference(reportedSuccessfulNodes, errorNodes)); lostNodes.putAll(errorNodesMap); } private List<String> findLeftVmNames(Set<? extends NodeMetadata> successfulNodes, List<String> vmNames) { List<String> leftVmNames = new ArrayList<>(); leftVmNames.addAll(vmNames); int unnamedVms = 0; for (NodeMetadata nodeMetadata : successfulNodes) { String nodeName = nodeMetadata.getName(); if (leftVmNames.contains(nodeName)) { leftVmNames.remove(nodeName); } else { unnamedVms++; } } for (int i = 0; i < unnamedVms; i++) { if (leftVmNames.size() > 0) { logger.debug(String.format("Taking %s as one of the unnamed vms.", leftVmNames.get(0))); leftVmNames.remove(0); } } return leftVmNames; } public static Predicate<NodeMetadata> withPredicate(final Set<String> ids, final Set<String> names, final Set<String> groupNames) { return new Predicate<NodeMetadata>() { @Override public boolean apply(NodeMetadata nodeMetadata) { String id = nodeMetadata.getId(); String name = nodeMetadata.getName(); String group = nodeMetadata.getGroup(); return ((id != null && ids.contains(id)) || (name != null && names.contains(name) || (group != null && groupNames.contains(group)))); } @Override public String toString() { return "machines predicate"; } }; } }