package com.sequenceiq.cloudbreak.orchestrator.swarm;
import static com.github.dockerjava.api.model.RestartPolicy.alwaysRestart;
import static com.sequenceiq.cloudbreak.common.type.OrchestratorConstants.SWARM;
import static java.lang.String.format;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.Callable;
import java.util.concurrent.Future;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.slf4j.MDC;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Component;
import org.springframework.util.StringUtils;
import com.github.dockerjava.api.DockerClient;
import com.github.dockerjava.api.command.CreateContainerCmd;
import com.github.dockerjava.api.model.Bind;
import com.github.dockerjava.api.model.ExposedPort;
import com.github.dockerjava.api.model.Link;
import com.github.dockerjava.api.model.Ports;
import com.github.dockerjava.core.DockerClientBuilder;
import com.github.dockerjava.core.DockerClientConfig;
import com.github.dockerjava.jaxrs.DockerCmdExecFactoryImpl;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.Lists;
import com.sequenceiq.cloudbreak.orchestrator.OrchestratorBootstrap;
import com.sequenceiq.cloudbreak.orchestrator.OrchestratorBootstrapRunner;
import com.sequenceiq.cloudbreak.orchestrator.container.SimpleContainerOrchestrator;
import com.sequenceiq.cloudbreak.orchestrator.exception.CloudbreakOrchestratorCancelledException;
import com.sequenceiq.cloudbreak.orchestrator.exception.CloudbreakOrchestratorException;
import com.sequenceiq.cloudbreak.orchestrator.exception.CloudbreakOrchestratorFailedException;
import com.sequenceiq.cloudbreak.orchestrator.model.ContainerConfig;
import com.sequenceiq.cloudbreak.orchestrator.model.ContainerConstraint;
import com.sequenceiq.cloudbreak.orchestrator.model.ContainerInfo;
import com.sequenceiq.cloudbreak.orchestrator.model.GatewayConfig;
import com.sequenceiq.cloudbreak.orchestrator.model.Node;
import com.sequenceiq.cloudbreak.orchestrator.model.OrchestrationCredential;
import com.sequenceiq.cloudbreak.orchestrator.model.port.TcpPortBinding;
import com.sequenceiq.cloudbreak.orchestrator.state.ExitCriteria;
import com.sequenceiq.cloudbreak.orchestrator.state.ExitCriteriaModel;
import com.sequenceiq.cloudbreak.orchestrator.swarm.builder.BindsBuilder;
import com.sequenceiq.cloudbreak.orchestrator.swarm.containers.MunchausenBootstrap;
import com.sequenceiq.cloudbreak.orchestrator.swarm.containers.SwarmOrchestratorBootstrap;
import com.sequenceiq.cloudbreak.orchestrator.swarm.containers.SwarmOrchestratorDeletion;
@Component
public class SwarmContainerOrchestrator extends SimpleContainerOrchestrator {
private static final Logger LOGGER = LoggerFactory.getLogger(SwarmContainerOrchestrator.class);
private static final int READ_TIMEOUT = 180_000;
private static final String MUNCHAUSEN_WAIT = "3600";
private static final int MAX_IP_FOR_ONE_REQUEST = 600;
private static final String ENV_KEY_VALUE_SEPARATOR = "=";
@Value("${cb.docker.container.ambari.agent:}")
private String ambariAgent;
@Value("${cb.docker.container.ambari.server:}")
private String ambariServer;
@Value("${cb.docker.container.ambari.db:}")
private String postgresDockerImageName;
/**
* Bootstraps a Swarm based container orchestration cluster with a Consul discovery backend with the Munchausen tool.
*
* @param gatewayConfig Config used to access the gateway instance
* @param nodes Nodes that must be added to the Swarm cluster
* @param consulServerCount Number of Consul servers in the cluster
*/
@Override
public void bootstrap(GatewayConfig gatewayConfig, ContainerConfig config, Set<Node> nodes, int consulServerCount,
ExitCriteriaModel exitCriteriaModel) throws CloudbreakOrchestratorException {
try {
String privateGatewayIp = getPrivateGatewayIp(gatewayConfig.getConnectionAddress(), nodes);
Set<String> privateAddresses = getPrivateAddresses(nodes);
Set<String> privateAddressesWithoutGateway = getPrivateAddresses(getNodesWithoutGateway(gatewayConfig.getConnectionAddress(), nodes));
Set<String> consulServers = selectConsulServers(privateGatewayIp, privateAddressesWithoutGateway, consulServerCount);
Set<String> result = prepareDockerAddressInventory(privateAddresses);
String[] cmd = {"--debug", "bootstrap", "--wait", MUNCHAUSEN_WAIT, "--consulServers",
concatToString(consulServers), concatToString(result)};
runner(munchausenBootstrap(gatewayConfig, imageName(config), cmd),
getExitCriteria(), exitCriteriaModel, MDC.getCopyOfContextMap()).call();
} catch (CloudbreakOrchestratorCancelledException | CloudbreakOrchestratorFailedException coe) {
throw coe;
} catch (Exception ex) {
throw new CloudbreakOrchestratorFailedException(ex);
}
}
@Override
public void bootstrapNewNodes(GatewayConfig gatewayConfig, ContainerConfig config, Set<Node> nodes,
ExitCriteriaModel exitCriteriaModel) throws CloudbreakOrchestratorException {
try {
Set<String> privateAddresses = getPrivateAddresses(nodes);
Set<String> result = prepareDockerAddressInventory(privateAddresses);
String[] cmd = {"--debug", "add", "--wait", MUNCHAUSEN_WAIT, "--join", getConsulJoinIp(gatewayConfig.getPrivateAddress()), concatToString(result)};
runner(munchausenNewNodeBootstrap(gatewayConfig, imageName(config), cmd),
getExitCriteria(), exitCriteriaModel, MDC.getCopyOfContextMap()).call();
} catch (CloudbreakOrchestratorCancelledException | CloudbreakOrchestratorFailedException coe) {
throw coe;
} catch (Exception ex) {
throw new CloudbreakOrchestratorFailedException(ex);
}
}
@Override
public void validateApiEndpoint(OrchestrationCredential cred) throws CloudbreakOrchestratorException {
}
@Override
public List<ContainerInfo> runContainer(ContainerConfig config, OrchestrationCredential cred, ContainerConstraint constraint,
ExitCriteriaModel exitCriteriaModel) throws CloudbreakOrchestratorException {
List<ContainerInfo> containerInfos = new ArrayList<>();
String image = imageName(config);
try {
List<Future<Boolean>> futures = new ArrayList<>();
int i = 0;
for (String fqdn : constraint.getHosts()) {
String nodeName = fqdn.split("\\.")[0];
DockerClient dockerApiClient = swarmClient(cred);
String name = createSwarmContainerName(constraint, i++);
CreateContainerCmd createCmd = decorateCreateContainerCmd(image, constraint, nodeName, dockerApiClient, name);
OrchestratorBootstrap bootstrap = new SwarmOrchestratorBootstrap(dockerApiClient, nodeName, createCmd);
Callable<Boolean> runner = runner(bootstrap, getExitCriteria(), exitCriteriaModel, MDC.getCopyOfContextMap());
futures.add(getParallelOrchestratorComponentRunner().submit(runner));
containerInfos.add(new ContainerInfo(name, name, fqdn, image));
}
for (Future<Boolean> future : futures) {
future.get();
}
return containerInfos;
} catch (Exception ex) {
deleteContainer(containerInfos, cred);
throw new CloudbreakOrchestratorFailedException(ex);
}
}
@Override
public void startContainer(List<ContainerInfo> info, OrchestrationCredential cred) {
}
@Override
public void stopContainer(List<ContainerInfo> info, OrchestrationCredential cred) {
}
@Override
public void deleteContainer(List<ContainerInfo> containerInfo, OrchestrationCredential cred) throws CloudbreakOrchestratorException {
try {
DockerClient dockerApiClient = swarmClient(cred);
List<Future<Boolean>> futures = new ArrayList<>();
for (ContainerInfo info : containerInfo) {
try {
String hostName = info.getHost().split("\\.")[0];
SwarmOrchestratorDeletion containerRemover = new SwarmOrchestratorDeletion(dockerApiClient, hostName, info.getName());
Callable<Boolean> runner = runner(containerRemover, getExitCriteria(), null, MDC.getCopyOfContextMap());
futures.add(getParallelOrchestratorComponentRunner().submit(runner));
} catch (Exception me) {
throw new CloudbreakOrchestratorFailedException(me);
}
}
for (Future<Boolean> future : futures) {
future.get();
}
} catch (Exception ex) {
String msg = String.format("Failed to delete containers: '%s'.", Arrays.toString(containerInfo.toArray(new ContainerInfo[containerInfo.size()])));
throw new CloudbreakOrchestratorFailedException(msg, ex);
}
}
@Override
public List<String> getMissingNodes(GatewayConfig gatewayConfig, Set<Node> nodes) {
Set<String> missingNodes = getPrivateAddresses(nodes);
LOGGER.info("Checking if Swarm manager is available and if the agents are registered.");
try {
List<String> allAvailableNodes = getAvailableNodes(gatewayConfig, nodes);
LOGGER.info("Available swarm nodes: {}/{}", allAvailableNodes.size(), missingNodes.size());
for (String availableNode : allAvailableNodes) {
missingNodes.remove(availableNode);
}
} catch (Exception t) {
LOGGER.info(String.format("Cannot connect to Swarm manager, maybe it hasn't started yet: %s", t.getMessage()));
}
return Lists.newArrayList(missingNodes);
}
@Override
public List<String> getAvailableNodes(GatewayConfig gatewayConfig, Set<Node> nodes) {
LOGGER.info("Checking if Swarm manager is available and if the agents are registered.");
List<String> privateAddresses = new ArrayList<>();
try {
DockerClientConfig swarmClientConfig = getSwarmClientConfig(gatewayConfig.getGatewayUrl(), gatewayConfig.getCertificateDir());
DockerClient swarmManagerClient = DockerClientBuilder.getInstance(swarmClientConfig)
.withDockerCmdExecFactory(new DockerCmdExecFactoryImpl())
.build();
List<Object> driverStatus = swarmManagerClient.infoCmd().exec().getDriverStatuses();
LOGGER.debug("Swarm manager is available, checking registered agents.");
for (Object element : driverStatus) {
try {
List objects = (ArrayList) element;
for (Node node : nodes) {
if (((String) objects.get(1)).split(":")[0].equals(node.getPrivateIp())) {
privateAddresses.add(node.getPrivateIp());
break;
}
}
} catch (Exception e) {
LOGGER.warn(String.format("Docker info returned an unexpected element: %s", element), e);
}
}
return privateAddresses;
} catch (Exception e) {
String defaultErrorMessage = "502 Bad Gateway";
String errorMessage = e.getMessage().contains(defaultErrorMessage) ? defaultErrorMessage : e.getMessage();
LOGGER.warn(String.format("Cannot connect to Swarm manager, maybe it hasn't started yet: %s", errorMessage));
return privateAddresses;
}
}
@Override
public boolean isBootstrapApiAvailable(GatewayConfig gatewayConfig) {
LOGGER.info("Checking if docker daemon is available.");
try {
DockerClient dockerApiClient = DockerClientBuilder.getInstance(getDockerClientConfig(gatewayConfig))
.withDockerCmdExecFactory(new DockerCmdExecFactoryImpl()).build();
dockerApiClient.infoCmd().exec();
return true;
} catch (Exception ex) {
LOGGER.warn(String.format("Docker api not available: %s", ex.getMessage()));
return false;
}
}
@Override
public int getMaxBootstrapNodes() {
return MAX_IP_FOR_ONE_REQUEST;
}
@Override
public String name() {
return SWARM;
}
private String createSwarmContainerName(ContainerConstraint constraint, int index) {
String name = constraint.getContainerName().getName();
if (constraint.getHosts().size() > 1) {
name = String.format("%s-%s", name, index);
}
return name;
}
private CreateContainerCmd decorateCreateContainerCmd(String image, ContainerConstraint constraint, String hostname,
DockerClient dockerApiClient, String name) {
String[] env = createEnv(constraint, hostname);
String[] cmd = constraint.getCmd();
CreateContainerCmd createCmd = dockerApiClient.createContainerCmd(image)
.withName(name)
.withRestartPolicy(alwaysRestart())
.withPrivileged(true)
.withEnv(env);
if (cmd != null && cmd.length > 0) {
createCmd.withCmd(cmd);
}
if (!StringUtils.isEmpty(constraint.getNetworkMode())) {
createCmd.withNetworkMode(constraint.getNetworkMode());
}
Bind[] binds = createVolumeBinds(constraint);
if (binds.length > 0) {
createCmd.withBinds(binds);
}
TcpPortBinding portBinding = constraint.getTcpPortBinding();
if (portBinding != null) {
Ports ports = new Ports(ExposedPort.tcp(portBinding.getExposedPort()), new Ports.Binding(portBinding.getHostIp(), portBinding.getHostPort()));
createCmd.withPortBindings(ports);
}
List<Link> links = new ArrayList<>();
for (Entry<String, String> entry : constraint.getLinks().entrySet()) {
Link link = new Link(entry.getKey(), entry.getValue());
links.add(link);
}
createCmd.withLinks(links.toArray(new Link[links.size()]));
return createCmd;
}
private String[] createEnv(ContainerConstraint constraint, String hostname) {
List<String> env = new ArrayList<>();
for (Entry<String, String> envEntry : constraint.getEnv().entrySet()) {
String envVariable = envEntry.getKey() + ENV_KEY_VALUE_SEPARATOR + envEntry.getValue();
env.add(envVariable);
}
env.add(format("constraint:node==%s", hostname));
String[] result = new String[env.size()];
return env.toArray(result);
}
private Bind[] createVolumeBinds(ContainerConstraint constraint) {
BindsBuilder bindsBuilder = new BindsBuilder();
for (Entry<String, String> entry : constraint.getVolumeBinds().entrySet()) {
String hostPath = entry.getKey();
String containerPath = entry.getValue();
if (StringUtils.isEmpty(containerPath)) {
bindsBuilder.add(hostPath);
} else {
bindsBuilder.add(hostPath, containerPath);
}
}
return bindsBuilder.build();
}
private Set<String> selectConsulServers(String gatewayAddress, Set<String> privateAddresses, int consulServerCount) {
List<String> privateAddressList = new ArrayList<>(privateAddresses);
int consulServers = consulServerCount <= privateAddressList.size() + 1 ? consulServerCount : privateAddressList.size();
Set<String> result = new HashSet<>();
result.add(gatewayAddress);
for (int i = 0; i < consulServers - 1; i++) {
result.add(privateAddressList.get(i));
}
return result;
}
private String concatToString(Collection<String> items) {
StringBuilder sb = new StringBuilder();
for (String item : items) {
sb.append(item).append(",");
}
return sb.toString().substring(0, sb.toString().length() - 1);
}
private Set<String> getPrivateAddresses(Collection<Node> nodes) {
Set<String> privateAddresses = new HashSet<>();
for (Node node : nodes) {
privateAddresses.add(node.getPrivateIp());
}
return privateAddresses;
}
private String getPrivateGatewayIp(String gatewayAddress, Collection<Node> nodes) {
for (Node node : nodes) {
if (node.getPublicIp() != null && node.getPublicIp().equals(gatewayAddress)) {
return node.getPrivateIp();
}
}
return null;
}
private Set<Node> getNodesWithoutGateway(String gatewayAddress, Collection<Node> nodes) {
Set<Node> coreNodes = new HashSet<>();
for (Node node : nodes) {
if (node.getPublicIp() == null || !node.getPublicIp().equals(gatewayAddress)) {
coreNodes.add(node);
}
}
return coreNodes;
}
@VisibleForTesting
Set<String> prepareDockerAddressInventory(Collection<String> nodeAddresses) {
Set<String> nodeResult = new HashSet<>();
for (String nodeAddress : nodeAddresses) {
nodeResult.add(String.format("%s:2376", nodeAddress));
}
return nodeResult;
}
private DockerClientConfig getSwarmClientConfig(String gatewayUrl, String certificateDir) {
return DockerClientConfig.createDefaultConfigBuilder()
.withDockerCertPath(certificateDir)
.withVersion("1.18")
.withUri(gatewayUrl + "/swarm")
.build();
}
private DockerClientConfig getDockerClientConfig(GatewayConfig gatewayConfig) {
return DockerClientConfig.createDefaultConfigBuilder()
.withDockerCertPath(gatewayConfig.getCertificateDir())
.withVersion("1.18")
.withUri(gatewayConfig.getGatewayUrl() + "/docker")
.build();
}
@VisibleForTesting
DockerClient dockerClient(GatewayConfig gatewayConfig) {
return DockerClientBuilder.getInstance(getDockerClientConfig(gatewayConfig))
.withDockerCmdExecFactory(new DockerCmdExecFactoryImpl().withReadTimeout(READ_TIMEOUT)).build();
}
@VisibleForTesting
DockerClient swarmClient(GatewayConfig gatewayConfig) {
return DockerClientBuilder.getInstance(getSwarmClientConfig(gatewayConfig.getGatewayUrl(), gatewayConfig.getCertificateDir()))
.withDockerCmdExecFactory(new DockerCmdExecFactoryImpl().withReadTimeout(READ_TIMEOUT)).build();
}
DockerClient swarmClient(OrchestrationCredential cred) {
String gatewayUrl = "https://" + cred.getApiEndpoint();
return DockerClientBuilder.getInstance(getSwarmClientConfig(gatewayUrl, (String) cred.getProperties().get("certificateDir")))
.withDockerCmdExecFactory(new DockerCmdExecFactoryImpl().withReadTimeout(READ_TIMEOUT))
.build();
}
@VisibleForTesting
MunchausenBootstrap munchausenBootstrap(GatewayConfig gatewayConfig, String imageName, String[] cmd) {
DockerClient dockerApiClient = dockerClient(gatewayConfig);
return new MunchausenBootstrap(dockerApiClient, imageName, cmd);
}
@VisibleForTesting
MunchausenBootstrap munchausenNewNodeBootstrap(GatewayConfig gatewayConfig, String imageName, String[] cmd) {
DockerClient dockerApiClient = swarmClient(gatewayConfig);
return new MunchausenBootstrap(dockerApiClient, imageName, cmd);
}
@VisibleForTesting
public Callable<Boolean> runner(OrchestratorBootstrap bootstrap, ExitCriteria exitCriteria, ExitCriteriaModel exitCriteriaModel,
Map<String, String> mdcMap) {
return new OrchestratorBootstrapRunner(bootstrap, exitCriteria, exitCriteriaModel, mdcMap);
}
private String getConsulJoinIp(String privateIp) {
return String.format("consul://%s:8500", privateIp);
}
private String imageName(ContainerConfig containerConfig) {
return containerConfig.getName() + ":" + containerConfig.getVersion();
}
@Override
public String ambariServerContainer(Optional<String> name) {
return name.isPresent() ? name.get() : ambariServer;
}
@Override
public String ambariClientContainer(Optional<String> name) {
return name.isPresent() ? name.get() : ambariServer;
}
@Override
public String ambariDbContainer(Optional<String> name) {
return name.isPresent() ? name.get() : ambariServer;
}
}