/*******************************************************************************
* Copyright (c) 2012-2017 Codenvy, S.A.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
* Codenvy, S.A. - initial API and implementation
*******************************************************************************/
package org.eclipse.che.api.environment.server;
import com.google.common.annotations.VisibleForTesting;
import org.eclipse.che.api.agent.server.AgentRegistry;
import org.eclipse.che.api.agent.server.exception.AgentException;
import org.eclipse.che.api.agent.shared.model.impl.AgentImpl;
import org.eclipse.che.api.agent.shared.model.impl.AgentKeyImpl;
import org.eclipse.che.api.core.ApiException;
import org.eclipse.che.api.core.ConflictException;
import org.eclipse.che.api.core.NotFoundException;
import org.eclipse.che.api.core.ServerException;
import org.eclipse.che.api.core.model.machine.Machine;
import org.eclipse.che.api.core.model.machine.MachineConfig;
import org.eclipse.che.api.core.model.machine.MachineLogMessage;
import org.eclipse.che.api.core.model.machine.MachineSource;
import org.eclipse.che.api.core.model.machine.MachineStatus;
import org.eclipse.che.api.core.model.machine.ServerConf;
import org.eclipse.che.api.core.model.workspace.Environment;
import org.eclipse.che.api.core.model.workspace.ExtendedMachine;
import org.eclipse.che.api.core.model.workspace.ServerConf2;
import org.eclipse.che.api.core.notification.EventService;
import org.eclipse.che.api.core.notification.EventSubscriber;
import org.eclipse.che.api.core.util.AbstractLineConsumer;
import org.eclipse.che.api.core.util.LineConsumer;
import org.eclipse.che.api.core.util.MessageConsumer;
import org.eclipse.che.api.core.util.lineconsumer.ConcurrentCompositeLineConsumer;
import org.eclipse.che.api.core.util.lineconsumer.ConcurrentFileLineConsumer;
import org.eclipse.che.api.environment.server.exception.EnvironmentException;
import org.eclipse.che.api.environment.server.exception.EnvironmentNotRunningException;
import org.eclipse.che.api.environment.server.exception.EnvironmentStartInterruptedException;
import org.eclipse.che.api.environment.server.model.CheServiceBuildContextImpl;
import org.eclipse.che.api.environment.server.model.CheServiceImpl;
import org.eclipse.che.api.environment.server.model.CheServicesEnvironmentImpl;
import org.eclipse.che.api.machine.server.MachineInstanceProviders;
import org.eclipse.che.api.machine.server.event.InstanceStateEvent;
import org.eclipse.che.api.machine.server.exception.MachineException;
import org.eclipse.che.api.machine.server.exception.SourceNotFoundException;
import org.eclipse.che.api.machine.server.model.impl.MachineConfigImpl;
import org.eclipse.che.api.machine.server.model.impl.MachineImpl;
import org.eclipse.che.api.machine.server.model.impl.MachineLimitsImpl;
import org.eclipse.che.api.machine.server.model.impl.MachineLogMessageImpl;
import org.eclipse.che.api.machine.server.model.impl.MachineSourceImpl;
import org.eclipse.che.api.machine.server.model.impl.ServerConfImpl;
import org.eclipse.che.api.machine.server.model.impl.SnapshotImpl;
import org.eclipse.che.api.machine.server.spi.Instance;
import org.eclipse.che.api.machine.server.spi.InstanceProvider;
import org.eclipse.che.api.machine.server.spi.SnapshotDao;
import org.eclipse.che.api.machine.server.util.RecipeDownloader;
import org.eclipse.che.api.machine.shared.dto.event.MachineStatusEvent;
import org.eclipse.che.api.workspace.server.WorkspaceSharedPool;
import org.eclipse.che.api.workspace.server.model.impl.EnvironmentImpl;
import org.eclipse.che.api.workspace.server.model.impl.ExtendedMachineImpl;
import org.eclipse.che.commons.annotation.Nullable;
import org.eclipse.che.commons.env.EnvironmentContext;
import org.eclipse.che.commons.lang.IoUtil;
import org.eclipse.che.commons.lang.NameGenerator;
import org.eclipse.che.commons.lang.Size;
import org.eclipse.che.commons.lang.concurrent.StripedLocks;
import org.eclipse.che.commons.lang.concurrent.Unlocker;
import org.slf4j.Logger;
import javax.annotation.PostConstruct;
import javax.annotation.PreDestroy;
import javax.inject.Inject;
import javax.inject.Named;
import javax.inject.Singleton;
import java.io.File;
import java.io.IOException;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Queue;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.CopyOnWriteArrayList;
import java.util.regex.Pattern;
import static java.lang.String.format;
import static java.util.Collections.emptyList;
import static java.util.stream.Collectors.toList;
import static org.eclipse.che.api.machine.server.event.InstanceStateEvent.Type.DIE;
import static org.eclipse.che.api.machine.server.event.InstanceStateEvent.Type.OOM;
import static org.eclipse.che.api.workspace.shared.Utils.getDevMachineName;
import static org.eclipse.che.dto.server.DtoFactory.newDto;
import static org.slf4j.LoggerFactory.getLogger;
/**
* Facade for implementation specific operations with environment runtimes.
*
* @author Alexander Garagatyi
* @author Yevhenii Voevodin
*/
@Singleton
public class CheEnvironmentEngine {
private static final NoOpStartedHandler NO_OP_HANDLER = new NoOpStartedHandler();
private static final Logger LOG = getLogger(CheEnvironmentEngine.class);
private final Map<String, EnvironmentHolder> environments;
private final StripedLocks stripedLocks;
private final File machineLogsDir;
private final MachineInstanceProviders machineInstanceProviders;
private final long defaultMachineMemorySizeBytes;
private final SnapshotDao snapshotDao;
private final EventService eventService;
private final EnvironmentParser environmentParser;
private final DefaultServicesStartStrategy startStrategy;
private final MachineInstanceProvider machineProvider;
private final InfrastructureProvisioner infrastructureProvisioner;
private final RecipeDownloader recipeDownloader;
private final Pattern recipeApiPattern;
private final ContainerNameGenerator containerNameGenerator;
private final AgentRegistry agentRegistry;
private final WorkspaceSharedPool sharedPool;
private volatile boolean isPreDestroyInvoked;
@Inject
public CheEnvironmentEngine(SnapshotDao snapshotDao,
MachineInstanceProviders machineInstanceProviders,
@Named("che.workspace.logs") String machineLogsDir,
@Named("che.workspace.default_memory_mb") int defaultMachineMemorySizeMB,
EventService eventService,
EnvironmentParser environmentParser,
DefaultServicesStartStrategy startStrategy,
MachineInstanceProvider machineProvider,
InfrastructureProvisioner infrastructureProvisioner,
@Named("che.api") String apiEndpoint,
RecipeDownloader recipeDownloader,
ContainerNameGenerator containerNameGenerator,
AgentRegistry agentRegistry,
WorkspaceSharedPool sharedPool) {
this.snapshotDao = snapshotDao;
this.eventService = eventService;
this.environmentParser = environmentParser;
this.startStrategy = startStrategy;
this.machineProvider = machineProvider;
this.infrastructureProvisioner = infrastructureProvisioner;
this.recipeDownloader = recipeDownloader;
this.agentRegistry = agentRegistry;
this.sharedPool = sharedPool;
this.environments = new ConcurrentHashMap<>();
this.machineInstanceProviders = machineInstanceProviders;
this.machineLogsDir = new File(machineLogsDir);
this.defaultMachineMemorySizeBytes = Size.parseSize(defaultMachineMemorySizeMB + "MB");
// 16 - experimental value for stripes count, it comes from default hash map size
this.stripedLocks = new StripedLocks(16);
this.recipeApiPattern = Pattern.compile("(^https?" +
apiEndpoint.substring(apiEndpoint.indexOf(":")) +
"/recipe/.*$)|(^/recipe/.*$)");
this.containerNameGenerator = containerNameGenerator;
}
/**
* Returns all machines from environment of specific workspace.
*
* @param workspaceId
* ID of workspace that owns environment machines
* @return list of machines
* @throws EnvironmentNotRunningException
* if environment is not running
*/
public List<Instance> getMachines(String workspaceId) throws EnvironmentNotRunningException {
EnvironmentHolder environment;
try (@SuppressWarnings("unused") Unlocker u = stripedLocks.readLock(workspaceId)) {
environment = environments.get(workspaceId);
if (environment == null) {
throw new EnvironmentNotRunningException("Environment with ID '" + workspaceId + "' is not found");
}
return new ArrayList<>(environment.machines);
}
}
/**
* Returns specific machine from environment of specific workspace.
*
* @param workspaceId
* ID of workspace that owns environment machines
* @param machineId
* ID of requested machine
* @return requested machine
* @throws EnvironmentNotRunningException
* if environment is not running
* @throws NotFoundException
* if machine is not found in the environment
*/
public Instance getMachine(String workspaceId, String machineId) throws NotFoundException {
EnvironmentHolder environment;
try (@SuppressWarnings("unused") Unlocker u = stripedLocks.readLock(workspaceId)) {
environment = environments.get(workspaceId);
}
if (environment == null) {
throw new EnvironmentNotRunningException("Environment with ID '" + workspaceId + "' is not found");
}
return environment.machines.stream()
.filter(instance -> instance.getId().equals(machineId))
.findAny()
.orElseThrow(() -> new NotFoundException(
format("Machine with ID '%s' is not found in the environment of workspace '%s'",
machineId, workspaceId)));
}
/**
* Starts provided environment.
*
* <p>Environment starts if and only all machines in environment definition start successfully.<br/>
* Otherwise exception is thrown by this method.<br/>
* It is not defined whether environment start fails right after first failure or in the end of the process.<br/>
* Starting order of machines is not guarantied. Machines can start sequentially or in parallel.
*
* @param workspaceId
* ID of workspace that owns provided environment
* @param envName
* name of environment
* @param env
* environment to start
* @param recover
* whether machines from environment should be recovered or not
* @param messageConsumer
* consumer of log messages from machines in the environment
* @param startedHandler
* handler for started machines
* @return list of running machines of this environment
* @throws ServerException
* if other error occurs
*/
public List<Instance> start(String workspaceId,
String envName,
Environment env,
boolean recover,
MessageConsumer<MachineLogMessage> messageConsumer,
MachineStartedHandler startedHandler) throws ServerException,
EnvironmentException,
AgentException,
ConflictException {
EnvironmentImpl environment = new EnvironmentImpl(env);
// TODO move to machines provider
// add random chars to ensure that old environments that weren't removed by some reason won't prevent start
String networkId = NameGenerator.generate(workspaceId + "_", 16);
String namespace = EnvironmentContext.getCurrent().getSubject().getUserName();
initializeEnvironment(namespace,
workspaceId,
envName,
environment,
networkId,
messageConsumer);
String devMachineName = getDevMachineName(environment);
if (devMachineName == null) {
throw new ServerException("Agent 'org.eclipse.che.ws-agent' is not found in any of environment machines");
}
startEnvironmentQueue(namespace,
workspaceId,
devMachineName,
networkId,
recover,
startedHandler);
try (@SuppressWarnings("unused") Unlocker u = stripedLocks.writeLock(workspaceId)) {
EnvironmentHolder environmentHolder = environments.get(workspaceId);
// possible only if environment was stopped during its start
if (environmentHolder == null) {
throw new ServerException("Environment start was interrupted by environment stopping");
}
environmentHolder.status = EnvStatus.RUNNING;
// prevent list modification
return new ArrayList<>(environmentHolder.machines);
}
}
/**
* Starts workspace environment.
*
* @see #start(String, String, Environment, boolean, MessageConsumer, MachineStartedHandler)
*/
public List<Instance> start(String workspaceId,
String envName,
Environment env,
boolean recover,
MessageConsumer<MachineLogMessage> messageConsumer) throws ServerException,
ConflictException,
AgentException,
EnvironmentException {
return start(workspaceId, envName, env, recover, messageConsumer, NO_OP_HANDLER);
}
/**
* Stops running environment of specified workspace.
*
* @param workspaceId
* ID of workspace that owns environment
* @throws EnvironmentNotRunningException
* when environment is not running
* @throws ServerException
* if other error occurs
*/
public void stop(String workspaceId) throws EnvironmentNotRunningException,
ServerException {
List<Instance> machinesCopy;
EnvironmentHolder environmentHolder;
try (@SuppressWarnings("unused") Unlocker u = stripedLocks.readLock(workspaceId)) {
environmentHolder = environments.get(workspaceId);
if (environmentHolder == null || environmentHolder.status != EnvStatus.RUNNING) {
throw new EnvironmentNotRunningException(
format("Stop of not running environment of workspace with ID '%s' is not allowed.",
workspaceId));
}
List<Instance> machines = environmentHolder.machines;
if (machines != null && !machines.isEmpty()) {
machinesCopy = new ArrayList<>(machines);
} else {
machinesCopy = emptyList();
}
}
// long operation - perform out of lock
destroyEnvironment(environmentHolder.networkId, machinesCopy);
try (@SuppressWarnings("unused") Unlocker u = stripedLocks.writeLock(workspaceId)) {
environments.remove(workspaceId);
}
}
/**
* Starts machine in running environment.
*
* @param workspaceId
* ID of workspace that owns environment in which machine should be started
* @param machineConfig
* configuration of machine that should be started
* @return running machine
* @throws EnvironmentNotRunningException
* if environment is not running
* @throws NotFoundException
* if provider of machine implementation is not found
* @throws ConflictException
* if machine with the same name already exists in the environment
* @throws ServerException
* if any other error occurs
*/
public Instance startMachine(String workspaceId,
MachineConfig machineConfig,
List<String> agents) throws ServerException,
NotFoundException,
ConflictException,
EnvironmentException {
MachineConfig machineConfigCopy = new MachineConfigImpl(machineConfig);
EnvironmentHolder environmentHolder;
try (@SuppressWarnings("unused") Unlocker u = stripedLocks.readLock(workspaceId)) {
environmentHolder = environments.get(workspaceId);
if (environmentHolder == null || environmentHolder.status != EnvStatus.RUNNING) {
throw new EnvironmentNotRunningException(format("Environment '%s' is not running", workspaceId));
}
for (Instance machine : environmentHolder.machines) {
if (machine.getConfig().getName().equals(machineConfigCopy.getName())) {
throw new ConflictException(
format("Machine with name '%s' already exists in environment of workspace '%s'",
machineConfigCopy.getName(), workspaceId));
}
}
}
final String creator = EnvironmentContext.getCurrent().getSubject().getUserId();
final String namespace = EnvironmentContext.getCurrent().getSubject().getUserName();
MachineImpl machine = MachineImpl.builder()
.setConfig(machineConfig)
.setWorkspaceId(workspaceId)
.setStatus(MachineStatus.CREATING)
.setEnvName(environmentHolder.name)
.setOwner(creator)
.build();
MachineStarter machineStarter;
if ("docker".equals(machineConfig.getType())) {
// needed to reuse startInstance method and
// create machine instances by different implementation-specific providers
CheServiceImpl service = machineConfigToService(machineConfig);
normalize(namespace,
workspaceId,
machineConfig.getName(),
service);
machine.setId(service.getId());
machineStarter = (machineLogger, machineSource) -> {
CheServiceImpl serviceWithNormalizedSource = normalizeServiceSource(service, machineSource);
normalize(namespace,
workspaceId,
machineConfig.getName(),
serviceWithNormalizedSource);
infrastructureProvisioner.provision(new ExtendedMachineImpl().withAgents(agents),
serviceWithNormalizedSource);
return machineProvider.startService(namespace,
workspaceId,
environmentHolder.name,
machineConfig.getName(),
machineConfig.isDev(),
environmentHolder.networkId,
serviceWithNormalizedSource,
machineLogger);
};
} else {
try {
InstanceProvider provider = machineInstanceProviders.getProvider(machineConfig.getType());
machine.setId(generateMachineId());
addAgentsProvidedServers(machine, agents);
machineStarter = (machineLogger, machineSource) -> {
Machine machineWithNormalizedSource = normalizeMachineSource(machine, machineSource);
return provider.createInstance(machineWithNormalizedSource, machineLogger);
};
} catch (NotFoundException e) {
throw new NotFoundException(format("Provider of machine type '%s' not found", machineConfig.getType()));
}
}
return startInstance(false,
environmentHolder.logger,
machine,
machineStarter);
}
/**
* Stops machine in running environment.
*
* @param workspaceId
* ID of workspace of environment that owns machine
* @param machineId
* ID of machine that should be stopped
* @throws NotFoundException
* if machine in not found in environment
* @throws EnvironmentNotRunningException
* if environment is not running
* @throws ConflictException
* if stop of dev machine is requested
* @throws ServerException
* if other error occurs
*/
public void stopMachine(String workspaceId, String machineId) throws NotFoundException,
ServerException,
ConflictException {
Instance targetMachine = null;
try (@SuppressWarnings("unused") Unlocker u = stripedLocks.writeLock(workspaceId)) {
EnvironmentHolder environmentHolder = environments.get(workspaceId);
if (environmentHolder == null || environmentHolder.status != EnvStatus.RUNNING) {
throw new EnvironmentNotRunningException(format("Environment '%s' is not running", workspaceId));
}
for (Instance machine : environmentHolder.machines) {
if (machine.getId().equals(machineId)) {
if (machine.getConfig().isDev()) {
throw new ConflictException(
"Stop of dev machine is not allowed. Please, stop whole environment");
}
targetMachine = machine;
break;
}
}
environmentHolder.machines.remove(targetMachine);
}
if (targetMachine == null) {
throw new NotFoundException(format("Machine with ID '%s' is not found in environment of workspace '%s'",
machineId, workspaceId));
}
// out of lock to prevent blocking by potentially long-running method
destroyMachine(targetMachine);
}
/**
* Saves machine into snapshot.
*
* @param workspaceId
* ID of workspace that owns environment
* @param machineId
* ID of machine to save
* @return snapshot
* @throws EnvironmentNotRunningException
* if environment of machine is not running
* @throws NotFoundException
* if machine is not running
* @throws ServerException
* if another error occurs
*/
public SnapshotImpl saveSnapshot(String workspaceId,
String machineId) throws ServerException,
NotFoundException {
EnvironmentHolder environmentHolder;
SnapshotImpl snapshot = null;
Instance instance = null;
try (@SuppressWarnings("unused") Unlocker u = stripedLocks.readLock(workspaceId)) {
environmentHolder = environments.get(workspaceId);
if (environmentHolder == null || environmentHolder.status != EnvStatus.RUNNING) {
throw new EnvironmentNotRunningException(format("Environment '%s' is not running", workspaceId));
}
for (Instance machine : environmentHolder.machines) {
if (machine.getId().equals(machineId)) {
instance = machine;
snapshot = SnapshotImpl.builder()
.generateId()
.setType(machine.getConfig().getType())
.setWorkspaceId(machine.getWorkspaceId())
.setDescription(machine.getEnvName())
.setDev(machine.getConfig().isDev())
.setEnvName(machine.getEnvName())
.setMachineName(machine.getConfig().getName())
.useCurrentCreationDate()
.build();
}
}
}
if (instance == null) {
throw new NotFoundException(format("Machine with id '%s' is not found in environment of workspace '%s'",
machineId, workspaceId));
}
try {
MachineSource machineSource = instance.saveToSnapshot();
snapshot.setMachineSource(new MachineSourceImpl(machineSource));
return snapshot;
} catch (ServerException e) {
try {
instance.getLogger().writeLine("Snapshot storing failed. " + e.getLocalizedMessage());
} catch (IOException ignore) {
}
throw e;
}
}
/**
* Removes snapshot of machine.
*
* @param snapshot
* description of snapshot that should be removed
* @throws NotFoundException
* if snapshot is not found
* @throws ServerException
* if error occurs on snapshot removal
*/
public void removeSnapshot(SnapshotImpl snapshot) throws ServerException, NotFoundException {
final String instanceType = snapshot.getType();
final InstanceProvider instanceProvider = machineInstanceProviders.getProvider(instanceType);
instanceProvider.removeInstanceSnapshot(snapshot.getMachineSource());
}
private void initializeEnvironment(String namespace,
String workspaceId,
String envName,
EnvironmentImpl envConfig,
String networkId,
MessageConsumer<MachineLogMessage> messageConsumer)
throws ServerException,
ConflictException,
EnvironmentException {
CheServicesEnvironmentImpl internalEnv = environmentParser.parse(envConfig);
internalEnv.setWorkspaceId(workspaceId);
infrastructureProvisioner.provision(envConfig, internalEnv);
normalize(namespace,
workspaceId,
internalEnv);
List<String> servicesOrder = startStrategy.order(internalEnv);
normalizeNames(internalEnv);
EnvironmentHolder environmentHolder = new EnvironmentHolder(servicesOrder,
internalEnv,
envConfig,
messageConsumer,
EnvStatus.STARTING,
envName,
networkId);
try (@SuppressWarnings("unused") Unlocker u = stripedLocks.writeLock(workspaceId)) {
if (environments.putIfAbsent(workspaceId, environmentHolder) != null) {
throw new ConflictException(format("Environment of workspace '%s' already exists", workspaceId));
}
}
}
private void addAgentsProvidedServers(MachineImpl machine, List<String> agentKeys) throws ServerException {
for (String agentKey : agentKeys) {
try {
AgentImpl agent = new AgentImpl(agentRegistry.getAgent(AgentKeyImpl.parse(agentKey)));
for (Map.Entry<String, ? extends ServerConf2> entry : agent.getServers().entrySet()) {
String ref = entry.getKey();
ServerConf2 conf2 = entry.getValue();
ServerConfImpl conf = new ServerConfImpl(ref,
conf2.getPort(),
conf2.getProtocol(),
conf2.getProperties().get("path"));
machine.getConfig().getServers().add(conf);
}
} catch (AgentException e) {
throw new ServerException(e);
}
}
}
private void normalize(String namespace,
String workspaceId,
CheServicesEnvironmentImpl environment) throws ServerException {
Map<String, CheServiceImpl> services = environment.getServices();
for (Map.Entry<String, CheServiceImpl> serviceEntry : services.entrySet()) {
normalize(namespace,
workspaceId,
serviceEntry.getKey(),
serviceEntry.getValue());
}
}
/**
* Sets specific names for this environment instance where it is required.
*
* @param environment
* environment in which names will be normalized
*/
private void normalizeNames(CheServicesEnvironmentImpl environment) {
Map<String, CheServiceImpl> services = environment.getServices();
for (Map.Entry<String, CheServiceImpl> serviceEntry : services.entrySet()) {
CheServiceImpl service = serviceEntry.getValue();
normalizeVolumesFrom(service, services);
normalizeLinks(service, services);
}
}
// replace machines names in volumes_from with containers IDs
private void normalizeVolumesFrom(CheServiceImpl service, Map<String, CheServiceImpl> services) {
if (service.getVolumesFrom() != null) {
service.setVolumesFrom(service.getVolumesFrom()
.stream()
.map(serviceName -> services.get(serviceName).getContainerName())
.collect(toList()));
}
}
/**
* Replaces linked to this service's name with container name which represents the service in links section.
* The problem is that a user writes names of other services in links section in compose file.
* But actually links are constraints and their values should be names of containers (not services) to be linked.
* <br/>
* For example: serviceDB:serviceDbAlias -> container_1234:serviceDbAlias <br/>
* If alias is omitted then service name will be used.
*
* @param serviceToNormalizeLinks
* service which links will be normalized
* @param services
* all services in environment
*/
@VisibleForTesting
void normalizeLinks(CheServiceImpl serviceToNormalizeLinks, Map<String, CheServiceImpl> services) {
serviceToNormalizeLinks.setLinks(
serviceToNormalizeLinks.getLinks()
.stream()
.map(link -> {
// a link has format: 'name:alias' or 'name'
String serviceNameAndAliasToLink[] = link.split(":", 2);
String serviceName = serviceNameAndAliasToLink[0];
String serviceAlias = (serviceNameAndAliasToLink.length > 1) ?
serviceNameAndAliasToLink[1] : null;
CheServiceImpl serviceLinkTo = services.get(serviceName);
if (serviceLinkTo != null) {
String containerNameLinkTo = serviceLinkTo.getContainerName();
return (serviceAlias == null) ?
containerNameLinkTo :
containerNameLinkTo + ':' + serviceAlias;
} else {
// should never happens. Errors like this should be filtered by CheEnvironmentValidator
throw new IllegalArgumentException("Attempt to link non existing service " + serviceName +
" to " + serviceToNormalizeLinks + " service.");
}
}).collect(toList()));
}
private void normalize(String namespace,
String workspaceId,
String machineName,
CheServiceImpl service) throws ServerException {
// set default mem limit for service if it is not set
if (service.getMemLimit() == null || service.getMemLimit() == 0) {
service.setMemLimit(defaultMachineMemorySizeBytes);
}
// download dockerfile if it is hosted by API to avoid problems with unauthorized requests from docker daemon
if (service.getBuild() != null &&
service.getBuild().getContext() != null &&
recipeApiPattern.matcher(service.getBuild().getContext()).matches()) {
String recipeContent = recipeDownloader.getRecipe(service.getBuild().getContext());
service.getBuild().setDockerfileContent(recipeContent);
service.getBuild().setContext(null);
service.getBuild().setDockerfilePath(null);
}
if (service.getId() == null) {
service.setId(generateMachineId());
}
service.setContainerName(containerNameGenerator.generateContainerName(workspaceId,
service.getId(),
namespace,
machineName));
}
/**
* Starts all machine from machine queue of environment.
*/
private void startEnvironmentQueue(String namespace,
String workspaceId,
String devMachineName,
String networkId,
boolean recover,
MachineStartedHandler startedHandler)
throws ServerException,
AgentException,
EnvironmentException {
// Starting all machines in environment one by one by getting configs
// from the corresponding starting queue.
// Config will be null only if there are no machines left in the queue
String envName;
MessageConsumer<MachineLogMessage> envLogger;
String creator = EnvironmentContext.getCurrent().getSubject().getUserId();
try (@SuppressWarnings("unused") Unlocker u = stripedLocks.readLock(workspaceId)) {
EnvironmentHolder environmentHolder = environments.get(workspaceId);
if (environmentHolder == null) {
throw new ServerException("Environment start is interrupted.");
}
envName = environmentHolder.name;
envLogger = environmentHolder.logger;
}
try {
machineProvider.createNetwork(networkId);
String machineName = queuePeekOrFail(workspaceId);
while (machineName != null) {
boolean isDev = devMachineName.equals(machineName);
// Environment start is failed when any machine start is failed, so if any error
// occurs during machine creation then environment start fail is reported and
// start resources such as queue and descriptor must be cleaned up
CheServiceImpl service;
@Nullable ExtendedMachine extendedMachine;
try (@SuppressWarnings("unused") Unlocker u = stripedLocks.readLock(workspaceId)) {
EnvironmentHolder environmentHolder = environments.get(workspaceId);
if (environmentHolder == null) {
throw new EnvironmentStartInterruptedException(workspaceId, envName);
}
service = environmentHolder.environment.getServices().get(machineName);
extendedMachine = environmentHolder.environmentConfig.getMachines().get(machineName);
}
// should not happen
if (service == null) {
LOG.error("Start of machine with name {} in workspace {} failed. Machine not found in start queue",
machineName, workspaceId);
throw new ServerException(
format("Environment of workspace with ID '%s' failed due to internal error", workspaceId));
}
final String finalMachineName = machineName;
// needed to reuse startInstance method and
// create machine instances by different implementation-specific providers
MachineStarter machineStarter = (machineLogger, machineSource) -> {
CheServiceImpl serviceWithNormalizedSource = normalizeServiceSource(service, machineSource);
return machineProvider.startService(namespace,
workspaceId,
envName,
finalMachineName,
isDev,
networkId,
serviceWithNormalizedSource,
machineLogger);
};
MachineImpl machine =
MachineImpl.builder()
.setConfig(MachineConfigImpl.builder()
.setDev(isDev)
.setLimits(new MachineLimitsImpl(
bytesToMB(service.getMemLimit())))
.setType("docker")
.setName(machineName)
.setEnvVariables(service.getEnvironment())
.build())
.setId(service.getId())
.setWorkspaceId(workspaceId)
.setStatus(MachineStatus.CREATING)
.setEnvName(envName)
.setOwner(creator)
.build();
checkInterruption(workspaceId, envName);
Instance instance = startInstance(recover,
envLogger,
machine,
machineStarter);
checkInterruption(workspaceId, envName);
startedHandler.started(instance, extendedMachine);
checkInterruption(workspaceId, envName);
// Machine destroying is an expensive operation which must be
// performed outside of the lock, this section checks if
// the environment wasn't stopped while it is starting and sets
// polled flag to true if the environment wasn't stopped.
// Also polls the proceeded machine configuration from the queue
boolean queuePolled = false;
try (@SuppressWarnings("unused") Unlocker u = stripedLocks.writeLock(workspaceId)) {
ensurePreDestroyIsNotExecuted();
EnvironmentHolder environmentHolder = environments.get(workspaceId);
if (environmentHolder != null) {
final Queue<String> queue = environmentHolder.startQueue;
if (queue != null) {
queue.poll();
queuePolled = true;
}
}
}
// If machine config is not polled from the queue
// then environment was stopped and newly created machine
// must be destroyed
if (!queuePolled) {
try {
eventService.publish(newDto(MachineStatusEvent.class)
.withEventType(MachineStatusEvent.EventType.DESTROYING)
.withDev(isDev)
.withMachineName(machineName)
.withMachineId(instance.getId())
.withWorkspaceId(workspaceId));
instance.destroy();
removeMachine(workspaceId, instance.getId());
eventService.publish(newDto(MachineStatusEvent.class)
.withEventType(MachineStatusEvent.EventType.DESTROYED)
.withDev(isDev)
.withMachineName(machineName)
.withMachineId(instance.getId())
.withWorkspaceId(workspaceId));
} catch (MachineException e) {
LOG.error(e.getLocalizedMessage(), e);
}
throw new ServerException("Workspace '" + workspaceId +
"' start interrupted. Workspace stopped before all its machines started");
}
machineName = queuePeekOrFail(workspaceId);
}
} catch (Exception e) {
boolean interrupted = Thread.interrupted();
EnvironmentHolder env;
try (@SuppressWarnings("unused") Unlocker u = stripedLocks.writeLock(workspaceId)) {
env = environments.remove(workspaceId);
}
try {
destroyEnvironment(env.networkId, env.machines);
} catch (Exception remEx) {
LOG.error(remEx.getLocalizedMessage(), remEx);
}
if (interrupted) {
throw new EnvironmentStartInterruptedException(workspaceId, envName);
}
try {
throw e;
} catch (ServerException | EnvironmentException | AgentException rethrow) {
throw rethrow;
} catch (Exception wrap) {
throw new ServerException(wrap.getMessage(), wrap);
}
}
}
private void checkInterruption(String workspaceId, String envName) throws EnvironmentStartInterruptedException {
if (Thread.interrupted()) {
throw new EnvironmentStartInterruptedException(workspaceId, envName);
}
}
private Instance startInstance(boolean recover,
MessageConsumer<MachineLogMessage> environmentLogger,
MachineImpl machine,
MachineStarter machineStarter)
throws ServerException,
EnvironmentException {
LineConsumer machineLogger = null;
Instance instance = null;
try {
addMachine(machine);
eventService.publish(newDto(MachineStatusEvent.class)
.withEventType(MachineStatusEvent.EventType.CREATING)
.withDev(machine.getConfig().isDev())
.withMachineName(machine.getConfig().getName())
.withMachineId(machine.getId())
.withWorkspaceId(machine.getWorkspaceId()));
machineLogger = getMachineLogger(environmentLogger,
machine.getId(),
machine.getConfig().getName());
MachineImpl originMachine = new MachineImpl(machine);
try {
MachineSourceImpl machineSource = null;
if (recover) {
try {
SnapshotImpl snapshot = snapshotDao.getSnapshot(machine.getWorkspaceId(),
machine.getEnvName(),
machine.getConfig().getName());
machineSource = snapshot.getMachineSource();
// Snapshot image location has SHA-256 digest which needs to be removed,
// otherwise it will be pulled without tag and cause problems
String imageName = machineSource.getLocation();
if (imageName.contains("@sha256:")) {
machineSource.setLocation(imageName.substring(0, imageName.indexOf('@')));
}
} catch (NotFoundException e) {
try {
machineLogger.writeLine("Failed to boot machine from snapshot: snapshot not found. " +
"Machine will be created from origin source.");
} catch (IOException ignore) { }
}
}
instance = machineStarter.startMachine(machineLogger, machineSource);
} catch (SourceNotFoundException e) {
if (recover) {
LOG.error("Image of snapshot for machine " + machine.getConfig().getName() +
" not found. " + "Machine will be created from origin source.");
machine = originMachine;
instance = machineStarter.startMachine(machineLogger, null);
} else {
throw e;
}
}
replaceMachine(instance);
eventService.publish(newDto(MachineStatusEvent.class)
.withEventType(MachineStatusEvent.EventType.RUNNING)
.withDev(machine.getConfig().isDev())
.withMachineName(machine.getConfig().getName())
.withMachineId(instance.getId())
.withWorkspaceId(machine.getWorkspaceId()));
return instance;
} catch (ApiException | RuntimeException e) {
boolean interrupted = Thread.interrupted();
removeMachine(machine.getWorkspaceId(), machine.getId());
if (instance != null) {
try {
instance.destroy();
} catch (Exception destroyingExc) {
LOG.error(destroyingExc.getLocalizedMessage(), destroyingExc);
}
}
if (machineLogger != null) {
try {
machineLogger.writeLine("[ERROR] " + e.getLocalizedMessage());
} catch (IOException ioEx) {
LOG.error(ioEx.getLocalizedMessage(), ioEx);
}
try {
machineLogger.close();
} catch (IOException ioEx) {
LOG.error(ioEx.getLocalizedMessage(), ioEx);
}
}
eventService.publish(newDto(MachineStatusEvent.class)
.withEventType(MachineStatusEvent.EventType.ERROR)
.withDev(machine.getConfig().isDev())
.withMachineName(machine.getConfig().getName())
.withMachineId(machine.getId())
.withWorkspaceId(machine.getWorkspaceId()));
if (interrupted) {
Thread.currentThread().interrupt();
}
throw new ServerException(e.getLocalizedMessage(), e);
}
}
private interface MachineStarter {
Instance startMachine(LineConsumer machineLogger,
MachineSource machineSource) throws ServerException,
NotFoundException,
EnvironmentException;
}
private CheServiceImpl normalizeServiceSource(CheServiceImpl service,
MachineSource machineSource)
throws ServerException {
CheServiceImpl serviceWithNormalizedSource = service;
if (machineSource != null) {
serviceWithNormalizedSource = new CheServiceImpl(service);
if ("image".equals(machineSource.getType())) {
serviceWithNormalizedSource.setBuild(null);
serviceWithNormalizedSource.setImage(machineSource.getLocation());
} else {
// dockerfile
serviceWithNormalizedSource.setImage(null);
if (machineSource.getContent() != null) {
serviceWithNormalizedSource.setBuild(new CheServiceBuildContextImpl(null,
null,
machineSource.getContent(),
null));
} else {
serviceWithNormalizedSource.setBuild(new CheServiceBuildContextImpl(machineSource.getLocation(),
null,
null,
null));
}
}
}
return serviceWithNormalizedSource;
}
private Machine normalizeMachineSource(MachineImpl machine, MachineSource machineSource) {
Machine machineWithNormalizedSource = machine;
if (machineSource != null) {
machineWithNormalizedSource = MachineImpl.builder()
.fromMachine(machine)
.setConfig(MachineConfigImpl.builder()
.fromConfig(machine.getConfig())
.setSource(machineSource)
.build())
.build();
}
return machineWithNormalizedSource;
}
private void addMachine(MachineImpl machine) throws ServerException {
Instance instance = new NoOpMachineInstance(machine);
try (@SuppressWarnings("unused") Unlocker u = stripedLocks.writeLock(machine.getWorkspaceId())) {
ensurePreDestroyIsNotExecuted();
EnvironmentHolder environmentHolder = environments.get(machine.getWorkspaceId());
if (environmentHolder != null && environmentHolder.status != EnvStatus.STOPPING) {
environmentHolder.machines.add(instance);
} else {
throw new ServerException(
format("Can't add machine into environment. Environment of workspace '%s' is missing",
machine.getWorkspaceId()));
}
}
}
private int bytesToMB(long bytes) {
return (int)Size.parseSizeToMegabytes(Long.toString(bytes) + "b");
}
private void removeMachine(String workspaceId,
String machineId) {
try (@SuppressWarnings("unused") Unlocker u = stripedLocks.writeLock(workspaceId)) {
EnvironmentHolder environmentHolder = environments.get(workspaceId);
if (environmentHolder != null) {
for (Instance machine : environmentHolder.machines) {
if (machine.getId().equals(machineId)) {
environmentHolder.machines.remove(machine);
return;
}
}
}
}
}
private void replaceMachine(Instance machine) throws ServerException {
try (@SuppressWarnings("unused") Unlocker u = stripedLocks.writeLock(machine.getWorkspaceId())) {
ensurePreDestroyIsNotExecuted();
EnvironmentHolder environmentHolder = environments.get(machine.getWorkspaceId());
if (environmentHolder != null) {
for (int i = 0; i < environmentHolder.machines.size(); i++) {
if (environmentHolder.machines.get(i).getId().equals(machine.getId())) {
environmentHolder.machines.set(i, machine);
return;
}
}
}
}
// if this area is reachable then environment/machine is not found and machine should be stopped
try {
machine.destroy();
} catch (MachineException e) {
LOG.error(e.getLocalizedMessage(), e);
}
// should not happen
throw new ServerException(format(
"Machine with ID '%s' and name '%s' has been stopped because its configuration is not found in the environment of workspace '%s'",
machine.getId(), machine.getConfig().getName(), machine.getWorkspaceId()));
}
/**
* Gets head config from the queue associated with the given {@code workspaceId}.
*
* <p>Note that this method won't actually poll the queue.
*
* <p>Fails if environment start was interrupted by stop(queue doesn't exist).
*
* @return machine config which is in the queue head, or null
* if there are no machine configs left
* @throws ServerException
* if queue doesn't exist which means that {@link #stop(String)} executed
* before all the machines started
* @throws ServerException
* if pre destroy has been invoked before peek config retrieved
*/
private String queuePeekOrFail(String workspaceId) throws ServerException {
try (@SuppressWarnings("unused") Unlocker u = stripedLocks.readLock(workspaceId)) {
ensurePreDestroyIsNotExecuted();
EnvironmentHolder environmentHolder = environments.get(workspaceId);
if (environmentHolder == null || environmentHolder.startQueue == null) {
throw new ServerException("Workspace " + workspaceId +
" start interrupted. Workspace was stopped before all its machines were started");
}
return environmentHolder.startQueue.peek();
}
}
/**
* Destroys provided machines and associated network.
*/
private void destroyEnvironment(String networkId,
List<Instance> machines) {
for (Instance machine : machines) {
try {
destroyMachine(machine);
} catch (RuntimeException | MachineException ex) {
LOG.error(format("Could not destroy machine '%s' of workspace '%s'",
machine.getId(),
machine.getWorkspaceId()),
ex);
}
}
try {
machineProvider.destroyNetwork(networkId);
} catch (RuntimeException | ServerException netExc) {
LOG.error(netExc.getLocalizedMessage(), netExc);
}
}
private void destroyMachine(Instance machine) throws MachineException {
eventService.publish(newDto(MachineStatusEvent.class)
.withEventType(MachineStatusEvent.EventType.DESTROYING)
.withDev(machine.getConfig().isDev())
.withMachineName(machine.getConfig().getName())
.withMachineId(machine.getId())
.withWorkspaceId(machine.getWorkspaceId()));
machine.destroy();
eventService.publish(newDto(MachineStatusEvent.class)
.withEventType(MachineStatusEvent.EventType.DESTROYED)
.withDev(machine.getConfig().isDev())
.withMachineName(machine.getConfig().getName())
.withMachineId(machine.getId())
.withWorkspaceId(machine.getWorkspaceId()));
}
@SuppressWarnings("unused")
@VisibleForTesting
@PostConstruct
void init() {
if (!(machineLogsDir.exists() || machineLogsDir.mkdirs())) {
throw new IllegalStateException("Unable create directory " + machineLogsDir.getAbsolutePath());
}
eventService.subscribe(new MachineCleaner());
}
/**
* Removes all descriptors from the in-memory storage, while
* {@link MachineProcessManager#cleanup()} is responsible for machines destroying.
*/
@PreDestroy
@VisibleForTesting
@SuppressWarnings("unused")
void cleanup() {
isPreDestroyInvoked = true;
final java.io.File[] files = machineLogsDir.listFiles();
if (files != null && files.length > 0) {
for (java.io.File f : files) {
if (!IoUtil.deleteRecursive(f)) {
LOG.warn("Failed delete {}", f);
}
}
}
}
private LineConsumer getMachineLogger(MessageConsumer<MachineLogMessage> environmentLogger,
String machineId,
String machineName) throws ServerException {
createMachineLogsDir(machineId);
LineConsumer lineConsumer = new AbstractLineConsumer() {
@Override
public void writeLine(String line) throws IOException {
environmentLogger.consume(new MachineLogMessageImpl(machineName, line));
}
};
try {
return new ConcurrentCompositeLineConsumer(new ConcurrentFileLineConsumer(getMachineLogsFile(machineId)),
lineConsumer);
} catch (IOException e) {
throw new MachineException(format("Unable create log file '%s' for machine '%s'.",
e.getLocalizedMessage(),
machineId));
}
}
private void createMachineLogsDir(String machineId) throws MachineException {
File dir = new File(machineLogsDir, machineId);
if (!dir.exists() && !dir.mkdirs()) {
throw new MachineException("Can't create folder for the logs of machine");
}
}
private File getMachineLogsFile(String machineId) {
return new File(new File(machineLogsDir, machineId), "machineId.logs");
}
@VisibleForTesting
String generateMachineId() {
return NameGenerator.generate("machine", 16);
}
private void ensurePreDestroyIsNotExecuted() throws ServerException {
if (isPreDestroyInvoked) {
throw new ServerException("Could not perform operation because application server is stopping");
}
}
private CheServiceImpl machineConfigToService(MachineConfig machineConfig) throws ServerException {
CheServiceImpl service = new CheServiceImpl();
service.setMemLimit(machineConfig.getLimits().getRam() * 1024L * 1024L);
service.setEnvironment(machineConfig.getEnvVariables());
if ("image".equals(machineConfig.getSource().getType())) {
service.setImage(machineConfig.getSource().getLocation());
} else {
if (machineConfig.getSource().getContent() != null) {
throw new ServerException(
"Additional machine creation from dockerfile content is not supported anymore. " +
"Please use dockerfile location instead");
} else {
service.setBuild(new CheServiceBuildContextImpl(machineConfig.getSource().getLocation(),
null,
null,
null));
}
}
List<? extends ServerConf> servers = machineConfig.getServers();
if (servers != null) {
List<String> expose = new ArrayList<>();
for (ServerConf server : servers) {
expose.add(server.getPort());
}
service.setExpose(expose);
}
return service;
}
// Removes machine from environment but doesn't stop it.
@VisibleForTesting
@Nullable
Instance removeMachineFromEnvironment(String workspaceId, String machineId) {
try (@SuppressWarnings("unused") Unlocker u = stripedLocks.writeLock(workspaceId)) {
EnvironmentHolder environmentHolder = environments.get(workspaceId);
if (environmentHolder == null || environmentHolder.status != EnvStatus.RUNNING) {
// should not happen
return null;
}
for (Instance machine : environmentHolder.machines) {
if (machine.getId().equals(machineId)) {
environmentHolder.machines.remove(machine);
return machine;
}
}
return null;
}
}
private enum EnvStatus {
STARTING,
RUNNING,
STOPPING
}
private static class EnvironmentHolder {
final Queue<String> startQueue;
final CheServicesEnvironmentImpl environment;
final MessageConsumer<MachineLogMessage> logger;
final String name;
final String networkId;
final Environment environmentConfig;
List<Instance> machines;
EnvStatus status;
EnvironmentHolder(List<String> startQueue,
CheServicesEnvironmentImpl environment,
Environment environmentConfig,
MessageConsumer<MachineLogMessage> envLogger,
EnvStatus envStatus,
String name,
String networkId) {
this.startQueue = new ArrayDeque<>(startQueue);
this.machines = new CopyOnWriteArrayList<>();
this.logger = envLogger;
this.status = envStatus;
this.name = name;
this.environment = environment;
this.networkId = networkId;
this.environmentConfig = environmentConfig;
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (!(o instanceof EnvironmentHolder)) return false;
EnvironmentHolder that = (EnvironmentHolder)o;
return Objects.equals(startQueue, that.startQueue) &&
Objects.equals(machines, that.machines) &&
status == that.status &&
Objects.equals(logger, that.logger) &&
Objects.equals(name, that.name) &&
Objects.equals(environment, that.environment) &&
Objects.equals(environmentConfig, that.environmentConfig);
}
@Override
public int hashCode() {
return Objects.hash(startQueue, machines, status, logger, name, environmentConfig, environment);
}
}
// cleanup machine if event about instance failure comes
private class MachineCleaner implements EventSubscriber<InstanceStateEvent> {
@Override
public void onEvent(InstanceStateEvent event) {
String machineId = event.getMachineId();
String workspaceId = event.getWorkspaceId();
InstanceStateEvent.Type eventType = event.getType();
// cleanup machine if event about instance failure comes
if ((eventType == OOM) || (eventType == DIE)) {
sharedPool.execute(() -> {
Instance instance = removeMachineFromEnvironment(workspaceId, machineId);
if (instance == null) {
// should not happen
return;
}
String message = "Machine is destroyed";
if (eventType == OOM) {
message = message +
". The processes in this machine need more RAM. This machine started with " +
instance.getConfig().getLimits().getRam() +
"MB. Create a new machine configuration that allocates additional RAM or increase " +
"the workspace RAM limit in the user dashboard.";
}
MachineStatusEvent destroyedEvent = newDto(MachineStatusEvent.class)
.withEventType(MachineStatusEvent.EventType.DESTROYED)
.withDev(instance.getConfig().isDev())
.withMachineId(machineId)
.withWorkspaceId(workspaceId)
.withMachineName(instance.getConfig().getName())
.withError(message);
try {
instance.getLogger().writeLine(message);
} catch (IOException ignore) {}
try {
instance.destroy();
} catch (MachineException e) {
LOG.warn("Destroying of machine {} in workspace {} where container was unexpectedly stopped failed. Error: {}, {}",
machineId, workspaceId, e.getLocalizedMessage());
}
eventService.publish(destroyedEvent);
});
}
}
}
private static class NoOpStartedHandler implements MachineStartedHandler {
@Override
public void started(Instance machine, ExtendedMachine extendedMachine) throws ServerException {}
}
}