/*******************************************************************************
* Copyright (c) 2012-2017 Codenvy, S.A.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
* Codenvy, S.A. - initial API and implementation
*******************************************************************************/
package org.eclipse.che.api.workspace.server;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
import org.eclipse.che.api.agent.server.AgentRegistry;
import org.eclipse.che.api.agent.server.exception.AgentException;
import org.eclipse.che.api.agent.server.impl.AgentSorter;
import org.eclipse.che.api.agent.server.launcher.AgentLauncher;
import org.eclipse.che.api.agent.server.launcher.AgentLauncherFactory;
import org.eclipse.che.api.agent.shared.model.Agent;
import org.eclipse.che.api.agent.shared.model.AgentKey;
import org.eclipse.che.api.core.ConflictException;
import org.eclipse.che.api.core.NotFoundException;
import org.eclipse.che.api.core.ServerException;
import org.eclipse.che.api.core.model.machine.MachineConfig;
import org.eclipse.che.api.core.model.workspace.Environment;
import org.eclipse.che.api.core.model.workspace.ExtendedMachine;
import org.eclipse.che.api.core.model.workspace.Workspace;
import org.eclipse.che.api.core.model.workspace.WorkspaceStatus;
import org.eclipse.che.api.core.notification.EventService;
import org.eclipse.che.api.core.util.WebsocketMessageConsumer;
import org.eclipse.che.api.environment.server.CheEnvironmentEngine;
import org.eclipse.che.api.environment.server.MachineStartedHandler;
import org.eclipse.che.api.environment.server.exception.EnvironmentException;
import org.eclipse.che.api.environment.server.exception.EnvironmentNotRunningException;
import org.eclipse.che.api.environment.server.exception.EnvironmentStartInterruptedException;
import org.eclipse.che.api.machine.server.exception.SnapshotException;
import org.eclipse.che.api.machine.server.model.impl.MachineConfigImpl;
import org.eclipse.che.api.machine.server.model.impl.MachineImpl;
import org.eclipse.che.api.machine.server.model.impl.SnapshotImpl;
import org.eclipse.che.api.machine.server.spi.Instance;
import org.eclipse.che.api.machine.server.spi.SnapshotDao;
import org.eclipse.che.api.workspace.server.model.impl.EnvironmentImpl;
import org.eclipse.che.api.workspace.server.model.impl.WorkspaceImpl;
import org.eclipse.che.api.workspace.server.model.impl.WorkspaceRuntimeImpl;
import org.eclipse.che.api.workspace.shared.dto.event.WorkspaceStatusEvent;
import org.eclipse.che.api.workspace.shared.dto.event.WorkspaceStatusEvent.EventType;
import org.eclipse.che.commons.annotation.Nullable;
import org.eclipse.che.commons.lang.concurrent.StripedLocks;
import org.eclipse.che.commons.lang.concurrent.Unlocker;
import org.eclipse.che.dto.server.DtoFactory;
import org.slf4j.Logger;
import javax.inject.Inject;
import javax.inject.Singleton;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.Callable;
import java.util.concurrent.CancellationException;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.stream.Collectors;
import static java.lang.String.format;
import static java.util.Comparator.comparing;
import static java.util.Objects.requireNonNull;
import static org.eclipse.che.api.core.model.workspace.WorkspaceStatus.RUNNING;
import static org.eclipse.che.api.core.model.workspace.WorkspaceStatus.SNAPSHOTTING;
import static org.eclipse.che.api.core.model.workspace.WorkspaceStatus.STARTING;
import static org.eclipse.che.api.machine.shared.Constants.ENVIRONMENT_OUTPUT_CHANNEL_TEMPLATE;
import static org.slf4j.LoggerFactory.getLogger;
/**
* Defines an internal API for managing {@link WorkspaceRuntimeImpl} instances.
*
* <p>This component implements {@link WorkspaceStatus} contract.
*
* <p>The implementation is thread-safe and guarded by
* eagerly initialized readwrite locks produced by {@link StripedLocks}.
* The component doesn't expose any api for client-side locking.
* All the instances produced by this component are copies of the real data.
*
* <p>The component doesn't check if the incoming objects are in application-valid state.
* Which means that it is expected that if {@link #startAsync(Workspace, String, boolean)} method is called
* then {@code Workspace} argument is a application-valid object which contains
* all the required data for performing start.
*
* @author Yevhenii Voevodin
* @author Alexander Garagatyi
*/
@Singleton
public class WorkspaceRuntimes {
private static final Logger LOG = getLogger(WorkspaceRuntimes.class);
private final ConcurrentMap<String, RuntimeState> states;
private final EventService eventsService;
private final StripedLocks locks;
private final CheEnvironmentEngine envEngine;
private final AgentSorter agentSorter;
private final AgentLauncherFactory launcherFactory;
private final AgentRegistry agentRegistry;
private final SnapshotDao snapshotDao;
private final WorkspaceSharedPool sharedPool;
private final AtomicBoolean isShutdown = new AtomicBoolean(false);
private final AtomicBoolean isStartRefused = new AtomicBoolean(false);
private final MachineAgentsLauncher machineAgentsLauncher = new MachineAgentsLauncher();
@Inject
public WorkspaceRuntimes(EventService eventsService,
CheEnvironmentEngine envEngine,
AgentSorter agentSorter,
AgentLauncherFactory launcherFactory,
AgentRegistry agentRegistry,
SnapshotDao snapshotDao,
WorkspaceSharedPool sharedPool) {
this(eventsService,
envEngine,
agentSorter,
launcherFactory,
agentRegistry,
snapshotDao,
sharedPool,
new ConcurrentHashMap<>());
}
public WorkspaceRuntimes(EventService eventsService,
CheEnvironmentEngine envEngine,
AgentSorter agentSorter,
AgentLauncherFactory launcherFactory,
AgentRegistry agentRegistry,
SnapshotDao snapshotDao,
WorkspaceSharedPool sharedPool,
ConcurrentMap<String, RuntimeState> states) {
this.eventsService = eventsService;
this.envEngine = envEngine;
this.agentSorter = agentSorter;
this.launcherFactory = launcherFactory;
this.agentRegistry = agentRegistry;
this.snapshotDao = snapshotDao;
// 16 - experimental value for stripes count, it comes from default hash map size
this.locks = new StripedLocks(16);
this.sharedPool = sharedPool;
this.states = states;
}
/**
* Asynchronously starts the environment of the workspace.
* Before executing start task checks whether all conditions
* are met and throws appropriate exceptions if not, so
* there is no way to start the same workspace twice.
*
* <p>Note that cancellation of resulting future won't
* interrupt workspace start, call {@link #stop(String)} directly instead.
*
* <p>If starting process is interrupted let's say within call
* to {@link #stop(String)} method, resulting future will
* be exceptionally completed(eventually) with an instance of
* {@link EnvironmentStartInterruptedException}. Note that clients
* don't have to cleanup runtime resources, the component
* will do necessary cleanup when interrupted.
*
* <p>Implementation notes:
* if thread which executes the task is interrupted, then the
* task is also eventually(depends on the environment engine implementation)
* interrupted as if {@link #stop(String)} is called directly.
* That helps to shutdown gracefully when thread pool is asked
* to {@link ExecutorService#shutdownNow()} and also reduces
* shutdown time when there are starting workspaces.
*
* @param workspace
* workspace containing target environment
* @param envName
* the name of the environment to start
* @param recover
* whether to recover from the snapshot
* @return completable future describing the instance of running environment
* @throws ConflictException
* when the workspace is already started
* @throws ConflictException
* when workspaces start refused {@link #refuseWorkspacesStart()} was called
* @throws ServerException
* when any other error occurs
* @throws IllegalArgumentException
* when the workspace doesn't contain the environment
* @throws NullPointerException
* when either {@code workspace} or {@code envName} is null
*/
public CompletableFuture<WorkspaceRuntimeImpl> startAsync(Workspace workspace,
String envName,
boolean recover) throws ConflictException, ServerException {
requireNonNull(workspace, "Non-null workspace required");
requireNonNull(envName, "Non-null environment name required");
EnvironmentImpl environment = copyEnv(workspace, envName);
String workspaceId = workspace.getId();
CompletableFuture<WorkspaceRuntimeImpl> cmpFuture;
StartTask startTask;
try (@SuppressWarnings("unused") Unlocker u = locks.writeLock(workspaceId)) {
checkIsNotTerminated("start the workspace");
if (isStartRefused.get()) {
throw new ConflictException(format("Start of the workspace '%s' is rejected by the system, " +
"no more workspaces are allowed to start",
workspace.getConfig().getName()));
}
RuntimeState state = states.get(workspaceId);
if (state != null) {
throw new ConflictException(format("Could not start workspace '%s' because its status is '%s'",
workspace.getConfig().getName(),
state.status));
}
startTask = new StartTask(workspaceId,
envName,
environment,
recover,
cmpFuture = new CompletableFuture<>());
states.put(workspaceId, new RuntimeState(WorkspaceStatus.STARTING,
envName,
startTask,
sharedPool.submit(startTask)));
}
// publish event synchronously as the task may not be executed by
// executors service(due to legal cancellation), clients still have
// to receive STOPPED -> STARTING event
eventsService.publish(DtoFactory.newDto(WorkspaceStatusEvent.class)
.withWorkspaceId(workspaceId)
.withStatus(WorkspaceStatus.STARTING)
.withEventType(EventType.STARTING)
.withPrevStatus(WorkspaceStatus.STOPPED));
// so the start thread is free to go and start the environment
startTask.unlockStart();
return cmpFuture;
}
/**
* Gets workspace runtime descriptor.
*
* @param workspaceId
* the id of the workspace to get its runtime
* @return descriptor which describes current state of the workspace runtime
* @throws NotFoundException
* when workspace with given {@code workspaceId} is not found
* @throws ServerException
* if any error occurs while getting machines runtime information
*/
public WorkspaceRuntimeImpl getRuntime(String workspaceId) throws NotFoundException, ServerException {
requireNonNull(workspaceId, "Required non-null workspace id");
RuntimeState state;
try (@SuppressWarnings("unused") Unlocker u = locks.readLock(workspaceId)) {
state = new RuntimeState(getExistingState(workspaceId));
}
return new WorkspaceRuntimeImpl(state.envName, envEngine.getMachines(workspaceId));
}
/**
* Return status of the workspace.
*
* @param workspaceId
* ID of requested workspace
* @return {@link WorkspaceStatus#STOPPED} if workspace is not running or,
* the status of workspace runtime otherwise
*/
public WorkspaceStatus getStatus(String workspaceId) {
requireNonNull(workspaceId, "Required non-null workspace id");
try (@SuppressWarnings("unused") Unlocker u = locks.readLock(workspaceId)) {
RuntimeState state = states.get(workspaceId);
if (state == null) {
return WorkspaceStatus.STOPPED;
}
return state.status;
}
}
/**
* Injects runtime information such as status and {@link WorkspaceRuntimeImpl}
* into the workspace object, if the workspace doesn't have runtime sets the
* status to {@link WorkspaceStatus#STOPPED}.
*
* @param workspace
* the workspace to inject runtime into
*/
public void injectRuntime(WorkspaceImpl workspace) {
requireNonNull(workspace, "Required non-null workspace");
RuntimeState state = null;
try (@SuppressWarnings("unused") Unlocker u = locks.readLock(workspace.getId())) {
if (states.containsKey(workspace.getId())) {
state = new RuntimeState(states.get(workspace.getId()));
}
}
if (state == null) {
workspace.setStatus(WorkspaceStatus.STOPPED);
} else {
workspace.setStatus(state.status);
try {
workspace.setRuntime(new WorkspaceRuntimeImpl(state.envName, envEngine.getMachines(workspace.getId())));
} catch (Exception x) {
workspace.setRuntime(new WorkspaceRuntimeImpl(state.envName, Collections.emptyList()));
}
}
}
/**
* Returns true if the status of the workspace is different
* from {@link WorkspaceStatus#STOPPED}.
*
* @param workspaceId
* workspace identifier to perform check
* @return true if workspace status is different from {@link WorkspaceStatus#STOPPED}
*/
public boolean hasRuntime(String workspaceId) {
return states.containsKey(workspaceId);
}
/**
* Stops running workspace runtime.
*
* <p>Stops environment in an implementation specific way.
* During the stop of the workspace its runtime is accessible with {@link WorkspaceStatus#STOPPING stopping} status.
* Workspace may be stopped only if its status is {@link WorkspaceStatus#RUNNING}.
*
* @param workspaceId
* identifier of workspace which should be stopped
* @throws NotFoundException
* when workspace with specified identifier is not running
* @throws ServerException
* when any error occurs during workspace stopping
* @throws ConflictException
* when running workspace status is different from {@link WorkspaceStatus#RUNNING}
* @see CheEnvironmentEngine#stop(String)
* @see WorkspaceStatus#STOPPING
*/
public void stop(String workspaceId) throws NotFoundException,
ServerException,
ConflictException,
EnvironmentException {
requireNonNull(workspaceId, "Required not-null workspace id");
RuntimeState prevState;
try (@SuppressWarnings("unused") Unlocker u = locks.writeLock(workspaceId)) {
checkIsNotTerminated("stop the workspace");
RuntimeState state = getExistingState(workspaceId);
if (state.status != WorkspaceStatus.RUNNING && state.status != WorkspaceStatus.STARTING) {
throw new ConflictException(format("Couldn't stop the workspace '%s' because its status is '%s'. " +
"Workspace can be stopped only if it is 'RUNNING' or 'STARTING'",
workspaceId,
state.status));
}
prevState = new RuntimeState(state);
state.status = WorkspaceStatus.STOPPING;
}
// workspace is running, stop normally
if (prevState.status == WorkspaceStatus.RUNNING) {
stopEnvironmentAndPublishEvents(workspaceId, WorkspaceStatus.RUNNING);
return;
}
// interrupt workspace start thread
prevState.startFuture.cancel(true);
// if task wasn't called by executor service, then
// no real machines were started but, the clients still
// have to be notified about the workspace shut down
StartTask startTask = prevState.startTask;
if (startTask.markAsUsed()) {
removeStateAndPublishStopEvents(workspaceId);
prevState.startTask.earlyComplete();
return;
}
// otherwise stop will be triggered by the start task, wait for it to finish
try {
startTask.await();
} catch (EnvironmentStartInterruptedException ignored) {
// environment start successfully interrupted
} catch (InterruptedException x) {
Thread.currentThread().interrupt();
throw new ServerException("Interrupted while waiting for start task cancellation", x);
}
}
/**
* Starts machine in running workspace.
*
* @param workspaceId
* ID of workspace that owns machine
* @param machineConfig
* config of machine that should be started
* @return running machine
* @throws ConflictException
* if environment is not running or conflicting machine already exists in the environment
* @throws ConflictException
* if environment was stopped during start of machine
* @throws ServerException
* if any other error occurs
*/
public Instance startMachine(String workspaceId,
MachineConfig machineConfig) throws ServerException,
ConflictException,
NotFoundException,
AgentException,
EnvironmentException {
try (@SuppressWarnings("unused") Unlocker u = locks.readLock(workspaceId)) {
getRunningState(workspaceId);
}
// Copy constructor makes deep copy of objects graph
// which means that original values won't affect the values in used further in this class
MachineConfigImpl machineConfigCopy = new MachineConfigImpl(machineConfig);
List<String> agents = Arrays.asList("org.eclipse.che.exec", "org.eclipse.che.terminal");
Instance instance = envEngine.startMachine(workspaceId, machineConfigCopy, agents);
launchAgents(instance, agents);
try (@SuppressWarnings("unused") Unlocker u = locks.writeLock(workspaceId)) {
checkIsNotTerminated("start the machine");
RuntimeState workspaceState = states.get(workspaceId);
if (workspaceState == null || workspaceState.status != RUNNING) {
try {
envEngine.stopMachine(workspaceId, instance.getId());
} catch (NotFoundException | ServerException | ConflictException e) {
LOG.error(e.getLocalizedMessage(), e);
}
throw new ConflictException(format("Environment of workspace '%s' was stopped during start of machine",
workspaceId));
}
}
return instance;
}
/**
* Synchronously creates a snapshot of a given workspace,
* the workspace must be {@link WorkspaceStatus#RUNNING}.
*
* <p>Publishes {@link EventType#SNAPSHOT_CREATING}, {@link EventType#SNAPSHOT_CREATED},
* {@link EventType#SNAPSHOT_CREATION_ERROR} like defined by {@link EventType}.
*
* @param workspaceId
* the id of workspace to create snapshot
* @throws NotFoundException
* when workspace doesn't have a runtime
* @throws ConflictException
* when workspace status is different from {@link WorkspaceStatus#RUNNING}
* @throws ServerException
* when any other error occurs
*/
public void snapshot(String workspaceId) throws NotFoundException,
ConflictException,
ServerException {
try (@SuppressWarnings("unused") Unlocker u = locks.writeLock(workspaceId)) {
getRunningState(workspaceId).status = SNAPSHOTTING;
}
snapshotAndUpdateStatus(workspaceId);
}
/**
* Asynchronously creates a snapshot of a given workspace,
* but synchronously toggles workspace status to {@link WorkspaceStatus#SNAPSHOTTING}
* or throws an error if it is impossible to do so.
*
* @see #snapshot(String)
*/
public Future<Void> snapshotAsync(String workspaceId) throws NotFoundException, ConflictException {
try (@SuppressWarnings("unused") Unlocker u = locks.writeLock(workspaceId)) {
getRunningState(workspaceId).status = SNAPSHOTTING;
}
return sharedPool.submit(() -> {
try {
snapshotAndUpdateStatus(workspaceId);
} catch (Exception x) {
LOG.error(format("Couldn't create a snapshot of workspace '%s'", workspaceId), x);
throw x;
}
return null;
});
}
/**
* Removes snapshot binaries in implementation specific way.
*
* @param snapshot
* snapshot that will be removed
* @return true if binaries are successfully removed,
* otherwise if binaries not found returns false
* @throws ServerException
* if any error occurs during binaries removal
* @see CheEnvironmentEngine#removeSnapshot(SnapshotImpl)
*/
public boolean removeBinaries(SnapshotImpl snapshot) throws ServerException {
try {
envEngine.removeSnapshot(snapshot);
} catch (NotFoundException x) {
return false;
}
return true;
}
/**
* Removes binaries of all the snapshots, continues to remove
* snapshots if removal of binaries for a single snapshot fails.
*
* @param snapshots
* the list of snapshots to remove binaries
*/
public void removeBinaries(Collection<? extends SnapshotImpl> snapshots) {
for (SnapshotImpl snapshot : snapshots) {
try {
if (!removeBinaries(snapshot)) {
LOG.warn("An attempt to remove binaries of the snapshot '{}' while there are no binaries", snapshot.getId());
}
} catch (ServerException x) {
LOG.error(format("Couldn't remove snapshot '%s', workspace id '%s'", snapshot.getId(), snapshot.getWorkspaceId()), x);
}
}
}
/**
* Stops machine in a running environment.
*
* @param workspaceId
* ID of workspace that owns environment
* @param machineId
* ID of machine that should be stopped
* @throws NotFoundException
* if machine is not found in the environment
* or workspace doesn't have a runtime
* @throws ConflictException
* if environment is not running
* @throws ConflictException
* if machine is dev and its stop is forbidden
* @throws ServerException
* if any other error occurs
*/
public void stopMachine(String workspaceId, String machineId) throws NotFoundException,
ServerException,
ConflictException {
try (@SuppressWarnings("unused") Unlocker u = locks.readLock(workspaceId)) {
getRunningState(workspaceId);
}
envEngine.stopMachine(workspaceId, machineId);
}
/**
* Finds machine {@link Instance} by specified workspace and machine IDs.
*
* @param workspaceId
* ID of workspace that owns machine
* @param machineId
* ID of requested machine
* @return requested machine
* @throws NotFoundException
* if environment or machine is not running
*/
public Instance getMachine(String workspaceId, String machineId) throws NotFoundException {
return envEngine.getMachine(workspaceId, machineId);
}
/**
* Gets the workspaces identifiers managed by this component.
* If an identifier is present in set then that workspace wasn't
* stopped at the moment of method execution.
*
* @return workspaces identifiers for those workspaces that are running(not stopped),
* or an empty set if there is no a single running workspace
*/
public Set<String> getRuntimesIds() {
return new HashSet<>(states.keySet());
}
/**
* Returns true if there is at least one workspace running(it's status is
* different from {@link WorkspaceStatus#STOPPED}), otherwise returns false.
*/
public boolean isAnyRunning() {
return !states.isEmpty();
}
/**
* Once called no more workspaces are allowed to start, {@link #startAsync}
* will always throw an appropriate exception. All the running workspaces
* will continue running, unless stopped directly.
*
* @return true if this is the caller is the one who refused start,
* otherwise if start is being already refused returns false
*/
public boolean refuseWorkspacesStart() {
return isStartRefused.compareAndSet(false, true);
}
/**
* Terminates workspace runtimes service, so no more workspaces are allowed to start
* or to be stopped directly, all the running workspaces are going to be stopped,
* all the starting tasks will be eventually interrupted.
*
* @throws IllegalStateException
* if component shutdown is already called
*/
public void shutdown() throws InterruptedException {
if (!isShutdown.compareAndSet(false, true)) {
throw new IllegalStateException("Workspace runtimes service shutdown has been already called");
}
List<String> idsToStop;
try (@SuppressWarnings("unused") Unlocker u = locks.writeAllLock()) {
idsToStop = states.entrySet()
.stream()
.filter(e -> e.getValue().status != WorkspaceStatus.STOPPING)
.map(Map.Entry::getKey)
.collect(Collectors.toList());
states.clear();
}
if (!idsToStop.isEmpty()) {
LOG.info("Shutdown running environments, environments to stop: '{}'", idsToStop.size());
ExecutorService executor =
Executors.newFixedThreadPool(2 * Runtime.getRuntime().availableProcessors(),
new ThreadFactoryBuilder().setNameFormat("StopEnvironmentsPool-%d")
.setDaemon(false)
.build());
for (String id : idsToStop) {
executor.execute(() -> {
try {
envEngine.stop(id);
} catch (EnvironmentNotRunningException ignored) {
// might be already stopped
} catch (Exception x) {
LOG.error(x.getMessage(), x);
}
});
}
executor.shutdown();
try {
if (!executor.awaitTermination(30, TimeUnit.SECONDS)) {
executor.shutdownNow();
if (!executor.awaitTermination(60, TimeUnit.SECONDS)) {
LOG.error("Unable to stop runtimes termination pool");
}
}
} catch (InterruptedException e) {
executor.shutdownNow();
Thread.currentThread().interrupt();
}
}
}
private void checkIsNotTerminated(String operation) throws ServerException {
if (isShutdown.get()) {
throw new ServerException("Could not " + operation + " because workspaces service is being terminated");
}
}
private RuntimeState getExistingState(String workspaceId) throws NotFoundException {
RuntimeState state = states.get(workspaceId);
if (state == null) {
throw new NotFoundException("Workspace with id '" + workspaceId + "' is not running");
}
return state;
}
private RuntimeState getRunningState(String workspaceId) throws NotFoundException, ConflictException {
RuntimeState state = getExistingState(workspaceId);
if (state.status != RUNNING) {
throw new ConflictException(format("Workspace with id '%s' is not 'RUNNING', it's status is '%s'",
workspaceId,
state.status));
}
return state;
}
protected void launchAgents(Instance instance, List<String> agents) throws ServerException, AgentException {
for (AgentKey agentKey : agentSorter.sort(agents)) {
if (!Thread.currentThread().isInterrupted()) {
LOG.info("Launching '{}' agent at workspace {}", agentKey.getId(), instance.getWorkspaceId());
Agent agent = agentRegistry.getAgent(agentKey);
AgentLauncher launcher = launcherFactory.find(agentKey.getId(), instance.getConfig().getType());
launcher.launch(instance, agent);
}
}
}
/**
* Starts the environment publishing all the necessary events.
* Respects task interruption & stops the workspace if starting task is cancelled.
*/
private void startEnvironmentAndPublishEvents(EnvironmentImpl environment,
String workspaceId,
String envName,
boolean recover) throws ServerException,
EnvironmentException,
ConflictException,
AgentException {
try {
envEngine.start(workspaceId,
envName,
environment,
recover,
new WebsocketMessageConsumer<>(format(ENVIRONMENT_OUTPUT_CHANNEL_TEMPLATE, workspaceId)),
machineAgentsLauncher);
} catch (EnvironmentStartInterruptedException x) {
// environment start was interrupted, it's either shutdown or direct stop
// in the case of shutdown make sure the status is correct,
// otherwise workspace is already stopping
compareAndSetStatus(workspaceId, WorkspaceStatus.STARTING, WorkspaceStatus.STOPPING);
removeStateAndPublishStopEvents(workspaceId);
throw x;
} catch (EnvironmentException | ServerException | ConflictException | AgentException x) {
// environment can't be started for some reason, STARTING -> STOPPED
removeState(workspaceId);
eventsService.publish(DtoFactory.newDto(WorkspaceStatusEvent.class)
.withWorkspaceId(workspaceId)
.withEventType(EventType.ERROR)
.withPrevStatus(WorkspaceStatus.STARTING)
.withStatus(WorkspaceStatus.STOPPED)
.withError("Start of environment '" + envName + "' failed. Error: " + x.getMessage()));
throw x;
}
// disallow direct start cancellation, STARTING -> RUNNING
WorkspaceStatus prevStatus;
try (@SuppressWarnings("unused") Unlocker u = locks.writeLock(workspaceId)) {
checkIsNotTerminated("finish workspace start");
RuntimeState state = states.get(workspaceId);
prevStatus = state.status;
if (state.status == WorkspaceStatus.STARTING) {
state.status = WorkspaceStatus.RUNNING;
state.startTask = null;
state.startFuture = null;
}
}
// either current thread is interrupted right after status update,
// or stop is called directly, anyway stop the environment
if (Thread.interrupted() || prevStatus != WorkspaceStatus.STARTING) {
try {
stopEnvironmentAndPublishEvents(workspaceId, WorkspaceStatus.STARTING);
} catch (Exception x) {
LOG.error("Couldn't stop the environment '{}' of the workspace '{}'. Error: {}",
envName,
workspaceId,
x.getMessage());
}
throw new EnvironmentStartInterruptedException(workspaceId, envName);
}
// normally started, notify clients
eventsService.publish(DtoFactory.newDto(WorkspaceStatusEvent.class)
.withWorkspaceId(workspaceId)
.withStatus(WorkspaceStatus.RUNNING)
.withEventType(EventType.RUNNING)
.withPrevStatus(WorkspaceStatus.STARTING));
}
/** STOPPING -> remove runtime -> STOPPED. */
private void removeStateAndPublishStopEvents(String workspaceId) {
eventsService.publish(DtoFactory.newDto(WorkspaceStatusEvent.class)
.withWorkspaceId(workspaceId)
.withPrevStatus(STARTING)
.withStatus(WorkspaceStatus.STOPPING)
.withEventType(EventType.STOPPING));
removeState(workspaceId);
eventsService.publish(DtoFactory.newDto(WorkspaceStatusEvent.class)
.withWorkspaceId(workspaceId)
.withPrevStatus(WorkspaceStatus.STOPPING)
.withEventType(EventType.STOPPED)
.withStatus(WorkspaceStatus.STOPPED));
}
/**
* Stops the workspace publishing all the necessary events.
*/
private void stopEnvironmentAndPublishEvents(String workspaceId,
WorkspaceStatus prevStatus) throws ServerException,
EnvironmentException {
eventsService.publish(DtoFactory.newDto(WorkspaceStatusEvent.class)
.withWorkspaceId(workspaceId)
.withPrevStatus(prevStatus)
.withStatus(WorkspaceStatus.STOPPING)
.withEventType(EventType.STOPPING));
removeState(workspaceId);
try {
envEngine.stop(workspaceId);
} catch (Exception x) {
eventsService.publish(DtoFactory.newDto(WorkspaceStatusEvent.class)
.withWorkspaceId(workspaceId)
.withPrevStatus(WorkspaceStatus.STOPPING)
.withEventType(EventType.ERROR)
.withError(x.getMessage())
.withStatus(WorkspaceStatus.STOPPED));
try {
throw x;
} catch (ServerException rethrow) {
throw rethrow;
} catch (Exception wrap) {
throw new ServerException(wrap.getMessage(), wrap);
}
}
eventsService.publish(DtoFactory.newDto(WorkspaceStatusEvent.class)
.withWorkspaceId(workspaceId)
.withPrevStatus(WorkspaceStatus.STOPPING)
.withEventType(EventType.STOPPED)
.withStatus(WorkspaceStatus.STOPPED));
}
/**
* Safely compares current status of given workspace
* with {@code from} and if they are equal sets the status to {@code to}.
* Returns true if the status of workspace was updated with {@code to} value.
*/
private boolean compareAndSetStatus(String id, WorkspaceStatus from, WorkspaceStatus to) throws ServerException {
try (@SuppressWarnings("unused") Unlocker u = locks.writeLock(id)) {
checkIsNotTerminated(format("change status from '%s' to '%s' for the workspace '%s'", from, to, id));
RuntimeState state = states.get(id);
if (state != null && state.status == from) {
state.status = to;
return true;
}
}
return false;
}
/** Removes state from in-memory storage in write lock. */
private void removeState(String workspaceId) {
try (@SuppressWarnings("unused") Unlocker u = locks.writeLock(workspaceId)) {
states.remove(workspaceId);
}
}
/** Creates a snapshot and changes status SNAPSHOTTING -> RUNNING. */
private void snapshotAndUpdateStatus(String workspaceId) throws NotFoundException,
ConflictException,
ServerException {
eventsService.publish(DtoFactory.newDto(WorkspaceStatusEvent.class)
.withWorkspaceId(workspaceId)
.withStatus(WorkspaceStatus.SNAPSHOTTING)
.withEventType(EventType.SNAPSHOT_CREATING)
.withPrevStatus(WorkspaceStatus.RUNNING));
WorkspaceRuntimeImpl runtime = getRuntime(workspaceId);
List<MachineImpl> machines = runtime.getMachines();
machines.sort(comparing(m -> !m.getConfig().isDev(), Boolean::compare));
LOG.info("Creating snapshot of workspace '{}', machines to snapshot: '{}'", workspaceId, machines.size());
List<SnapshotImpl> newSnapshots = new ArrayList<>(machines.size());
for (MachineImpl machine : machines) {
try {
newSnapshots.add(envEngine.saveSnapshot(workspaceId, machine.getId()));
} catch (ServerException | NotFoundException x) {
if (machine.getConfig().isDev()) {
compareAndSetStatus(workspaceId, WorkspaceStatus.SNAPSHOTTING, WorkspaceStatus.RUNNING);
eventsService.publish(DtoFactory.newDto(WorkspaceStatusEvent.class)
.withWorkspaceId(workspaceId)
.withStatus(WorkspaceStatus.RUNNING)
.withEventType(EventType.SNAPSHOT_CREATION_ERROR)
.withPrevStatus(WorkspaceStatus.SNAPSHOTTING)
.withError(x.getMessage()));
throw x;
}
LOG.warn(format("Couldn't create snapshot of machine '%s:%s' in workspace '%s'",
machine.getEnvName(),
machine.getConfig().getName(),
workspaceId));
}
}
LOG.info("Saving new snapshots metadata, workspace id '{}'", workspaceId);
try {
List<SnapshotImpl> removed = snapshotDao.replaceSnapshots(workspaceId,
runtime.getActiveEnv(),
newSnapshots);
if (!removed.isEmpty()) {
LOG.info("Removing old snapshots binaries, workspace id '{}', snapshots to remove '{}'", workspaceId, removed.size());
removeBinaries(removed);
}
} catch (SnapshotException x) {
LOG.error(format("Couldn't remove existing snapshots metadata for workspace '%s'", workspaceId), x);
LOG.info("Removing newly created snapshots, workspace id '{}', snapshots to remove '{}'", workspaceId, newSnapshots.size());
removeBinaries(newSnapshots);
compareAndSetStatus(workspaceId, WorkspaceStatus.SNAPSHOTTING, WorkspaceStatus.RUNNING);
eventsService.publish(DtoFactory.newDto(WorkspaceStatusEvent.class)
.withWorkspaceId(workspaceId)
.withStatus(WorkspaceStatus.RUNNING)
.withEventType(EventType.SNAPSHOT_CREATION_ERROR)
.withPrevStatus(WorkspaceStatus.SNAPSHOTTING)
.withError(x.getMessage()));
throw x;
}
compareAndSetStatus(workspaceId, WorkspaceStatus.SNAPSHOTTING, WorkspaceStatus.RUNNING);
eventsService.publish(DtoFactory.newDto(WorkspaceStatusEvent.class)
.withStatus(WorkspaceStatus.RUNNING)
.withWorkspaceId(workspaceId)
.withEventType(EventType.SNAPSHOT_CREATED)
.withPrevStatus(WorkspaceStatus.SNAPSHOTTING));
}
/** Holds runtime information while workspace is running. */
@VisibleForTesting
static class RuntimeState {
WorkspaceStatus status;
String envName;
StartTask startTask;
Future<WorkspaceRuntimeImpl> startFuture;
RuntimeState(RuntimeState state) {
this.status = state.status;
this.envName = state.envName;
this.startFuture = state.startFuture;
this.startTask = state.startTask;
}
RuntimeState(WorkspaceStatus status,
String envName,
StartTask startTask,
Future<WorkspaceRuntimeImpl> startFuture) {
this.status = status;
this.envName = envName;
this.startTask = startTask;
this.startFuture = startFuture;
}
}
@VisibleForTesting
class StartTask implements Callable<WorkspaceRuntimeImpl> {
final String workspaceId;
final String envName;
final EnvironmentImpl environment;
final boolean recover;
final CompletableFuture<WorkspaceRuntimeImpl> cmpFuture;
final AtomicBoolean used;
final CountDownLatch allowStartLatch;
final CountDownLatch completionLatch;
volatile Exception exception;
StartTask(String workspaceId,
String envName,
EnvironmentImpl environment,
boolean recover,
CompletableFuture<WorkspaceRuntimeImpl> cmpFuture) {
this.workspaceId = workspaceId;
this.envName = envName;
this.environment = environment;
this.recover = recover;
this.cmpFuture = cmpFuture;
this.used = new AtomicBoolean(false);
this.completionLatch = new CountDownLatch(1);
this.allowStartLatch = new CountDownLatch(1);
}
@Override
public WorkspaceRuntimeImpl call() throws Exception {
if (!markAsUsed()) {
throw new CancellationException(format("Start of the workspace '%s' was cancelled", workspaceId));
}
allowStartLatch.await();
try {
startEnvironmentAndPublishEvents(environment, workspaceId, envName, recover);
WorkspaceRuntimeImpl runtime = getRuntime(workspaceId);
cmpFuture.complete(runtime);
return runtime;
} catch (IllegalStateException illegalStateEx) {
if (isShutdown.get()) {
exception = new EnvironmentStartInterruptedException(workspaceId, envName);
} else {
exception = new ServerException(illegalStateEx.getMessage(), illegalStateEx);
}
cmpFuture.completeExceptionally(exception);
throw exception;
} catch (Exception occurred) {
cmpFuture.completeExceptionally(exception = occurred);
throw occurred;
} finally {
completionLatch.countDown();
}
}
/**
* Awaits this task to complete, rethrows exceptions occurred during the invocation.
*/
void await() throws InterruptedException,
ServerException,
ConflictException,
EnvironmentException {
completionLatch.await();
if (exception != null) {
try {
throw exception;
} catch (ServerException | EnvironmentException | ConflictException rethrow) {
throw rethrow;
} catch (Exception x) {
throw new ServerException(x.getMessage(), x);
}
}
}
/**
* Completes corresponding completable future exceptionally
* with {@link EnvironmentStartInterruptedException}.
*/
void earlyComplete() {
exception = new EnvironmentStartInterruptedException(workspaceId, envName);
cmpFuture.completeExceptionally(exception);
completionLatch.countDown();
}
/**
* Marks this task as used, returns true only if it was unused before.
*/
boolean markAsUsed() {
return used.compareAndSet(false, true);
}
/**
* Allows start of this task.
* The task caller will wait until this method is called.
*/
void unlockStart() {
allowStartLatch.countDown();
}
}
private class MachineAgentsLauncher implements MachineStartedHandler {
@Override
public void started(Instance machine, @Nullable ExtendedMachine extendedMachine)
throws ServerException, AgentException {
if (extendedMachine != null) {
launchAgents(machine, extendedMachine.getAgents());
}
}
}
private static EnvironmentImpl copyEnv(Workspace workspace, String envName) {
Environment environment = workspace.getConfig().getEnvironments().get(envName);
if (environment == null) {
throw new IllegalArgumentException(format("Workspace '%s' doesn't contain environment '%s'",
workspace.getId(),
envName));
}
return new EnvironmentImpl(environment);
}
}