package io.cattle.platform.process.instance;
import static io.cattle.platform.core.model.tables.InstanceHostMapTable.*;
import io.cattle.platform.allocator.service.AllocatorService;
import io.cattle.platform.archaius.util.ArchaiusUtil;
import io.cattle.platform.async.utils.ResourceTimeoutException;
import io.cattle.platform.async.utils.TimeoutException;
import io.cattle.platform.core.addon.InstanceHealthCheck;
import io.cattle.platform.core.constants.AgentConstants;
import io.cattle.platform.core.constants.CommonStatesConstants;
import io.cattle.platform.core.constants.InstanceConstants;
import io.cattle.platform.core.constants.InstanceLinkConstants;
import io.cattle.platform.core.dao.GenericMapDao;
import io.cattle.platform.core.dao.InstanceDao;
import io.cattle.platform.core.dao.IpAddressDao;
import io.cattle.platform.core.dao.ServiceDao;
import io.cattle.platform.core.model.Agent;
import io.cattle.platform.core.model.Host;
import io.cattle.platform.core.model.Instance;
import io.cattle.platform.core.model.InstanceHostMap;
import io.cattle.platform.core.model.InstanceLink;
import io.cattle.platform.core.model.IpAddress;
import io.cattle.platform.core.model.Nic;
import io.cattle.platform.core.model.Port;
import io.cattle.platform.core.model.Volume;
import io.cattle.platform.core.util.SystemLabels;
import io.cattle.platform.docker.constants.DockerInstanceConstants;
import io.cattle.platform.engine.handler.HandlerResult;
import io.cattle.platform.engine.process.ProcessInstance;
import io.cattle.platform.engine.process.ProcessState;
import io.cattle.platform.json.JsonMapper;
import io.cattle.platform.object.resource.ResourceMonitor;
import io.cattle.platform.object.resource.ResourcePredicate;
import io.cattle.platform.object.util.DataAccessor;
import io.cattle.platform.process.base.AbstractDefaultProcessHandler;
import io.cattle.platform.process.common.util.ProcessUtils;
import io.cattle.platform.process.containerevent.ContainerEventCreate;
import io.cattle.platform.process.progress.ProcessProgress;
import io.cattle.platform.util.exception.ExecutionException;
import io.cattle.platform.util.exception.ResourceExhaustionException;
import io.cattle.platform.util.type.CollectionUtils;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import javax.inject.Inject;
import javax.inject.Named;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.netflix.config.DynamicIntProperty;
@Named
public class InstanceStart extends AbstractDefaultProcessHandler {
private static final DynamicIntProperty COMPUTE_TRIES = ArchaiusUtil.getInt("instance.compute.tries");
private static final List<String> REMOVED_STATES = Arrays.asList(CommonStatesConstants.REMOVED, CommonStatesConstants.REMOVING,
CommonStatesConstants.PURGED, CommonStatesConstants.PURGING);
private static final List<String> ERROR_STATES = Arrays.asList(InstanceConstants.STATE_ERROR, InstanceConstants.STATE_ERRORING);
private static final List<String> STOPPED_STATES = Arrays.asList(InstanceConstants.STATE_STOPPED, InstanceConstants.STATE_STOPPING);
private static final List<String> START_ONCE_STATES = Arrays.asList(InstanceConstants.STATE_STOPPED, InstanceConstants.STATE_STOPPING,
InstanceConstants.STATE_RUNNING);
private static final List<String> UNALLOCATED_WAIT_STATES = Arrays.asList(CommonStatesConstants.REQUESTED, CommonStatesConstants.CREATING);
private static final Logger log = LoggerFactory.getLogger(InstanceStart.class);
@Inject
JsonMapper jsonMapper;
@Inject
InstanceDao instanceDao;
GenericMapDao mapDao;
IpAddressDao ipAddressDao;
ProcessProgress progress;
@Inject
ResourceMonitor resourceMonitor;
@Inject
ServiceDao serviceDao;
@Inject
AllocatorService allocatorService;
@Override
public HandlerResult handle(ProcessState state, ProcessInstance process) {
final Instance instance = (Instance) state.getResource();
Map<String, Object> resultData = new ConcurrentHashMap<String, Object>();
HandlerResult result = new HandlerResult(resultData);
progress.init(state, 16, 16, 16, 16, 20, 16);
try {
try {
progress.checkPoint("Waiting for dependencies");
// wait until volumesFrom/networksFrom containers start up
waitForDependenciesStart(instance);
progress.checkPoint("Waiting for deployment unit instances to create");
///wait until all containers in deployment unit are starting
waitForDeploymentUnitCreate(instance);
} catch (ExecutionException e) {
log.error("Failed [{} {}] for instance [{}]", e.getMessage(), e.getTransitioningMessage(), instance.getId());
int count = incrementDepTry(state);
if (serviceDao.isServiceInstance(instance) && count < 10) {
throw new ResourceTimeoutException(instance, e.getMessage());
}
return handleStartError(state, instance, e);
}
try {
progress.checkPoint("Scheduling");
allocate(instance);
allocatorService.ensureResourcesReservedForStart(instance);
} catch (ExecutionException e) {
log.info("Failed to {} for instance [{}]", progress.getCurrentCheckpoint(), instance.getId());
return handleStartError(state, instance, e);
}
try {
progress.checkPoint("Networking");
network(instance, state);
activatePorts(instance, state);
instanceDao.clearCacheInstanceData(instance.getId());
progress.checkPoint("Storage");
storage(instance, state);
} catch (ResourceExhaustionException e) {
log.info("Failed to {} for instance [{}]", progress.getCurrentCheckpoint(), instance.getId());
return handleStartError(state, instance, e);
} catch (ExecutionException e) {
log.error("Failed to {} for instance [{}]", progress.getCurrentCheckpoint(), instance.getId());
return handleStartError(state, instance, e);
}
progress.checkPoint("Starting");
while (true) {
try {
compute(instance, state);
break;
} catch (ExecutionException e) {
int tryCount = incrementComputeTry(state);
int maxCount = getMaxComputeTries(instance);
log.error("Failed [{}/{}] to {} for instance [{}]", tryCount, maxCount, progress.getCurrentCheckpoint(), instance.getId());
if (tryCount >= maxCount) {
return handleStartError(state, instance, e);
}
}
}
} catch (TimeoutException e) {
handleReconnecting(state, instance);
throw e;
}
try {
progress.checkPoint("Post-network");
activatePorts(instance, state);
} catch (ExecutionException e) {
log.error("Failed to {} for instance [{}]", progress.getCurrentCheckpoint(), instance.getId());
return handleStartError(state, instance, e);
}
assignPrimaryIpAddress(instance, resultData);
instanceDao.clearCacheInstanceData(instance.getId());
return result;
}
protected void waitForDeploymentUnitCreate(Instance instance) {
// Wait until all instances in the deployment unit are out of the creating state (to ensure all of instnace.create has ran)
if(StringUtils.isEmpty(instance.getDeploymentUnitUuid())) {
return;
}
List<? extends Instance> duInstances =
instanceDao.findUnallocatedInstanceByDeploymentUnitUuid(instance.getAccountId(), instance.getDeploymentUnitUuid());
List<Instance> waitList = new ArrayList<>();
for (Instance i : duInstances) {
if (UNALLOCATED_WAIT_STATES.contains(i.getState())) {
waitList.add(i);
}
}
//timeout is 15 seconds
Long timeout = 15000L;
for (Instance wait : waitList) {
try {
resourceMonitor.waitFor(wait, timeout,
new ResourcePredicate<Instance>() {
@Override
public boolean evaluate(Instance obj) {
return !UNALLOCATED_WAIT_STATES.contains(obj.getState());
}
@Override
public String getMessage() {
return "starting state";
}
}
);
} catch (TimeoutException e) {
throw new ExecutionException("Dependencies readiness error", "instance hasn't gotten passed creating state", instance.getId());
}
}
}
protected void waitForDependenciesStart(Instance instance) {
List<Long> instancesIds = DataAccessor.fieldLongList(instance, DockerInstanceConstants.FIELD_VOLUMES_FROM);
Long networkFromId = instance.getNetworkContainerId();
if (networkFromId != null) {
instancesIds.add(networkFromId);
}
List<Instance> waitList = new ArrayList<>();
for (Long id : instancesIds) {
Instance i = objectManager.loadResource(Instance.class, id);
String type = networkFromId != null && networkFromId.equals(id) ? "networkFrom" : "volumeFrom";
// Because of data cleanup and these soft references, it's possible for this to be null
if (i == null || REMOVED_STATES.contains(i.getState())) {
throw new ExecutionException("Dependencies readiness error", type + " instance is removed", instance.getId());
}
if (!isStartOnce(i) && !serviceDao.isServiceInstance(instance) && STOPPED_STATES.contains(i.getState())) {
throw new ExecutionException("Dependencies readiness error", type + " instance is not running",
instance.getId());
}
waitList.add(i);
}
//timeout is 30 seconds
Long timeout = 30000L;
for (Instance wait : waitList) {
try {
resourceMonitor.waitFor(wait, timeout,
new ResourcePredicate<Instance>() {
@Override
public boolean evaluate(Instance obj) {
if (obj.getRemoved() != null) {
throw new TimeoutException("Instance is removed");
}
if (ERROR_STATES.contains(obj.getState())) {
throw new TimeoutException("Instance encountered an error");
}
if (isStartOnce(obj)) {
return START_ONCE_STATES.contains(obj.getState());
}
InstanceHostMap ihm =
objectManager.findAny(InstanceHostMap.class, INSTANCE_HOST_MAP.INSTANCE_ID, obj.getId(), INSTANCE_HOST_MAP.STATE,
CommonStatesConstants.ACTIVE, INSTANCE_HOST_MAP.REMOVED, null);
return ihm != null;
}
@Override
public String getMessage() {
return "created state";
}
}
);
} catch (TimeoutException e) {
throw new ExecutionException("Dependencies readiness error", "instance is not running", instance.getId());
}
}
}
protected boolean isStartOnce(Instance instance) {
Map<String, Object> labels = DataAccessor.fieldMap(instance, InstanceConstants.FIELD_LABELS);
if (labels.get(SystemLabels.LABEL_SERVICE_CONTAINER_START_ONCE) != null) {
return Boolean.valueOf(((String) labels
.get(SystemLabels.LABEL_SERVICE_CONTAINER_START_ONCE)));
}
return false;
}
protected void handleReconnecting(ProcessState state, Instance instance) {
boolean reconnecting = false;
InstanceHealthCheck healthCheck = DataAccessor.field(instance,
InstanceConstants.FIELD_HEALTH_CHECK, jsonMapper, InstanceHealthCheck.class);
for (InstanceHostMap map : mapDao.findNonRemoved(InstanceHostMap.class, Instance.class, instance.getId())) {
Host host = objectManager.loadResource(Host.class, map.getHostId());
Agent agent = host == null ? null : objectManager.loadResource(Agent.class, host.getAgentId());
if (agent != null && (AgentConstants.STATE_RECONNECTING.equals(agent.getState()) ||
AgentConstants.STATE_DISCONNECTED.equals(agent.getState()))) {
reconnecting = true;
} else {
reconnecting = false;
break;
}
}
if (reconnecting && (healthCheck != null || instance.getFirstRunning() == null)) {
getObjectProcessManager().scheduleProcessInstance(InstanceConstants.PROCESS_STOP, instance,
CollectionUtils.asMap(InstanceConstants.REMOVE_OPTION, true));
}
}
protected void assignPrimaryIpAddress(Instance instance, Map<String, Object> resultData) {
int min = Integer.MAX_VALUE;
IpAddress ip = null;
IpAddress fallBackIp = null;
for (Nic nic : getObjectManager().children(instance, Nic.class)) {
if (nic.getDeviceNumber().intValue() < min) {
min = nic.getDeviceNumber();
ip = ipAddressDao.getPrimaryIpAddress(nic);
if (ip == null) {
List<IpAddress> ips = getObjectManager().mappedChildren(nic, IpAddress.class);
if (ips.size() > 0) {
fallBackIp = ips.get(0);
}
}
}
}
String address = null;
if (ip == null) {
address = fallBackIp == null ? null : fallBackIp.getAddress();
} else {
address = ip.getAddress();
}
if (address != null) {
resultData.put(InstanceConstants.FIELD_PRIMARY_IP_ADDRESS, address);
}
}
protected int getMaxComputeTries(Instance instance) {
Integer tries = DataAccessor.fromDataFieldOf(instance).withScope(InstanceStart.class).withKey("computeTries").as(Integer.class);
if (tries != null && tries > 0) {
return tries;
}
return COMPUTE_TRIES.get();
}
protected HandlerResult handleStartError(ProcessState state, Instance instance, ExecutionException e) {
if (InstanceCreate.isCreateStart(state) && !ContainerEventCreate.isNativeDockerStart(state) ) {
HashMap<String, Object> data = new HashMap<String, Object>();
data.put(InstanceConstants.PROCESS_DATA_ERROR, true);
getObjectProcessManager().scheduleProcessInstance(InstanceConstants.PROCESS_STOP, instance,
ProcessUtils.chainInData(data, InstanceConstants.PROCESS_STOP,
InstanceConstants.PROCESS_ERROR));
} else {
getObjectProcessManager().scheduleProcessInstance(InstanceConstants.PROCESS_STOP, instance, null);
}
e.setResources(state.getResource());
throw e;
}
protected int incrementDepTry(ProcessState state) {
DataAccessor accessor = DataAccessor.fromMap(state.getData()).withScope(InstanceStart.class).withKey("depTry");
Integer computeTry = accessor.as(Integer.class);
if (computeTry == null) {
computeTry = 0;
}
computeTry++;
accessor.set(computeTry);
return computeTry;
}
protected int incrementComputeTry(ProcessState state) {
DataAccessor accessor = DataAccessor.fromMap(state.getData()).withScope(InstanceStart.class).withKey("computeTry");
Integer computeTry = accessor.as(Integer.class);
if (computeTry == null) {
computeTry = 0;
}
computeTry++;
accessor.set(computeTry);
return computeTry;
}
protected void allocate(Instance instance) {
execute("instance.allocate", instance, null);
}
protected void storage(Instance instance, ProcessState state) {
List<Volume> volumes = getObjectManager().children(instance, Volume.class);
for (Volume volume : volumes) {
activate(volume, state.getData());
}
}
protected void compute(Instance instance, ProcessState state) {
for (InstanceHostMap map : mapDao.findNonRemoved(InstanceHostMap.class, Instance.class, instance.getId())) {
activate(map, state.getData());
}
}
protected void network(Instance instance, ProcessState state) {
for (Nic nic : getObjectManager().children(instance, Nic.class)) {
activate(nic, state.getData());
}
for (InstanceLink link : getObjectManager().children(instance, InstanceLink.class, InstanceLinkConstants.FIELD_INSTANCE_ID)) {
if (link.getRemoved() == null) {
activate(link, state.getData());
}
}
}
protected void activatePorts(Instance instance, ProcessState state) {
for (Port port : getObjectManager().children(instance, Port.class)) {
// ports can be removed while instance is still present (lb instance is an example)
if (port.getRemoved() == null
&& !(port.getState().equalsIgnoreCase(CommonStatesConstants.REMOVED) || port.getState()
.equalsIgnoreCase(CommonStatesConstants.REMOVING))) {
createThenActivate(port, state.getData());
}
}
}
public GenericMapDao getMapDao() {
return mapDao;
}
@Inject
public void setMapDao(GenericMapDao mapDao) {
this.mapDao = mapDao;
}
public IpAddressDao getIpAddressDao() {
return ipAddressDao;
}
@Inject
public void setIpAddressDao(IpAddressDao ipAddressDao) {
this.ipAddressDao = ipAddressDao;
}
public ProcessProgress getProgress() {
return progress;
}
@Inject
public void setProgress(ProcessProgress progress) {
this.progress = progress;
}
}