package org.zstack.compute.host; import org.springframework.beans.factory.annotation.Autowired; import org.zstack.core.CoreGlobalProperty; import org.zstack.core.Platform; import org.zstack.core.cloudbus.*; import org.zstack.core.componentloader.PluginRegistry; import org.zstack.core.config.GlobalConfig; import org.zstack.core.config.GlobalConfigUpdateExtensionPoint; import org.zstack.core.db.DatabaseFacade; import org.zstack.core.db.DbEntityLister; import org.zstack.core.db.SimpleQuery; import org.zstack.core.db.SimpleQuery.Op; import org.zstack.core.defer.Deferred; import org.zstack.core.errorcode.ErrorFacade; import org.zstack.core.thread.AsyncThread; import org.zstack.core.thread.SyncThread; import org.zstack.core.workflow.FlowChainBuilder; import org.zstack.header.AbstractService; import org.zstack.header.allocator.HostCpuOverProvisioningManager; import org.zstack.header.cluster.ClusterVO; import org.zstack.header.cluster.ClusterVO_; import org.zstack.header.core.Completion; import org.zstack.header.core.ReturnValueCompletion; import org.zstack.header.core.workflow.*; import org.zstack.header.errorcode.ErrorCode; import org.zstack.header.exception.CloudRuntimeException; import org.zstack.header.host.*; import org.zstack.header.managementnode.ManagementNodeChangeListener; import org.zstack.header.managementnode.ManagementNodeReadyExtensionPoint; import org.zstack.header.message.APIMessage; import org.zstack.header.message.Message; import org.zstack.header.message.MessageReply; import org.zstack.header.message.NeedReplyMessage; import org.zstack.search.GetQuery; import org.zstack.search.SearchQuery; import org.zstack.tag.TagManager; import org.zstack.utils.Bucket; import org.zstack.utils.CollectionUtils; import org.zstack.utils.ObjectUtils; import org.zstack.utils.Utils; import org.zstack.utils.function.ForEachFunction; import org.zstack.utils.logging.CLogger; import javax.persistence.Tuple; import java.util.*; import static org.zstack.core.Platform.operr; public class HostManagerImpl extends AbstractService implements HostManager, ManagementNodeChangeListener, ManagementNodeReadyExtensionPoint { private static final CLogger logger = Utils.getLogger(HostManagerImpl.class); @Autowired private CloudBus bus; @Autowired private DatabaseFacade dbf; @Autowired private PluginRegistry pluginRgty; @Autowired private DbEntityLister dl; @Autowired private ResourceDestinationMaker destMaker; @Autowired private ErrorFacade errf; @Autowired private HostExtensionPointEmitter extEmitter; @Autowired protected HostTracker tracker; @Autowired private TagManager tagMgr; @Autowired private HostCpuOverProvisioningManager cpuRatioMgr; private Map<Class, HostBaseExtensionFactory> hostBaseExtensionFactories = new HashMap<>(); private Map<String, HypervisorFactory> hypervisorFactories = Collections.synchronizedMap(new HashMap<String, HypervisorFactory>()); private static final Set<Class> allowedMessageAfterSoftDeletion = new HashSet<Class>(); static { allowedMessageAfterSoftDeletion.add(HostDeletionMsg.class); } private void handleApiMessage(APIMessage msg) { if (msg instanceof APIAddHostMsg) { handle((APIAddHostMsg) msg); } else if (msg instanceof APIListHostMsg) { handle((APIListHostMsg) msg); } else if (msg instanceof APISearchHostMsg) { handle((APISearchHostMsg) msg); } else if (msg instanceof APIGetHostMsg) { handle((APIGetHostMsg) msg); } else if (msg instanceof APIGetHypervisorTypesMsg) { handle((APIGetHypervisorTypesMsg) msg); } else if (msg instanceof HostMessage) { HostMessage hmsg = (HostMessage) msg; passThrough(hmsg); } else { bus.dealWithUnknownMessage(msg); } } private void handle(APIGetHypervisorTypesMsg msg) { APIGetHypervisorTypesReply reply = new APIGetHypervisorTypesReply(); List<String> res = new ArrayList<String>(); res.addAll(HypervisorType.getAllTypeNames()); reply.setHypervisorTypes(res); bus.reply(msg, reply); } private void handle(APIGetHostMsg msg) { GetQuery q = new GetQuery(); String res = q.getAsString(msg, HostInventory.class); APIGetHostReply reply = new APIGetHostReply(); reply.setInventory(res); bus.reply(msg, reply); } private void handle(APISearchHostMsg msg) { SearchQuery<HostInventory> query = SearchQuery.create(msg, HostInventory.class); String content = query.listAsString(); APISearchHostReply reply = new APISearchHostReply(); reply.setContent(content); bus.reply(msg, reply); } private void handle(APIListHostMsg msg) { List<HostVO> vos = dl.listByApiMessage(msg, HostVO.class); List<HostInventory> invs = HostInventory.valueOf(vos); APIListHostReply reply = new APIListHostReply(); reply.setInventories(invs); bus.reply(msg, reply); } private void passThrough(HostMessage msg) { HostVO vo = dbf.findByUuid(msg.getHostUuid(), HostVO.class); if (vo == null && allowedMessageAfterSoftDeletion.contains(msg.getClass())) { HostEO eo = dbf.findByUuid(msg.getHostUuid(), HostEO.class); vo = ObjectUtils.newAndCopy(eo, HostVO.class); } if (vo == null) { String err = "Cannot find host: " + msg.getHostUuid() + ", it may have been deleted"; bus.replyErrorByMessageType((Message) msg, err); return; } HypervisorFactory factory = this.getHypervisorFactory(HypervisorType.valueOf(vo.getHypervisorType())); Host host = factory.getHost(vo); host.handleMessage((Message) msg); } @Override @MessageSafe public void handleMessage(Message msg) { if (msg instanceof APIMessage) { handleApiMessage((APIMessage) msg); } else { handleLocalMessage(msg); } } private void handleLocalMessage(Message msg) { if (msg instanceof HostMessage) { passThrough((HostMessage) msg); } else if (msg instanceof AddHostMsg){ handle((AddHostMsg) msg); } else { bus.dealWithUnknownMessage(msg); } } private AddHostMsg getAddHostMsg(AddHostMessage msg) { if (msg instanceof AddHostMsg) { return (AddHostMsg) msg; } else if (msg instanceof APIAddHostMsg) { return AddHostMsg.valueOf((APIAddHostMsg) msg); } throw new CloudRuntimeException("unexpected addHost message: " + msg); } private void doAddHost(final AddHostMessage msg, ReturnValueCompletion<HostInventory > completion) { final ClusterVO cluster = findClusterByUuid(msg.getClusterUuid()); final HostVO hvo = new HostVO(); if (msg.getResourceUuid() != null) { hvo.setUuid(msg.getResourceUuid()); } else { hvo.setUuid(Platform.getUuid()); } hvo.setClusterUuid(cluster.getUuid()); hvo.setZoneUuid(cluster.getZoneUuid()); hvo.setName(msg.getName()); hvo.setDescription(msg.getDescription()); hvo.setHypervisorType(cluster.getHypervisorType()); hvo.setManagementIp(msg.getManagementIp()); hvo.setStatus(HostStatus.Connecting); hvo.setState(HostState.Enabled); final HypervisorFactory factory = getHypervisorFactory(HypervisorType.valueOf(cluster.getHypervisorType())); final HostVO vo = factory.createHost(hvo, msg); final AddHostMsg amsg = getAddHostMsg(msg); if (msg instanceof APIAddHostMsg) { tagMgr.createTagsFromAPICreateMessage((APIAddHostMsg)msg, vo.getUuid(), HostVO.class.getSimpleName()); } FlowChain chain = FlowChainBuilder.newSimpleFlowChain(); final HostInventory inv = HostInventory.valueOf(vo); chain.setName(String.format("add-host-%s", vo.getUuid())); chain.then(new NoRollbackFlow() { String __name__ = "call-before-add-host-extension"; private void callPlugins(final Iterator<HostAddExtensionPoint> it, final FlowTrigger trigger) { if (!it.hasNext()) { trigger.next(); return; } HostAddExtensionPoint ext = it.next(); ext.beforeAddHost(inv, new Completion(trigger) { @Override public void success() { callPlugins(it, trigger); } @Override public void fail(ErrorCode errorCode) { trigger.fail(errorCode); } }); } @Override public void run(final FlowTrigger trigger, Map data) { List<HostAddExtensionPoint> exts = pluginRgty.getExtensionList(HostAddExtensionPoint.class); callPlugins(exts.iterator(), trigger); } }).then(new NoRollbackFlow() { String __name__ = "send-connect-host-message"; @Override public void run(final FlowTrigger trigger, Map data) { ConnectHostMsg connectMsg = new ConnectHostMsg(vo.getUuid()); connectMsg.setNewAdd(true); connectMsg.setStartPingTaskOnFailure(false); bus.makeTargetServiceIdByResourceUuid(connectMsg, HostConstant.SERVICE_ID, hvo.getUuid()); bus.send(connectMsg, new CloudBusCallBack(trigger) { @Override public void run(MessageReply reply) { if (reply.isSuccess()) { trigger.next(); } else { trigger.fail(reply.getError()); } } }); } }).then(new NoRollbackFlow() { String __name__ = "check-host-os-version"; @Override public void run(FlowTrigger trigger, Map data) { String distro = HostSystemTags.OS_DISTRIBUTION.getTokenByResourceUuid(vo.getUuid(), HostSystemTags.OS_DISTRIBUTION_TOKEN); String release = HostSystemTags.OS_RELEASE.getTokenByResourceUuid(vo.getUuid(), HostSystemTags.OS_RELEASE_TOKEN); String version = HostSystemTags.OS_VERSION.getTokenByResourceUuid(vo.getUuid(), HostSystemTags.OS_VERSION_TOKEN); if (distro == null && release == null && version == null) { trigger.fail(operr("after connecting, host[name:%s, ip:%s] returns a null os version", vo.getName(), vo.getManagementIp())); return; } SimpleQuery<HostVO> q = dbf.createQuery(HostVO.class); q.select(HostVO_.uuid); q.add(HostVO_.clusterUuid, Op.EQ, vo.getClusterUuid()); q.add(HostVO_.uuid, Op.NOT_EQ, vo.getUuid()); q.add(HostVO_.status, Op.NOT_EQ, HostStatus.Connecting); q.setLimit(1); List<String> huuids = q.listValue(); if (huuids.isEmpty()) { // this the first host in cluster trigger.next(); return; } String otherHostUuid = huuids.get(0); String cdistro = HostSystemTags.OS_DISTRIBUTION.getTokenByResourceUuid(otherHostUuid, HostSystemTags.OS_DISTRIBUTION_TOKEN); String crelease = HostSystemTags.OS_RELEASE.getTokenByResourceUuid(otherHostUuid, HostSystemTags.OS_RELEASE_TOKEN); String cversion = HostSystemTags.OS_VERSION.getTokenByResourceUuid(otherHostUuid, HostSystemTags.OS_VERSION_TOKEN); if (cdistro == null && crelease == null && cversion == null) { // this the first host in cluster trigger.next(); return; } if (version.contains(".")) { version = version.split("\\.")[0]; } if (cversion.contains(".")) { cversion = cversion.split("\\.")[0]; } String mineVersion = String.format("%s;%s;%s", distro, release, version); String currentVersion = String.format("%s;%s;%s", cdistro, crelease, cversion); if (!mineVersion.equals(currentVersion)) { trigger.fail(operr("cluster[uuid:%s] already has host with os version[%s], but new added host[name:%s ip:%s] has host os version[%s]", vo.getClusterUuid(), currentVersion, vo.getName(), vo.getManagementIp(), mineVersion)); return; } trigger.next(); } }).then(new NoRollbackFlow() { String __name__ = "call-after-add-host-extension"; @Override public void run(final FlowTrigger trigger, Map data) { extEmitter.afterAddHost(inv, new Completion(trigger) { @Override public void success() { trigger.next(); } @Override public void fail(ErrorCode errorCode) { trigger.fail(errorCode); } }); } }).done(new FlowDoneHandler(amsg) { @Override public void handle(Map data) { HostVO nvo = dbf.reload(vo); HostInventory inv = factory.getHostInventory(nvo.getUuid()); inv.setStatus(HostStatus.Connected.toString()); completion.success(inv); logger.debug(String.format("successfully added host[name:%s, hypervisor:%s, uuid:%s]", vo.getName(), vo.getHypervisorType(), vo.getUuid())); } }).error(new FlowErrorHandler(amsg) { @Override public void handle(ErrorCode errCode, Map data) { // delete host totally through the database, so other tables // refer to the host table will clean up themselves HostVO nvo = dbf.reload(vo); dbf.remove(nvo); dbf.eoCleanup(HostVO.class); HostInventory inv = HostInventory.valueOf(nvo); CollectionUtils.safeForEach(pluginRgty.getExtensionList(FailToAddHostExtensionPoint.class), new ForEachFunction<FailToAddHostExtensionPoint>() { @Override public void run(FailToAddHostExtensionPoint ext) { ext.failedToAddHost(inv, msg); } }); completion.fail(errf.instantiateErrorCode(HostErrors.UNABLE_TO_ADD_HOST, errCode)); } }).start(); } @Deferred private void handle(final AddHostMsg msg) { final AddHostReply reply = new AddHostReply(); doAddHost(msg, new ReturnValueCompletion<HostInventory>(msg) { @Override public void success(HostInventory returnValue) { reply.setInventory(returnValue); bus.reply(msg, reply); } @Override public void fail(ErrorCode errorCode) { reply.setError(errorCode); bus.reply(msg, reply); } }); } @Deferred private void handle(final APIAddHostMsg msg) { final APIAddHostEvent evt = new APIAddHostEvent(msg.getId()); doAddHost(msg, new ReturnValueCompletion<HostInventory>(msg) { @Override public void success(HostInventory inventory) { evt.setInventory(inventory); bus.publish(evt); } @Override public void fail(ErrorCode errorCode) { evt.setError(errorCode); bus.publish(evt); } }); } private ClusterVO findClusterByUuid(String uuid) { SimpleQuery<ClusterVO> query = dbf.createQuery(ClusterVO.class); query.add(ClusterVO_.uuid, Op.EQ, uuid); return query.find(); } @Override public String getId() { return bus.makeLocalServiceId(HostConstant.SERVICE_ID); } private void populateExtensions() { for (HypervisorFactory f : pluginRgty.getExtensionList(HypervisorFactory.class)) { HypervisorFactory old = hypervisorFactories.get(f.getHypervisorType().toString()); if (old != null) { throw new CloudRuntimeException(String.format("duplicate HypervisorFactory[%s, %s] for hypervisor type[%s]", old.getClass().getName(), f.getClass().getName(), f.getHypervisorType())); } hypervisorFactories.put(f.getHypervisorType().toString(), f); } for (HostBaseExtensionFactory ext : pluginRgty.getExtensionList(HostBaseExtensionFactory.class)) { for (Class clz : ext.getMessageClasses()) { HostBaseExtensionFactory old = hostBaseExtensionFactories.get(clz); if (old != null) { throw new CloudRuntimeException(String.format("duplicate HostBaseExtensionFactory[%s, %s] for the" + " message[%s]", old.getClass(), ext.getClass(), clz)); } hostBaseExtensionFactories.put(clz, ext); } } } @Override public boolean start() { setupGlobalConfig(); populateExtensions(); return true; } private void setupGlobalConfig() { HostGlobalConfig.HOST_CPU_OVER_PROVISIONING_RATIO.installLocalUpdateExtension(new GlobalConfigUpdateExtensionPoint() { @Override public void updateGlobalConfig(GlobalConfig oldConfig, GlobalConfig newConfig) { cpuRatioMgr.setGlobalRatio(newConfig.value(Integer.class)); } }); } @Override public boolean stop() { return true; } @Override public void nodeJoin(String nodeId) { } @Override @SyncThread public void nodeLeft(String nodeId) { logger.debug(String.format("Management node[uuid:%s] left, node[uuid:%s] starts to take over hosts", nodeId, Platform.getManagementServerId())); loadHost(); } @Override public void iAmDead(String nodeId) { } private Bucket getHostManagedByUs() { int qun = 10000; long amount = dbf.count(HostVO.class); int times = (int) (amount / qun) + (amount % qun != 0 ? 1 : 0); List<String> connected = new ArrayList<String>(); List<String> disconnected = new ArrayList<String>(); int start = 0; for (int i = 0; i < times; i++) { SimpleQuery<HostVO> q = dbf.createQuery(HostVO.class); q.select(HostVO_.uuid, HostVO_.status); q.setLimit(qun); q.setStart(start); List<Tuple> lst = q.listTuple(); start += qun; for (Tuple t : lst) { String huuid = t.get(0, String.class); if (!destMaker.isManagedByUs(huuid)) { continue; } HostStatus state = t.get(1, HostStatus.class); if (state == HostStatus.Connected) { connected.add(huuid); } else { // for Disconnected and Connecting, treat as Disconnected disconnected.add(huuid); } } } return Bucket.newBucket(connected, disconnected); } private void loadHost() { Bucket hosts = getHostManagedByUs(); List<String> connected = hosts.get(0); List<String> disconnected = hosts.get(1); List<String> hostsToLoad = new ArrayList<String>(); if (CoreGlobalProperty.UNIT_TEST_ON) { hostsToLoad.addAll(connected); hostsToLoad.addAll(disconnected); } else { if (HostGlobalConfig.RECONNECT_ALL_ON_BOOT.value(Boolean.class)) { hostsToLoad.addAll(connected); hostsToLoad.addAll(disconnected); } else { hostsToLoad.addAll(disconnected); tracker.trackHost(connected); } } if (hostsToLoad.isEmpty()) { return; } String serviceId = bus.makeLocalServiceId(HostConstant.SERVICE_ID); final List<ConnectHostMsg> msgs = new ArrayList<ConnectHostMsg>(hostsToLoad.size()); for (String uuid : hostsToLoad) { ConnectHostMsg connectMsg = new ConnectHostMsg(uuid); connectMsg.setNewAdd(false); connectMsg.setServiceId(serviceId); connectMsg.setStartPingTaskOnFailure(true); msgs.add(connectMsg); } bus.send(msgs, HostGlobalConfig.HOST_LOAD_PARALLELISM_DEGREE.value(Integer.class), new CloudBusSteppingCallback(null) { @Override public void run(NeedReplyMessage msg, MessageReply reply) { ConnectHostMsg cmsg = (ConnectHostMsg) msg; if (!reply.isSuccess()) { logger.warn(String.format("failed to load host[uuid:%s], %s", cmsg.getHostUuid(), reply.getError())); } else { logger.debug(String.format("host[uuid:%s] load successfully", cmsg.getHostUuid())); } } }); } @Override public void iJoin(String nodeId) { } public HypervisorFactory getHypervisorFactory(HypervisorType type) { HypervisorFactory factory = hypervisorFactories.get(type.toString()); if (factory == null) { throw new CloudRuntimeException("No factory for hypervisor: " + type + " found, check your HypervisorManager.xml"); } return factory; } @Override @AsyncThread public void managementNodeReady() { logger.debug(String.format("Management node[uuid:%s] joins, start loading host...", Platform.getManagementServerId())); loadHost(); } @Override public HostBaseExtensionFactory getHostBaseExtensionFactory(Message msg) { return hostBaseExtensionFactories.get(msg.getClass()); } }