/***************************************************************************
* Copyright (c) 2012-2015 VMware, Inc. All Rights Reserved.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
***************************************************************************/
package com.vmware.bdd.service.job;
import com.vmware.aurora.vc.VcCache;
import com.vmware.aurora.vc.VcVirtualMachine;
import com.vmware.bdd.apitypes.ClusterCreate;
import com.vmware.bdd.apitypes.NodeStatus;
import com.vmware.bdd.entity.NodeEntity;
import com.vmware.bdd.exception.BddException;
import com.vmware.bdd.exception.ClusteringServiceException;
import com.vmware.bdd.manager.ClusterConfigManager;
import com.vmware.bdd.manager.SoftwareManagerCollector;
import com.vmware.bdd.manager.intf.IClusterEntityManager;
import com.vmware.bdd.placement.entity.BaseNode;
import com.vmware.bdd.service.IClusteringService;
import com.vmware.bdd.software.mgmt.plugin.intf.SoftwareManager;
import com.vmware.bdd.software.mgmt.plugin.model.ClusterBlueprint;
import com.vmware.bdd.software.mgmt.plugin.monitor.ClusterReportQueue;
import com.vmware.bdd.utils.CommonUtil;
import com.vmware.bdd.utils.Constants;
import com.vmware.bdd.utils.JobUtils;
import com.vmware.bdd.utils.VcVmUtil;
import org.springframework.batch.core.scope.context.ChunkContext;
import org.springframework.batch.repeat.RepeatStatus;
import org.springframework.beans.factory.annotation.Autowired;
import java.util.*;
public class ExpandClusterRemoveBadNodeStep extends TrackableTasklet {
private IClusteringService clusteringService;
private ClusterConfigManager configMgr;
private SoftwareManagerCollector softwareMgrs;
@Autowired
public void setSoftwareMgrs(SoftwareManagerCollector softwareMgrs) {
this.softwareMgrs = softwareMgrs;
}
@Override
public RepeatStatus executeStep(ChunkContext chunkContext,
JobExecutionStatusHolder jobExecutionStatusHolder) throws Exception {
String clusterName =
getJobParameters(chunkContext).getString(
JobConstants.CLUSTER_NAME_JOB_PARAM);
String nodeGroupNameList =
TrackableTasklet.getJobParameters(chunkContext).getString(
JobConstants.NEW_NODE_GROUP_LIST_JOB_PARAM);
List<String> nodeGroupNames = new ArrayList<String>();
Map<String, Set<String>> occupiedIps = new HashMap<String, Set<String>>();
for (String nodeGroupName : nodeGroupNameList.split(",")){
nodeGroupNames.add(nodeGroupName);
}
ClusterCreate clusterSpec = configMgr.getClusterConfig(clusterName);
List<BaseNode> existingNodes = JobUtils.getExistingNodes(
clusterSpec, getClusterEntityMgr());
List<BaseNode> deletedNodes = new ArrayList<BaseNode>();
for (String groupName: nodeGroupNames) {
long newInstanceNum = clusterSpec.getNodeGroup(groupName).getInstanceNum();
removeExcessiveOrWrongStatusNodes(existingNodes,
deletedNodes, groupName, newInstanceNum);
JobUtils.removeNonExistNodes(existingNodes, occupiedIps);
}
StatusUpdater statusUpdator = new DefaultStatusUpdater(
jobExecutionStatusHolder, getJobExecutionId(chunkContext));
deleteServices(getClusterEntityMgr(),
softwareMgrs.getSoftwareManagerByClusterName(clusterName),
deletedNodes);
boolean deleted = false;
try {
deleted = clusteringService.syncDeleteVMs(deletedNodes,
statusUpdator, false);
} catch (BddException e) {
String errMsg = "Failed to remove bad nodes for expanding cluster " + clusterName + ": " + e.getMessage();
JobUtils.recordErrorInClusterOperation(chunkContext, errMsg);
if (!JobUtils.getJobParameterForceClusterOperation(chunkContext)) {
throw e;
}
}
putIntoJobExecutionContext(chunkContext,
JobConstants.CLUSTER_EXISTING_NODES_JOB_PARAM, existingNodes);
putIntoJobExecutionContext(chunkContext,
JobConstants.CLUSTER_SPEC_JOB_PARAM, clusterSpec);
putIntoJobExecutionContext(chunkContext,
JobConstants.CLUSTER_USED_IP_JOB_PARAM, occupiedIps);
putIntoJobExecutionContext(chunkContext,
JobConstants.CLUSTER_DELETED_NODES_JOB_PARAM, deletedNodes);
putIntoJobExecutionContext(chunkContext,
JobConstants.CLUSTER_DELETE_VM_OPERATION_SUCCESS, deleted);
return RepeatStatus.FINISHED;
}
private void removeExcessiveOrWrongStatusNodes(List<BaseNode> existingNodes,
List<BaseNode> deletedNodes, String groupName,
long newInstanceNum) {
for(BaseNode node : existingNodes) {
if (node.getGroupName().equals(groupName)) {
long index = CommonUtil.getVmIndex(node.getVmName());
if (index >= newInstanceNum) {
deletedNodes.add(node);
continue;
}
if (node.getVmMobId() == null) {
deletedNodes.add(node);
continue;
}
VcVirtualMachine vm = VcCache.getIgnoreMissing(node.getVmMobId());
Set<String> ips = VcVmUtil.getAllIpAddresses(vm, node.getNics().keySet(), false);
if (vm == null
|| (!vm.isPoweredOn())
|| ips.contains(Constants.NULL_IPV4_ADDRESS)) {
deletedNodes.add(node);
continue;
}
}
}
existingNodes.removeAll(deletedNodes);
}
public IClusteringService getClusteringService() {
return clusteringService;
}
public void setClusteringService(IClusteringService clusteringService) {
this.clusteringService = clusteringService;
}
public ClusterConfigManager getConfigMgr() {
return configMgr;
}
public void setConfigMgr(ClusterConfigManager configMgr) {
this.configMgr = configMgr;
}
public static void deleteServices(IClusterEntityManager clusterEntityMgr,
SoftwareManager softMgr, List<BaseNode> toBeDeleted) {
if (toBeDeleted.isEmpty()) {
return;
}
ClusterBlueprint blueprint =
clusterEntityMgr.toClusterBluePrint(toBeDeleted.get(0)
.getClusterName());
ClusterReportQueue queue = new ClusterReportQueue();
List<String> nodeNames = new ArrayList<>();
for (BaseNode node : toBeDeleted) {
if (node.getVmMobId() != null) {
nodeNames.add(node.getVmName());
}
}
try {
softMgr.onDeleteNodes(blueprint, nodeNames);
} catch (Exception e) {
logger.error("Failed to delete services on bad nodes: " + nodeNames);
}
}
}