/*************************************************************************** * Copyright (c) 2012-2015 VMware, Inc. All Rights Reserved. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. ***************************************************************************/ package com.vmware.bdd.service.job; import com.vmware.aurora.util.AuAssert; import com.vmware.bdd.apitypes.NodeStatus; import com.vmware.bdd.entity.NodeEntity; import com.vmware.bdd.utils.CommonUtil; import org.apache.log4j.Logger; import org.springframework.batch.core.ExitStatus; import org.springframework.batch.core.JobExecution; import com.vmware.bdd.entity.ClusterEntity; import com.vmware.bdd.entity.NodeGroupEntity; import java.util.ArrayList; import java.util.List; public class ResizeClusterJobExecutionListener extends ClusterJobExecutionListener { private static final Logger logger = Logger .getLogger(ResizeClusterJobExecutionListener.class); public void afterJob(JobExecution je) { super.afterJob(je); Boolean success = TrackableTasklet.getFromJobExecutionContext( je.getExecutionContext(), JobConstants.CLUSTER_OPERATION_SUCCESS, Boolean.class); final String clusterName = getJobParameters(je).getString(JobConstants.CLUSTER_NAME_JOB_PARAM); final String groupName = getJobParameters(je).getString(JobConstants.GROUP_NAME_JOB_PARAM); if (success == null || success) { success = (je.getExitStatus().equals(ExitStatus.COMPLETED)); } final Long oldInstanceNum = getJobParameters(je).getLong( JobConstants.GROUP_INSTANCE_OLD_NUMBER_JOB_PARAM, 0); if (!success || hasNotReadyNodes(je)) { // TODO: need to discuss whether to check the software provision status // || hasSoftwareBootstapFailedNodes(clusterName, groupName, oldInstanceNum)) { logger.warn("resize cluster failed, revert to the original defined instance number " + oldInstanceNum.intValue()); updateDefinedInstanceNum(clusterName, groupName, oldInstanceNum); } } //verify if there is not vm ready nodes by 2 steps, first step is to verify vm creation //second step is verify vm status == VM_READY. Those two parameters are put into job context in //CreateClusterVMStep and ClusterUpdateDataStep. As the vm status depends on vm creation, so you have //to verify VMs' creation, then verify VMs status private boolean hasNotReadyNodes(JobExecution je) { boolean createVMSuccess = TrackableTasklet.getFromJobExecutionContext( je.getExecutionContext(), JobConstants.CLUSTER_CREATE_VM_OPERATION_SUCCESS, Boolean.class); if (!createVMSuccess) { logger.info("Some VMs are not created successfully in cluster scale out"); return true; } boolean allNewVMsAreVMReady = TrackableTasklet.getFromJobExecutionContext( je.getExecutionContext(), JobConstants.VERIFY_NODE_STATUS_RESULT_PARAM, Boolean.class); if (!allNewVMsAreVMReady) { logger.info("Some VMs are not VM_READY in cluster scale out"); return true; } return false; } private boolean hasSoftwareBootstapFailedNodes(String cluster, String nodegroup, Long oldInstanceNum) { List<NodeEntity> nodes = getClusterEntityMgr().findAllNodes(cluster, nodegroup); ArrayList<String> serviceFailedNodeNames = new ArrayList<>(); for (NodeEntity node: nodes) { long index = CommonUtil.getVmIndex(node.getVmName()); if (index < oldInstanceNum) { // do not verify existing nodes from last successful deployment continue; } //for cloudera/ambari cluster scale out operation, the status is correct //for ironfan deployed cluster, the NodeStatus might be not accurate sometimes //But in this case, as we judge whether to rollback db by VM creation/VM_READY/SERVICE_READY, //there will be no problem here if (node.getStatus().ordinal() < NodeStatus.SERVICE_READY.ordinal()) { serviceFailedNodeNames.add(node.getVmName()); } } if (serviceFailedNodeNames.isEmpty()) { return false; } else { logger.info(" The following VMs " + serviceFailedNodeNames.toString() + " are not SERVICE_READY"); return true; } } private void updateDefinedInstanceNum(String clusterName, String groupName, Long instanceNum) { int intNum = instanceNum.intValue(); AuAssert.check(intNum > 0, String.format("The instance number %s should be larger than 0", intNum)); ClusterEntity cluster = getClusterEntityMgr().findByName(clusterName); NodeGroupEntity groupEntity = getClusterEntityMgr().findByName(cluster, groupName); logger.info("Set cluster " + clusterName + " group " + groupName + " instance number from " + groupEntity.getDefineInstanceNum() + " to " + intNum); groupEntity.setDefineInstanceNum(intNum); getClusterEntityMgr().update(groupEntity); } }