/*************************************************************************** * Copyright (c) 2012-2014 VMware, Inc. All Rights Reserved. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. ***************************************************************************/ package com.vmware.bdd.manager; import java.util.ArrayList; import java.util.Date; import java.util.List; import java.util.Map; import java.util.TreeMap; import java.util.concurrent.TimeoutException; import org.apache.log4j.Logger; import org.springframework.batch.core.BatchStatus; import org.springframework.batch.core.Job; import org.springframework.batch.core.JobExecution; import org.springframework.batch.core.JobExecutionListener; import org.springframework.batch.core.JobParameter; import org.springframework.batch.core.JobParameters; import org.springframework.batch.core.StepExecutionListener; import org.springframework.batch.core.configuration.JobFactory; import org.springframework.batch.core.configuration.JobRegistry; import org.springframework.batch.core.configuration.support.ReferenceJobFactory; import org.springframework.batch.core.explore.JobExplorer; import org.springframework.batch.core.job.SimpleJob; import org.springframework.batch.core.launch.JobLauncher; import org.springframework.batch.core.launch.JobOperator; import org.springframework.batch.core.launch.NoSuchJobException; import org.springframework.batch.core.repository.JobRepository; import org.springframework.batch.core.step.job.JobParametersExtractor; import org.springframework.beans.factory.annotation.Autowired; import com.vmware.bdd.apitypes.ClusterRead; import com.vmware.bdd.apitypes.ClusterStatus; import com.vmware.bdd.apitypes.TaskRead; import com.vmware.bdd.apitypes.TaskRead.Status; import com.vmware.bdd.apitypes.TaskRead.Type; import com.vmware.bdd.exception.BddException; import com.vmware.bdd.exception.TaskException; import com.vmware.bdd.manager.intf.IClusterEntityManager; import com.vmware.bdd.service.job.JobConstants; import com.vmware.bdd.service.job.JobExecutionStatusHolder; import com.vmware.bdd.service.job.NodeOperationStatus; import com.vmware.bdd.service.job.SimpleStepExecutionListener; import com.vmware.bdd.service.job.SubJobStep; import com.vmware.bdd.service.job.TrackableTasklet; import com.vmware.bdd.utils.JobUtils; public class JobManager { static final Logger logger = Logger.getLogger(JobManager.class); private JobRepository jobRepository; private JobLauncher jobLauncher; private JobExplorer jobExplorer; private JobOperator jobOperator; private JobRegistry jobRegistry; private JobExecutionStatusHolder jobExecutionStatusHolder; private JobExecutionStatusHolder mainJobExecutionStatusHolder; private JobParametersExtractor jobParametersExtractor; private JobExecutionListener mainJobExecutionListener; @Autowired private IClusterEntityManager clusterEntityMgr; /** * Run a new job * * @param jobName * job name * @param param * job parameters * @return jobExecution id * @throws Exception */ public long runJob(String jobName, JobParameters param) throws Exception { // TODO handle errors Job job = jobRegistry.getJob(jobName); return jobLauncher.run(job, param).getId(); } /** * Run Spring Batch job with sub job. The number of sub jobs is size of * "jobParamtersList". Every element in the "jobParametersList" will be the * job parameters for one sub job. * * @param jobName * sub job name * @param jobParametersList * List of job parameters for the sub job * @param clusterName * cluster name * @param sucessStatus * the status to be set on the cluster if job success * @param failStatus * the status to be set on the cluster if job fail * @return job execution id * @throws Exception */ public long runSubJobForNodes(String jobName, List<JobParameters> jobParametersList, String clusterName, ClusterStatus successStatus, ClusterStatus failStatus) throws Exception { return createAndLaunchJobWithSubJob(clusterName, jobName, jobParametersList, successStatus, failStatus); } /** * Run Spring Batch job with sub job. * * @param jobName * the Spring Batch job name * @param param * job parameters * @param subJobName * sub job name * @param sucessStatus * the status to be set on the cluster if job success * @param failStatus * the status to be set on the cluster if job fail * @return job exection id * @throws Exception */ public long runJobWithSubJob(String jobName, JobParameters param, String subJobName, ClusterStatus successStatus, ClusterStatus failStatus) throws Exception { logger.debug("::runJobWithSubJob: " + jobName + ", subJobName: " + subJobName); long result = Long.MIN_VALUE; JobParameter clusterNameParameter = param.getParameters().get(JobConstants.CLUSTER_NAME_JOB_PARAM); String clusterName = (String) clusterNameParameter.getValue(); Job preparingJob = jobRegistry.getJob(jobName); JobExecution preparingJobExecution = jobLauncher.run(preparingJob, param); int subJobNumber = 0; waitJobExecution(preparingJobExecution.getId(), Long.MAX_VALUE); if (preparingJobExecution.getStatus() == BatchStatus.COMPLETED) { subJobNumber = preparingJobExecution.getExecutionContext().getInt( (JobConstants.SUB_JOB_NUMBER)); if (subJobNumber > 0) { logger.debug("sub job number: " + subJobNumber); List<JobParameters> subJobParametersList = new ArrayList<JobParameters>(); for (int i = 0; i < subJobNumber; i++) { JobParameters subJobParameters = (JobParameters) preparingJobExecution .getExecutionContext() .get(JobConstants.SUB_JOB_PARAMETERS_KEY_PREFIX + i); subJobParametersList.add(subJobParameters); } result = createAndLaunchJobWithSubJob(clusterName, subJobName, subJobParametersList, successStatus, failStatus); } } if (result == Long.MIN_VALUE) { logger.warn("Failure in preparing sub jobs"); throw TaskException.EXECUTION_FAILED("failed to prepare sub jobs."); } return result; } /** * Create new Spring Batch job to execute sub jobs. The sub job name is * "subJobName", the number of sub jobs is size of "subJobParameters". One * element in the "subJobParameters" will be the JobParameters of sub job. * * @param clusterName * cluster name * @param subJobName * sub job name * @param subJobParameters * sub job parameter * @param sucessStatus * the status to be set on the cluster if job success * @param failStatus * the status to be set on the cluster if job fail * @return job execution id * @throws Exception */ private synchronized long createAndLaunchJobWithSubJob(String clusterName, String subJobName, List<JobParameters> subJobParameters, ClusterStatus successStatus, ClusterStatus failStatus) throws Exception { SimpleJob mainJob = new SimpleJob("composed-job-" + clusterName + "-" + subJobName + "-" + System.nanoTime()); //SimpleJob mainJob = new SimpleJob("composed-job-" + clusterName + "-" + subJobName); StepExecutionListener[] jobStepListeners = createJobStepListener(); Map<String, JobParameter> mainJobParams = new TreeMap<String, JobParameter>(); mainJobParams.put(JobConstants.TIMESTAMP_JOB_PARAM, new JobParameter( new Date())); mainJobParams.put(JobConstants.CLUSTER_NAME_JOB_PARAM, new JobParameter( clusterName)); mainJobParams.put(JobConstants.CLUSTER_SUCCESS_STATUS_JOB_PARAM, new JobParameter(successStatus.name())); mainJobParams.put(JobConstants.CLUSTER_FAILURE_STATUS_JOB_PARAM, new JobParameter(failStatus.name())); //enable sub job indicator to for job progress query mainJobParams.put(JobConstants.SUB_JOB_ENABLED, new JobParameter(1l)); Job subJob = jobRegistry.getJob(subJobName); for (int stepNumber = 0, j = subJobParameters.size(); stepNumber < j; stepNumber++) { SubJobStep subJobStep = new SubJobStep(); subJobStep.setName(subJobName + "-subJobStep-" + stepNumber); subJobStep.setJob(subJob); subJobStep.setJobParametersExtractor(jobParametersExtractor); subJobStep.setJobExecutionStatusHolder(jobExecutionStatusHolder); subJobStep .setMainJobExecutionStatusHolder(mainJobExecutionStatusHolder); subJobStep.setJobLauncher(jobLauncher); subJobStep.setJobRepository(jobRepository); subJobStep.setStepExecutionListeners(jobStepListeners); subJobStep.afterPropertiesSet(); mainJob.addStep(subJobStep); logger.debug("added sub job step: " + subJobStep.getName()); int subJobParametersNumber = subJobParameters.get(stepNumber).getParameters().keySet().size(); mainJobParams.put(JobConstants.SUB_JOB_PARAMETERS_NUMBER + stepNumber, new JobParameter((long) subJobParametersNumber)); int count = 0; for (String key : subJobParameters.get(stepNumber).getParameters() .keySet()) { int index = count++; mainJobParams.put( JobUtils.getSubJobParameterPrefixKey(stepNumber, index), new JobParameter(key)); mainJobParams.put( JobUtils.getSubJobParameterPrefixValue(stepNumber, index), subJobParameters.get(stepNumber).getParameters().get(key)); } } mainJob .setJobExecutionListeners(new JobExecutionListener[] { mainJobExecutionListener }); mainJob.setJobRepository(jobRepository); mainJob.afterPropertiesSet(); JobFactory jobFactory = new ReferenceJobFactory(mainJob); jobRegistry.register(jobFactory); logger.info("registered job: " + mainJob.getName()); JobParameters mainJobParameters = new JobParameters(mainJobParams); JobExecution mainJobExecution = jobLauncher.run(mainJob, mainJobParameters); logger.info("launched main job: " + mainJob.getName()); return mainJobExecution.getId(); } private StepExecutionListener[] createJobStepListener() { SimpleStepExecutionListener jobStepListener = new SimpleStepExecutionListener(); jobStepListener.setJobRegistry(jobRegistry); jobStepListener.setJobExecutionStatusHolder(mainJobExecutionStatusHolder); return new SimpleStepExecutionListener[] { jobStepListener }; } /** * Try to stop a jobExecution * * @param jobExecutionId * jobExecution Id * @return true if the message was successfully sent (does not guarantee that * the job has stopped) * @throws Exception */ public boolean stopJobExecution(long jobExecutionId) throws Exception { // TODO handle errors return jobOperator.stop(jobExecutionId); } /** * Restart a jobExecution * * @param jobExecutionId * old jobExecution id * @return new jobExecution id * @throws Exception */ public long restartJobExecution(long jobExecutionId) throws Exception { // TODO handle errors return jobOperator.restart(jobExecutionId); } /** * Get job execution status * * @param jobExecutionId * @return job status * @throws NoSuchJobException */ public TaskRead getJobExecutionStatus(long jobExecutionId) { JobExecution jobExecution = jobExplorer.getJobExecution(jobExecutionId); if (jobExecution == null) { throw BddException.NOT_FOUND("Task", Long.toString(jobExecutionId)); } TaskRead jobStatus = new TaskRead(); jobStatus.setId(jobExecutionId); //identify VHM jobs String jobName = jobExecution.getJobInstance().getJobName(); if (jobName.equals(JobConstants.SET_MANUAL_ELASTICITY_JOB_NAME)) { jobStatus.setType(Type.VHM); } else if (jobName.equals(JobConstants.DELETE_CLUSTER_JOB_NAME)) { jobStatus.setType(Type.DELETE); } else if (jobName.contains(JobConstants.SHRINK_CLUSTER_JOB_NAME)) { jobStatus.setType(Type.SHRINK); } JobParameters jobParameters = jobExecution.getJobInstance().getJobParameters(); String clusterName = jobParameters.getString(JobConstants.CLUSTER_NAME_JOB_PARAM); jobStatus.setTarget(clusterName); long subJobEnabled = jobParameters.getLong(JobConstants.SUB_JOB_ENABLED); if (subJobEnabled != 1) { jobStatus.setProgress(jobExecutionStatusHolder .getCurrentProgress(jobExecutionId)); } else { jobStatus.setProgress(mainJobExecutionStatusHolder .getCurrentProgress(jobExecutionId)); } Status status = null; switch (jobExecution.getStatus()) { case ABANDONED: status = Status.ABANDONED; break; case COMPLETED: status = Status.COMPLETED; break; case FAILED: status = Status.FAILED; break; case STARTED: status = Status.STARTED; break; case STARTING: status = Status.STARTING; break; case STOPPED: status = Status.STOPPED; break; case STOPPING: status = Status.STOPPING; break; case UNKNOWN: default: status = Status.UNKNOWN; } jobStatus.setStatus(status); if (subJobEnabled == 1) { List<NodeOperationStatus> succeedNodes = (ArrayList<NodeOperationStatus>) jobExecution.getExecutionContext() .get(JobConstants.SUB_JOB_NODES_SUCCEED); List<NodeOperationStatus> failNodes = (ArrayList<NodeOperationStatus>) jobExecution.getExecutionContext() .get(JobConstants.SUB_JOB_NODES_FAIL); if (succeedNodes != null) { jobStatus.setSucceedNodes(convert(succeedNodes)); } if (failNodes != null) { jobStatus.setFailNodes(convert(failNodes)); } } if (status.equals(Status.FAILED) && subJobEnabled != 1) { String workDir = TrackableTasklet.getFromJobExecutionContext( jobExecution.getExecutionContext(), JobConstants.CURRENT_COMMAND_WORK_DIR, String.class); String errorMessage = TrackableTasklet.getFromJobExecutionContext( jobExecution.getExecutionContext(), JobConstants.CURRENT_ERROR_MESSAGE, String.class); jobStatus.setErrorMessage(errorMessage); jobStatus.setWorkDir(workDir); logger.error("mark task as failed: " + errorMessage); } return jobStatus; } private List<TaskRead.NodeStatus> convert(List<NodeOperationStatus> subJobStatus) { List<TaskRead.NodeStatus> result = new ArrayList<TaskRead.NodeStatus>(); for (NodeOperationStatus status : subJobStatus) { TaskRead.NodeStatus nodeStatus = new TaskRead.NodeStatus(status.getNodeName(), status.isSucceed(), status.getErrorMessage()); result.add(nodeStatus); } return result; } /** * the latest_task_id attribute of a cluster entity records the latest job id * the cluster executes * * @return */ public List<TaskRead> getLatestTaskForExistedClusters() { List<Long> taskIds = clusterEntityMgr.getLatestTaskIds(); List<TaskRead> taskReads = new ArrayList<TaskRead>(taskIds.size()); for (Long id : taskIds) { if (id == null) continue; TaskRead task = getJobExecutionStatus(id); if (task.getType() == null) { task.setType(Type.INNER); } if (task.getStatus() == TaskRead.Status.COMPLETED) { task.setProgress(1.0); } taskReads.add(task); } return taskReads; } /** * Wait for job execution to finish. * * @param jobExecutionId * @param timeoutSec * @return job result status * @throws TimeoutException */ public TaskRead waitJobExecution(long jobExecutionId, long timeoutMs) throws TimeoutException { long start = System.currentTimeMillis(); while (true) { TaskRead tr = getJobExecutionStatus(jobExecutionId); Status status = tr.getStatus(); if (Status.ABANDONED.equals(status) || Status.COMPLETED.equals(status) || Status.FAILED.equals(status) || Status.STOPPED.equals(status)) { return tr; } long now = System.currentTimeMillis(); if (now - start >= timeoutMs) { throw new TimeoutException("wait for job finish timeout"); } try { Thread.sleep(3000); } catch (InterruptedException e) { } } } public JobRepository getJobRepository() { return jobRepository; } public void setJobRepository(JobRepository jobRepository) { this.jobRepository = jobRepository; } public JobLauncher getJobLauncher() { return jobLauncher; } public void setJobLauncher(JobLauncher jobLauncher) { this.jobLauncher = jobLauncher; } public JobExplorer getJobExplorer() { return jobExplorer; } public void setJobExplorer(JobExplorer jobExplorer) { this.jobExplorer = jobExplorer; } public JobOperator getJobOperator() { return jobOperator; } /** * @return the jobParametersExtractor */ public JobParametersExtractor getJobParametersExtractor() { return jobParametersExtractor; } /** * @param jobParametersExtractor * the jobParametersExtractor to set */ public void setJobParametersExtractor( JobParametersExtractor jobParametersExtractor) { this.jobParametersExtractor = jobParametersExtractor; } public void setJobOperator(JobOperator jobOperator) { this.jobOperator = jobOperator; } public JobRegistry getJobRegistry() { return jobRegistry; } public void setJobRegistry(JobRegistry jobRegistry) { this.jobRegistry = jobRegistry; } public JobExecutionStatusHolder getJobExecutionStatusHolder() { return jobExecutionStatusHolder; } public void setJobExecutionStatusHolder( JobExecutionStatusHolder jobExecutionStatusHolder) { this.jobExecutionStatusHolder = jobExecutionStatusHolder; } /** * @return the mainJobExecutionStatusHolder */ public JobExecutionStatusHolder getMainJobExecutionStatusHolder() { return mainJobExecutionStatusHolder; } /** * @param mainJobExecutionStatusHolder * the mainJobExecutionStatusHolder to set */ public void setMainJobExecutionStatusHolder( JobExecutionStatusHolder subJobExecutionStatusHolder) { this.mainJobExecutionStatusHolder = subJobExecutionStatusHolder; } public IClusterEntityManager getClusterEntityMgr() { return clusterEntityMgr; } public void setClusterEntityMgr(IClusterEntityManager clusterEntityMgr) { this.clusterEntityMgr = clusterEntityMgr; } /** * @return the mainJobExecutionListener */ public JobExecutionListener getMainJobExecutionListener() { return mainJobExecutionListener; } /** * @param mainJobExecutionListener * the mainJobExecutionListener to set */ public void setMainJobExecutionListener( JobExecutionListener mainJobExecutionListener) { this.mainJobExecutionListener = mainJobExecutionListener; } }