/** * Copyright 2016 benjobs * <p> * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * <p> * http://www.apache.org/licenses/LICENSE-2.0 * <p> * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.opencron.server.service; import org.opencron.common.exception.PingException; import org.opencron.common.job.Action; import org.opencron.common.job.Request; import org.opencron.common.job.Response; import org.opencron.common.utils.ParamsMap; import org.opencron.server.domain.Record; import org.opencron.server.domain.Agent; import org.opencron.server.domain.User; import org.opencron.server.job.OpencronCaller; import org.opencron.server.job.OpencronMonitor; import org.opencron.server.vo.JobVo; import com.mysql.jdbc.PacketTooBigException; import org.quartz.Job; import org.quartz.JobExecutionContext; import org.quartz.JobExecutionException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Service; import java.util.*; import java.util.concurrent.LinkedBlockingQueue; import static org.opencron.common.job.Opencron.*; @Service public class ExecuteService implements Job { private Logger logger = LoggerFactory.getLogger(getClass()); @Autowired private RecordService recordService; @Autowired private JobService jobService; @Autowired private NoticeService noticeService; @Autowired private OpencronCaller opencronCaller; @Autowired private AgentService agentService; @Autowired private UserService userService; private Map<Long, Integer> reExecuteThreadMap = new HashMap<Long, Integer>(0); private static final String PACKETTOOBIG_ERROR = "在向MySQL数据库插入数据量过多,需要设定max_allowed_packet"; @Override public void execute(JobExecutionContext jobExecutionContext) throws JobExecutionException { String key = jobExecutionContext.getJobDetail().getKey().getName(); JobVo jobVo = (JobVo) jobExecutionContext.getJobDetail().getJobDataMap().get(key); try { ExecuteService executeService = (ExecuteService) jobExecutionContext.getJobDetail().getJobDataMap().get("jobBean"); boolean success = executeService.executeJob(jobVo); this.loggerInfo("[opencron] job:{} at {}:{},execute:{}", jobVo, success ? "successful" : "failed"); } catch (Exception e) { logger.error(e.getLocalizedMessage(), e); } } /** * 基本方式执行任务,按任务类型区分 */ public boolean executeJob(final JobVo job) { JobType jobType = JobType.getJobType(job.getJobType()); switch (jobType) { case SINGLETON: return executeSingleJob(job, job.getUserId());//单一任务 case FLOW: return executeFlowJob(job);//流程任务 default: return false; } } /** * 单一任务执行过程 */ private boolean executeSingleJob(JobVo job, Long userId) { if (!checkJobPermission(job.getAgentId(), userId)) return false; Record record = new Record(job); record.setJobType(JobType.SINGLETON.getCode());//单一任务 try { //执行前先保存 record = recordService.merge(record); //执行前先检测一次通信是否正常 checkPing(job, record); Response response = responseToRecord(job, record); recordService.merge(record); if (!response.isSuccess()) { //当前的单一任务只运行一次未设置重跑. if (job.getRedo() == 0 || job.getRunCount() == 0) { noticeService.notice(job, null); } this.loggerInfo("execute failed:jobName:{} at ip:{},port:{},info:{}", job, record.getMessage()); return false; } else { this.loggerInfo("execute successful:jobName:{} at ip:{},port:{}", job, null); } } catch (PacketTooBigException e) { noticeService.notice(job, PACKETTOOBIG_ERROR); this.loggerError("execute failed:jobName:%s at ip:%s,port:%d,info:%s", job, PACKETTOOBIG_ERROR, e); } catch (Exception e) { if (job.getRedo() == 0 || job.getRunCount() == 0) { noticeService.notice(job, null); } this.loggerError("execute failed:jobName:%s at ip:%s,port:%d,info:%s", job, e.getMessage(), e); } return record.getSuccess().equals(ResultStatus.SUCCESSFUL.getStatus()); } /** * 流程任务 按流程任务处理方式区分 */ private boolean executeFlowJob(JobVo job) { if (!checkJobPermission(job.getAgentId(), job.getUserId())) return false; final long groupId = System.nanoTime() + Math.abs(new Random().nextInt());//分配一个流程组Id final Queue<JobVo> jobQueue = new LinkedBlockingQueue<JobVo>(); jobQueue.add(job); jobQueue.addAll(job.getChildren()); RunModel runModel = RunModel.getRunModel(job.getRunModel()); switch (runModel) { case SEQUENCE: return executeSequenceJob(groupId, jobQueue);//串行任务 case SAMETIME: return executeSameTimeJob(groupId, jobQueue);//并行任务 default: return false; } } /** * 串行任务处理方式 */ private boolean executeSequenceJob(long groupId, Queue<JobVo> jobQueue) { for (JobVo jobVo : jobQueue) { if (!doFlowJob(jobVo, groupId)) { return false; } } return true; } /** * 并行任务处理方式 */ private boolean executeSameTimeJob(final long groupId, final Queue<JobVo> jobQueue) { final List<Boolean> result = new ArrayList<Boolean>(0); Thread jobThread = new Thread(new Runnable() { @Override public void run() { for (final JobVo jobVo : jobQueue) { //如果子任务是并行(则启动多线程,所有子任务同时执行) Thread thread = new Thread(new Runnable() { public void run() { result.add(doFlowJob(jobVo, groupId)); } }); thread.start(); } } }); jobThread.start(); //确保所有的现场执行作业都全部执行完毕,拿到返回的执行结果。检查并行任务中有是否失败的... try { jobThread.join(); } catch (InterruptedException e) { logger.error("[opencron] job rumModel with SAMETIME error:{}", e.getMessage()); } return !result.contains(false); } /** * 流程任务(通用)执行过程 */ private boolean doFlowJob(JobVo job, long groupId) { Record record = new Record(job); record.setGroupId(groupId);//组Id record.setJobType(JobType.FLOW.getCode());//流程任务 record.setFlowNum(job.getFlowNum()); boolean success = true; try { //执行前先保存 record = recordService.merge(record); //执行前先检测一次通信是否正常 checkPing(job, record); Response result = responseToRecord(job, record); if (!result.isSuccess()) { recordService.merge(record); //被kill,直接退出 if (StatusCode.KILL.getValue().equals(result.getExitCode())) { recordService.flowJobDone(record); } else { success = false; } return false; } else { //当前任务是流程任务的最后一个任务,则整个任务运行完毕 if (job.getLastChild()) { recordService.merge(record); recordService.flowJobDone(record); } else { //当前任务非流程任务最后一个子任务,全部流程任务为运行中... record.setStatus(RunStatus.RUNNING.getStatus()); recordService.merge(record); } return true; } } catch (PingException e) { recordService.flowJobDone(record);//通信失败,流程任务挂起. return false; } catch (Exception e) { if (e instanceof PacketTooBigException) { record.setMessage(this.loggerError("execute failed(flow job):jobName:%s at ip:%s,port:%d,info:", job, PACKETTOOBIG_ERROR, e)); } else { record.setMessage(this.loggerError("execute failed(flow job):jobName:%s at ip:%s,port:%d,info:%s", job, e.getMessage(), e)); } record.setSuccess(ResultStatus.FAILED.getStatus());//程序调用失败 record.setReturnCode(StatusCode.ERROR_EXEC.getValue()); record.setEndTime(new Date()); recordService.merge(record); success = false; return false; } finally { //流程任务的重跑靠自身维护... if (!success) { Record red = recordService.get(record.getRecordId()); if (job.getRedo() == 1 && job.getRunCount() > 0) { int index = 0; boolean flag; do { flag = reExecuteJob(red, job, JobType.FLOW); ++index; } while (!flag && index < job.getRunCount()); //重跑到截止次数还是失败,则发送通知,记录最终运行结果 if (!flag) { noticeService.notice(job, null); recordService.flowJobDone(record); } } else { noticeService.notice(job, null); recordService.flowJobDone(record); } } } } /** * 多执行器同时 现场执行过程 */ public void batchExecuteJob(final Long userId, String command, String agentIds) { final Queue<JobVo> jobQueue = new LinkedBlockingQueue<JobVo>(); String[] arrayIds = agentIds.split(";"); for (String agentId : arrayIds) { Agent agent = agentService.getAgent(Long.parseLong(agentId)); JobVo jobVo = new JobVo(userId, command, agent); jobQueue.add(jobVo); } Thread jobThread = new Thread(new Runnable() { @Override public void run() { for (final JobVo jobVo : jobQueue) { //如果批量现场执行(则启动多线程,所有任务同时执行) Thread thread = new Thread(new Runnable() { public void run() { executeSingleJob(jobVo, userId); } }); thread.start(); } } }); jobThread.start(); } /** * 失败任务的重执行过程 */ public boolean reExecuteJob(final Record parentRecord, JobVo job, JobType jobType) { if (parentRecord.getRedoCount().equals(reExecuteThreadMap.get(parentRecord.getRecordId()))) { return false; } else { reExecuteThreadMap.put(parentRecord.getRecordId(), parentRecord.getRedoCount()); } parentRecord.setStatus(RunStatus.RERUNNING.getStatus()); Record record = new Record(job); try { recordService.merge(parentRecord); /** * 当前重新执行的新纪录 */ job.setExecType(ExecType.RERUN.getStatus()); record.setParentId(parentRecord.getRecordId()); record.setGroupId(parentRecord.getGroupId()); record.setJobType(jobType.getCode()); parentRecord.setRedoCount(parentRecord.getRedoCount() + 1);//运行次数 record.setRedoCount(parentRecord.getRedoCount()); record = recordService.merge(record); //执行前先检测一次通信是否正常 checkPing(job, record); Response result = responseToRecord(job, record); //当前重跑任务成功,则父记录执行完毕 if (result.isSuccess()) { parentRecord.setStatus(RunStatus.RERUNDONE.getStatus()); //重跑的某一个子任务被Kill,则整个重跑计划结束 } else if (StatusCode.KILL.getValue().equals(result.getExitCode())) { parentRecord.setStatus(RunStatus.RERUNDONE.getStatus()); } else { //已经重跑到最后一次了,还是失败了,则认为整个重跑任务失败,发送通知 if (parentRecord.getRunCount().equals(parentRecord.getRedoCount())) { noticeService.notice(job, null); } parentRecord.setStatus(RunStatus.RERUNUNDONE.getStatus()); } this.loggerInfo("execute successful:jobName:{} at ip:{},port:{}", job, null); } catch (Exception e) { if (e instanceof PacketTooBigException) { noticeService.notice(job, PACKETTOOBIG_ERROR); errorExec(record, this.loggerError("execute failed:jobName:%s at ip:%s,port:%d,info:%s", job, PACKETTOOBIG_ERROR, e)); } noticeService.notice(job, e.getMessage()); errorExec(record, this.loggerError("execute failed:jobName:%s at ip:%s,port:%d,info:%s", job, e.getMessage(), e)); } finally { //如果已经到了任务重跑的截至次数直接更新为已重跑完成 if (parentRecord.getRunCount().equals(parentRecord.getRedoCount())) { parentRecord.setStatus(RunStatus.RERUNDONE.getStatus()); } try { recordService.merge(record); recordService.merge(parentRecord); } catch (Exception e) { if (e instanceof PacketTooBigException) { record.setMessage(this.loggerError("execute failed(flow job):jobName:%s at ip:%s,port:%d,info:" + PACKETTOOBIG_ERROR, job, e.getMessage(), e)); } else { record.setMessage(this.loggerError("execute failed(flow job):jobName:%s at ip:%s,port:%d,info:%s", job, e.getMessage(), e)); } } } return record.getSuccess().equals(ResultStatus.SUCCESSFUL.getStatus()); } /** * 终止任务过程 */ public boolean killJob(Record record) { final Queue<Record> recordQueue = new LinkedBlockingQueue<Record>(); //单一任务 if (JobType.SINGLETON.getCode().equals(record.getJobType())) { recordQueue.add(record); } else if (JobType.FLOW.getCode().equals(record.getJobType())) { //流程任务 recordQueue.addAll(recordService.getRunningFlowJob(record.getRecordId())); } final List<Boolean> result = new ArrayList<Boolean>(0); Thread jobThread = new Thread(new Runnable() { @Override public void run() { for (final Record cord : recordQueue) { //如果kill并行任务(则启动多线程,所有任务同时kill) Thread thread = new Thread(new Runnable() { public void run() { //临时的改成停止中... cord.setStatus(RunStatus.STOPPING.getStatus());//停止中 cord.setSuccess(ResultStatus.KILLED.getStatus());//被杀. JobVo job = null; try { recordService.merge(cord); job = jobService.getJobVoById(cord.getJobId()); //向远程机器发送kill指令 opencronCaller.asyncCall(Request.request(job.getIp(), job.getPort(), Action.KILL, job.getPassword()).putParam("pid", cord.getPid()), job.getAgent()); cord.setStatus(RunStatus.STOPED.getStatus()); cord.setEndTime(new Date()); recordService.merge(cord); loggerInfo("killed successful :jobName:{} at ip:{},port:{},pid:{}", job, cord.getPid()); } catch (Exception e) { if (e instanceof PacketTooBigException) { noticeService.notice(job, PACKETTOOBIG_ERROR); loggerError("killed error:jobName:%s at ip:%s,port:%d,pid:%s", job, cord.getPid() + " failed info: " + PACKETTOOBIG_ERROR, e); } noticeService.notice(job, null); loggerError("killed error:jobName:%s at ip:%s,port:%d,pid:%s", job, cord.getPid() + " failed info: " + e.getMessage(), e); result.add(false); } } }); thread.start(); } } }); jobThread.start(); //确保所有的kill任务都执行完毕,拿到返回的执行结果。检查kill任务中有是否失败的... try { jobThread.join(); } catch (InterruptedException e) { logger.error("[opencron] kill job with error:{}", e.getMessage()); } return !result.contains(false); } /** * 向执行器发送请求,并封装响应结果 */ private Response responseToRecord(final JobVo job, final Record record) throws Exception { Response response = opencronCaller.asyncCall(Request.request(job.getIp(), job.getPort(), Action.EXECUTE, job.getPassword()) .putParam("command", job.getCommand()).putParam("pid", record.getPid()).putParam("timeout", job.getTimeout() + ""), job.getAgent()); logger.info("[opencron]:execute response:{}", response.toString()); record.setReturnCode(response.getExitCode()); record.setMessage(response.getMessage()); record.setSuccess(response.isSuccess() ? ResultStatus.SUCCESSFUL.getStatus() : ResultStatus.FAILED.getStatus()); if (StatusCode.KILL.getValue().equals(response.getExitCode())) { record.setStatus(RunStatus.STOPED.getStatus()); record.setSuccess(ResultStatus.KILLED.getStatus());//被kill任务失败 } else if (StatusCode.TIME_OUT.getValue().equals(response.getExitCode())) { record.setStatus(RunStatus.STOPED.getStatus()); record.setSuccess(ResultStatus.TIMEOUT.getStatus());//超时... } else { record.setStatus(RunStatus.DONE.getStatus()); } record.setStartTime(new Date(response.getStartTime())); record.setEndTime(new Date(response.getEndTime())); return response; } /** * 调用失败后的处理 */ private void errorExec(Record record, String errorInfo) { record.setSuccess(ResultStatus.FAILED.getStatus());//程序调用失败 record.setStatus(RunStatus.DONE.getStatus());//已完成 record.setReturnCode(StatusCode.ERROR_EXEC.getValue()); record.setEndTime(new Date()); record.setMessage(errorInfo); recordService.merge(record); } /** * 任务执行前 检测通信 */ private void checkPing(JobVo job, Record record) throws PingException { boolean ping = ping(job.getAgent()); if ( ! ping ) { record.setStatus(RunStatus.DONE.getStatus());//已完成 record.setReturnCode(StatusCode.ERROR_PING.getValue()); String format = "can't to communicate with agent:%s(%s:%d),execute job:%s failed"; String content = String.format(format, job.getAgentName(), job.getIp(), job.getPort(), job.getJobName()); record.setMessage(content); record.setSuccess(ResultStatus.FAILED.getStatus()); record.setEndTime(new Date()); recordService.merge(record); throw new PingException(content); } } public boolean ping(Agent agent) { try { Response response = opencronCaller.asyncCall(Request.request(agent.getIp(), agent.getPort(), Action.PING, agent.getPassword()).putParam("serverPort", OpencronMonitor.port + ""), agent); return response!=null && response.isSuccess(); } catch (Exception e) { logger.error("[opencron]ping failed,host:{},port:{}", agent.getIp(), agent.getPort()); return false; } } public String guid(Agent agent) { try { Response response = opencronCaller.asyncCall(Request.request(agent.getIp(), agent.getPort(), Action.GUID,agent.getPassword()), agent); return response.getMessage(); } catch (Exception e) { logger.error("[opencron]getguid failed,host:{},port:{}", agent.getIp(), agent.getPort()); return null; } } public String path(Agent agent) { try { Response response = opencronCaller.asyncCall(Request.request(agent.getIp(), agent.getPort(), Action.PATH,null), agent); return response.getMessage(); } catch (Exception e) { logger.error("[opencron]ping failed,host:{},port:{}", agent.getIp(), agent.getPort()); return null; } } /** * 修改密码 */ public boolean password(Agent agent, final String newPassword) { boolean ping = false; try { Response response = opencronCaller.asyncCall(Request.request(agent.getIp(), agent.getPort(), Action.PASSWORD, agent.getPassword()) .putParam("newPassword", newPassword), agent); ping = response.isSuccess(); } catch (Exception e) { e.printStackTrace(); } return ping; } /** * 监测执行器运行状态 */ public Response monitor(Agent agent) throws Exception { return opencronCaller.asyncCall( Request.request(agent.getIp(), agent.getPort(), Action.MONITOR, agent.getPassword()) .setParams(ParamsMap.instance().fill("connType", ConnType.getByType(agent.getProxy()).getName())), agent); } /** * 校验任务执行权限 */ private boolean checkJobPermission(Long jobAgentId, Long userId) { if (userId == null) return false; User user = userService.getUserById(userId); //超级管理员拥有所有执行器的权限 if (user != null && user.getRoleId() == 999) return true; String agentIds = userService.getUserById(userId).getAgentIds(); agentIds = "," + agentIds + ","; String thisAgentId = "," + jobAgentId + ","; return agentIds.contains(thisAgentId); } private void loggerInfo(String str, JobVo job, String message) { if (message != null) { logger.info(str, job.getJobName(), job.getIp(), job.getPort(), message); } else { logger.info(str, job.getJobName(), job.getIp(), job.getPort()); } } private String loggerError(String str, JobVo job, String message, Exception e) { String errorInfo = String.format(str, job.getJobName(), job.getIp(), job.getPort(), message); logger.error(errorInfo, e); return errorInfo; } }