package org.ovirt.engine.core.bll.gluster; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Set; import java.util.concurrent.TimeUnit; import javax.inject.Inject; import javax.inject.Singleton; import org.apache.commons.lang.StringUtils; import org.ovirt.engine.core.bll.gluster.tasks.GlusterTaskUtils; import org.ovirt.engine.core.bll.gluster.tasks.GlusterTasksService; import org.ovirt.engine.core.bll.interfaces.BackendInternal; import org.ovirt.engine.core.bll.job.ExecutionHandler; import org.ovirt.engine.core.common.AuditLogType; import org.ovirt.engine.core.common.VdcObjectType; import org.ovirt.engine.core.common.action.AddInternalJobParameters; import org.ovirt.engine.core.common.action.AddStepParameters; import org.ovirt.engine.core.common.action.VdcActionType; import org.ovirt.engine.core.common.action.VdcReturnValueBase; import org.ovirt.engine.core.common.asynctasks.gluster.GlusterAsyncTask; import org.ovirt.engine.core.common.asynctasks.gluster.GlusterTaskType; import org.ovirt.engine.core.common.businessentities.Cluster; import org.ovirt.engine.core.common.businessentities.VdsStatic; import org.ovirt.engine.core.common.businessentities.gluster.GlusterBrickEntity; import org.ovirt.engine.core.common.businessentities.gluster.GlusterStatus; import org.ovirt.engine.core.common.businessentities.gluster.GlusterVolumeEntity; import org.ovirt.engine.core.common.config.Config; import org.ovirt.engine.core.common.config.ConfigValues; import org.ovirt.engine.core.common.constants.gluster.GlusterConstants; import org.ovirt.engine.core.common.errors.EngineError; import org.ovirt.engine.core.common.errors.EngineException; import org.ovirt.engine.core.common.job.ExternalSystemType; import org.ovirt.engine.core.common.job.JobExecutionStatus; import org.ovirt.engine.core.common.job.Step; import org.ovirt.engine.core.common.job.StepEnum; import org.ovirt.engine.core.compat.Guid; import org.ovirt.engine.core.dal.job.ExecutionMessageDirector; import org.ovirt.engine.core.dao.gluster.GlusterDBUtils; import org.ovirt.engine.core.utils.threadpool.ThreadPoolUtil; import org.ovirt.engine.core.utils.timer.OnTimerMethodAnnotation; import org.ovirt.engine.core.utils.transaction.TransactionSupport; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @Singleton public class GlusterTasksSyncJob extends GlusterJob { private static final Logger log = LoggerFactory.getLogger(GlusterTasksSyncJob.class); @Inject private BackendInternal backendInternal; @Inject private ExecutionHandler executionHandler; @Inject private GlusterTasksService provider; @Inject private GlusterTaskUtils glusterTaskUtils; @Inject private GlusterDBUtils glusterDBUtils; @Override public Collection<GlusterJobSchedulingDetails> getSchedulingDetails() { return Collections.singleton(new GlusterJobSchedulingDetails( "gluster_async_task_poll_event", getRefreshRate(ConfigValues.GlusterRefreshRateTasks))); } @OnTimerMethodAnnotation("gluster_async_task_poll_event") public void updateGlusterAsyncTasks() { log.debug("Refreshing gluster tasks list"); List<Cluster> clusters = clusterDao.getAll(); Map<Guid, Set<Guid>> tasksFromClustersMap = new HashMap<>(); for (Cluster cluster : clusters) { if (!cluster.supportsGlusterService()) { continue; } try { Map<Guid, GlusterAsyncTask> runningTasks = provider.getTaskListForCluster(cluster.getId()); if (runningTasks != null) { updateTasksInCluster(cluster, runningTasks); tasksFromClustersMap.put(cluster.getId(), runningTasks.keySet()); } } catch (EngineException e) { log.error("Error updating tasks from CLI", e); } } cleanUpOrphanTasks(tasksFromClustersMap); } private void updateTasksInCluster(final Cluster cluster, final Map<Guid, GlusterAsyncTask> runningTasks) { for (Entry<Guid, GlusterAsyncTask> entry : runningTasks.entrySet()) { Guid taskId = entry.getKey(); final GlusterAsyncTask task = entry.getValue(); List<Step> steps = stepDao.getStepsByExternalId(taskId); if (steps.isEmpty()) { createJobForTaskFromCLI(cluster, task); } glusterTaskUtils.updateSteps(cluster, task, steps); } } private void createJobForTaskFromCLI(final Cluster cluster, final GlusterAsyncTask task) { ThreadPoolUtil.execute(() -> TransactionSupport.executeInNewTransaction(() -> { try { createJobToMonitor(cluster, task); } catch (EngineException e) { log.error("Error creating job for task from CLI", e); } return null; })); } private void createJobToMonitor(Cluster cluster, GlusterAsyncTask task) { if (!isTaskToBeMonitored(task)) { return; //there's no need to monitor jobs that are failed or completed } StepEnum step = task.getType().getStep(); VdcActionType actionType; switch (step) { case REBALANCING_VOLUME: actionType = VdcActionType.StartRebalanceGlusterVolume; break; case REMOVING_BRICKS: actionType = VdcActionType.StartRemoveGlusterVolumeBricks; break; default: actionType = VdcActionType.Unknown; } String volumeName = task.getTaskParameters().getVolumeName(); GlusterVolumeEntity vol = volumeDao.getByName(cluster.getId(), volumeName); if (vol == null) { log.info("Volume '{}' does not exist yet for task detected from CLI '{}', not adding to engine", volumeName, task); return; } Guid jobId = addJob(cluster, task, actionType, vol); Guid execStepId = addExecutingStep(jobId); Guid asyncStepId = addAsyncTaskStep(cluster, task, step, execStepId); Step asyncStep = stepDao.get(asyncStepId); executionHandler.updateStepExternalId(asyncStep, task.getTaskId(), ExternalSystemType.GLUSTER); updateVolumeBricksAndLock(cluster, task, vol); } private boolean isTaskToBeMonitored(GlusterAsyncTask task) { return task.getStatus() == JobExecutionStatus.STARTED || task.getType() == GlusterTaskType.REMOVE_BRICK; } private Guid addAsyncTaskStep(Cluster cluster, GlusterAsyncTask task, StepEnum step, Guid execStepId) { VdcReturnValueBase result; result = backendInternal.runInternalAction(VdcActionType.AddInternalStep, new AddStepParameters(execStepId, glusterTaskUtils.getTaskMessage(cluster, step, task), step)); if (!result.getSucceeded()) { //log and return throw new EngineException(result.getFault().getError()); } return result.getActionReturnValue(); } private Guid addExecutingStep(Guid jobId) { VdcReturnValueBase result; result = backendInternal.runInternalAction(VdcActionType.AddInternalStep, new AddStepParameters(jobId, ExecutionMessageDirector.resolveStepMessage(StepEnum.EXECUTING, null), StepEnum.EXECUTING)); if (!result.getSucceeded()) { //log and return throw new EngineException(result.getFault().getError()); } return result.getActionReturnValue(); } private Guid addJob(Cluster cluster, GlusterAsyncTask task, VdcActionType actionType, final GlusterVolumeEntity vol) { VdcReturnValueBase result = backendInternal.runInternalAction(VdcActionType.AddInternalJob, new AddInternalJobParameters(ExecutionMessageDirector.resolveJobMessage(actionType, glusterTaskUtils.getMessageMap(cluster, task)), actionType, true, VdcObjectType.GlusterVolume, vol.getId()) ); if (!result.getSucceeded()) { //log and return throw new EngineException(result.getFault().getError()); } return result.getActionReturnValue(); } private void updateVolumeBricksAndLock(Cluster cluster, GlusterAsyncTask task, final GlusterVolumeEntity vol) { try { //acquire lock on volume acquireLock(vol.getId()); //update volume with task id volumeDao.updateVolumeTask(vol.getId(), task.getTaskId()); if (GlusterTaskType.REMOVE_BRICK == task.getType()) { //update bricks associated with task id String[] bricks = task.getTaskParameters().getBricks(); if (bricks != null) { List<GlusterBrickEntity> brickEntities = new ArrayList<>(); for (String brick: bricks) { String[] brickParts = brick.split(":", -1); String hostnameOrIp = brickParts[0]; String brickDir = brickParts[1]; GlusterBrickEntity brickEntity = new GlusterBrickEntity(); VdsStatic server = glusterDBUtils.getServer(cluster.getId(), hostnameOrIp); if (server == null) { log.warn("Could not find server '{}' in cluster '{}'", hostnameOrIp, cluster.getId()); } else { brickEntity.setServerId(server.getId()); brickEntity.setBrickDirectory(brickDir); brickEntity.setAsyncTask(new GlusterAsyncTask()); brickEntity.getAsyncTask().setTaskId(task.getTaskId()); brickEntities.add(brickEntity); } } brickDao.updateAllBrickTasksByHostIdBrickDirInBatch(brickEntities); } } logTaskStartedFromCLI(cluster, task, vol); } catch (Exception e) { log.error("Exception", e); // Release the lock only if there is any exception, // otherwise the lock will be released once the task is completed releaseLock(vol.getId()); throw new EngineException(EngineError.GeneralException, e.getMessage()); } } private void logTaskStartedFromCLI(Cluster cluster, GlusterAsyncTask task, GlusterVolumeEntity vol) { Map<String, String> values = new HashMap<>(); AuditLogType logType; switch (task.getType()) { case REBALANCE: logType = AuditLogType.GLUSTER_VOLUME_REBALANCE_START_DETECTED_FROM_CLI; break; case REMOVE_BRICK: logType = AuditLogType.START_REMOVING_GLUSTER_VOLUME_BRICKS_DETECTED_FROM_CLI; values.put(GlusterConstants.BRICK, StringUtils.join(task.getTaskParameters().getBricks(), ',')); break; default: logType = AuditLogType.UNASSIGNED; break; } logUtil.logAuditMessage(cluster.getId(), vol, null, logType, values); } private void logTaskStoppedFromCLI(Step step, GlusterVolumeEntity vol) { AuditLogType logType; switch (step.getStepType()) { case REBALANCING_VOLUME: logType = AuditLogType.GLUSTER_VOLUME_REBALANCE_NOT_FOUND_FROM_CLI; break; case REMOVING_BRICKS: logType = AuditLogType.REMOVE_GLUSTER_VOLUME_BRICKS_NOT_FOUND_FROM_CLI; break; default: logType = AuditLogType.UNASSIGNED; break; } logUtil.logAuditMessage(vol.getClusterId(), vol, null, logType, null); } /** * This method cleans the tasks in DB which the gluster CLI is no longer * aware of. * @param runningTasksInClusterMap - map of cluster id - task list in cluster */ private void cleanUpOrphanTasks(Map<Guid, Set<Guid>> runningTasksInClusterMap) { // if map is empty, no tasks from clusters fetched. so return if (runningTasksInClusterMap.isEmpty()) { log.debug("Clean up of tasks has been skipped"); return; } //Populate the list of tasks that need to be monitored from database List<Guid> taskListInDB = provider.getMonitoredTaskIDsInDB(); if (taskListInDB == null || taskListInDB.isEmpty()) { return; } Set<Guid> allRunningTasksInCluster = new HashSet<>(); for (Set<Guid> taskSet: runningTasksInClusterMap.values()) { if (taskSet != null) { allRunningTasksInCluster.addAll(taskSet); } } //if task is in DB but not in running task list final Set<Guid> tasksNotRunning = new HashSet<>(taskListInDB); tasksNotRunning.removeAll(allRunningTasksInCluster); log.debug("Tasks to be cleaned up in db '{}'", tasksNotRunning); for (Guid taskId: tasksNotRunning) { GlusterVolumeEntity vol= volumeDao.getVolumeByGlusterTask(taskId); if (vol != null && (vol.getStatus() != GlusterStatus.UP || !runningTasksInClusterMap.keySet() .contains(vol.getClusterId()))) { // the volume is not UP. Hence gluster may not have been able to return tasks for the volume // also handling the case where gluster was not able to return any tasks from this cluster - the keyset will not // contain the cluster id in such case continue; } //Volume is up, but gluster does not know of task //will mark job ended with status unknown. List<Step> steps = stepDao.getStepsByExternalId(taskId); Map<String, String> values = new HashMap<>(); values.put(GlusterConstants.CLUSTER, vol == null ? "" :vol.getClusterName()); values.put(GlusterConstants.VOLUME, vol == null ? "" : vol.getName()); values.put(GlusterConstants.JOB_STATUS, JobExecutionStatus.UNKNOWN.toString()); values.put(GlusterConstants.JOB_INFO, " "); for (Step step: steps) { if (TimeUnit.MILLISECONDS.toMinutes(System.currentTimeMillis() - step.getStartTime().getTime()) < getMininumWaitInMins()) { //This task has been recently created. We will give it 10 mins before clearing it. continue; } step.markStepEnded(JobExecutionStatus.UNKNOWN); step.setStatus(JobExecutionStatus.UNKNOWN); step.setDescription(ExecutionMessageDirector.resolveStepMessage(step.getStepType(), values)); glusterTaskUtils.endStepJob(step); if (vol != null) { logTaskStoppedFromCLI(step, vol); } } glusterTaskUtils.releaseVolumeLock(taskId); } } private static Integer getMininumWaitInMins() { return Config.<Integer> getValue(ConfigValues.GlusterTaskMinWaitForCleanupInMins); } }