/* * Copyright (c) 2015 EMC Corporation * All Rights Reserved */ package com.emc.storageos.db.task; import java.net.URI; import java.util.ArrayList; import java.util.Calendar; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.TimeUnit; import org.apache.commons.lang.StringUtils; import org.apache.curator.framework.recipes.locks.InterProcessLock; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.emc.storageos.coordinator.client.service.CoordinatorClient; import com.emc.storageos.db.client.DbClient; import com.emc.storageos.db.client.constraint.AggregatedConstraint; import com.emc.storageos.db.client.constraint.AggregationQueryResultList; import com.emc.storageos.db.client.constraint.Constraint; import com.emc.storageos.db.client.model.DataObject; import com.emc.storageos.db.client.model.Task; import com.emc.storageos.db.client.model.TenantOrg; import com.emc.storageos.services.util.NamedScheduledThreadPoolExecutor; import com.google.common.collect.Lists; import com.netflix.astyanax.util.TimeUUIDUtils; /** * Deletes completed tasks that have completed and are over {@link #TASK_TTL_MINS_PROPERTY} old at periodic intervals */ public class TaskScrubberExecutor { private static final Logger log = LoggerFactory.getLogger(TaskScrubberExecutor.class); private static final String TASK_TTL_MINS_PROPERTY = "task_ttl"; private static final String TASK_CLEAN_INTERVAL_PROPERTY = "task_clean_interval"; private static final int TASK_CLEAN_INITIAL_DELAY = 10; private final static long MIN_TO_MICROSECS = 60 * 1000 * 1000; private final static long MINI_TO_MICROSECS = 1000; private final static long MINIMUM_PERIOD_MINS = 60; private final static int DELETE_BATCH_SIZE = 100; private final static int MAXIMUM_TASK_TO_DELETE = 100 * DELETE_BATCH_SIZE; private final static int MAXIMUM_TASK_IN_ONE_QUERY = 100 * DELETE_BATCH_SIZE; private static final String SYSTEM_TENANT_ID = "urn:storageos:TenantOrg:system:"; private final static String TASK_SCRUBBER_LOCK = "task_scrubber_lock"; private static final int TASK_SCRUBBER_LOCK_TIMEOUT = 3 * 60; // 3 minute timeout to acquire the lock private ScheduledExecutorService _executor = new NamedScheduledThreadPoolExecutor("TaskScrubber", 1); private DbClient dbClient; private CoordinatorClient coordinator; private InterProcessLock lock = null; public void start() { _executor.scheduleWithFixedDelay(new Runnable() { public void run() { try { deleteOldTasks(); } catch(Exception e) { log.error("Exception thrown while running task scrubber executor"); log.error(e.getMessage(), e); } } }, TASK_CLEAN_INITIAL_DELAY, getConfigProperty(TASK_CLEAN_INTERVAL_PROPERTY, MINIMUM_PERIOD_MINS), TimeUnit.MINUTES); log.info("Started Task Scrubber to run every {} minutes", getConfigProperty(TASK_CLEAN_INTERVAL_PROPERTY, MINIMUM_PERIOD_MINS)); } public DbClient getDbClient() { return dbClient; } public void setDbClient(DbClient dbClient) { this.dbClient = dbClient; } public CoordinatorClient getCoordinator() { return coordinator; } public void setCoordinator(CoordinatorClient coordinator) { this.coordinator = coordinator; } private void deleteOldTasks() { boolean lockAcquired = false; try { // acquire a lock so clean up is only done on one node lockAcquired = acquireLock(); if (lockAcquired) { log.info("Looking for completed tasks older than {} minutes", getConfigProperty(TASK_TTL_MINS_PROPERTY, MINIMUM_PERIOD_MINS)); long taskLifetimeMicroSeconds = getConfigProperty(TASK_TTL_MINS_PROPERTY, MINIMUM_PERIOD_MINS) * MIN_TO_MICROSECS; long currentTimeMicroseconds = TimeUUIDUtils.getMicrosTimeFromUUID(TimeUUIDUtils.getUniqueTimeUUIDinMicros()); long startTimeMicroSec = currentTimeMicroseconds - taskLifetimeMicroSeconds; Calendar startTimeMarker = Calendar.getInstance(); startTimeMarker.setTimeInMillis(startTimeMicroSec/MINI_TO_MICROSECS); int tasksDeleted = 0; List<URI> tenantIds = getTenantIds(); for (URI tenantId : tenantIds) { tasksDeleted += deleteOldCompletedTasksForTenant(tenantId, startTimeMarker); if (tasksDeleted >= MAXIMUM_TASK_TO_DELETE) { break; } } log.info("delete completed tasks successfully; deleted {} tasks", tasksDeleted); } } finally { if (lockAcquired) { releaseLock(); } } } /** * get the list of all tenant ids * @return */ private List<URI> getTenantIds() { log.debug("getting a list of all tenants"); List<URI> tenantIds = new ArrayList<URI>(); Iterator<URI> tenantItr = dbClient.queryByType(TenantOrg.class, true).iterator(); while (tenantItr.hasNext()) { URI tenantId = tenantItr.next(); tenantIds.add(tenantId); } tenantIds.add(URI.create(SYSTEM_TENANT_ID)); log.debug("found {} tenants (including system tenant)", tenantIds.size()); return tenantIds; } /** * deletes old completed tasks for a tenant * @param tenantId tenant to delete tasks for * @param startTimeMarker time that determines which tasks are old * @return the number of tasks deleted */ private int deleteOldCompletedTasksForTenant(URI tenantId, Calendar startTimeMarker) { log.debug("deleting completed tasks for tenant {}", tenantId); int tasksDeleted = 0; Map<String, List<URI>> batchedIds = findTasksForTenantNotPending(tenantId); for (Entry<String, List<URI>> entry : batchedIds.entrySet()) { String batch = entry.getKey(); List<URI> ids = entry.getValue(); log.debug("processing batch {} with {} completed tasks for tenant {}", batch, ids.size(), tenantId); try { Iterator<Task> tasks = dbClient.queryIterativeObjects(Task.class, ids, true); List<Task> toBeDeleted = Lists.newArrayList(); while (tasks.hasNext()) { Task task = tasks.next(); if (task == null || (task.getCreationTime() != null && task.getCreationTime().after(startTimeMarker))) { continue; } if (!task.isPending()) { tasksDeleted++; toBeDeleted.add(task); } if (toBeDeleted.size() >= DELETE_BATCH_SIZE) { removeTasks(toBeDeleted, tenantId); toBeDeleted.clear(); } // Don't delete more than MAXIMUM_TASK_TO_DELETE tasks in one iteration // Prior to 3.6, a bug may have caused old completed tasks to not be cleaned up // Having a maximum number of tasks to delete per iteration will prevent the // system from being overloaded while catching up; we want to avoid customers // experiencing poor performance just after upgrade to 3.6 because they may // have many tasks that now can be cleaned up if (tasksDeleted >= MAXIMUM_TASK_TO_DELETE) { break; } } if (!toBeDeleted.isEmpty()) { removeTasks(toBeDeleted, tenantId); } } catch (Exception e) { log.error(e.getMessage(), e); } } log.debug("done deleting completed tasks for tenant {}", tenantId); return tasksDeleted; } /** * remove tasks from db with logging * @param toBeDeleted * @param tenantId */ private void removeTasks(List<Task> toBeDeleted, URI tenantId) { log.info("Deleting {} Tasks for tenant {}", toBeDeleted.size(), tenantId); dbClient.removeObject(toBeDeleted.toArray(new DataObject[toBeDeleted.size()])); } /** * returns non-pending task ids for a tenant * * @param dbClient * @return a map of task ids in buckets of 10k each */ private Map<String, List<URI>> findTasksForTenantNotPending(URI tenantId) { log.debug("searching for completed tasks for tenant {}", tenantId); Constraint constraint = AggregatedConstraint.Factory.getAggregationConstraint(Task.class, "tenant", tenantId.toString(), "taskStatus"); AggregationQueryResultList queryResults = new AggregationQueryResultList(); dbClient.queryByConstraint(constraint, queryResults); Iterator<AggregationQueryResultList.AggregatedEntry> it = queryResults.iterator(); Map<String, List<URI>> notPendingTasks = new HashMap<String, List<URI>>(); int batch = 0; int count = 0; int totalCount = 0; // only used for logging while (it.hasNext()) { AggregationQueryResultList.AggregatedEntry entry = it.next(); if (!entry.getValue().equals(Task.Status.pending.name())) { if (notPendingTasks.get(Integer.toString(batch)) == null) { notPendingTasks.put(Integer.toString(batch), new ArrayList<URI>()); } notPendingTasks.get(Integer.toString(batch)).add(entry.getId()); totalCount++; count++; if (count >= MAXIMUM_TASK_IN_ONE_QUERY) { batch++; count = 0; } } } log.debug("found {} completed tasks for tenant {}", totalCount, tenantId); return notPendingTasks; } /** * Acquire a distrubuted lock to ensure no other node will perform task scrubbing concurrently * wait up to 30 seconds to acquire the lock * * @return true if the lock was successfully acquired */ private boolean acquireLock() { boolean acquired = false; try { log.info("Attempting to acquire distributed lock {}", TASK_SCRUBBER_LOCK); lock = coordinator.getLock(TASK_SCRUBBER_LOCK); acquired = lock.acquire(TASK_SCRUBBER_LOCK_TIMEOUT, TimeUnit.SECONDS); if (acquired) { log.info("Successfully acquired distributed lock {}", TASK_SCRUBBER_LOCK); } else { log.info("Failed to acquire distributed lock {}", TASK_SCRUBBER_LOCK); } } catch (Exception e) { log.info("Exception while attempting to acquire distributed lock {}", TASK_SCRUBBER_LOCK); log.error(e.getMessage(), e); } return acquired; } /** * release the distributed task scrubber lock */ private void releaseLock() { log.info("Attempting to release distributed lock {}", TASK_SCRUBBER_LOCK); try { lock.release(); log.info("Successfully released distributed lock {}", TASK_SCRUBBER_LOCK); } catch (Exception e) { log.error("Failed to release distributed lock {}", TASK_SCRUBBER_LOCK); log.error(e.getMessage(), e); } } private long getConfigProperty(String propertyName, long minimumValue) { String value = coordinator.getPropertyInfo().getProperty(propertyName); if (value != null && StringUtils.isNotBlank(value)) { try { return Math.max(Long.valueOf(value), minimumValue); } catch (Exception e) { log.error("Configuration property " + propertyName + " invalid number, using minimum value of " + minimumValue, e); return minimumValue; } } else { log.error("Configuration property " + propertyName + " not found, using minimum value of " + minimumValue); return minimumValue; } } }