package org.dcache.pool.migration; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import javax.annotation.concurrent.GuardedBy; import java.io.PrintWriter; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; import java.util.Iterator; import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Set; import java.util.concurrent.ArrayBlockingQueue; import java.util.concurrent.BlockingQueue; import java.util.concurrent.Future; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.TimeUnit; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReentrantLock; import diskCacheV111.util.CacheException; import diskCacheV111.util.FileNotInCacheException; import diskCacheV111.util.PnfsId; import diskCacheV111.vehicles.PoolManagerPoolInformation; import dmg.cells.nucleus.DelayedReply; import org.dcache.pool.repository.AbstractStateChangeListener; import org.dcache.pool.repository.CacheEntry; import org.dcache.pool.repository.EntryChangeEvent; import org.dcache.pool.repository.IllegalTransitionException; import org.dcache.pool.repository.ReplicaState; import org.dcache.pool.repository.Repository; import org.dcache.pool.repository.StateChangeEvent; import org.dcache.pool.repository.StickyChangeEvent; import org.dcache.pool.repository.StickyRecord; import org.dcache.util.FireAndForgetTask; import org.dcache.util.expression.Expression; import static com.google.common.base.Preconditions.checkState; /** * Encapsulates a job as defined by a user command. * * A job is a collection of tasks, each task transferring a single * replica. The tasks are scheduled by the job. Whereas a job defines * a bulk operation, a task encapsulates a transfer of a single * replica. * * Jobs monitor the local repository for changes. If a replica changes * state before it is transferred, and the replica no longer passes the * selection criteria of the job, then it will not be transferred. If * it is in the process of being transferred, then the transfer is * cancelled. If the transfer has already completed, then nothing * happens. * * Jobs can be defined as permanent. A permanent job will monitor the * repository for state changes. Should a replica be added or change * state in such a way that is passes the selection criteria of the * job, then it is added to the transfer queue of the job. A permanent * job does not terminate, even if its transfer queue becomes * empty. Permanent jobs are saved to the pool setup file and restored * on pool start. * * Jobs can be in any of the following states: * * NEW Job has not been started yet * INITIALIZING Initial scan of repository * RUNNING Job runs (schedules new tasks) * SLEEPING A task failed; no tasks are scheduled for 10 seconds * PAUSED Pause expression evaluates to true; no tasks are * scheduled for 10 seconds * STOPPING Stop expression evaluate to true; waiting or tasks * to stop * SUSPENDED Job suspended by user; no tasks are scheduled * CANCELLING Job cancelled by user; waiting for tasks to stop * CANCELLED Job cancelled by user; no tasks are running * FINISHED Job completed * FAILED Job failed */ public class Job extends AbstractStateChangeListener implements TaskCompletionHandler { enum State { NEW, INITIALIZING, RUNNING, SLEEPING, PAUSED, SUSPENDED, STOPPING, CANCELLING, CANCELLED, FINISHED, FAILED } private static final Logger _log = LoggerFactory.getLogger(Job.class); private final Set<PnfsId> _queued = new LinkedHashSet<>(); private final Map<PnfsId,Long> _sizes = new HashMap<>(); private final Map<PnfsId,Task> _running = new HashMap<>(); private final BlockingQueue<Error> _errors = new ArrayBlockingQueue<>(15); private final Map<PoolMigrationJobCancelMessage,DelayedReply> _cancelRequests = new HashMap<>(); private final JobStatistics _statistics = new JobStatistics(); private final MigrationContext _context; private final JobDefinition _definition; private final TaskParameters _taskParameters; private final String _pinPrefix; private final Lock _lock = new ReentrantLock(true); private volatile State _state; private int _concurrency; private Future<?> _refreshTask; public Job(MigrationContext context, JobDefinition definition) { _context = context; _definition = definition; _concurrency = 1; _state = State.NEW; _taskParameters = new TaskParameters(context.getPoolStub(), context.getPnfsStub(), context.getPinManagerStub(), context.getExecutor(), definition.selectionStrategy, definition.poolList, definition.isEager, definition.isMetaOnly, definition.computeChecksumOnUpdate, definition.forceSourceMode, definition.maintainAtime, definition.replicas); _pinPrefix = context.getPinManagerStub().getDestinationPath().getDestinationAddress().getCellName(); } public void start() { _lock.lock(); try { checkState(_state == State.NEW); _state = State.INITIALIZING; long refreshPeriod = _definition.refreshPeriod; ScheduledExecutorService executor = _context.getExecutor(); _refreshTask = executor.scheduleWithFixedDelay(new FireAndForgetTask(() -> { _definition.sourceList.refresh(); _definition.poolList.refresh(); }), 0, refreshPeriod, TimeUnit.MILLISECONDS); executor.submit(new FireAndForgetTask(() -> { try { _context.getRepository().addListener(Job.this); populate(); _lock.lock(); try { if (getState() == State.INITIALIZING) { setState(State.RUNNING); } } finally { _lock.unlock(); } } catch (InterruptedException e) { _log.error("Migration job was interrupted"); } finally { _lock.lock(); try { switch (getState()) { case INITIALIZING: setState(State.FAILED); break; case CANCELLING: schedule(); break; } } finally { _lock.unlock(); } } })); } finally { _lock.unlock(); } } public JobDefinition getDefinition() { return _definition; } public int getConcurrency() { _lock.lock(); try { return _concurrency; } finally { _lock.unlock(); } } public void setConcurrency(int concurrency) { _lock.lock(); try { _concurrency = concurrency; schedule(); } finally { _lock.unlock(); } } public void addError(Error error) { _lock.lock(); try { while (!_errors.offer(error)) { _errors.poll(); } } finally { _lock.unlock(); } } /** Adds status information about the job to <code>pw</code>. */ public void getInfo(PrintWriter pw) { _lock.lock(); try { long total = _statistics.getTotal(); long completed = _statistics.getTransferred(); pw.println("State : " + _state); pw.println("Queued : " + _queued.size()); pw.println("Attempts : " + _statistics.getAttempts()); pw.println("Targets : " + _definition.poolList); if (total > 0) { switch (getState()) { case NEW: break; case RUNNING: case SUSPENDED: case CANCELLING: case STOPPING: case SLEEPING: case PAUSED: pw.println("Completed : " + _statistics.getCompleted() + " files; " + _statistics.getTransferred() + " bytes; " + (100 * completed / total) + "%"); pw.println("Total : " + total + " bytes"); break; case INITIALIZING: case FINISHED: pw.println("Completed : " + _statistics.getCompleted() + " files; " + _statistics.getTransferred() + " bytes"); pw.println("Total : " + total + " bytes"); break; case CANCELLED: case FAILED: pw.println("Completed : " + _statistics.getCompleted() + " files; " + _statistics.getTransferred() + " bytes"); break; } } pw.println("Concurrency: " + _concurrency); pw.println("Running tasks:"); List<Task> tasks = new ArrayList<>(_running.values()); Collections.sort(tasks, (t1, t2) -> Long.compare(t1.getId(), t2.getId())); for (Task task : tasks) { task.getInfo(pw); } if (!_errors.isEmpty()) { pw.println("Most recent errors:"); for (Error error : _errors) { pw.println(error); } } } finally { _lock.unlock(); } } /** * Scans the repository for files and adds corresponding tasks to * the job. */ private void populate() throws InterruptedException { try { Repository repository = _context.getRepository(); Iterable<PnfsId> files = repository; if (_definition.comparator != null) { List<PnfsId> all = new ArrayList<>(); for (PnfsId pnfsId: files) { all.add(pnfsId); } Comparator<PnfsId> order = new CacheEntryOrder(repository, _definition.comparator); Collections.sort(all, order); files = all; } for (PnfsId pnfsId: files) { try { _lock.lock(); try { if (_state != State.INITIALIZING) { break; } CacheEntry entry = repository.getEntry(pnfsId); if (accept(entry)) { add(entry); } } finally { _lock.unlock(); } } catch (FileNotInCacheException e) { // File was removed before we got to it - not a // problem. } catch (CacheException e) { _log.error("Failed to load entry: " + e.getMessage()); } } } catch (IllegalStateException e) { // This means the repository was not initialized yet. Not // a big problem, since we will be notified about each // entry during initialization. } } /** * Cancels a job. All running tasks are cancelled. */ public void cancel(boolean force) { _lock.lock(); try { if (_state == State.CANCELLED || _state == State.FAILED || _state == State.FINISHED) { for (Map.Entry<PoolMigrationJobCancelMessage,DelayedReply> entry: _cancelRequests.entrySet()) { entry.getValue().reply(entry.getKey()); } } else if (_state != State.INITIALIZING && _running.isEmpty()) { setState(State.CANCELLED); } else { setState(State.CANCELLING); if (force) { for (Task task : _running.values()) { task.cancel(); } } } } finally { _lock.unlock(); } } /** * Similar to cancel(false), but the job will eventually end up in * FINISHED rather than CANCELLED. */ @GuardedBy("_lock") private void stop() { if (_state != State.RUNNING && _state != State.SUSPENDED && _state != State.STOPPING && _state != State.SLEEPING && _state != State.PAUSED) { throw new IllegalStateException("The job cannot be stopped in its present state"); } if (_running.isEmpty()) { setState(State.FINISHED); } else { setState(State.STOPPING); } } /** * Pauses a job. Pause is similar to suspend, but it will * periodically reevaluate the pause predicate and automatically * resume the job when the predicate evaluates to false. */ @GuardedBy("_lock") private void pause() { if (_state != State.RUNNING && _state != State.SUSPENDED && _state != State.SLEEPING && _state != State.PAUSED) { throw new IllegalStateException("The job cannot be stopped in its present state"); } setState(State.PAUSED); } /** * Suspends a job. No new tasks are scheduled. */ public void suspend() { _lock.lock(); try { if (_state != State.RUNNING && _state != State.SUSPENDED && _state != State.SLEEPING && _state != State.PAUSED) { throw new IllegalStateException("Cannot suspend a job that does not run"); } setState(State.SUSPENDED); } finally { _lock.unlock(); } } /** * Resumes a previously suspended task. */ public void resume() { _lock.lock(); try { if (_state != State.SUSPENDED) { throw new IllegalStateException("Cannot resume a job that does not run"); } setState(State.RUNNING); } finally { _lock.unlock(); } } /** Returns the current state of the job. */ public State getState() { return _state; } /** * Sets the state of the job. * * Closely coupled to the <code>schedule</code> method. * * @see schedule */ private void setState(State state) { _lock.lock(); try { if (_state != state) { _state = state; switch (_state) { case RUNNING: schedule(); break; case SLEEPING: _context.getExecutor().schedule(new FireAndForgetTask(() -> { _lock.lock(); try { if (getState() == State.SLEEPING) { setState(State.RUNNING); } } finally { _lock.unlock(); } }), 10, TimeUnit.SECONDS); break; case PAUSED: _context.getExecutor().schedule(new FireAndForgetTask(() -> { _lock.lock(); try { if (getState() == State.PAUSED) { Expression stopWhen = _definition.stopWhen; if (stopWhen != null && evaluateLifetimePredicate(stopWhen)) { stop(); } Expression pauseWhen = _definition.pauseWhen; if (!evaluateLifetimePredicate(pauseWhen)) { setState(State.RUNNING); } } } finally { _lock.unlock(); } }), 10, TimeUnit.SECONDS); break; case FINISHED: case CANCELLED: case FAILED: _queued.clear(); _sizes.clear(); _context.getRepository().removeListener(this); _refreshTask.cancel(false); for (Map.Entry<PoolMigrationJobCancelMessage,DelayedReply> entry: _cancelRequests.entrySet()) { entry.getValue().reply(entry.getKey()); } _cancelRequests.clear(); break; } } } finally { _lock.unlock(); } } /** * Schedules jobs, depending on the current state and available * resources. * * Closely coupled to the <code>setState</code> method. * * @see setState */ @GuardedBy("_lock") private void schedule() { if (_state == State.CANCELLING && _running.isEmpty()) { setState(State.CANCELLED); } else if (_state != State.INITIALIZING && _state != State.NEW && !_definition.isPermanent && _queued.isEmpty() && _running.isEmpty()) { setState(State.FINISHED); } else if (_state == State.STOPPING && _running.isEmpty()) { setState(State.FINISHED); } else if (_state == State.RUNNING && (!_definition.sourceList.isValid() || !_definition.poolList.isValid())) { setState(State.SLEEPING); } else if (_state == State.RUNNING) { Iterator<PnfsId> i = _queued.iterator(); while ((_running.size() < _concurrency) && i.hasNext()) { Expression stopWhen = _definition.stopWhen; if (stopWhen != null && evaluateLifetimePredicate(stopWhen)) { stop(); break; } Expression pauseWhen = _definition.pauseWhen; if (pauseWhen != null && evaluateLifetimePredicate(pauseWhen)) { pause(); break; } PnfsId pnfsId = i.next(); if (!_context.lock(pnfsId)) { addError(new Error(0, pnfsId, "File is locked")); continue; } try { i.remove(); Repository repository = _context.getRepository(); CacheEntry entry = repository.getEntry(pnfsId); Task task = new Task(_taskParameters, this, _context.getPoolName(), entry.getPnfsId(), getTargetState(entry), getTargetStickyRecords(entry), getPins(entry), entry.getFileAttributes(), entry.getLastAccessTime()); _running.put(pnfsId, task); _statistics.addAttempt(); task.run(); } catch (FileNotInCacheException e) { _sizes.remove(pnfsId); } catch (CacheException e) { _log.error("Migration job failed to read entry: " + e.getMessage()); setState(State.FAILED); break; } catch (InterruptedException e) { _log.error("Migration job was interrupted: " + e.getMessage()); setState(State.FAILED); break; } finally { if (!_running.containsKey(pnfsId)) { _context.unlock(pnfsId); } } } if (_running.isEmpty()) { if (!_definition.isPermanent && _queued.isEmpty()) { setState(State.FINISHED); } else { setState(State.SLEEPING); } } } } private ReplicaState getTargetState(CacheEntry entry) { switch (_definition.targetMode.state) { case SAME: return entry.getState(); case CACHED: return ReplicaState.CACHED; case PRECIOUS: return ReplicaState.PRECIOUS; default: throw new IllegalStateException("Unsupported target mode"); } } private List<StickyRecord> getPins(CacheEntry entry) { if (!_definition.mustMovePins) { return Collections.emptyList(); } List<StickyRecord> pins = new ArrayList<>(); for (StickyRecord record: entry.getStickyRecords()) { if (isPin(record)) { pins.add(record); } } return pins; } private List<StickyRecord> getTargetStickyRecords(CacheEntry entry) { List<StickyRecord> result = new ArrayList<>(); if (_definition.targetMode.state == CacheEntryMode.State.SAME) { for (StickyRecord record: entry.getStickyRecords()) { if (!isPin(record)) { result.add(record); } } } result.addAll(_definition.targetMode.stickyRecords); return result; } /** * Returns true if and only if <code>entry</code> is accepted by * all filters. */ private boolean accept(CacheEntry entry) { for (CacheEntryFilter filter: _definition.filters) { if (!filter.accept(entry)) { return false; } } return true; } /** Adds a new task to the job. */ @GuardedBy("_lock") private void add(CacheEntry entry) { PnfsId pnfsId = entry.getPnfsId(); if (!_queued.contains(pnfsId) && !_running.containsKey(pnfsId)) { long size = entry.getReplicaSize(); _queued.add(pnfsId); _sizes.put(pnfsId, size); _statistics.addToTotal(size); schedule(); } } /** Removes a task from the job. */ @GuardedBy("_lock") private void remove(PnfsId pnfsId) { Task task = _running.get(pnfsId); if (task != null) { task.cancel(); } else if (_queued.remove(pnfsId)) { _sizes.remove(pnfsId); } } /** Callback from repository. */ @Override public void stateChanged(StateChangeEvent event) { PnfsId pnfsId = event.getPnfsId(); if (event.getNewState() == ReplicaState.REMOVED) { _lock.lock(); try { remove(pnfsId); } finally { _lock.unlock(); } } else { // We don't call entryChanged because during repository // initialization stateChanged is called and we want to // add the file to the job even if the state didn't change. CacheEntry entry = event.getNewEntry(); if (!accept(entry)) { _lock.lock(); try { if (!_running.containsKey(pnfsId)) { remove(pnfsId); } } finally { _lock.unlock(); } } else if (_definition.isPermanent) { _lock.lock(); try { add(entry); } finally { _lock.unlock(); } } } } @Override public void accessTimeChanged(EntryChangeEvent event) { entryChanged(event); } @Override public void stickyChanged(StickyChangeEvent event) { entryChanged(event); } private void entryChanged(EntryChangeEvent event) { PnfsId pnfsId = event.getPnfsId(); CacheEntry entry = event.getNewEntry(); if (!accept(entry)) { _lock.lock(); try { if (!_running.containsKey(pnfsId)) { remove(pnfsId); } } finally { _lock.unlock(); } } else if (_definition.isPermanent && !accept(event.getOldEntry())) { _lock.lock(); try { add(entry); } finally { _lock.unlock(); } } } /** Callback from task: Task is dead, remove it. */ @Override public void taskCancelled(Task task) { _lock.lock(); try { PnfsId pnfsId = task.getPnfsId(); _running.remove(pnfsId); _sizes.remove(pnfsId); _context.unlock(pnfsId); schedule(); } finally { _lock.unlock(); } } /** Callback from task: Task failed, reschedule it. */ @Override public void taskFailed(Task task, int rc, String msg) { _lock.lock(); try { PnfsId pnfsId = task.getPnfsId(); if (task == _running.remove(pnfsId)) { _queued.add(pnfsId); _context.unlock(pnfsId); } if (_state == State.RUNNING) { setState(State.SLEEPING); } else { schedule(); } addError(new Error(task.getId(), pnfsId, msg)); } finally { _lock.unlock(); } } /** Callback from task: Task failed permanently, remove it. */ @Override public void taskFailedPermanently(Task task, int rc, String msg) { _lock.lock(); try { PnfsId pnfsId = task.getPnfsId(); _running.remove(pnfsId); _sizes.remove(pnfsId); _context.unlock(pnfsId); schedule(); addError(new Error(task.getId(), pnfsId, msg)); } finally { _lock.unlock(); } } /** Callback from task: Task is done, remove it. */ @Override public void taskCompleted(Task task) { _lock.lock(); try { PnfsId pnfsId = task.getPnfsId(); applySourceMode(pnfsId); _running.remove(pnfsId); _context.unlock(pnfsId); _statistics.addCompleted(_sizes.remove(pnfsId)); schedule(); } finally { _lock.unlock(); } } public Object messageArrived(PoolMigrationJobCancelMessage message) { DelayedReply reply = new DelayedReply(); _lock.lock(); try { _cancelRequests.put(message, reply); cancel(message.isForced()); } finally { _lock.unlock(); } return reply; } /** Message handler. Delegates to proper task .*/ public void messageArrived(PoolMigrationCopyFinishedMessage message) { Task task; _lock.lock(); try { task = _running.get(message.getPnfsId()); } finally { _lock.unlock(); } if (task != null) { task.messageArrived(message); } } /** Apply sticky flags to file. */ private void applySticky(PnfsId pnfsId, List<StickyRecord> records) throws CacheException, InterruptedException { for (StickyRecord record: records) { _context.getRepository().setSticky(pnfsId, record.owner(), record.expire(), true); } } /** * Returns true if and only if <code>records</code> contains an * entry for <code>owner</code>. */ private boolean containsOwner(List<StickyRecord> records, String owner) { for (StickyRecord r: records) { if (r.owner().equals(owner)) { return true; } } return false; } /** * Returns true if and only if <code>record</code> is owned by the * pin manager. */ private boolean isPin(StickyRecord record) { return record.owner().startsWith(_pinPrefix); } /** * Returns true if and only if the given entry has any sticky * records owned by the pin manager. */ private boolean isPinned(CacheEntry entry) { for (StickyRecord record: entry.getStickyRecords()) { if (isPin(record)) { return true; } } return false; } /** Apply source mode update to replica. */ @GuardedBy("_lock") private void applySourceMode(PnfsId pnfsId) { try { CacheEntryMode mode = _definition.sourceMode; Repository repository = _context.getRepository(); CacheEntry entry = repository.getEntry(pnfsId); switch (mode.state) { case SAME: applySticky(pnfsId, mode.stickyRecords); break; case DELETE: if (!isPinned(entry)) { repository.setState(pnfsId, ReplicaState.REMOVED); break; } // Fall through case REMOVABLE: List<StickyRecord> list = mode.stickyRecords; applySticky(pnfsId, list); for (StickyRecord record: entry.getStickyRecords()) { String owner = record.owner(); if (!isPin(record) && !containsOwner(list, owner)) { repository.setSticky(pnfsId, owner, 0, true); } } repository.setState(pnfsId, ReplicaState.CACHED); break; case CACHED: applySticky(pnfsId, mode.stickyRecords); repository.setState(pnfsId, ReplicaState.CACHED); break; case PRECIOUS: repository.setState(pnfsId, ReplicaState.PRECIOUS); applySticky(pnfsId, mode.stickyRecords); break; } } catch (FileNotInCacheException e) { // File got remove before we could update it. TODO: log it } catch (IllegalTransitionException e) { // File is likely about to be removed. TODO: log it } catch (CacheException e) { _log.error("Migration job failed to update source mode: " + e.getMessage()); setState(State.FAILED); } catch (InterruptedException e) { _log.error("Migration job was interrupted"); setState(State.FAILED); } } public boolean evaluateLifetimePredicate(Expression expression) { List<PoolManagerPoolInformation> sourceInformation = _definition.sourceList.getPools(); if (sourceInformation.isEmpty()) { throw new RuntimeException("Bug detected: Source pool information was unavailable"); } SymbolTable symbols = new SymbolTable(); symbols.put(MigrationModule.CONSTANT_SOURCE, sourceInformation.get(0)); symbols.put(MigrationModule.CONSTANT_QUEUE_FILES, _queued.size()); symbols.put(MigrationModule.CONSTANT_QUEUE_BYTES, _statistics.getTotal() - _statistics.getCompleted()); symbols.put(MigrationModule.CONSTANT_TARGETS, _definition.poolList.getPools().size()); return expression.evaluateBoolean(symbols); } protected static class Error { private final long _id; private final long _time; private final PnfsId _pnfsId; private final String _error; public Error(long id, PnfsId pnfsId, String error) { _id = id; _time = System.currentTimeMillis(); _pnfsId = pnfsId; _error = error; } public String toString() { return String.format("%tT [%d] %s: %s", _time, _id, _pnfsId, _error); } } }