package org.dcache.pool.migration;
import statemap.TransitionUndefinedException;
import java.io.PrintWriter;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.util.ArrayDeque;
import java.util.Collection;
import java.util.Deque;
import java.util.HashSet;
import java.util.List;
import java.util.NoSuchElementException;
import java.util.Set;
import java.util.UUID;
import java.util.concurrent.ScheduledFuture;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.Stream;
import diskCacheV111.util.PnfsId;
import diskCacheV111.vehicles.Message;
import diskCacheV111.vehicles.PnfsGetCacheLocationsMessage;
import diskCacheV111.vehicles.PoolManagerPoolInformation;
import dmg.cells.nucleus.CellPath;
import org.dcache.cells.AbstractMessageCallback;
import org.dcache.cells.CellStub;
import org.dcache.namespace.FileAttribute;
import org.dcache.pool.repository.ReplicaState;
import org.dcache.pool.repository.StickyRecord;
import org.dcache.services.pinmanager1.PinManagerMovePinMessage;
import org.dcache.util.FireAndForgetTask;
import org.dcache.util.ReflectionUtils;
import org.dcache.vehicles.FileAttributes;
import static com.google.common.base.Preconditions.checkState;
import static java.util.stream.Collectors.toCollection;
import static java.util.stream.Collectors.toList;
/**
* Encapsulates the migration of a single replica of a migration job.
*
* A task encapsulates the logic for migrating a replica according to the conditions
* defined by a migration job. The high level logic is defined in the Task state
* machine generated with SMC, see Task.sm.
*
* This class mostly contains utility methods and auxiliary state used by the
* state machine.
*/
public class Task
{
private static final AtomicInteger _counter = new AtomicInteger();
private final TaskContext _fsm;
private final TaskCompletionHandler _callbackHandler;
private final String _source;
private final long _id;
private final UUID _uuid;
private final TaskParameters _parameters;
private final PnfsId _pnfsId;
private final ReplicaState _targetState;
private final List<StickyRecord> _targetStickyRecords;
private final List<StickyRecord> _pinsToMove;
private final FileAttributes _fileAttributes;
private final long _atime;
private ScheduledFuture<?> _timerTask;
private Deque<String> _locations = new ArrayDeque<>(0);
private final Set<String> _replicas = new HashSet<>();
private CellPath _target;
public Task(TaskParameters parameters,
TaskCompletionHandler callbackHandler,
String source,
PnfsId pnfsId,
ReplicaState targetState,
List<StickyRecord> targetStickyRecords,
List<StickyRecord> pinsToMove,
FileAttributes fileAttributes,
long atime)
{
_parameters = parameters;
_pnfsId = pnfsId;
_targetState = targetState;
_targetStickyRecords = targetStickyRecords;
_pinsToMove = pinsToMove;
_fileAttributes = fileAttributes;
_id = _counter.getAndIncrement();
_uuid = UUID.randomUUID();
_fsm = new TaskContext(this);
_callbackHandler = callbackHandler;
_source = source;
_atime = atime;
}
public boolean getMustMovePins()
{
return !_pinsToMove.isEmpty();
}
public long getId()
{
return _id;
}
public PnfsId getPnfsId()
{
return _pnfsId;
}
/** Time in milliseconds between pings. */
public long getPingPeriod()
{
return _parameters.pool.getTimeoutInMillis() * 2;
}
/**
* Time in milliseconds before we fail the task if we loose the
* cell connection.
*/
public long getNoResponseTimeout()
{
return _parameters.pool.getTimeoutInMillis() * 2;
}
/**
* Time in milliseconds before we fail the task if we do not get
* CopyFinished.
*/
public long getTaskDeadTimeout()
{
return _parameters.pool.getTimeoutInMillis();
}
/**
* Eager tasks copy files if attempts to update existing copies
* timeout or fail due to communication problems. Other tasks fail
* in this situation.
*/
public boolean isEager()
{
return _parameters.isEager;
}
/**
* Meta only jobs only upgrade existing replicas - they never copy replicas. If
* no or not enough existing replicas exist, the task fails permanently.
*/
public boolean isMetaOnly()
{
return _parameters.isMetaOnly;
}
/**
* Returns the current target pool, if any.
*/
synchronized String getTarget()
{
return _target == null ? "" : _target.toSmallString();
}
/**
* Returns a pool from the pool list using the pool selection
* strategy.
*/
private CellPath selectPool()
throws NoSuchElementException
{
List<PoolManagerPoolInformation> pools =
_parameters.poolList.getPools().stream()
.filter(pool -> !_replicas.contains(pool.getName()))
.collect(toList());
PoolManagerPoolInformation pool = _parameters.selectionStrategy.select(pools);
if (pool == null) {
if (pools.isEmpty()) {
throw new NoSuchElementException("No pools available.");
}
throw new NoSuchElementException("All target pools are full.");
}
return new CellPath(pool.getName());
}
/** Adds status information about the task to <code>pw</code>. */
synchronized void getInfo(PrintWriter pw)
{
if (_target != null) {
pw.println(String.format("[%d] %s: %s -> %s",
_id,
_pnfsId,
_fsm.getState(),
_target.toSmallString()));
} else {
pw.println(String.format("[%d] %s: %s",
_id,
_pnfsId,
_fsm.getState()));
}
}
/** Message handler - ignores messages with the wrong ID */
public synchronized void
messageArrived(PoolMigrationCopyFinishedMessage message)
{
if (_uuid.equals(message.getUUID())) {
_replicas.add(message.getPool());
_fsm.messageArrived(message);
}
}
/** FSM Action */
synchronized void queryLocations()
{
CellStub.addCallback(_parameters.pnfs.send(new PnfsGetCacheLocationsMessage(getPnfsId())),
new Callback<PnfsGetCacheLocationsMessage>("query_")
{
@Override
public void success(PnfsGetCacheLocationsMessage msg)
{
setLocations(msg.getCacheLocations());
super.success(msg);
}
}, _parameters.executor);
}
/**
* Sets the list of pools on which a copy of the replica is known
* to exist.
*/
private synchronized void setLocations(Collection<String> locations)
{
Stream<String> pools = _parameters.poolList.getPools().stream().map(PoolManagerPoolInformation::getName);
if (!_parameters.isEager) {
pools = Stream.concat(pools, _parameters.poolList.getOfflinePools().stream());
}
_locations = pools.filter(locations::contains).collect(toCollection(ArrayDeque::new));
}
/**
* Returns true iff there are more target pools with copies of the
* replica.
*/
synchronized boolean hasMoreLocations()
{
return !_locations.isEmpty();
}
/**
* Returns true iff the more replicas are needed to satisfy the requirements
* of the migration job.
*/
synchronized boolean needsMoreReplicas()
{
return _replicas.size() < _parameters.replicas;
}
/** FSM Action */
synchronized void updateExistingReplica()
{
assert !_locations.isEmpty();
initiateCopy(new CellPath(_locations.removeFirst()));
}
/** FSM Action */
synchronized void initiateCopy()
{
checkState(!isMetaOnly());
try {
initiateCopy(selectPool());
} catch (NoSuchElementException e) {
_target = null;
_parameters.executor.execute(new FireAndForgetTask(() -> {
synchronized (Task.this) {
_fsm.copy_nopools();
}
}));
}
}
/**
* Ask <code>target</code> to copy the file.
*/
private synchronized void
initiateCopy(CellPath target)
{
_target = target;
PoolMigrationCopyReplicaMessage copyReplicaMessage =
new PoolMigrationCopyReplicaMessage(_uuid,
_source,
_fileAttributes,
_targetState,
_targetStickyRecords,
_parameters.computeChecksumOnUpdate,
_parameters.forceSourceMode,
_parameters.maintainAtime ? _atime : null,
_parameters.isMetaOnly);
CellStub.addCallback(_parameters.pool.send(_target, copyReplicaMessage),
new Callback<>("copy_"), _parameters.executor);
}
/** FSM Action */
synchronized void cancelCopy()
{
CellStub.addCallback(_parameters.pool.send(_target,
new PoolMigrationCancelMessage(_uuid,
_source,
getPnfsId())),
new Callback<>("cancel_"), _parameters.executor);
}
/** FSM Action */
synchronized void movePin()
{
Callback<PinManagerMovePinMessage> callback = new Callback<>("move_");
String target = _target.getDestinationAddress().getCellName();
PinManagerMovePinMessage message =
new PinManagerMovePinMessage(getPnfsId(), _pinsToMove, _source, target);
CellStub.addCallback(_parameters.pinManager.send(message), callback, _parameters.executor);
}
/** FSM Action */
void notifyCancelled()
{
_parameters.executor.execute(new FireAndForgetTask(() -> _callbackHandler.taskCancelled(Task.this)));
}
/** FSM Action */
void fail(int rc, String message)
{
_parameters.executor.execute(new FireAndForgetTask(() -> _callbackHandler.taskFailed(Task.this, rc, message)));
}
/** FSM Action */
void failPermanently(int rc, String message)
{
_parameters.executor.execute(new FireAndForgetTask(
() -> _callbackHandler.taskFailedPermanently(Task.this, rc, message)));
}
/** FSM Action */
void notifyCompleted()
{
_parameters.executor.execute(new FireAndForgetTask(() -> _callbackHandler.taskCompleted(Task.this)));
}
/** FSM Action */
synchronized void startTimer(long delay)
{
Runnable task =
() -> {
synchronized (Task.this) {
if (_timerTask != null) {
_fsm.timer();
_timerTask = null;
}
}
};
_timerTask =
_parameters.executor.schedule(new FireAndForgetTask(task),
delay, TimeUnit.MILLISECONDS);
}
/** FSM Action */
synchronized void stopTimer()
{
if (_timerTask != null) {
_timerTask.cancel(false);
_timerTask = null;
}
}
/** FSM Action */
synchronized void ping()
{
CellStub.addCallback(_parameters.pool.send(_target,
new PoolMigrationPingMessage(_uuid,
_source,
getPnfsId())),
new Callback<>("ping_"),
_parameters.executor);
}
/**
* Starts the task.
*/
public synchronized void run()
{
if (_fileAttributes.isDefined(FileAttribute.LOCATIONS)) {
setLocations(_fileAttributes.getLocations());
_fsm.startWithLocations();
} else {
_fsm.startWithoutLocations();
}
}
/**
* Cancels the task, if not already completed. This will trigger a
* notification (postponed).
*/
public synchronized void cancel()
{
_fsm.cancel();
}
/**
* Helper class implementing the MessageCallback interface,
* forwarding all messages as events to the state machine. Events
* are forwarded via an executor to guarantee asynchronous delivery
* (SMC state machines do not allow transitions to be triggered
* from within transitions).
*/
class Callback<T extends Message> extends AbstractMessageCallback<T>
{
private final String _prefix;
public Callback()
{
_prefix = "";
}
public Callback(String prefix)
{
_prefix = prefix;
}
protected void transition(String name, final Object... arguments)
{
try {
Class<?>[] parameterTypes = new Class[arguments.length];
for (int i = 0; i < arguments.length; i++) {
parameterTypes[i] = arguments[i].getClass();
}
final Method m =
ReflectionUtils.resolve(_fsm.getClass(), _prefix + name,
parameterTypes);
if (m != null) {
try {
synchronized (Task.this) {
m.invoke(_fsm, arguments);
}
} catch (IllegalAccessException | InvocationTargetException e) {
/* We are not allowed to call this
* method. Better escalate it.
*/
throw new RuntimeException("Bug detected", e);
} catch (TransitionUndefinedException e) {
throw new RuntimeException("State machine is incomplete", e);
}
}
} catch (Throwable e) {
Thread thisThread = Thread.currentThread();
Thread.UncaughtExceptionHandler ueh = thisThread.getUncaughtExceptionHandler();
ueh.uncaughtException( thisThread, e);
}
}
@Override
public void success(T message)
{
transition("success");
}
@Override
public void failure(int rc, Object cause)
{
transition("failure", rc, cause);
}
@Override
public void timeout(String error)
{
transition("timeout");
}
@Override
public void noroute(CellPath path)
{
transition("noroute");
}
}
}