package org.dcache.pinmanager;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Required;
import org.springframework.transaction.annotation.Transactional;
import javax.security.auth.Subject;
import java.io.IOException;
import java.util.Date;
import java.util.EnumSet;
import java.util.Set;
import java.util.UUID;
import java.util.concurrent.Executor;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.regex.PatternSyntaxException;
import diskCacheV111.poolManager.RequestContainerV5;
import diskCacheV111.util.CacheException;
import diskCacheV111.util.CheckStagePermission;
import diskCacheV111.util.FileNotOnlineCacheException;
import diskCacheV111.util.PnfsId;
import diskCacheV111.vehicles.PoolMgrSelectReadPoolMsg;
import diskCacheV111.vehicles.PoolSetStickyMessage;
import dmg.cells.nucleus.CellAddressCore;
import dmg.cells.nucleus.CellPath;
import org.dcache.cells.AbstractMessageCallback;
import dmg.cells.nucleus.CellMessageReceiver;
import org.dcache.cells.CellStub;
import org.dcache.cells.MessageReply;
import org.dcache.namespace.FileAttribute;
import org.dcache.pinmanager.model.Pin;
import org.dcache.poolmanager.PoolInfo;
import org.dcache.poolmanager.PoolMonitor;
import org.dcache.poolmanager.PoolSelector;
import org.dcache.poolmanager.SelectedPool;
import org.dcache.vehicles.PnfsGetFileAttributes;
import static java.util.concurrent.TimeUnit.*;
import static org.dcache.pinmanager.model.Pin.State.*;
import static org.springframework.transaction.annotation.Isolation.REPEATABLE_READ;
/**
* Processes pin requests.
*
* A pin request goes through several steps to pin a file on a pool:
*
* - Create DB entry in state PINNING
* - Optionally read the name space entry
* - Select a read pool (which may involve staging)
* - Update DB entry with the pool name
* - Create sticky flag on pool
* - Update DB entry to state PINNED
*
* If during any step the entry is no longer in PINNING then the
* operation is aborted.
*
* If a DB error occurs it is considered fatal and the pinning
* operation is not completed. The DB entry will stay in PINNING until
* either explicitly unpinned or it expires.
*
* Database operations are blocking. Communication with PoolManager
* and pools is asynchronous.
*/
public class PinRequestProcessor
implements CellMessageReceiver
{
private static final Logger _log =
LoggerFactory.getLogger(PinRequestProcessor.class);
/**
* The delay we use after a pin request failed and before retrying
* the request.
*/
private static final long RETRY_DELAY = SECONDS.toMillis(30);
/**
* The delay we use after transient failures that should be
* retried immediately. The small delay prevents tight retry
* loops.
*/
private static final long SMALL_DELAY = MILLISECONDS.toMillis(10);
/**
* Safety margin added to the lifetime of the sticky bit to
* account for clock drift.
*/
private static final long CLOCK_DRIFT_MARGIN = MINUTES.toMillis(30);
private ScheduledExecutorService _scheduledExecutor;
private Executor _executor;
private PinDao _dao;
private CellStub _poolStub;
private CellStub _pnfsStub;
private CellStub _poolManagerStub;
private CheckStagePermission _checkStagePermission;
private long _maxLifetime;
private TimeUnit _maxLifetimeUnit;
private PoolMonitor _poolMonitor;
@Required
public void setScheduledExecutor(ScheduledExecutorService executor)
{
_scheduledExecutor = executor;
}
@Required
public void setExecutor(Executor executor)
{
_executor = executor;
}
@Required
public void setDao(PinDao dao)
{
_dao = dao;
}
@Required
public void setPoolStub(CellStub stub)
{
_poolStub = stub;
}
@Required
public void setPnfsStub(CellStub stub)
{
_pnfsStub = stub;
}
@Required
public void setPoolManagerStub(CellStub stub)
{
_poolManagerStub = stub;
}
@Required
public void setStagePermission(CheckStagePermission checker)
{
_checkStagePermission = checker;
}
@Required
public void setMaxLifetime(long maxLifetime)
{
_maxLifetime = maxLifetime;
}
@Required
public void setPoolMonitor(PoolMonitor poolMonitor)
{
_poolMonitor = poolMonitor;
}
public long getMaxLifetime()
{
return _maxLifetime;
}
@Required
public void setMaxLifetimeUnit(TimeUnit unit)
{
_maxLifetimeUnit = unit;
}
public TimeUnit getMaxLifetimeUnit()
{
return _maxLifetimeUnit;
}
private void enforceLifetimeLimit(PinManagerPinMessage message)
{
if (_maxLifetime > -1) {
long millis = _maxLifetimeUnit.toMillis(_maxLifetime);
long requestedLifetime = message.getLifetime();
if (requestedLifetime == -1) {
message.setLifetime(millis);
} else {
message.setLifetime(Math.min(millis, requestedLifetime));
}
}
}
public MessageReply<PinManagerPinMessage>
messageArrived(PinManagerPinMessage message)
throws CacheException
{
MessageReply<PinManagerPinMessage> reply =
new MessageReply<>();
enforceLifetimeLimit(message);
PinTask task = createTask(message, reply);
if (task != null) {
if (!task.getFileAttributes().isDefined(PoolMgrSelectReadPoolMsg.getRequiredAttributes())) {
rereadNameSpaceEntry(task);
} else {
selectReadPool(task);
}
}
return reply;
}
protected EnumSet<RequestContainerV5.RequestState>
checkStaging(PinTask task)
{
try {
Subject subject = task.getSubject();
return _checkStagePermission.canPerformStaging(subject,
task.getFileAttributes(),
task.getProtocolInfo()) ?
RequestContainerV5.allStates :
RequestContainerV5.allStatesExceptStage;
} catch (PatternSyntaxException | IOException ex) {
_log.error("Failed to check stage permission: " + ex);
}
return RequestContainerV5.allStatesExceptStage;
}
private void retry(final PinTask task, long delay)
{
if (!task.isValidIn(delay)) {
fail(task, CacheException.TIMEOUT, "Pin request TTL exceeded");
} else {
_scheduledExecutor.schedule(() -> {
try {
rereadNameSpaceEntry(task);
} catch (CacheException e) {
fail(task, e.getRc(), e.getMessage());
} catch (RuntimeException e) {
fail(task, CacheException.UNEXPECTED_SYSTEM_EXCEPTION, e.toString());
}
}, delay, MILLISECONDS);
}
}
private void fail(PinTask task, int rc, String error)
{
try {
task.fail(rc, error);
clearPin(task);
} catch (RuntimeException e) {
_log.error(e.toString());
}
}
private void rereadNameSpaceEntry(final PinTask task)
throws CacheException
{
/* Ensure that task is still valid and stays valid for the
* duration of the name space lookup.
*/
refreshTimeout(task, getExpirationTimeForNameSpaceLookup());
/* We allow the set of provided attributes to be incomplete
* and thus add attributes required by pool manager.
*/
Set<FileAttribute> attributes = EnumSet.noneOf(FileAttribute.class);
attributes.addAll(task.getFileAttributes().getDefinedAttributes());
attributes.addAll(PoolMgrSelectReadPoolMsg.getRequiredAttributes());
CellStub.addCallback(
_pnfsStub.send(new PnfsGetFileAttributes(task.getPnfsId(), attributes)),
new AbstractMessageCallback<PnfsGetFileAttributes>()
{
@Override
public void success(PnfsGetFileAttributes msg)
{
try {
task.setFileAttributes(msg.getFileAttributes());
/* Ensure that task is still valid
* and stays valid for the duration
* of the pool selection.
*/
refreshTimeout(task, getExpirationTimeForPoolSelection());
selectReadPool(task);
} catch (CacheException e) {
fail(task, e.getRc(), e.getMessage());
} catch (RuntimeException e) {
fail(task, CacheException.UNEXPECTED_SYSTEM_EXCEPTION, e.toString());
}
}
@Override
public void failure(int rc, Object error)
{
fail(task, rc, error.toString());
}
@Override
public void noroute(CellPath path)
{
/* PnfsManager is unreachable. We
* expect this to be a transient
* problem and retry in a moment.
*/
retry(task, RETRY_DELAY);
}
@Override
public void timeout(String error)
{
/* PnfsManager did not respond. We
* expect this to be a transient
* problem and retry in a moment.
*/
retry(task, SMALL_DELAY);
}
}, _executor);
}
private void selectReadPool(final PinTask task)
throws CacheException
{
try {
PoolSelector poolSelector =
_poolMonitor.getPoolSelector(task.getFileAttributes(),
task.getProtocolInfo(),
null);
SelectedPool pool = poolSelector.selectPinPool();
setPool(task, pool.name());
setStickyFlag(task, pool.name(), pool.address());
} catch (FileNotOnlineCacheException e) {
askPoolManager(task);
}
}
private void askPoolManager(final PinTask task)
{
PoolMgrSelectReadPoolMsg msg =
new PoolMgrSelectReadPoolMsg(task.getFileAttributes(),
task.getProtocolInfo(),
task.getReadPoolSelectionContext(),
checkStaging(task));
msg.setSubject(task.getSubject());
CellStub.addCallback(_poolManagerStub.send(msg),
new AbstractMessageCallback<PoolMgrSelectReadPoolMsg>()
{
@Override
public void success(PoolMgrSelectReadPoolMsg msg)
{
try {
/* Pool manager expects us
* to keep some state
* between retries.
*/
task.setReadPoolSelectionContext(msg.getContext());
/* Store the pool name in
* the DB so we know what to
* clean up if something
* fails.
*/
String poolName = msg.getPoolName();
CellAddressCore poolAddress = msg.getPoolAddress();
task.getFileAttributes().getLocations().add(poolName);
setPool(task, poolName);
setStickyFlag(task, poolName, poolAddress);
} catch (CacheException e) {
fail(task, e.getRc(), e.getMessage());
} catch (RuntimeException e) {
fail(task, CacheException.UNEXPECTED_SYSTEM_EXCEPTION, e.toString());
}
}
@Override
public void failure(int rc, Object error)
{
/* Pool manager expects us to
* keep some state between
* retries.
*/
task.setReadPoolSelectionContext(getReply().getContext());
switch (rc) {
case CacheException.OUT_OF_DATE:
/* Pool manager asked for a
* refresh of the request.
* Retry right away.
*/
retry(task, 0);
break;
case CacheException.FILE_NOT_IN_REPOSITORY:
case CacheException.PERMISSION_DENIED:
fail(task, rc, error.toString());
break;
default:
/* Ideally we would delegate the retry to the door,
* but for the time being the retry is dealed with
* by pin manager.
*/
retry(task, RETRY_DELAY);
break;
}
}
@Override
public void noroute(CellPath path)
{
/* Pool manager is
* unreachable. We expect this
* to be transient and retry in
* a moment.
*/
retry(task, RETRY_DELAY);
}
@Override
public void timeout(String message)
{
/* Pool manager did not
* respond. We expect this to be
* transient and retry in a
* moment.
*/
retry(task, SMALL_DELAY);
}
}, _executor);
}
private void setStickyFlag(final PinTask task, final String poolName, CellAddressCore poolAddress)
{
/* The pin lifetime should be from the moment the file is
* actually pinned. Due to staging and pool to pool transfers
* this may be much later than when the pin was requested.
*/
Date pinExpiration = task.freezeExpirationTime();
/* To allow for some drift in clocks we add a safety margin to
* the lifetime of the sticky bit.
*/
long poolExpiration =
(pinExpiration == null) ? -1 : pinExpiration.getTime() + CLOCK_DRIFT_MARGIN;
PoolSetStickyMessage msg =
new PoolSetStickyMessage(poolName,
task.getPnfsId(),
true,
task.getSticky(),
poolExpiration);
CellStub.addCallback(_poolStub.send(new CellPath(poolAddress), msg),
new AbstractMessageCallback<PoolSetStickyMessage>()
{
@Override
public void success(PoolSetStickyMessage msg)
{
try {
setToPinned(task);
task.success();
} catch (CacheException e) {
fail(task, e.getRc(), e.getMessage());
} catch (RuntimeException e) {
fail(task, CacheException.UNEXPECTED_SYSTEM_EXCEPTION, e.toString());
}
}
@Override
public void failure(int rc, Object error)
{
switch (rc) {
case CacheException.POOL_DISABLED:
/* Pool manager had outdated
* information about the pool. Give
* it a chance to be updated and
* then retry.
*/
retry(task, RETRY_DELAY);
break;
case CacheException.FILE_NOT_IN_REPOSITORY:
/* Pnfs manager had stale location
* information. The pool clears
* this information as a result of
* this error, so we retry in a
* moment.
*/
retry(task, SMALL_DELAY);
break;
default:
fail(task, rc, error.toString());
break;
}
}
@Override
public void noroute(CellPath path)
{
/* The pool must have gone down. Give
* pool manager a moment to notice this
* and then retry.
*/
retry(task, RETRY_DELAY);
}
@Override
public void timeout(String error)
{
/* No response from pool. Typically this is
* because the pool is overloaded.
*/
fail(task, CacheException.TIMEOUT, error);
}
}, _executor);
}
private Date getExpirationTimeForNameSpaceLookup()
{
long now = System.currentTimeMillis();
long timeout = _pnfsStub.getTimeoutInMillis();
return new Date(now + 2 * (timeout + RETRY_DELAY));
}
private Date getExpirationTimeForPoolSelection()
{
long now = System.currentTimeMillis();
long timeout = _poolManagerStub.getTimeoutInMillis();
return new Date(now + 2 * (timeout + RETRY_DELAY));
}
private Date getExpirationTimeForSettingFlag()
{
long now = System.currentTimeMillis();
long timeout = _poolStub.getTimeoutInMillis();
return new Date(now + 2 * timeout);
}
@Transactional(isolation = REPEATABLE_READ)
protected PinTask createTask(PinManagerPinMessage message,
MessageReply<PinManagerPinMessage> reply)
{
PnfsId pnfsId = message.getFileAttributes().getPnfsId();
if (message.getRequestId() != null) {
Pin pin = _dao.get(_dao.where().pnfsId(pnfsId).requestId(message.getRequestId()));
if (pin != null) {
/* In this case the request is a resubmission. If the
* previous pin completed then use it. Otherwise abort the
* previous pin and create a new one.
*/
if (pin.getState() == PINNED) {
message.setPin(pin);
reply.reply(message);
return null;
}
_dao.update(pin, _dao.set().state(UNPINNING).requestId(null));
}
}
Pin pin = _dao.create(_dao.set()
.subject(message.getSubject())
.state(PINNING)
.pnfsId(pnfsId)
.requestId(message.getRequestId())
.sticky("PinManager-" + UUID.randomUUID().toString())
.expirationTime(getExpirationTimeForPoolSelection()));
return new PinTask(message, reply, pin);
}
private void updateTask(PinTask task, PinDao.PinUpdate update) throws CacheException
{
Pin pin = _dao.update(_dao.where().id(task.getPinId()).sticky(task.getSticky()).state(PINNING), update);
if (pin == null) {
throw new CacheException("Operation was aborted");
}
task.setPin(pin);
}
@Transactional(isolation=REPEATABLE_READ)
protected void refreshTimeout(PinTask task, Date date)
throws CacheException
{
updateTask(task, _dao.set().expirationTime(date));
}
@Transactional(isolation=REPEATABLE_READ)
protected void setPool(PinTask task, String pool)
throws CacheException
{
updateTask(task, _dao.set().expirationTime(getExpirationTimeForSettingFlag()).pool(pool));
}
@Transactional(isolation=REPEATABLE_READ)
protected void setToPinned(PinTask task)
throws CacheException
{
updateTask(task, _dao.set().expirationTime(task.getExpirationTime()).state(PINNED));
}
@Transactional
protected void clearPin(PinTask task)
{
if (task.getPool() != null) {
/* If the pin record expired or the pin was explicitly
* unpinned, then the unpin processor may already have
* submitted a request to the pool to clear the sticky
* flag. Although out of order delivery of messages is
* unlikely, if it would happen then we have a race
* between the set sticky and clear sticky messages. To
* cover this case we delete the old record and create a
* fresh one in UNPINNING.
*/
_dao.delete(task.getPin());
_dao.create(_dao.set()
.subject(task.getSubject())
.pnfsId(task.getPnfsId())
.state(UNPINNING));
} else {
/* We didn't create a sticky flag yet, so there is no
* reason to keep the record. It will expire by itself,
* but we delete the record now to avoid that we get
* tickets from admins wondering why they have records
* staying in PINNING.
*/
_dao.delete(task.getPin());
}
}
}