package org.dcache.services.hsmcleaner;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.net.URI;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.Timer;
import java.util.TimerTask;
import java.util.function.Consumer;
import diskCacheV111.vehicles.PoolRemoveFilesFromHSMMessage;
import dmg.cells.nucleus.CellMessageReceiver;
import dmg.cells.nucleus.CellPath;
import org.dcache.cells.CellStub;
import org.dcache.util.Args;
/**
* This class encapsulates the interaction with pools.
*
* At the abstract level it provides a method for submitting file
* deletions. Notifcation of success or failure is provided
* asynchronously via two sinks.
*
* To reduce the load on pools, files are deleted in batches. For each
* HSM, at most one request is send at a time. The class defines an
* upper limit on the size of a request.
*/
public class RequestTracker implements CellMessageReceiver
{
private static final Logger _log =
LoggerFactory.getLogger(RequestTracker.class);
/**
* Utility class to keep track of timeouts.
*/
class Timeout extends TimerTask
{
final String _hsm;
final String _pool;
Timeout(String hsm, String pool)
{
_hsm = hsm;
_pool = pool;
}
@Override
public void run()
{
timeout(_hsm, _pool);
}
public String getPool()
{
return _pool;
}
}
/**
* CellStub used for sending messages to pools.
*/
private CellStub _poolStub;
/**
* Timeout for delete request.
*
* For each HSM, we have at most one outstanding remove request.
*/
private final Map<String,Timeout> _poolRequests =
new HashMap<>();
/**
* A simple queue of locations to delete, grouped by HSM.
*
* The main purpose is to allow bulk removal of files, thus not
* spamming the pools with a large number of small delete
* requests. For each HSM, there will be at most one outstanding
* remove request; new entries during that period will be queued.
*/
private final Map<String,Set<URI>> _locationsToDelete =
new HashMap<>();
/**
* Locations that could not be deleted are pushed to this sink.
*/
private Consumer<URI> _failureSink;
/**
* Locations that were deleted are pushed to this sink.
*/
private Consumer<URI> _successSink;
/**
* Maximum number of files to include in a single request.
*/
private int _maxFilesPerRequest = 100;
/**
* Timeout in milliseconds for delete requests send to pools.
*/
private long _timeout = 60000;
/**
* Timer used for implementing timeouts.
*/
private final Timer _timer = new Timer("Request tracker timeout");
/**
* Pools currently available.
*/
private PoolInformationBase _pools;
/**
* Sets the CellStub for communicating with pools.
*/
public synchronized void setPoolStub(CellStub stub)
{
_poolStub = stub;
}
/**
* Set PoolInformationBase from which the request tracker learns
* about available pools.
*/
public synchronized void setPoolInformationBase(PoolInformationBase pools)
{
_pools = pools;
}
/**
* Set maximum number of files to include in a single request.
*/
public synchronized void setMaxFilesPerRequest(int value)
{
_maxFilesPerRequest = value;
}
/**
* Returns maximum number of files to include in a single request.
*/
public synchronized int getMaxFilesPerRequest()
{
return _maxFilesPerRequest;
}
/**
* Set timeout in milliseconds for delete requests send to pools.
*/
public synchronized void setTimeout(long timeout)
{
_timeout = timeout;
}
/**
* Returns timeout in milliseconds for delete requests send to
* pools.
*/
public synchronized long getTimeout()
{
return _timeout;
}
/**
* Sets the sink to which success to delete a file is reported.
*/
public synchronized void setSuccessSink(Consumer<URI> sink)
{
_successSink = sink;
}
/**
* Sets the sink to which failure to delete a file is reported.
*/
public synchronized void setFailureSink(Consumer<URI> sink)
{
_failureSink = sink;
}
/**
* Submits a request to delete a file.
*
* The request may not be submitted right away. It may be queued
* and submitted together with other requests.
*
* @param location the URI of the file to delete
*/
public synchronized void submit(URI location)
{
String hsm = location.getAuthority();
Set<URI> locations = _locationsToDelete.get(hsm);
if (locations == null) {
locations = new HashSet<>();
_locationsToDelete.put(hsm, locations);
}
locations.add(location);
flush(hsm);
}
/**
* Submits requests queued for a given HSM.
*
* @param hsm the name of an HSM instance
*/
private synchronized void flush(String hsm)
{
Collection<URI> locations = _locationsToDelete.get(hsm);
if (locations == null || locations.isEmpty()) {
return;
}
if (_poolRequests.containsKey(hsm)) {
return;
}
/* To avoid excessively large requests, we limit the number
* of files per request.
*/
if (locations.size() > _maxFilesPerRequest) {
Collection<URI> subset =
new ArrayList<>(_maxFilesPerRequest);
Iterator<URI> iterator = locations.iterator();
for (int i = 0; i < _maxFilesPerRequest; i++) {
subset.add(iterator.next());
}
locations = subset;
}
PoolInformation pool = _pools.getPoolWithHSM(hsm);
if (pool != null) {
String name = pool.getName();
PoolRemoveFilesFromHSMMessage message =
new PoolRemoveFilesFromHSMMessage(name, hsm, locations);
_poolStub.notify(new CellPath(name), message);
Timeout timeout = new Timeout(hsm, name);
_timer.schedule(timeout, _timeout);
_poolRequests.put(hsm, timeout);
} else {
/* If there is no available pool, then we report failure on
* all files.
*/
_log.warn("No pools attached to " + hsm + " are available");
Iterator<URI> i = _locationsToDelete.get(hsm).iterator();
while (i.hasNext()) {
URI location = i.next();
assert location.getAuthority().equals(hsm);
_failureSink.accept(location);
i.remove();
}
}
}
/**
* Called when a request to a pool has timed out. We remove the
* pool from out list of known pools and resubmit the request.
*
* One may worry that in case of problems we end up resubmit the
* same requests over and over. A timeout will however only happen
* if either the pool crashed or in case of a bug in the pool. In
* the first case we will end up trying another pool. In the
* second case, we should simply fix the bug in the pool.
*/
private synchronized void timeout(String hsm, String pool)
{
_log.error("Timeout deleting files on HSM " + hsm
+ " attached to " + pool);
_poolRequests.remove(hsm);
_pools.remove(pool);
flush(hsm);
}
/**
* Message handler for responses from pools.
*/
public synchronized void messageArrived(PoolRemoveFilesFromHSMMessage msg)
{
/* In case of failure we rely on the timeout to invalidate the
* entries.
*/
if (msg.getReturnCode() != 0) {
_log.error("Received failure from pool: " + msg.getErrorObject());
return;
}
String hsm = msg.getHsm();
Collection<URI> locations = _locationsToDelete.get(hsm);
Collection<URI> success = msg.getSucceeded();
Collection<URI> failures = msg.getFailed();
if (locations == null) {
/* Seems we got a reply for something this instance did
* not request. We log this as a warning, but otherwise
* ignore it.
*/
_log.warn("Received confirmation from a pool, for an action this cleaner did not request.");
return;
}
if (!failures.isEmpty()) {
_log.warn("Failed to delete " + failures.size()
+ " files from HSM " + hsm + ". Will try again later.");
}
for (URI location : success) {
assert location.getAuthority().equals(hsm);
if (locations.remove(location)) {
_successSink.accept(location);
}
}
for (URI location : failures) {
assert location.getAuthority().equals(hsm);
if (locations.remove(location)) {
_failureSink.accept(location);
}
}
Timeout timeout = _poolRequests.remove(hsm);
if (timeout != null) {
timeout.cancel();
}
flush(hsm);
}
public static final String hh_requests_ls = "[hsm] # Lists delete requests";
public synchronized String ac_requests_ls_$_0_1(Args args)
{
StringBuilder sb = new StringBuilder();
if (args.argc() == 0) {
sb.append(String.format("%-15s %s %s\n",
"HSM Instance", "Files", "Pool"));
for (Map.Entry<String,Set<URI>> e: _locationsToDelete.entrySet()) {
Timeout timeout = _poolRequests.get(e.getKey());
if (timeout == null) {
sb.append(String.format("%-15s %5d\n",
e.getKey(),
e.getValue().size()));
} else {
sb.append(String.format("%-15s %5d %s\n",
e.getKey(),
e.getValue().size(),
timeout.getPool()));
}
}
} else {
String hsm = args.argv(0);
Collection<URI> locations = _locationsToDelete.get(hsm);
if (locations != null) {
for (URI location: locations) {
sb.append(location).append('\n');
}
}
}
return sb.toString();
}
public void shutdown()
{
_timer.cancel();
}
}