package org.dcache.pool.repository.v5; import com.google.common.base.Throwables; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import javax.annotation.concurrent.GuardedBy; import java.io.PrintWriter; import java.util.Collection; import java.util.Collections; import java.util.EnumSet; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; import java.util.concurrent.Callable; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.CopyOnWriteArrayList; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.ScheduledFuture; import java.util.concurrent.TimeUnit; import java.util.concurrent.locks.ReadWriteLock; import java.util.concurrent.locks.ReentrantReadWriteLock; import diskCacheV111.util.CacheException; import diskCacheV111.util.DiskErrorCacheException; import diskCacheV111.util.DiskSpace; import diskCacheV111.util.FileCorruptedCacheException; import diskCacheV111.util.FileInCacheException; import diskCacheV111.util.FileNotInCacheException; import diskCacheV111.util.LockedCacheException; import diskCacheV111.util.PnfsHandler; import diskCacheV111.util.PnfsId; import diskCacheV111.vehicles.PnfsAddCacheLocationMessage; import dmg.cells.nucleus.CellAddressCore; import dmg.cells.nucleus.CellCommandListener; import dmg.cells.nucleus.CellIdentityAware; import dmg.cells.nucleus.CellInfoProvider; import dmg.cells.nucleus.CellSetupProvider; import dmg.util.command.Argument; import dmg.util.command.Command; import org.dcache.pool.FaultAction; import org.dcache.pool.FaultEvent; import org.dcache.pool.FaultListener; import org.dcache.pool.repository.Account; import org.dcache.pool.repository.Allocator; import org.dcache.pool.repository.CacheEntry; import org.dcache.pool.repository.DuplicateEntryException; import org.dcache.pool.repository.EntryChangeEvent; import org.dcache.pool.repository.IllegalTransitionException; import org.dcache.pool.repository.ReplicaDescriptor; import org.dcache.pool.repository.ReplicaRecord; import org.dcache.pool.repository.ReplicaState; import org.dcache.pool.repository.ReplicaStore; import org.dcache.pool.repository.ReplicaStoreCache; import org.dcache.pool.repository.Repository; import org.dcache.pool.repository.SpaceRecord; import org.dcache.pool.repository.SpaceSweeperPolicy; import org.dcache.pool.repository.StateChangeEvent; import org.dcache.pool.repository.StateChangeListener; import org.dcache.pool.repository.StickyChangeEvent; import org.dcache.pool.repository.StickyRecord; import org.dcache.util.CacheExceptionFactory; import org.dcache.vehicles.FileAttributes; import static com.google.common.base.Preconditions.*; import static org.dcache.namespace.FileAttribute.PNFSID; import static org.dcache.namespace.FileAttribute.STORAGEINFO; import static org.dcache.pool.repository.ReplicaState.*; import static org.dcache.util.ByteUnit.GiB; /** * Implementation of Repository interface. * * Allows openEntry, getEntry, getState and setSticky to be called * before the load method finishes. Other methods of the Repository * interface will fail until load has completed. */ public class ReplicaRepository implements Repository, CellCommandListener, CellSetupProvider, CellInfoProvider, CellIdentityAware { /* Implementation note * ------------------- * * The following order must be observed when synchronizing: * * - this * - _stateLock * - entries (only one) * - _account * */ private static final Logger LOGGER = LoggerFactory.getLogger(ReplicaRepository.class); /** * Time in millisecs added to each sticky expiration task. We * schedule the task later than the expiration time to account for * small clock shifts. */ public static final long EXPIRATION_CLOCKSHIFT_EXTRA_TIME = 1000L; public static final long DEFAULT_GAP = GiB.toBytes(4L); private final List<FaultListener> _faultListeners = new CopyOnWriteArrayList<>(); private final StateChangeListeners _stateChangeListeners = new StateChangeListeners(); /** * Sticky bit expiration tasks. */ private final Map<PnfsId,ScheduledFuture<?>> _tasks = new ConcurrentHashMap<>(); /** * Collection of removable entries. */ private final Set<PnfsId> _removable = Collections.newSetFromMap(new ConcurrentHashMap<>()); /** Executor for periodic tasks. */ @GuardedBy("_stateLock") private ScheduledExecutorService _executor; /** * Meta data about files in the pool. */ @GuardedBy("_stateLock") private ReplicaStore _store; @GuardedBy("_stateLock") private String _poolName; /** * Current state of the repository. */ enum State { UNINITIALIZED, INITIALIZED, LOADING, OPEN, FAILED, CLOSED } @GuardedBy("_stateLock") private State _state = State.UNINITIALIZED; /** * Lock for the field changes. */ private final ReadWriteLock _stateLock = new ReentrantReadWriteLock(); /** * Initialization progress between 0 and 1. */ private volatile float _initializationProgress; /** * Shared repository account object for tracking space. */ @GuardedBy("_stateLock") private Account _account; /** * Allocator used for when allocating space for new entries. */ @GuardedBy("_stateLock") private Allocator _allocator; /** * Policy defining which files may be garbage collected. */ @GuardedBy("_stateLock") private SpaceSweeperPolicy _sweeper; @GuardedBy("_stateLock") private PnfsHandler _pnfs; @GuardedBy("_stateLock") private boolean _volatile; /** * Pool size configured through the 'max disk space' command. */ @GuardedBy("_stateLock") private DiskSpace _runtimeMaxSize = DiskSpace.UNSPECIFIED; /** * Pool size configured in the configuration files. */ @GuardedBy("_stateLock") private DiskSpace _staticMaxSize = DiskSpace.UNSPECIFIED; /** * Pool size gap to report to pool manager. */ @GuardedBy("_stateLock") private DiskSpace _gap = DiskSpace.UNSPECIFIED; /** * Throws an IllegalStateException if the repository has been * initialized. */ @GuardedBy("_stateLock") private void checkUninitialized() { if (_state != State.UNINITIALIZED) { throw new IllegalStateException("Operation not allowed after initialization"); } } /** * Throws an IllegalStateException if the repository is not open. */ @GuardedBy("_stateLock") private void checkOpen() { State state = _state; if (state != State.OPEN) { throw new IllegalStateException("Operation not allowed while repository is in state " + state); } } /** * Throws an IllegalStateException if the repository is not in * either INITIALIZED, LOADING or OPEN. */ @GuardedBy("_stateLock") private void checkInitialized() { State state = _state; if (state != State.INITIALIZED && state != State.LOADING && state != State.OPEN) { throw new IllegalStateException("Operation not allowed while repository is in state " + state); } } @Override public void setCellAddress(CellAddressCore address) { _stateLock.readLock().lock(); try { checkUninitialized(); _poolName = address.getCellName(); } finally { _stateLock.readLock().unlock(); } } /** * Get pool name to which repository belongs. * @return pool name. */ public String getPoolName() { _stateLock.readLock().lock(); try { return _poolName; } finally { _stateLock.readLock().unlock(); } } /** * The executor is used for periodic background checks and sticky * flag expiration. */ public void setExecutor(ScheduledExecutorService executor) { _stateLock.readLock().lock(); try { checkUninitialized(); _executor = executor; } finally { _stateLock.readLock().unlock(); } } /** * Sets the handler for talking to the PNFS manager. */ public void setPnfsHandler(PnfsHandler pnfs) { _stateLock.readLock().lock(); try { checkUninitialized(); _pnfs = pnfs; } finally { _stateLock.readLock().unlock(); } } public boolean getVolatile() { _stateLock.readLock().lock(); try { return _volatile; } finally { _stateLock.readLock().unlock(); } } /** * Sets whether pool is volatile. On volatile pools * ClearCacheLocation messages are flagged to trigger deletion of * the namespace entry when the last known replica is deleted. */ public void setVolatile(boolean value) { _stateLock.readLock().lock(); try { checkUninitialized(); _volatile = value; } finally { _stateLock.readLock().unlock(); } } /** * The account keeps track of available space. */ public void setAccount(Account account) { _stateLock.readLock().lock(); try { checkUninitialized(); _account = account; } finally { _stateLock.readLock().unlock(); } } /** * The allocator implements an allocation policy. */ public void setAllocator(Allocator allocator) { _stateLock.readLock().lock(); try { checkUninitialized(); _allocator = allocator; } finally { _stateLock.readLock().unlock(); } } public void setReplicaStore(ReplicaStore store) { _stateLock.readLock().lock(); try { checkUninitialized(); _store = new ReplicaStoreCache(store, new StateChangeListener() { @Override public void stateChanged(StateChangeEvent event) { if (event.getOldState() != NEW || event.getNewState() != REMOVED) { if (event.getOldState() == NEW) { long size = event.getNewEntry().getReplicaSize(); /* Usually space has to be allocated before writing the * data to disk, however during pool startup we are notified * about "new" files that already consume space, so we * adjust the allocation here. */ if (size > 0) { _account.growTotalAndUsed(size); } scheduleExpirationTask(event.getNewEntry()); } updateRemovable(event.getNewEntry()); if (event.getOldState() != PRECIOUS && event.getNewState() == PRECIOUS) { _account.adjustPrecious(event.getNewEntry().getReplicaSize()); } else if (event.getOldState() == PRECIOUS && event.getNewState() != PRECIOUS) { _account.adjustPrecious(-event.getOldEntry().getReplicaSize()); } _stateChangeListeners.stateChanged(event); } PnfsId id = event.getPnfsId(); switch (event.getNewState()) { case REMOVED: if (event.getOldState() != NEW) { LOGGER.info("remove entry for: {}", id); } _pnfs.clearCacheLocation(id, _volatile); ScheduledFuture<?> oldTask = _tasks.remove(id); if (oldTask != null) { oldTask.cancel(false); } break; case DESTROYED: /* It is essential to free after we removed the file: This is the opposite * of what happens during allocation, in which we allocate before writing * to disk. We rely on never having anything on disk that we haven't accounted * for in the Account object. */ _account.free(event.getOldEntry().getReplicaSize()); break; } } @Override public void accessTimeChanged(EntryChangeEvent event) { updateRemovable(event.getNewEntry()); _stateChangeListeners.accessTimeChanged(event); } @Override public void stickyChanged(StickyChangeEvent event) { updateRemovable(event.getNewEntry()); _stateChangeListeners.stickyChanged(event); scheduleExpirationTask(event.getNewEntry()); } }, new FaultListener() { @Override public void faultOccurred(FaultEvent event) { for (FaultListener listener : _faultListeners) { listener.faultOccurred(event); } } }); } finally { _stateLock.readLock().unlock(); } } public void setSpaceSweeperPolicy(SpaceSweeperPolicy sweeper) { _stateLock.readLock().lock(); try { checkUninitialized(); _sweeper = sweeper; } finally { _stateLock.readLock().unlock(); } } public void setMaxDiskSpaceString(String size) { setMaxDiskSpace(size.isEmpty() ? DiskSpace.UNSPECIFIED : new DiskSpace(size)); } public void setMaxDiskSpace(DiskSpace size) { _stateLock.writeLock().lock(); try { _staticMaxSize = size; if (_state == State.OPEN) { updateAccountSize(); } } finally { _stateLock.writeLock().unlock(); } } public State getState() { _stateLock.readLock().lock(); try { return _state; } finally { _stateLock.readLock().unlock(); } } @Override public void init() throws IllegalStateException, CacheException { checkState(_pnfs != null, "Pnfs handler must be set."); checkState(_account != null, "Account must be set."); checkState(_allocator != null, "Allocator must be set."); if (!compareAndSetState(State.UNINITIALIZED, State.INITIALIZED)) { throw new IllegalStateException("Can only initialize uninitialized repository."); } } private boolean compareAndSetState(State expected, State state) { _stateLock.writeLock().lock(); try { if (_state != expected) { return false; } _state = state; return true; } finally { _stateLock.writeLock().unlock(); } } @Override public void load() throws CacheException, IllegalStateException, InterruptedException { if (!compareAndSetState(State.INITIALIZED, State.LOADING)) { throw new IllegalStateException("Can only load repository after initialization and only once."); } try { LOGGER.warn("Reading inventory from {}.", _store); _store.init(); Collection<PnfsId> ids = _store.index(); int fileCount = ids.size(); LOGGER.info("Checking meta data for {} files.", fileCount); int cnt = 0; for (PnfsId id: ids) { ReplicaRecord entry = readReplicaRecord(id); if (entry != null) { ReplicaState state = entry.getState(); LOGGER.debug("{} {}", id, state); } _initializationProgress = ((float) cnt) / fileCount; cnt++; // Lazily check if repository was closed if (_state != State.LOADING) { throw new IllegalStateException("Repository was closed during loading."); } } _stateLock.writeLock().lock(); try { updateAccountSize(); if (!compareAndSetState(State.LOADING, State.OPEN)) { throw new IllegalStateException("Repository was closed during loading."); } } finally { _stateLock.writeLock().unlock(); } } finally { compareAndSetState(State.LOADING, State.FAILED); } LOGGER.info("Done generating inventory."); } @Override public Iterator<PnfsId> iterator() { _stateLock.readLock().lock(); try { checkOpen(); try { return Collections.unmodifiableCollection(_store.index()).iterator(); } catch (CacheException e) { throw new RuntimeException(e); } } finally { _stateLock.readLock().unlock(); } } @Override public ReplicaDescriptor createEntry(FileAttributes fileAttributes, ReplicaState transferState, ReplicaState targetState, List<StickyRecord> stickyRecords, Set<OpenFlags> flags) throws CacheException { if (!fileAttributes.isDefined(EnumSet.of(PNFSID, STORAGEINFO))) { throw new IllegalArgumentException("PNFSID and STORAGEINFO are required, only got " + fileAttributes.getDefinedAttributes()); } if (stickyRecords == null) { throw new IllegalArgumentException("List of sticky records must not be null"); } PnfsId id = fileAttributes.getPnfsId(); _stateLock.readLock().lock(); try { checkOpen(); switch (transferState) { case FROM_CLIENT: case FROM_STORE: case FROM_POOL: break; default: throw new IllegalArgumentException("Invalid initial state"); } switch (targetState) { case PRECIOUS: case CACHED: break; default: throw new IllegalArgumentException("Invalid target state"); } LOGGER.info("Creating new entry for {}", id); ReplicaRecord entry = _store.create(id, flags); return entry.update(r -> { r.setFileAttributes(fileAttributes); r.setState(transferState); return new WriteHandleImpl( this, _allocator, _pnfs, entry, fileAttributes, targetState, stickyRecords); }); } catch (DuplicateEntryException e) { /* Somebody got the idea that we don't have the file, so we make * sure to register it. */ _pnfs.notify(new PnfsAddCacheLocationMessage(id, getPoolName())); throw new FileInCacheException("Entry already exists: " + id); } finally { _stateLock.readLock().unlock(); } } @Override public ReplicaDescriptor openEntry(PnfsId id, Set<OpenFlags> flags) throws CacheException, InterruptedException { _stateLock.readLock().lock(); try { checkInitialized(); FileAttributes fileAttributes; ReplicaRecord entry = getReplicaRecord(id); synchronized (entry) { switch (entry.getState()) { case NEW: case FROM_CLIENT: case FROM_STORE: case FROM_POOL: throw new LockedCacheException("File is incomplete"); case BROKEN: throw new FileCorruptedCacheException("File is broken"); case REMOVED: case DESTROYED: throw new LockedCacheException("File has been removed"); case PRECIOUS: case CACHED: break; } fileAttributes = entry.getFileAttributes(); if (!flags.contains(OpenFlags.NOATIME)) { entry.setLastAccessTime(System.currentTimeMillis()); } entry.incrementLinkCount(); } return new ReadHandleImpl(_pnfs, entry, fileAttributes); } catch (FileNotInCacheException e) { /* Somebody got the idea that we have the file, so we make * sure to remove any stray pointers. */ try { ReplicaRecord entry = _store.create(id, EnumSet.noneOf(OpenFlags.class)); entry.update(r -> r.setState(REMOVED)); } catch (DuplicateEntryException concurrentCreation) { return openEntry(id, flags); } catch (CacheException | RuntimeException f) { e.addSuppressed(f); } throw e; } finally { _stateLock.readLock().unlock(); } } @Override public CacheEntry getEntry(PnfsId id) throws CacheException, InterruptedException { _stateLock.readLock().lock(); try { checkInitialized(); ReplicaRecord entry = getReplicaRecord(id); synchronized (entry) { if (entry.getState() == NEW) { throw new FileNotInCacheException("File is incomplete"); } return new CacheEntryImpl(entry); } } finally { _stateLock.readLock().unlock(); } } @Override public void setSticky(PnfsId id, String owner, long expire, boolean overwrite) throws IllegalArgumentException, CacheException, InterruptedException { checkNotNull(id); checkNotNull(owner); checkArgument(expire >= -1, "Expiration time must be -1 or non-negative"); _stateLock.readLock().lock(); try { checkInitialized(); ReplicaRecord entry; try { entry = getReplicaRecord(id); } catch (FileNotInCacheException e) { /* Attempts to set a sticky bit on a missing file may * indicate a stale registration in the name space. */ try { entry = _store.create(id, EnumSet.noneOf(OpenFlags.class)); entry.update(r -> r.setState(REMOVED)); } catch (DuplicateEntryException concurrentCreation) { setSticky(id, owner, expire, overwrite); return; } catch (CacheException | RuntimeException f) { e.addSuppressed(f); } throw e; } entry.update(r -> { switch (r.getState()) { case NEW: case FROM_CLIENT: case FROM_STORE: case FROM_POOL: throw new LockedCacheException("File is incomplete"); case REMOVED: case DESTROYED: throw new LockedCacheException("File has been removed"); case BROKEN: case PRECIOUS: case CACHED: break; } return r.setSticky(owner, expire, overwrite); }); } finally { _stateLock.readLock().unlock(); } } @Override public SpaceRecord getSpaceRecord() { _stateLock.readLock().lock(); try { SpaceRecord space = _account.getSpaceRecord(); long lru = (System.currentTimeMillis() - _sweeper.getLru()) / 1000L; long gap = _gap.orElse(Math.min(space.getTotalSpace() / 4, DEFAULT_GAP)); return new SpaceRecord(space.getTotalSpace(), space.getFreeSpace(), space.getPreciousSpace(), space.getRemovableSpace(), lru, gap); } finally { _stateLock.readLock().unlock(); } } @Override public void setState(PnfsId id, ReplicaState state) throws IllegalArgumentException, InterruptedException, CacheException { if (id == null) { throw new IllegalArgumentException("id is null"); } _stateLock.readLock().lock(); try { checkOpen(); try { ReplicaRecord entry = getReplicaRecord(id); entry.update(r -> { ReplicaState source = r.getState(); switch (source) { case NEW: case REMOVED: case DESTROYED: if (state == ReplicaState.REMOVED) { /* File doesn't exist or is already * deleted. That's all we care about. */ return null; } break; case PRECIOUS: case CACHED: case BROKEN: switch (state) { case REMOVED: case CACHED: case PRECIOUS: case BROKEN: return r.setState(state); default: break; } default: break; } throw new IllegalTransitionException(id, source, state); }); } catch (FileNotInCacheException e) { /* File disappeared before we could change the * state. That's okay if we wanted to remove it, otherwise * not. */ if (state != REMOVED) { throw new IllegalTransitionException(id, NEW, state); } } } finally { _stateLock.readLock().unlock(); } } /** * If set to true, then state change listeners are notified * synchronously. In this case listeners must not acquire any * locks or call back into the repository, as there is otherwise a * risk that the component will deadlock. Synchronous notification * is mainly provided for testing purposes. */ public void setSynchronousNotification(boolean value) { _stateChangeListeners.setSynchronousNotification(value); } @Override public void addListener(StateChangeListener listener) { _stateChangeListeners.add(listener); } @Override public void removeListener(StateChangeListener listener) { _stateChangeListeners.remove(listener); } @Override public void addFaultListener(FaultListener listener) { _faultListeners.add(listener); } @Override public void removeFaultListener(FaultListener listener) { _faultListeners.remove(listener); } @Override public ReplicaState getState(PnfsId id) throws CacheException, InterruptedException { _stateLock.readLock().lock(); try { checkInitialized(); try { return getReplicaRecord(id).getState(); } catch (FileNotInCacheException e) { return NEW; } } finally { _stateLock.readLock().unlock(); } } @Override public void getInfo(PrintWriter pw) { _stateLock.readLock().lock(); try { State state = _state; pw.append("State : ").append(state.toString()); if (state == State.LOADING) { pw.append(" (").append(String.valueOf((int) (_initializationProgress * 100))).append("% done)"); } pw.println(); try { if (state == State.OPEN || state == State.LOADING || state == State.INITIALIZED) { pw.println("Files : " + _store.index().size()); } } catch (CacheException e) { pw.println("Files : " + e.getMessage()); } SpaceRecord space = getSpaceRecord(); long total = space.getTotalSpace(); long used = total - space.getFreeSpace(); long precious = space.getPreciousSpace(); long fsFree = _store.getFreeSpace(); long fsTotal = _store.getTotalSpace(); long gap = space.getGap(); pw.println("Disk space"); pw.println(" Total : " + DiskSpace.toUnitString(total)); pw.println(" Used : " + used + " [" + (((float) used) / ((float) total)) + "]"); pw.println(" Free : " + (total - used) + " Gap : " + gap); pw.println(" Precious : " + precious + " [" + (((float) precious) / ((float) total)) + "]"); pw.println(" Removable: " + space.getRemovableSpace() + " [" + (((float) space.getRemovableSpace()) / ((float) total)) + "]"); pw.println("File system"); pw.println(" Size : " + fsTotal); pw.println(" Free : " + fsFree + " [" + (((float) fsFree) / fsTotal) + "]"); pw.println("Limits for maximum disk space"); pw.println(" File system : " + (fsFree + used)); pw.println(" Statically configured: " + _staticMaxSize); pw.println(" Runtime configured : " + _runtimeMaxSize); } finally { _stateLock.readLock().unlock(); } } public void shutdown() { _stateLock.writeLock().lock(); try { _stateChangeListeners.stop(); _state = State.CLOSED; _store.close(); } finally { _stateLock.writeLock().unlock(); } } @GuardedBy("getReplicaRecord(entry.getPnfsid())") protected void updateRemovable(CacheEntry entry) { PnfsId id = entry.getPnfsId(); if (_sweeper.isRemovable(entry)) { if (_removable.add(id)) { _account.adjustRemovable(entry.getReplicaSize()); } } else { if (_removable.remove(id)) { _account.adjustRemovable(-entry.getReplicaSize()); } } } /** * @throw FileNotInCacheException in case file is not in * repository */ private ReplicaRecord getReplicaRecord(PnfsId pnfsId) throws CacheException, InterruptedException { ReplicaRecord entry = _store.get(pnfsId); if (entry == null) { throw new FileNotInCacheException("Entry not in repository : " + pnfsId); } return entry; } /** * Reads an entry from the meta data store. Retries indefinitely * in case of timeouts. */ private ReplicaRecord readReplicaRecord(PnfsId id) throws CacheException, InterruptedException { /* In case of communication problems with the pool, there is * no point in failing - the pool would be dead if we did. It * is reasonable to expect that the PNFS manager is started at * some point and hence we just keep trying. */ while (!Thread.interrupted()) { try { return _store.get(id); } catch (CacheException e) { if (e.getRc() != CacheException.TIMEOUT) { throw CacheExceptionFactory.exceptionOf(e.getRc(), "Failed to read meta data for " + id + ": " + e.getMessage(), e); } } Thread.sleep(1000); } throw new InterruptedException(); } /** * Schedules an expiration task for a sticky entry. */ @GuardedBy("entry") private void scheduleExpirationTask(CacheEntry entry) { /* Cancel previous task. */ PnfsId pnfsId = entry.getPnfsId(); ScheduledFuture<?> future = _tasks.remove(pnfsId); if (future != null) { future.cancel(false); } /* Find next sticky flag to expire. */ long expire = Long.MAX_VALUE; for (StickyRecord record: entry.getStickyRecords()) { if (record.expire() > -1) { expire = Math.min(expire, record.expire()); } } /* Schedule a new task. Notice that we schedule an expiration * task even if expire is in the past. This guarantees that we * also remove records that already have expired. */ if (expire != Long.MAX_VALUE) { ExpirationTask task = new ExpirationTask(entry.getPnfsId()); future = _executor.schedule(task, expire - System.currentTimeMillis() + EXPIRATION_CLOCKSHIFT_EXTRA_TIME, TimeUnit.MILLISECONDS); _tasks.put(pnfsId, future); } } /** * Reports a fault to all fault listeners. */ void fail(FaultAction action, String message) { FaultEvent event = new FaultEvent("repository", action, message, null); for (FaultListener listener : _faultListeners) { listener.faultOccurred(event); } } /** * Runnable for removing expired sticky flags. */ class ExpirationTask implements Runnable { private final PnfsId _id; public ExpirationTask(PnfsId id) { _id = id; } @Override public void run() { try { _tasks.remove(_id); ReplicaRecord entry = _store.get(_id); if (entry != null) { Collection<StickyRecord> removed = entry.removeExpiredStickyFlags(); if (removed.isEmpty()) { /* If for some reason we didn't expire anything, we reschedule * the expiration to be on the safe side (could be a timing * issue). */ synchronized (entry) { scheduleExpirationTask(new CacheEntryImpl(entry)); } } } } catch (DiskErrorCacheException ignored) { // MetaDataCache will already have disabled the pool if this happens } catch (CacheException e) { // This ought to be a transient error, so reschedule LOGGER.warn("Failed to clear sticky flags for {}: {}", _id, e.getMessage()); ScheduledFuture<?> future = _executor.schedule(this, EXPIRATION_CLOCKSHIFT_EXTRA_TIME, TimeUnit.MILLISECONDS); _tasks.put(_id, future); } catch (InterruptedException e) { Thread.currentThread().interrupt(); } } } @AffectsSetup @Command(name = "set max diskspace", hint = "set size of pool", description = "Sets the maximum disk space to be used by this pool. Overrides " + "whatever maximum was defined in the configuration file. The value " + "will be saved to the pool setup file if the save command is " + "executed.") class SetMaxDiskspaceCommand implements Callable<String> { @Argument(valueSpec = "-|BYTES[k|m|g|t]", usage = "Disk space in bytes, kibibytes, mebibytes, gibibytes, or tebibytes. If " + "- is specified, then the pool will return to the size configured in " + "the configuration file, or no maximum if such a size is not defined.") DiskSpace size; @Override public String call() throws IllegalArgumentException { _stateLock.writeLock().lock(); try { _runtimeMaxSize = size; if (_state == State.OPEN) { updateAccountSize(); } } finally { _stateLock.writeLock().unlock(); } return ""; } } @AffectsSetup @Command(name = "set gap", hint = "set minimum free space target", description = "New transfers will not be assigned to a pool once it has less free space than the " + "gap. This is to ensure that there is a reasonable chance for ongoing transfers to " + "complete. To prevent that writes will fail due to lack of space, the gap should be " + "in the order of the expected largest file size multiplied by the largest number of " + "concurrent writes expected to a pool, although a smaller value will often do.\n\n" + "It is not an error for a pool to have less free space than the gap.") class SetGapCommand implements Callable<String> { @Argument(valueSpec = "BYTES[k|m|g|t]", required = false, usage = "The gap in bytes, kibibytes, mebibytes, gibibytes or tebibytes. If not specified the " + "default is the smaller of 4 GiB or 25% of the pool size.") DiskSpace gap = DiskSpace.UNSPECIFIED; @Override public String call() throws Exception { _stateLock.writeLock().lock(); try { _gap = gap; } finally { _stateLock.writeLock().unlock(); } return "Gap set to " + gap; } } @Override public void printSetup(PrintWriter pw) { DiskSpace runtimeMaxSize; DiskSpace gap; _stateLock.readLock().lock(); try { runtimeMaxSize = _runtimeMaxSize; gap = _gap; } finally { _stateLock.readLock().unlock(); } if (runtimeMaxSize.isSpecified()) { pw.println("set max diskspace " + runtimeMaxSize); } if (gap.isSpecified()) { pw.println("set gap " + gap); } } private DiskSpace getConfiguredMaxSize() { _stateLock.readLock().lock(); try { return _runtimeMaxSize.orElse(_staticMaxSize); } finally { _stateLock.readLock().unlock(); } } private long getFileSystemMaxSize() { _stateLock.readLock().lock(); try { return _store.getFreeSpace() + _account.getUsed(); } finally { _stateLock.readLock().unlock(); } } private boolean isTotalSpaceReported() { _stateLock.readLock().lock(); try { return _store.getTotalSpace() > 0; } finally { _stateLock.readLock().unlock(); } } /** * Updates the total size of the Account based on the configured * limits and the available disk space. * * Notice that if the configured limits are larger than the file * system or if there are no configured limits, then the size is * going to be an overapproximation based on the current amount of * used space and the amount of free space on disk. This is so * because the Account object does not accurately track space that * has been reserved but not yet written to disk. In this case the * periodic health check will adjust the pool size when a more * accurate limit can be determined. */ @GuardedBy("_stateLock") private void updateAccountSize() { Account account = _account; synchronized (account) { DiskSpace configuredPoolSize = getConfiguredMaxSize(); long maxPoolSize = getFileSystemMaxSize(); long used = account.getUsed(); if (!isTotalSpaceReported()) { LOGGER.warn("Java reported the file system size as 0. This typically happens on Solaris with a 32-bit JVM. Please use a 64-bit JVM."); if (!configuredPoolSize.isSpecified()) { throw new IllegalStateException("Failed to determine file system size. A pool size must be configured."); } } long newSize; if (configuredPoolSize.isLargerThan(maxPoolSize)) { LOGGER.warn("Configured pool size ({}) is larger than what is available on disk ({}).", configuredPoolSize, maxPoolSize); } else if (configuredPoolSize.isLessThan(used)) { LOGGER.warn("Configured pool size ({}) is less than what is used already ({}).", configuredPoolSize, used); } newSize = Math.max(used, configuredPoolSize.orElse(maxPoolSize)); if (newSize != account.getTotal()) { LOGGER.info("Adjusting pool size to {}", newSize); account.setTotal(newSize); } } } }