package org.dcache.pool.repository.v5;
import org.apache.log4j.NDC;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import org.dcache.alarms.AlarmMarkerFactory;
import org.dcache.alarms.PredefinedAlarm;
import org.dcache.pool.FaultAction;
import org.dcache.pool.repository.Account;
import org.dcache.pool.repository.ReplicaStore;
import org.dcache.pool.repository.SpaceRecord;
class CheckHealthTask implements Runnable
{
private static final Logger LOGGER = LoggerFactory.getLogger(CheckHealthTask.class);
public static final int GRACE_PERIOD_ON_FREE = 60_000;
private ReplicaRepository _repository;
/**
* Shared repository account object for tracking space.
*/
private Account _account;
/**
* Meta data about files in the pool.
*/
private ReplicaStore _replicaStore;
/**
* Command string to execute periodically to check the health of the file system,
* disk array, host, etc.
*/
private String[] _commands = {};
public void setRepository(ReplicaRepository repository)
{
_repository = repository;
}
public void setAccount(Account account)
{
_account = account;
}
public void setReplicaStore(ReplicaStore store)
{
_replicaStore = store;
}
public void setCommand(String s)
{
_commands = new Scanner(s).scan();
}
@Override
public void run()
{
switch (_repository.getState()) {
case UNINITIALIZED:
case INITIALIZED:
case LOADING:
case FAILED:
case CLOSED:
break;
case OPEN:
if (!_replicaStore.isOk()) {
_repository.fail(FaultAction.DISABLED, "I/O test failed");
}
if (!checkSpaceAccounting()) {
LOGGER.error("Marking pool read-only due to accounting errors. This is a bug. Please report it to support@dcache.org.");
_repository.fail(FaultAction.READONLY, "Accounting errors detected");
}
adjustFreeSpace();
}
checkHealthCommand();
}
private void checkHealthCommand()
{
if (_commands.length > 0) {
NDC.push("health-check");
try {
ProcessBuilder builder = new ProcessBuilder(_commands);
builder.redirectErrorStream(true);
Process process = builder.start();
try {
StringBuilder output = new StringBuilder();
try (InputStream in = process.getInputStream()) {
BufferedReader reader = new BufferedReader(new InputStreamReader(in));
String line = reader.readLine();
while (line != null) {
output.append(line).append('\n');
line = reader.readLine();
}
}
int code = process.waitFor();
switch (code) {
case 0:
if (output.length() > 0) {
LOGGER.debug("{}", output);
}
break;
case 1:
_repository.fail(FaultAction.READONLY, "Health check command failed with exit code 1");
if (output.length() > 0) {
LOGGER.warn("{}", output);
}
default:
_repository.fail(FaultAction.DISABLED, "Health check command failed with exit code " + code);
if (output.length() > 0) {
LOGGER.warn("{}", output);
}
}
} catch (InterruptedException e) {
LOGGER.debug("Health check command was interrupted");
process.destroy();
}
} catch (IOException e) {
LOGGER.error("Failed to launch health check command '{}': {}",
Arrays.toString(_commands), e.getMessage());
} finally {
NDC.pop();
}
}
}
private boolean checkSpaceAccounting()
{
SpaceRecord record = _account.getSpaceRecord();
long removable = record.getRemovableSpace();
long total = record.getTotalSpace();
long free = record.getFreeSpace();
long precious = record.getPreciousSpace();
long used = total - free;
if (removable < 0) {
LOGGER.error("Removable space is negative.");
return false;
}
if (total < 0) {
LOGGER.error("Repository size is negative.");
return false;
}
if (free < 0) {
LOGGER.error("Free space is negative.");
return false;
}
if (precious < 0) {
LOGGER.error("Precious space is negative.");
return false;
}
if (used < 0) {
LOGGER.error("Used space is negative.");
return false;
}
/* The following check cannot be made consistently, since we
* do not retrieve these values atomically. Therefore we log
* the error, but do not return false.
*/
if (precious + removable > used) {
LOGGER.warn("Used space is less than the sum of precious and removable space (this may be a temporary problem - if it persists then please report it to support@dcache.org).");
}
return true;
}
private void adjustFreeSpace()
{
/* At any time the file system must have at least as much free
* space as shows in the account. Thus invariantly
*
* _replicaStore.getFreeSpace >= _account.getFree
*
* Taking the monitor lock on the account object prevents
* anybody else from allocating space from the account. Hence
* throughout the period we have the lock, the file system
* must have at least as much free space as the account.
*/
Account account = _account;
synchronized (account) {
/* It is not uncommon that file system free space asynchronously from
* file deletion. Thus after we delete a file, it may take a while
* before the free space is reported as such by the operating system.
* To compensate, we suppress this check for a grace period after the
* last delete.
*/
if (account.getTimeOfLastFree() > System.currentTimeMillis() - GRACE_PERIOD_ON_FREE) {
long free = _replicaStore.getFreeSpace();
long total = _replicaStore.getTotalSpace();
if (total == 0) {
LOGGER.debug("Java reported file system size as 0. Skipping file system size check.");
return;
}
if (total < account.getTotal()) {
LOGGER.warn(AlarmMarkerFactory.getMarker(
PredefinedAlarm.POOL_SIZE, _repository.getPoolName()),
"The file system containing the data files "
+ "appears to be smaller {} than the configured "
+ "pool size {}.",
String.format("(%,d bytes)", total),
String.format("(%,d bytes)", _account.getTotal()));
}
if (free < account.getFree()) {
long newSize =
account.getTotal() - (account.getFree() - free);
LOGGER.warn(AlarmMarkerFactory.getMarker(
PredefinedAlarm.POOL_FREE_SPACE, _repository.getPoolName()),
"The file system containing the data files "
+ "appears to have less free space {} than "
+ "expected {}; reducing the pool size to {} "
+ "to compensate. Notice that this does not leave "
+ "any space for the meta data. If such data is "
+ "stored on the same file system, then it is "
+ "paramount that the pool size is reconfigured "
+ "to leave enough space for the meta data.",
String.format("(%,d bytes)", free),
String.format("(%,d bytes)", _account.getFree()),
String.format("%,d bytes", newSize));
account.setTotal(newSize);
}
}
}
}
/**
* Scanner for parsing strings of white space separated
* words. Characters may be escaped with a backslash and character
* sequences may be quoted.
*/
static class Scanner
{
private final CharSequence _line;
private int _position;
public Scanner(CharSequence line)
{
_line = line;
}
private char peek()
{
return isEof() ? (char) 0 : _line.charAt(_position);
}
private char readChar()
{
char c = peek();
_position++;
return c;
}
private boolean isEof()
{
return (_position >= _line.length());
}
private boolean isWhitespace()
{
return Character.isWhitespace(peek());
}
private void scanWhitespace()
{
while (isWhitespace()) {
readChar();
}
}
public String[] scan()
{
List<String> arguments = new ArrayList<>();
scanWhitespace();
while (!isEof()) {
arguments.add(scanWord());
scanWhitespace();
}
return arguments.toArray(new String[arguments.size()]);
}
/**
* Scans the next word. A word is a sequence of non-white
* space characters and escaped or quoted white space
* characters. The unescaped and unquoted word is returned.
*/
private String scanWord()
{
StringBuilder word = new StringBuilder();
while (!isEof() && !isWhitespace()) {
scanWordElement(word);
}
return word.toString();
}
/**
* Scans the next element of a word. Elements of a word are
* non-white space characters, escaped characters and quoted
* strings. The unescaped and unquoted element is added to word.
*/
private void scanWordElement(StringBuilder word)
{
if (!isEof() && !isWhitespace()) {
switch (peek()) {
case '\'':
scanSingleQuotedString(word);
break;
case '"':
scanDoubleQuotedString(word);
break;
case '\\':
scanEscapedCharacter(word);
break;
default:
word.append(readChar());
break;
}
}
}
/**
* Scans a single quoted string. Escaped characters are not
* recognized. The unquoted string is added to word.
*/
private void scanSingleQuotedString(StringBuilder word)
{
if (readChar() != '\'') {
throw new IllegalStateException("Parse failure");
}
while (!isEof()) {
char c = readChar();
switch (c) {
case '\'':
return;
default:
word.append(c);
break;
}
}
}
/**
* Scans a double quoted string. Escaped characters are
* recognized. The unquoted and unescaped string is added to
* word.
*/
private void scanDoubleQuotedString(StringBuilder word)
{
if (readChar() != '"') {
throw new IllegalStateException("Parse failure");
}
while (!isEof()) {
switch (peek()) {
case '\\':
scanEscapedCharacter(word);
break;
case '"':
readChar();
return;
default:
word.append(readChar());
break;
}
}
}
/**
* Scans a backslash escaped character. The escaped character
* without the escape symbol is added to word.
*/
private void scanEscapedCharacter(StringBuilder word)
{
if (readChar() != '\\') {
throw new IllegalStateException("Parse failure");
}
if (!isEof()) {
word.append(readChar());
}
}
}
}