/*
* Copyright (C) 2006-2016 DLR, Germany
*
* All rights reserved
*
* http://www.rcenvironment.de/
*/
package de.rcenvironment.core.monitoring.system.internal;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ScheduledFuture;
import java.util.concurrent.Semaphore;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.hyperic.sigar.Humidor;
import org.hyperic.sigar.ProcState;
import org.osgi.framework.BundleContext;
import de.rcenvironment.core.communication.api.CommunicationService;
import de.rcenvironment.core.communication.common.ResolvableNodeId;
import de.rcenvironment.core.monitoring.common.spi.PeriodicMonitoringDataContributor;
import de.rcenvironment.core.monitoring.system.api.LocalSystemMonitoringAggregationService;
import de.rcenvironment.core.monitoring.system.api.OperatingSystemException;
import de.rcenvironment.core.monitoring.system.api.RemotableSystemMonitoringService;
import de.rcenvironment.core.monitoring.system.api.SystemMonitoringConstants;
import de.rcenvironment.core.monitoring.system.api.SystemMonitoringDataService;
import de.rcenvironment.core.monitoring.system.api.model.FullSystemAndProcessDataSnapshot;
import de.rcenvironment.core.monitoring.system.api.model.ProcessInformation;
import de.rcenvironment.core.monitoring.system.api.model.SystemLoadInformation;
import de.rcenvironment.core.utils.common.StringUtils;
import de.rcenvironment.core.utils.common.rpc.RemoteOperationException;
import de.rcenvironment.core.utils.common.security.AllowRemoteAccess;
import de.rcenvironment.toolkit.modules.concurrency.api.AsyncTaskService;
import de.rcenvironment.toolkit.modules.concurrency.api.ConcurrencyUtilsFactory;
import de.rcenvironment.toolkit.modules.concurrency.api.TaskDescription;
import de.rcenvironment.toolkit.modules.objectbindings.api.ObjectBindingsService;
import de.rcenvironment.toolkit.utils.common.DefaultTimeSource;
/**
* Aggregates low-level system monitoring data to higher-level data structures, with internal caching where appropriate.
*
* @author David Scholz (original "snapshot" code)
* @author Robert Mischke
*/
public class SystemMonitoringAggregationServiceImpl implements RemotableSystemMonitoringService, LocalSystemMonitoringAggregationService {
private static final int COMPLETE_SNAPSHOT_CACHE_LIFETIME_MSEC = 2000;
private static final int SYSTEM_LOAD_INFORMATION_COLLECTION_BUFFER_SIZE = 30;
/**
* The low-level service to fetch system data from.
*/
private SystemMonitoringDataService systemDataService;
/**
* Used to register periodic background tasks.
*/
private AsyncTaskService asyncTaskService;
/**
* Used to register itself as a {@link PeriodicMonitoringDataContributor}.
*/
private ObjectBindingsService objectBindingsService;
// description of provided data sources
private Map<String, String> topicIdToDescriptionMap = new HashMap<>();
private long selfLauncherPid = 0;
private ProcState selfLauncherProcState = null;
private long selfJavaPid = 0;
private ProcState selfJavaProcState = null;
private FullSystemAndProcessDataSnapshot cachedFullSnapshot;
private long cachedFullSnapshotTimestamp = 0;
private final Log log = LogFactory.getLog(SystemMonitoringDataServiceImpl.class);
private ScheduledFuture<?> systemLoadCollectorFuture;
private SystemLoadInformationCollector systemLoadInformationCollector;
// TODO remove?
@SuppressWarnings("unused")
private ConcurrencyUtilsFactory concurrencyUtilsFactory;
private CommunicationService communicationService;
protected void activate(BundleContext bundleContext) {
Objects.requireNonNull(systemDataService);
Objects.requireNonNull(objectBindingsService);
Objects.requireNonNull(asyncTaskService);
try {
initializeSelfPidsIfNecessary();
} catch (OperatingSystemException e) {
log.error("Failed to get the process IDs of the local instance; "
+ "a new attempt will be made when actual monitoring data is requested: " + e.toString());
return;
}
try {
selfLauncherProcState = systemDataService.fetchProcessState(selfLauncherPid);
selfJavaProcState = systemDataService.fetchProcessState(selfJavaPid);
topicIdToDescriptionMap.put(SystemMonitoringConstants.PERIODIC_MONITORING_TOPIC_SIMPLE_SYSTEM_INFO,
"Logs basic system monitoring data (total CPU and RAM usage)");
// topicIdToDescriptionMap
// .put(SystemMonitoringConstants.PERIODIC_MONITORING_TOPIC_DETAILED_SYSTEM_INFO,
// "Logs monitoring data in more detail. Information such as CPU-usage, "
// + "RAM-usage ect. of rce and rce sub-processes will be logged.");
} catch (OperatingSystemException e) {
log.error("Failed to initialize some system monitoring data: " + e.toString());
}
objectBindingsService.addBinding(PeriodicMonitoringDataContributor.class, setUpPeriodicMonitoringDataContributorAdapter(), this);
systemLoadInformationCollector =
new SystemLoadInformationCollector(systemDataService, SYSTEM_LOAD_INFORMATION_COLLECTION_BUFFER_SIZE, new DefaultTimeSource(),
MINIMUM_TIME_DELTA_TO_ACCEPT_BETWEEN_UPDATES, MAXIMUM_TIME_DELTA_TO_ACCEPT_BEFORE_STARTING_OVER);
systemLoadCollectorFuture = asyncTaskService.scheduleAtFixedRate(systemLoadInformationCollector,
SYSTEM_LOAD_INFORMATION_COLLECTION_INTERVAL_MSEC);
log.debug("System load collector initialized");
}
protected void deactivate(BundleContext bundleContext) {
systemLoadInformationCollector = null;
systemLoadCollectorFuture.cancel(false); // short-running; no need to interrupt
systemLoadCollectorFuture = null;
objectBindingsService.removeAllBindingsOfOwner(this);
}
protected void bindObjectBindingsService(ObjectBindingsService newInstance) {
objectBindingsService = newInstance;
}
protected void bindSystemMonitoringDataService(SystemMonitoringDataService newInstance) {
this.systemDataService = newInstance;
}
protected void bindAsyncTaskService(AsyncTaskService newInstance) {
this.asyncTaskService = newInstance;
}
protected void bindConcurrencyUtilsFactory(ConcurrencyUtilsFactory newInstance) {
this.concurrencyUtilsFactory = newInstance;
}
protected void bindCommunicationService(CommunicationService newInstance) {
this.communicationService = newInstance;
}
@Override
@AllowRemoteAccess
public synchronized FullSystemAndProcessDataSnapshot getCompleteSnapshot() throws OperatingSystemException {
// TODO review if this is really necessary; calling this over and over seems clumsy
initializeSelfPidsIfNecessary();
if (hasValidCachedFullSnapshot()) {
return cachedFullSnapshot;
}
FullSystemAndProcessDataSnapshot newSnapshot = createFullSnapshot();
cachedFullSnapshot = newSnapshot;
cachedFullSnapshotTimestamp = System.currentTimeMillis();
return newSnapshot;
}
@Override
@AllowRemoteAccess
public SystemLoadInformation getSystemLoadInformation(Integer maxSamples) {
// note: method is synchronized internally, no need to do it here too
return systemLoadInformationCollector.getSystemLoadInformation(maxSamples);
}
@Override
public <T extends ResolvableNodeId> Map<T, SystemLoadInformation> collectSystemMonitoringDataWithTimeLimit(
final Set<T> nodeIds, final int timeSpanMsec, final int timeLimitMsec)
throws InterruptedException, ExecutionException, TimeoutException {
final Map<T, SystemLoadInformation> concurrentResultMap = new ConcurrentHashMap<>();
final int nodeCount = nodeIds.size();
final Semaphore finishCounter = new Semaphore(0); // not using a CDL as it does not provide a "release all" method
for (final T nodeId : nodeIds) {
asyncTaskService.execute(new Runnable() {
@Override
@TaskDescription("Fetch system load data from a single node")
public void run() {
final RemotableSystemMonitoringService remotableService =
communicationService.getRemotableService(RemotableSystemMonitoringService.class, nodeId);
SystemLoadInformation systemLoadInformation;
try {
// note: the division relies on the assumption that all nodes use the same polling interval
systemLoadInformation =
remotableService.getSystemLoadInformation(timeSpanMsec / SYSTEM_LOAD_INFORMATION_COLLECTION_INTERVAL_MSEC);
concurrentResultMap.put(nodeId, systemLoadInformation);
} catch (RemoteOperationException e) {
log.warn("Error while fetching remote system load data: " + e.toString());
}
finishCounter.release();
}
});
}
// trigger standard timeout
asyncTaskService.scheduleAfterDelay(new Runnable() {
@Override
@TaskDescription("Enforce time limit while waiting for system load information responses")
public void run() {
finishCounter.release(nodeCount);
}
}, timeLimitMsec);
// use twice the individual limit as a hard fallback time limit (arbitrary)
if (!finishCounter.tryAcquire(nodeCount, timeLimitMsec * 2, TimeUnit.MILLISECONDS)) {
// this should not usually happen, but is possible under high system load
log.warn("Fallback time limit reached while waiting for individual system load data responses");
}
// create and return an immutable snapshot of the map
synchronized (concurrentResultMap) {
return Collections.unmodifiableMap(new HashMap<>(concurrentResultMap));
}
}
/**
* Clears cached model (intended for tests).
*/
protected void clearFullSnapshotCache() {
cachedFullSnapshot = null;
cachedFullSnapshotTimestamp = 0;
}
private FullSystemAndProcessDataSnapshot createFullSnapshot() throws OperatingSystemException {
final double systemCPUUsage = systemDataService.getTotalCPUUsage(); // valid percentage or Double.NaN
final double cpuIdle;
if (Double.isNaN(systemCPUUsage)) {
cpuIdle = Double.NaN;
} else {
// do not query again, but derive from total CPU for consistency
cpuIdle = SystemMonitoringUtils.ONE_HUNDRED_PERCENT_CPU_VALUE - systemCPUUsage;
}
long systemRAMUsage = systemDataService.getTotalUsedRAM();
final List<ProcessInformation> subProcesses = systemDataService.getFullChildProcessInformation(selfJavaPid);
final List<ProcessInformation> ownProcesses = new ArrayList<>();
if (selfLauncherProcState != null) {
ownProcesses.add(new ProcessInformation(selfLauncherPid, selfLauncherProcState.getName(), Collections
.<ProcessInformation> emptyList(),
systemDataService.getProcessCPUUsage(selfLauncherPid), systemDataService.getProcessRAMUsage(selfLauncherPid)));
}
if (selfJavaProcState != null) {
ownProcesses.add(new ProcessInformation(selfJavaPid, selfJavaProcState.getName(), Collections
.<ProcessInformation> emptyList(),
systemDataService.getProcessCPUUsage(selfJavaPid), systemDataService.getProcessRAMUsage(selfJavaPid)));
}
return new FullSystemAndProcessDataSnapshot(systemCPUUsage, systemRAMUsage, systemDataService.getTotalSystemRAM(),
cpuIdle, subProcesses, ownProcesses);
}
private String createSimpleSystemMonitoringSummary() {
try {
double nodeCpuUsage;
long ram;
long systemRamUsage;
if (hasValidCachedFullSnapshot()) {
nodeCpuUsage = cachedFullSnapshot.getNodeCPUusage();
ram = cachedFullSnapshot.getNodeSystemRAM();
systemRamUsage = cachedFullSnapshot.getNodeRAMUsage();
} else {
ram = systemDataService.getTotalSystemRAM();
nodeCpuUsage = systemDataService.getTotalCPUUsage();
systemRamUsage = systemDataService.getTotalUsedRAM();
}
return StringUtils.format("System CPU usage: %.2f%%, System RAM usage: %d / %d MiB", nodeCpuUsage
* SystemMonitoringConstants.PERCENTAGE_TO_DISPLAY_VALUE_MULTIPLIER, systemRamUsage, ram);
} catch (OperatingSystemException e) {
return "Error gathering system data: " + e.getMessage();
}
}
private boolean hasValidCachedFullSnapshot() {
return cachedFullSnapshotTimestamp >= (System.currentTimeMillis() - COMPLETE_SNAPSHOT_CACHE_LIFETIME_MSEC)
&& cachedFullSnapshot != null;
}
private String logDetailedMonitoringData() {
if (hasValidCachedFullSnapshot()) {
return cachedFullSnapshot.toString();
} else {
try {
return getCompleteSnapshot().toString();
} catch (OperatingSystemException e) {
log.error(e);
return "<error>";
}
}
}
private void initializeSelfPidsIfNecessary() throws OperatingSystemException {
if (selfLauncherPid == 0) {
selfLauncherPid = systemDataService.fetchProcessState(Humidor.getInstance().getSigar().getPid()).getPpid();
}
if (selfJavaPid == 0) {
selfJavaPid = Humidor.getInstance().getSigar().getPid();
}
}
private PeriodicMonitoringDataContributor setUpPeriodicMonitoringDataContributorAdapter() {
return new PeriodicMonitoringDataContributor() {
@Override
public Collection<String> getTopicIds() {
return topicIdToDescriptionMap.keySet();
}
@Override
public String getTopicDescription(String topicId) {
return topicIdToDescriptionMap.get(topicId);
}
@Override
public void generateOutput(String topicId, List<String> collection) {
switch (topicId) {
case SystemMonitoringConstants.PERIODIC_MONITORING_TOPIC_SIMPLE_SYSTEM_INFO:
collection.add(createSimpleSystemMonitoringSummary());
break;
case SystemMonitoringConstants.PERIODIC_MONITORING_TOPIC_DETAILED_SYSTEM_INFO:
collection.add(logDetailedMonitoringData());
break;
default:
throw new IllegalArgumentException("There is no topic id such as: " + topicId);
}
}
};
}
}