/*******************************************************************************
*
* Copyright (c) 2004-2009 Oracle Corporation.
*
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
*
* Kohsuke Kawaguchi
*
*
*******************************************************************************/
package hudson.node_monitors;
import hudson.model.Computer;
import hudson.model.Descriptor;
import hudson.model.Hudson;
import hudson.model.ComputerSet;
import hudson.model.AdministrativeMonitor;
import hudson.triggers.Trigger;
import hudson.triggers.SafeTimerTask;
import hudson.slaves.OfflineCause;
import java.io.IOException;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;
import java.util.logging.Level;
import java.util.logging.Logger;
/**
* Convenient base class for common {@link NodeMonitor} implementation where the
* "monitoring" consists of executing something periodically on every node and
* taking some action based on its result.
*
* <p> "T" represents the the result of the monitoring.
*
* @author Kohsuke Kawaguchi
*/
public abstract class AbstractNodeMonitorDescriptor<T> extends Descriptor<NodeMonitor> {
protected AbstractNodeMonitorDescriptor() {
this(HOUR);
}
protected AbstractNodeMonitorDescriptor(long interval) {
schedule(interval);
}
protected AbstractNodeMonitorDescriptor(Class<? extends NodeMonitor> clazz) {
this(clazz, HOUR);
}
protected AbstractNodeMonitorDescriptor(Class<? extends NodeMonitor> clazz, long interval) {
super(clazz);
schedule(interval);
}
private void schedule(long interval) {
Trigger.timer.scheduleAtFixedRate(new SafeTimerTask() {
public void doRun() {
triggerUpdate();
}
}, interval, interval);
}
/**
* Represents the last record of the update
*/
private volatile Record record = null;
/**
* Represents the update activity in progress.
*/
private volatile Record inProgress = null;
/**
* Performs monitoring of the given computer object. This method is invoked
* periodically to perform the monitoring of the computer.
*
* @return Application-specific value that represents the observed
* monitoring value on the given node. This value will be returned from the
* {@link #get(Computer)} method. If null is returned, it will be
* interpreted as "no observed value." This is convenient way of abandoning
* the observation on a particular computer, whereas {@link IOException} is
* useful for indicating a hard error that needs to be corrected.
*/
protected abstract T monitor(Computer c) throws IOException, InterruptedException;
/**
* Obtains the monitoring result currently available, or null if no data is
* available.
*
* <p> If no data is available, a background task to collect data will be
* started.
*/
public T get(Computer c) {
if (record == null) {
// if this is the first time, schedule the check now
if (inProgress == null) {
synchronized (this) {
if (inProgress == null) {
new Record().start();
}
}
}
return null;
}
return record.data.get(c);
}
/**
* Is this monitor currently ignored?
*/
public boolean isIgnored() {
NodeMonitor m = ComputerSet.getMonitors().get(this);
return m == null || m.isIgnored();
}
/**
* Utility method to mark the computer offline for derived classes.
*
* @return true if the node was actually taken offline by this act (as
* opposed to us deciding not to do it, or the computer already marked
* offline.)
*/
protected boolean markOffline(Computer c, OfflineCause oc) {
if (isIgnored() || c.isTemporarilyOffline()) {
return false; // noop
}
c.setTemporarilyOffline(true, oc);
// notify the admin
MonitorMarkedNodeOffline no = AdministrativeMonitor.all().get(MonitorMarkedNodeOffline.class);
if (no != null) {
no.active = true;
}
return true;
}
/**
* @deprecated as of 1.320 Use {@link #markOffline(Computer, OfflineCause)}
* to specify the cause.
*/
protected boolean markOffline(Computer c) {
return markOffline(c, null);
}
/**
* @see NodeMonitor#triggerUpdate()
*/
/*package*/ Thread triggerUpdate() {
Record t = new Record();
t.start();
return t;
}
/**
* Thread that monitors nodes, as well as the data structure to record the
* result.
*/
private final class Record extends Thread {
/**
* Last computed monitoring result.
*/
private final Map<Computer, T> data = new HashMap<Computer, T>();
public Record() {
super("Monitoring thread for " + getDisplayName() + " started on " + new Date());
synchronized (AbstractNodeMonitorDescriptor.this) {
if (inProgress != null) {
// maybe it got stuck?
LOGGER.warning("Previous " + getDisplayName() + " monitoring activity still in progress. Interrupting");
inProgress.interrupt();
}
inProgress = this;
}
}
@Override
public void run() {
long startTime = System.currentTimeMillis();
String oldName = getName();
for (Computer c : Hudson.getInstance().getComputers()) {
try {
setName("Monitoring " + c.getDisplayName() + " for " + getDisplayName());
if (c.getChannel() == null) {
data.put(c, null);
} else {
data.put(c, monitor(c));
}
} catch (IOException e) {
LOGGER.log(Level.WARNING, "Failed to monitor " + c.getDisplayName() + " for " + getDisplayName(), e);
} catch (InterruptedException e) {
LOGGER.log(Level.WARNING, "Node monitoring " + c.getDisplayName() + " for " + getDisplayName() + " aborted.", e);
}
}
setName(oldName);
synchronized (AbstractNodeMonitorDescriptor.this) {
assert inProgress == this;
inProgress = null;
record = this;
}
LOGGER.fine("Node monitoring " + getDisplayName() + " completed in " + (System.currentTimeMillis() - startTime) + "ms");
}
}
private final Logger LOGGER = Logger.getLogger(getClass().getName());
private static final long HOUR = 1000 * 60 * 60L;
}