/*
* The MIT License
*
* Copyright (c) 2004-2009, Sun Microsystems, Inc., Kohsuke Kawaguchi
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package hudson.node_monitors;
import hudson.Util;
import hudson.model.Computer;
import hudson.model.Descriptor;
import jenkins.model.Jenkins;
import hudson.model.ComputerSet;
import hudson.model.AdministrativeMonitor;
import hudson.triggers.SafeTimerTask;
import hudson.slaves.OfflineCause;
import jenkins.util.Timer;
import javax.annotation.concurrent.GuardedBy;
import java.io.IOException;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import java.util.logging.Level;
import java.util.logging.Logger;
/**
* Convenient base class for common {@link NodeMonitor} implementation
* where the "monitoring" consists of executing something periodically on every node
* and taking some action based on its result.
*
* @param <T>
* represents the the result of the monitoring.
* @author Kohsuke Kawaguchi
*/
public abstract class AbstractNodeMonitorDescriptor<T> extends Descriptor<NodeMonitor> {
/**
* @deprecated as of 1.522
* Extend from {@link AbstractAsyncNodeMonitorDescriptor}
*/
@Deprecated
protected AbstractNodeMonitorDescriptor() {
this(HOUR);
}
/**
* @deprecated as of 1.522
* Extend from {@link AbstractAsyncNodeMonitorDescriptor}
*/
@Deprecated
protected AbstractNodeMonitorDescriptor(long interval) {
schedule(interval);
}
/**
* @deprecated as of 1.522
* Extend from {@link AbstractAsyncNodeMonitorDescriptor}
*/
@Deprecated
protected AbstractNodeMonitorDescriptor(Class<? extends NodeMonitor> clazz) {
this(clazz,HOUR);
}
/**
* @deprecated as of 1.522
* Extend from {@link AbstractAsyncNodeMonitorDescriptor}
*/
@Deprecated
protected AbstractNodeMonitorDescriptor(Class<? extends NodeMonitor> clazz, long interval) {
super(clazz);
schedule(interval);
}
private void schedule(long interval) {
Timer.get()
.scheduleAtFixedRate(new SafeTimerTask() {
public void doRun() {
triggerUpdate();
}
}, interval, interval, TimeUnit.MILLISECONDS);
}
/**
* Represents the last record of the update.
*
* Once set to non-null, never be null.
*/
private transient volatile Record record = null;
/**
* Represents the update activity in progress.
*/
@GuardedBy("this")
private transient Record inProgress = null;
/**
* Represents when an update activity was last started.
*/
@GuardedBy("this")
private transient long inProgressStarted = Long.MIN_VALUE;
/**
* Performs monitoring of the given computer object.
* This method is invoked periodically to perform the monitoring of the computer.
*
* @return
* Application-specific value that represents the observed monitoring value
* on the given node. This value will be returned from the {@link #get(Computer)} method.
* If null is returned, it will be interpreted as "no observed value." This is
* convenient way of abandoning the observation on a particular computer,
* whereas {@link IOException} is useful for indicating a hard error that needs to be
* corrected.
*/
protected abstract T monitor(Computer c) throws IOException,InterruptedException;
/**
* Performs monitoring across the board.
*
* @return
* For all the computers, report the monitored values.
*/
protected Map<Computer,T> monitor() throws InterruptedException {
Map<Computer,T> data = new HashMap<Computer,T>();
for( Computer c : Jenkins.getInstance().getComputers() ) {
try {
Thread.currentThread().setName("Monitoring "+c.getDisplayName()+" for "+getDisplayName());
if(c.getChannel()==null)
data.put(c,null);
else
data.put(c,monitor(c));
} catch (RuntimeException e) {
LOGGER.log(Level.WARNING, "Failed to monitor "+c.getDisplayName()+" for "+getDisplayName(), e);
} catch (IOException e) {
LOGGER.log(Level.WARNING, "Failed to monitor "+c.getDisplayName()+" for "+getDisplayName(), e);
} catch (InterruptedException e) {
throw (InterruptedException)new InterruptedException("Node monitoring "+c.getDisplayName()+" for "+getDisplayName()+" aborted.").initCause(e);
}
}
return data;
}
/**
* Obtains the monitoring result currently available, or null if no data is available.
*
* <p>
* If no data is available, a background task to collect data will be started.
*/
public T get(Computer c) {
if(record==null || !record.data.containsKey(c)) {
// if we don't have the data, schedule the check now
triggerUpdate();
return null;
}
return record.data.get(c);
}
/**
* Is the monitoring activity currently in progress?
*/
private synchronized boolean isInProgress() {
return inProgress !=null && inProgress.isAlive();
}
/**
* The timestamp that indicates when the last round of the monitoring has completed.
*/
public long getTimestamp() {
return record==null ? 0L : record.timestamp;
}
public String getTimestampString() {
if (record==null)
return Messages.AbstractNodeMonitorDescriptor_NoDataYet();
// return Messages.AbstractNodeMonitorDescriptor_DataObtainedSometimeAgo(
// Util.getTimeSpanString(System.currentTimeMillis()-record.timestamp));
return Util.getPastTimeString(System.currentTimeMillis()-record.timestamp);
}
/**
* Is this monitor currently ignored?
*/
public boolean isIgnored() {
NodeMonitor m = ComputerSet.getMonitors().get(this);
return m==null || m.isIgnored();
}
/**
* Utility method to mark the computer online for derived classes.
*
* @return true
* if the node was actually taken online by this act (as opposed to us deciding not to do it,
* or the computer was already online.)
*/
protected boolean markOnline(Computer c) {
if(isIgnored() || c.isOnline()) return false; // noop
c.setTemporarilyOffline(false,null);
return true;
}
/**
* Utility method to mark the computer offline for derived classes.
*
* @return true
* if the node was actually taken offline by this act (as opposed to us deciding not to do it,
* or the computer already marked offline.)
*/
protected boolean markOffline(Computer c, OfflineCause oc) {
if(isIgnored() || c.isTemporarilyOffline()) return false; // noop
c.setTemporarilyOffline(true, oc);
// notify the admin
MonitorMarkedNodeOffline no = AdministrativeMonitor.all().get(MonitorMarkedNodeOffline.class);
if(no!=null)
no.active = true;
return true;
}
/**
* @deprecated as of 1.320
* Use {@link #markOffline(Computer, OfflineCause)} to specify the cause.
*/
@Deprecated
protected boolean markOffline(Computer c) {
return markOffline(c,null);
}
/**
* @see NodeMonitor#triggerUpdate()
*/
/*package*/ synchronized Thread triggerUpdate() {
if (inProgress != null) {
if (!inProgress.isAlive()) {
LOGGER.log(Level.WARNING, "Previous {0} monitoring activity died without cleaning up after itself",
getDisplayName());
inProgress = null;
} else if (System.currentTimeMillis() > inProgressStarted + getMonitoringTimeOut() + 1000) {
// maybe it got stuck?
LOGGER.log(Level.WARNING, "Previous {0} monitoring activity still in progress. Interrupting",
getDisplayName());
inProgress.interrupt();
inProgress = null; // we interrupted the old one so it's now dead to us.
} else {
// return the in progress
return inProgress;
}
}
final Record t = new Record();
t.start();
// only store the new thread if we started it
inProgress = t;
inProgressStarted = System.currentTimeMillis();
return inProgress;
}
/**
* Controls the time out of monitoring.
*/
protected long getMonitoringTimeOut() {
return TimeUnit.SECONDS.toMillis(30);
}
/**
* Thread that monitors nodes, as well as the data structure to record
* the result.
*/
private final class Record extends Thread {
/**
* Last computed monitoring result.
*/
private /*final*/ Map<Computer,T> data = Collections.emptyMap();
private long timestamp;
public Record() {
super("Monitoring thread for "+getDisplayName()+" started on "+new Date());
}
@Override
public void run() {
try {
long startTime = System.currentTimeMillis();
String oldName = getName();
data=monitor();
setName(oldName);
timestamp = System.currentTimeMillis();
record = this;
LOGGER.log(Level.FINE, "Node monitoring {0} completed in {1}ms", new Object[] {getDisplayName(), System.currentTimeMillis()-startTime});
} catch (InterruptedException x) {
// interrupted by new one, fine
} catch (Throwable t) {
LOGGER.log(Level.WARNING, "Unexpected node monitoring termination: "+getDisplayName(),t);
} finally {
synchronized(AbstractNodeMonitorDescriptor.this) {
if (inProgress==this)
inProgress = null;
}
}
}
}
private static final Logger LOGGER = Logger.getLogger(AbstractNodeMonitorDescriptor.class.getName());
private static final long HOUR = 1000*60*60L;
}