/*
* RHQ Management Platform
* Copyright (C) 2005-2013 Red Hat, Inc.
* All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation version 2 of the License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*/
package org.rhq.core.pc.inventory;
import static org.rhq.core.domain.measurement.AvailabilityType.DOWN;
import static org.rhq.core.domain.measurement.AvailabilityType.UNKNOWN;
import static org.rhq.core.domain.measurement.AvailabilityType.UP;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.ObjectOutputStream;
import java.util.ArrayList;
import java.util.Date;
import java.util.LinkedList;
import java.util.List;
import java.util.Random;
import java.util.concurrent.Callable;
import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.jetbrains.annotations.Nullable;
import org.rhq.core.domain.discovery.AvailabilityReport;
import org.rhq.core.domain.measurement.Availability;
import org.rhq.core.domain.measurement.AvailabilityType;
import org.rhq.core.domain.measurement.MeasurementScheduleRequest;
import org.rhq.core.domain.resource.InventoryStatus;
import org.rhq.core.domain.resource.Resource;
import org.rhq.core.domain.resource.ResourceError;
import org.rhq.core.domain.resource.ResourceErrorType;
import org.rhq.core.pc.inventory.ResourceContainer.ResourceComponentState;
import org.rhq.core.pluginapi.availability.AvailabilityFacet;
import org.rhq.core.util.exception.ThrowableUtil;
import org.rhq.core.util.stream.StreamUtil;
/**
* Runs a periodic scan for resource availability.
*
* @author Jay Shaughnessy
* @author John Mazzitelli
* @author Ian Springer
*/
public class AvailabilityExecutor implements Runnable, Callable<AvailabilityReport> {
private static final Log LOG = LogFactory.getLog(AvailabilityExecutor.class);
protected final InventoryManager inventoryManager;
private final AtomicBoolean sendChangesOnlyReport;
private static final Random RANDOM = new Random();
// NOTE: this is probably useless. The concurrency of the availability checks is mainly guarded by the size of the
// availabilityThreadPoolExecutor in InventoryManager. While this lock object would prevent multiple avail checks
// from running concurrently even if the size of the above executor was more than 1 (which it isn't), the problem
// we'd then face would be that we use multiple instances of AvailabilityExecutor in InventoryManager:
// availabilityExecutor field but also local instances in executeAvailabilityScanImmediately() and
// getCurrentAvailability(). This means that the only thing preventing from the multiple availability checks
// happening concurrently is the size of the thread pool and this object serves little purpose in that regard.
private final Object lock = new Object();
private int scanHistorySize = 1;
private final LinkedList<Scan> scanHistory = new LinkedList<Scan>();
public AvailabilityExecutor(InventoryManager inventoryManager) {
this.inventoryManager = inventoryManager;
this.sendChangesOnlyReport = new AtomicBoolean(false);
}
public void run() {
try {
synchronized (lock) {
AvailabilityReport report = call();
inventoryManager.handleReport(report);
}
} catch (Exception e) {
LOG.warn("Availability report collection failed", e);
}
}
/**
* Returns the availability report that should be sent to the Server.
*
* <p>This will return <code>null</code> if there is nothing committed to inventory. Having no committed inventory
* is rare. There will be no committed inventory if this is a brand new agent whose inventory hasn't been committed
* yet or if the platform and all its children have been deleted (in which case the agent should be uninstalled, or
* the user will want to re-import the platform).
*
* The report can be empty if there is nothing to report. This can happen for a changesOnly report when there
* are no changes.</p>
*
* @return the report containing all the availabilities that need to be sent to the Server, or <code>null</code> if
* there is no inventory or nothing to report. The report can be empty
*
* @throws Exception if failed to create and prepare the report
*/
@Nullable
public AvailabilityReport call() throws Exception {
AvailabilityReport availabilityReport;
synchronized (lock) {
if (inventoryManager.getPlatform().getInventoryStatus() != InventoryStatus.COMMITTED) {
return null;
}
boolean changesOnly = sendChangesOnlyReport.get();
availabilityReport = new AvailabilityReport(changesOnly, inventoryManager.getAgent().getName());
// Follow up full reports with changesOnly reports
if (!changesOnly) {
sendChangesOnlyReportNextTime();
}
startScan(inventoryManager.getPlatform(), availabilityReport, changesOnly);
}
return availabilityReport;
}
/**
* This is an entry point for the recursive availability scan. I.e. usually this method is called with the platform
* resource so that the avail scan is executed for the whole platform.
*
* @param scanRoot the resource to root the availability scan at
* @param availabilityReport the availability report to fill
* @param changesOnly whether to only report changes or produce a full report
*/
protected void startScan(Resource scanRoot, AvailabilityReport availabilityReport, boolean changesOnly) {
long start = System.currentTimeMillis();
Scan scan = new Scan(start, !changesOnly);
if (LOG.isDebugEnabled()) {
LOG.debug("Scan Starting: " + new Date(start));
}
AvailabilityType parentAvailabilityType = null;
//determine the parent availability
Resource parent = scanRoot.getParentResource();
while (parent != null) {
Availability parentAvail = inventoryManager.getAvailabilityIfKnown(parent);
if (parentAvail != null && parentAvail.getAvailabilityType() == DOWN) {
parentAvailabilityType = DOWN;
break;
}
parent = parent.getParentResource();
}
// we've gone up past the platform but didn't encounter a single down resource, hence the parent avail type
// is to be considered UP (because it either truly is UP or is UNKNOWN as of now)
if (parentAvailabilityType == null) {
parentAvailabilityType = UP;
}
boolean traceEnabled = LOG.isTraceEnabled();
try {
checkInventory(scanRoot, availabilityReport, parentAvailabilityType, false, scan, traceEnabled);
} catch (InterruptedException e) {
LOG.debug("Availability check was interrupted", e);
return;
} catch (RuntimeException e) {
if (LOG.isDebugEnabled()) {
if (Thread.interrupted()) {
LOG.debug("Exception occurred during availability check, but this thread has been interrupted, "
+ "so most likely the plugin container is shutting down: " + e);
} else {
LOG.debug("Exception occurred during availability check: " + e);
}
}
return;
}
scan.setEndTime(System.currentTimeMillis());
if (LOG.isDebugEnabled()) {
LOG.debug("Scan Ended : " + new Date(scan.getEndTime()) + " : " + scan.toString());
}
addScanHistory(scan);
if (LOG.isDebugEnabled()) {
long end = System.currentTimeMillis();
ObjectOutputStream oos = null;
try {
ByteArrayOutputStream baos = new ByteArrayOutputStream(10000);
oos = new ObjectOutputStream(baos);
oos.writeObject(availabilityReport);
LOG.debug("Built availability report for [" + availabilityReport.getResourceAvailability().size()
+ "] resources with a size of [" + baos.size() + "] bytes in [" + (end - start) + "]ms");
} catch (IOException e) {
LOG.debug("Failed to log the availability report details.", e);
} finally {
StreamUtil.safeClose(oos);
}
}
}
/**
* Checks the availability of one resource and then its children.
*
* @throws InterruptedException if this checking thread was interrupted
*/
protected void checkInventory(Resource resource, AvailabilityReport availabilityReport,
AvailabilityType parentAvailType, boolean isForced, Scan scan, boolean traceEnabled) throws InterruptedException {
// Only report avail for committed Resources - that's all the Server cares about.
if (resource.getId() == 0 || resource.getInventoryStatus() != InventoryStatus.COMMITTED) {
return;
}
ResourceContainer resourceContainer = this.inventoryManager.getResourceContainer(resource.getId());
// Only report avail for synchronized Resources, otherwise the Server will likely know nothing of the Resource.
if (resourceContainer == null
|| resourceContainer.getSynchronizationState() != ResourceContainer.SynchronizationState.SYNCHRONIZED) {
return;
}
// The avail proxy guarantees fast response time for an avail check
AvailabilityFacet resourceAvailabilityProxy = resourceContainer.getAvailabilityProxy();
++scan.numResources;
// See if this resource is scheduled for an avail check
boolean checkAvail = false;
boolean deferToParent = false;
long availabilityScheduleTime = resourceContainer.getAvailabilityScheduleTime();
MeasurementScheduleRequest availScheduleRequest = resourceContainer.getAvailabilitySchedule();
// if no avail check is scheduled or we're forcing the check, schedule the next check. Note that a forcedCheck
// is "off-schedule" so we need to push out the next check.
if ((0 == availabilityScheduleTime) || isForced) {
// if there is no availability schedule (platform) then just perform the avail check
// (note, platforms always return UP anyway).
if (null == availScheduleRequest) {
if (traceEnabled) {
LOG.trace("No availScheduleRequest for " + resource + ". checkAvail set to true");
}
checkAvail = true;
} else {
// if the schedule is enabled then schedule the next avail check, else just defer to the parent type
if (availScheduleRequest.isEnabled()) {
// Schedule the avail check at some time between now and (now + collectionInterval). By
// doing this random assignment for the first scheduled collection, we'll spread out the actual
// check times going forward. Do not check it on this pass (unless we're forced)
int interval = (int) availScheduleRequest.getInterval(); // intervals are short enough for safe cast
availabilityScheduleTime = scan.startTime + RANDOM.nextInt(interval + 1);
resourceContainer.setAvailabilityScheduleTime(availabilityScheduleTime);
if (traceEnabled) {
LOG.trace("Forced availabilityScheduleTime to " + new Date(availabilityScheduleTime) + " for "
+ resource);
}
++scan.numScheduledRandomly;
} else {
if (traceEnabled) {
LOG.trace("Deferred availability to parent for " + resource);
}
deferToParent = true;
}
}
} else {
// check avail if this resource scheduled time has been reached
checkAvail = scan.startTime >= availabilityScheduleTime;
if (checkAvail) {
if (traceEnabled) {
LOG.trace("Scheduled time has been reached for " + resource);
}
long interval = availScheduleRequest.getInterval(); // intervals are short enough for safe cast
resourceContainer.setAvailabilityScheduleTime(scan.startTime + interval);
++scan.numPushedByInterval;
} else {
if (traceEnabled) {
LOG.trace("Scheduled time has not been reached for " + resource);
}
}
}
// find out what the avail was the last time we checked it. this may be null
Availability previous = this.inventoryManager.getAvailabilityIfKnown(resource);
AvailabilityType previousType = (null == previous) ? UNKNOWN : previous.getAvailabilityType();
AvailabilityType current = null;
// If the resource's parent is DOWN, the rules are that the resource and all of the parent's other
// descendants, must also be DOWN. So, there's no need to even ask the resource component
// for its current availability - its current avail is set to the parent avail type and that's that.
// Otherwise, checkAvail as needed.
if (deferToParent || (DOWN == parentAvailType)) {
current = parentAvailType;
++scan.numDeferToParent;
// For the DOWN parent case it's unclear to me whether we should push out the avail check time of
// the child. For now, we'll leave it alone and let the next check happen according to the
// schedule already established.
if (traceEnabled) {
LOG.trace("Gave parent availability " + parentAvailType + " to " + resource);
}
} else {
// regardless of whether the avail schedule is met, we still must check avail if isForce is true or if
// it's a full report and we don't yet have an avail for the resource.
if (!checkAvail && (isForced || (scan.isFull && null == previous))) {
checkAvail = true;
}
if (checkAvail) {
if (traceEnabled) {
LOG.trace("Now checking availability for " + resource);
}
try {
++scan.numGetAvailabilityCalls;
// if the component is started, ask what its current availability is as of right now;
// if it's not started, then assume it's down, and the next time we check,
// we'll see if it's started and check for real then - otherwise, keep assuming it's
// down (this is for the case when a plugin component can't start for whatever reason
// or is just slow to start)
if (resourceContainer.getResourceComponentState() == ResourceComponentState.STARTED) {
current = translate(resourceAvailabilityProxy.getAvailability(), previousType);
} else {
// try to start the component and then perform the avail check
this.inventoryManager.activateResource(resource, resourceContainer, false);
if (resourceContainer.getResourceComponentState() == ResourceComponentState.STARTED) {
current = translate(resourceAvailabilityProxy.getAvailability(), previousType);
} else {
current = DOWN;
}
}
if (traceEnabled) {
LOG.trace("Current availability is " + current + " for " + resource);
}
} catch (Throwable t) {
ResourceError resourceError = new ResourceError(resource, ResourceErrorType.AVAILABILITY_CHECK,
t.getLocalizedMessage(), ThrowableUtil.getStackAsString(t), System.currentTimeMillis());
this.inventoryManager.sendResourceErrorToServer(resourceError);
LOG.warn("Availability collection failed with exception on " + resource
+ ", availability will be reported as " + DOWN.name() + ", reason=" + t.getMessage());
current = DOWN;
}
} else {
current = previousType;
}
}
// Add the availability to the report if it changed from its previous state or if this is a full report.
// Update the resource container only if the avail has changed.
boolean availChanged = (UNKNOWN != current && current != previousType);
if (availChanged || scan.isFull) {
Availability availability;
if (availChanged) {
if (traceEnabled) {
LOG.trace("Availability changed for " + resource);
}
++scan.numAvailabilityChanges;
availability = this.inventoryManager.updateAvailability(resource, current);
// if the resource avail changed to UP then we must perform avail checks for all
// children, to ensure their avails are up to date. Note that if it changed to NOT UP
// then the children will just get the parent avail type and there is no avail check anyway.
if (!isForced && (UP == current)) {
if (traceEnabled) {
LOG.trace("Forcing availability check for children of " + resource);
}
isForced = true;
}
} else {
// avoid the overhead of updating the resource container, the avail type did not change
availability = new Availability(resource, current);
}
// update the report
availabilityReport.addAvailability(availability);
}
for (Resource child : this.inventoryManager.getContainerChildren(resource, resourceContainer)) {
checkInventory(child, availabilityReport, current, isForced, scan, traceEnabled);
}
}
/**
* Resources must report UP, DOWN or MISSING, If current is UNKNOWN, return previously set avail, otherwise current.
*/
private AvailabilityType translate(AvailabilityType current, AvailabilityType previousType) {
return current == UNKNOWN ? previousType : current;
}
/**
* This tells the executor to send a full availability report the next time it sends one. Public-scoped so tests
* can call this.
*/
public void sendFullReportNextTime() {
this.sendChangesOnlyReport.set(false);
if (LOG.isTraceEnabled()) {
LOG.trace("\nFull report requested by: " + getSmallStackTrace(new Throwable()));
}
}
static private String getSmallStackTrace(Throwable t) {
StringBuilder smallStack = new StringBuilder();
StackTraceElement[] stack = (null == t) ? new Exception().getStackTrace() : t.getStackTrace();
for (int i = 1; i < stack.length; i++) {
StackTraceElement ste = stack[i];
if (ste.getClassName().startsWith("org.rhq")) {
smallStack.append(ste.toString());
smallStack.append("\n");
}
}
return smallStack.toString();
}
/**
* This tells the executor to send a minimal availability report the next time it sends one (that is, do not send a
* resource availability if it hasn't changed from its last known state). Public-scoped so test code can call this.
*/
public void sendChangesOnlyReportNextTime() {
this.sendChangesOnlyReport.set(true);
}
public void addScanHistory(Scan scan) {
synchronized (scanHistory) {
if (scanHistory.size() == scanHistorySize) {
scanHistory.removeLast();
}
scanHistory.push(scan);
}
}
public List<Scan> getScanHistory() {
synchronized (scanHistory) {
List<Scan> result = new ArrayList<Scan>(scanHistory.size());
result.addAll(scanHistory);
return result;
}
}
public Scan getMostRecentScanHistory() {
synchronized (scanHistory) {
return scanHistory.isEmpty() ? null : scanHistory.get(0);
}
}
public void setScanHistorySize(int scanHistorySize) {
synchronized (scanHistory) {
if (scanHistorySize < 1) {
return;
}
while (scanHistory.size() > scanHistorySize) {
scanHistory.removeLast();
}
this.scanHistorySize = scanHistorySize;
}
}
public static class Scan {
private final long startTime;
private long endTime;
private long runtime;
private boolean isFull = false;
private boolean isForced = false;
int numResources = 0;
int numGetAvailabilityCalls = 0;
int numScheduledRandomly = 0;
int numPushedByInterval = 0;
int numAvailabilityChanges = 0;
int numDeferToParent = 0;
public Scan(long startTime, boolean isFull) {
this.startTime = startTime;
this.isFull = isFull;
}
public long getStartTime() {
return startTime;
}
public long getEndTime() {
return endTime;
}
public void setEndTime(long endTime) {
this.endTime = endTime;
this.runtime = endTime - startTime;
}
public long getRuntime() {
return runtime;
}
public boolean isFull() {
return isFull;
}
public boolean isForced() {
return isForced;
}
public void setForced(boolean isForced) {
this.isForced = isForced;
}
public int getNumResources() {
return numResources;
}
public int getNumGetAvailabilityCalls() {
return numGetAvailabilityCalls;
}
public int getNumScheduledRandomly() {
return numScheduledRandomly;
}
public int getNumPushedByInterval() {
return numPushedByInterval;
}
public int getNumAvailabilityChanges() {
return numAvailabilityChanges;
}
public int getNumDeferToParent() {
return numDeferToParent;
}
@Override
public String toString() {
return "Scan [startTime=" + startTime + ", endTime=" + endTime + ", runtime=" + runtime + ", isFull="
+ isFull + ", isForced=" + isForced + ", numResources=" + numResources + ", numGetAvailabilityCalls="
+ numGetAvailabilityCalls + ", numScheduledRandomly=" + numScheduledRandomly + ", numPushedByInterval="
+ numPushedByInterval + ", numAvailabilityChanges=" + numAvailabilityChanges + ", numDeferToParent="
+ numDeferToParent + "]";
}
}
}