/*
* RHQ Management Platform
* Copyright (C) 2005-2013 Red Hat, Inc.
* All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation version 2 of the License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*/
package org.rhq.core.pc.configuration;
import gnu.trove.map.hash.TIntLongHashMap;
import gnu.trove.set.hash.TIntHashSet;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.concurrent.Callable;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.rhq.core.clientapi.agent.PluginContainerException;
import org.rhq.core.clientapi.server.configuration.ConfigurationServerService;
import org.rhq.core.domain.configuration.Configuration;
import org.rhq.core.domain.configuration.ConfigurationUtility;
import org.rhq.core.domain.configuration.definition.ConfigurationDefinition;
import org.rhq.core.domain.measurement.AvailabilityType;
import org.rhq.core.domain.resource.InventoryStatus;
import org.rhq.core.domain.resource.Resource;
import org.rhq.core.domain.resource.ResourceCategory;
import org.rhq.core.domain.resource.ResourceType;
import org.rhq.core.pc.PluginContainer;
import org.rhq.core.pc.inventory.InventoryManager;
import org.rhq.core.pc.inventory.ResourceContainer;
import org.rhq.core.pc.util.FacetLockType;
import org.rhq.core.pluginapi.configuration.ConfigurationFacet;
/**
* The original implementation had two main issues. The second a by-product of the first. It did not chunk
* the work so large inventories could generate a long configuration discovery/check with fairly significant
* usage on the agent. The scans ran infrequently, once an hour by default. Secondly, this job runs on
* a single-threaded threadpool responsible for also processing on-demand configuration updates (from the GUI,
* or remote clients). These on-demand updates could get starved and possibly timeout waiting for a discovery run
* to complete (BZ 1100300).
* To solve these issues we now do the following; we chunk config checking by "roots". A root can be the platform
* or a top level server. So number of roots is TLS's+1. We then run the checker more often, every 5 minutes by default.
* Each run then starts operating on eligible roots, one at a time. A root is eligible if it hasn't been checked for
* rhq.agent.plugins.configuration-discovery.period-secs (this prop was somewhat re-purposed but in the end is sort
* of the same. it specifies the interval between checks, but not necessarily the interval between executions of the
* checker). Each run now has a time limit, 10s by default. We check as many eligible roots as possible until we're
* done or exceed the time limit. It's not a timeout per se, we finish the root and then check our time. This means
* an on-demand update should not have to wait more than about 10s and the agent chunks work, spreading out the checks.
* If we are in the middle of processing a root when we run out of time, we pick up where we left off on the next run
* (by ignoring the root's descendants that have already been checked).
* @author Greg Hinkle
* @author Jay Shaughnessy
*/
public class ConfigurationCheckExecutor implements Runnable, Callable {
private static final Log log = LogFactory.getLog(ConfigurationCheckExecutor.class);
private static final long CONFIGURATION_CHECK_TIMEOUT = 30000L;
private ConfigurationServerService configurationServerService;
private long checkPeriod;
private long timeLimit;
/**
* Map of resourceId to lastCheckTime. This will include only config checking "root" resources; platform and
* top-level server resources
*/
private TIntLongHashMap rootCheckTimeMap = new TIntLongHashMap();
/**
* Set of resourceIds already checked for the root currently being checked. If we can't finish the root in the
* current run we use this to avoid duplicating checks. The root's id will be added to the set (regardless of
* whether it supports a config check) if the processing is stopped due to time. This ensures we can know
* which root the set refers to.
*/
private TIntHashSet rootMemberCheckedSet = new TIntHashSet();
/**
* @param configurationServerService
* @param checkPeriod In seconds. The amount of time after a resource is checked before it again becomes eligible
* for a check.
* @param timeLimit In seconds. After each checked resource the executor checks its elapsed runtime. If the limit
* is exceeded it defers more checks to the run of the executor.
*/
public ConfigurationCheckExecutor(ConfigurationServerService configurationServerService, long checkPeriod,
long timeLimit) {
this.configurationServerService = configurationServerService;
this.checkPeriod = checkPeriod;
this.timeLimit = timeLimit;
}
public void run() {
call();
}
public Object call() {
InventoryManager inventoryManager = PluginContainer.getInstance().getInventoryManager();
Resource platform = inventoryManager.getPlatform();
List<Resource> eligibleRoots = getEligibleRoots(platform);
if (eligibleRoots.isEmpty()) {
log.debug("Skipping configuration update check, no eligible roots.");
return null;
}
if (log.isDebugEnabled()) {
log.debug("Starting configuration update check on [" + eligibleRoots.size() + "] eligible roots...");
}
CountTime totalCountTime = new CountTime();
long start = System.currentTimeMillis();
long stopTime = start + (timeLimit * 1000);
long wallTime = 0L;
int rootsChecked = 0;
// check as many roots as possible until we either finish or exceed the allotted time.
for (Resource root : eligibleRoots) {
// See if this root check was in progress when the last run completed
if (rootMemberCheckedSet.contains(root.getId())) {
if (log.isDebugEnabled()) {
log.debug("Configuration update check continuing for root resource [" + root.getName() + "]");
}
} else {
if (log.isDebugEnabled()) {
log.debug("Configuration update check beginning for root resource [" + root.getName() + "]");
}
if (!rootMemberCheckedSet.isEmpty()) {
// It looks we had checking in progress but it was apparently for a root that no longer exists.
rootMemberCheckedSet.clear();
log.debug("Clearing in-progress work, previous root no longer exists.");
}
}
CountTime countTime = new CountTime();
boolean completed = checkConfigurations(inventoryManager, root, countTime, stopTime);
++rootsChecked;
totalCountTime.add(countTime);
long now = System.currentTimeMillis();
wallTime = (now - start);
if (completed) {
// set the checked time so this root will not again be eligible for a while
rootCheckTimeMap.put(root.getId(), Long.valueOf(now));
// clear our rootMember tracking in preparation of processing another root
rootMemberCheckedSet.clear();
if (log.isDebugEnabled()) {
log.debug("Configuration update check completed for root resource [" + root.getName() + "] "
+ ((null != countTime) ? countTime : ""));
}
} else {
// add the root to the member set to mark this root as in-progress
rootMemberCheckedSet.add(root.getId());
if (log.isDebugEnabled()) {
log.debug("Configuration update check stopped, time limit [" + timeLimit
+ "] hit while processing root resource [" + root.getName() + "]"
+ ((null != countTime) ? countTime : ""));
log.debug("Stopping after [" + rootsChecked + "] of [" + eligibleRoots.size()
+ "] because elapsed time [" + wallTime + "ms] >= time limit [" + timeLimit + "s]");
}
// stop checks for this run
break;
}
}
if (log.isDebugEnabled()) {
log.debug("Configuration update check complete. Checked [" + rootsChecked + "] of [" + eligibleRoots.size()
+ "] eligible roots in [" + wallTime + "ms (" + wallTime / 1000 + "s)] wall time. " + totalCountTime);
}
return null;
}
/**
* @param platform
* @return a list of root resources that have not been checked within the last checkInterval period. Returned in
* a predictable order given a two-level sort of lastCheckTime ASC, resourceId ASC.
*/
private List<Resource> getEligibleRoots(Resource platform) {
// the list of possible roots contains the platform and top level servers
List<Resource> possibleRoots = new ArrayList<Resource>();
possibleRoots.add(platform);
for (Resource child : platform.getChildResources()) {
if (ResourceCategory.SERVER == child.getResourceType().getCategory()) {
possibleRoots.add(child);
}
}
// now return the eligible roots, those that have not been checked for at least the checkInterval time.
List<Resource> result = new ArrayList<Resource>(rootCheckTimeMap.size());
long now = System.currentTimeMillis();
// make sure the rootCheckTimeMap has entries for only the current possible roots
HashMap<Integer, Long> tempRootCheckTimeMap = new HashMap<Integer, Long>();
for (Resource r : possibleRoots) {
Long lastCheckTime = rootCheckTimeMap.get(r.getId());
if (null == lastCheckTime || lastCheckTime <= now - (checkPeriod * 1000)) {
result.add(r);
}
tempRootCheckTimeMap.put(Integer.valueOf(r.getId()),
((null != lastCheckTime) ? lastCheckTime : Long.valueOf(0L)));
}
rootCheckTimeMap.clear();
rootCheckTimeMap.putAll(tempRootCheckTimeMap);
// sort the eligible roots such that the least recently checked are done first, using resId as a tie breaker.
Collections.sort(result, new Comparator<Resource>() {
public int compare(Resource o1, Resource o2) {
int i = Long.valueOf(rootCheckTimeMap.get(o1.getId())).compareTo(
Long.valueOf(rootCheckTimeMap.get(o2.getId())));
return (0 != i) ? i : Integer.valueOf(o1.getId()).compareTo(Integer.valueOf(o2.getId()));
}
});
return result;
}
public boolean checkConfigurations(InventoryManager inventoryManager, Resource resource, CountTime countTime,
long stopTime) {
// if we've used up our allotted time, just stop
if (System.currentTimeMillis() > stopTime) {
return false;
}
ResourceContainer resourceContainer = inventoryManager.getResourceContainer(resource.getId());
// if we've already checked this resource then just check the children
if (!rootMemberCheckedSet.contains(resource.getId())) {
ConfigurationFacet resourceComponent = null;
ResourceType resourceType = resource.getResourceType();
boolean debugEnabled = log.isDebugEnabled();
if (resourceContainer != null && resourceContainer.getAvailability() != null
&& resourceContainer.getAvailability().getAvailabilityType() == AvailabilityType.UP) {
if (resourceContainer.supportsFacet(ConfigurationFacet.class)) {
try {
resourceComponent = resourceContainer.createResourceComponentProxy(ConfigurationFacet.class,
FacetLockType.NONE, CONFIGURATION_CHECK_TIMEOUT, true, false, true);
} catch (PluginContainerException e) {
// Expecting when the resource does not support configuration management
// Should never happen after above check
}
}
if (resourceComponent != null) {
// Only report availability for committed resources; don't bother with new, ignored or deleted resources.
if (resource.getInventoryStatus() == InventoryStatus.COMMITTED
&& resourceType.getResourceConfigurationDefinition() != null) {
long t1 = System.currentTimeMillis();
if (debugEnabled) {
log.debug("Checking for updated Resource configuration for " + resource + "...");
}
try {
Configuration liveConfiguration = resourceComponent.loadResourceConfiguration();
if (liveConfiguration != null) {
ConfigurationDefinition configurationDefinition = resourceType
.getResourceConfigurationDefinition();
// Normalize and validate the config.
ConfigurationUtility.normalizeConfiguration(liveConfiguration, configurationDefinition,
true, true);
List<String> errorMessages = ConfigurationUtility.validateConfiguration(
liveConfiguration, configurationDefinition);
for (String errorMessage : errorMessages) {
log.warn("Plugin Error: Invalid " + resourceType.getName()
+ " resource configuration returned by " + resourceType.getPlugin()
+ " plugin - " + errorMessage);
}
Configuration original = getResourceConfiguration(inventoryManager, resource);
if (original == null) {
original = loadConfigurationFromFile(inventoryManager, resource.getId());
}
if (!liveConfiguration.equals(original)) {
if (debugEnabled) {
log.debug("New configuration version detected on resource: " + resource);
}
this.configurationServerService.persistUpdatedResourceConfiguration(
resource.getId(), liveConfiguration);
boolean persisted = persistConfigurationToFile(inventoryManager, resource.getId(),
liveConfiguration, log);
if (persisted) {
resource.setResourceConfiguration(null);
}
}
}
} catch (Throwable t) {
log.warn("An error occurred while checking for an updated Resource configuration for "
+ resource + ".", t);
} finally {
// regardless of whether it passes or fails, consider it checked.
rootMemberCheckedSet.add(resource.getId());
}
long now = System.currentTimeMillis();
countTime.add(1, (now - t1));
}
}
}
}
// recurse on any child other than a top-level server, which is treated as a separate root resource.
boolean isPlatform = null == resource.getParentResource();
for (Resource child : inventoryManager.getContainerChildren(resource, resourceContainer)) {
if (isPlatform && (ResourceCategory.SERVER == child.getResourceType().getCategory())) {
if (log.isDebugEnabled()) {
log.debug("Not Recursing on platform child (top-level-server [" + child.getName() + "])");
}
continue;
}
try {
if (!checkConfigurations(inventoryManager, child, countTime, stopTime)) {
return false;
}
} catch (Exception e) {
log.error("Failed to check Resource configuration for " + child + ".", e);
}
}
return true;
}
static public Configuration getResourceConfiguration(InventoryManager inventoryManager, Resource resource) {
Configuration result = resource.getResourceConfiguration();
if (null == result) {
result = loadConfigurationFromFile(inventoryManager, resource.getId());
}
return result;
}
static public boolean persistConfigurationToFile(InventoryManager inventoryManager, int resourceId,
Configuration liveConfiguration, Log log) {
boolean success = true;
try {
File baseDataDir = inventoryManager.getDataDirectory();
String pathname = "rc/" + String.valueOf(resourceId / 1000); // Don't put too many files into one data dir
File dataDir = new File(baseDataDir, pathname);
if (!dataDir.exists()) {
success = dataDir.mkdirs();
if (!success) {
log.warn("Could not create data dir " + dataDir.getAbsolutePath());
return false;
}
}
File file = new File(dataDir, String.valueOf(resourceId));
FileOutputStream fos = new FileOutputStream(file);
ObjectOutputStream oos = new ObjectOutputStream(fos);
oos.writeObject(liveConfiguration);
oos.flush();
oos.close();
fos.flush();
fos.close();
} catch (IOException e) {
log.warn("Persisting failed: " + e.getMessage());
success = false;
}
return success;
}
static private Configuration loadConfigurationFromFile(InventoryManager inventoryManager, int resourceId) {
File baseDataDir = inventoryManager.getDataDirectory();
String pathname = "rc/" + String.valueOf(resourceId / 1000); // Don't put too many files into one data dir
File dataDir = new File(baseDataDir, pathname);
File file = new File(dataDir, String.valueOf(resourceId));
if (!file.exists()) {
log.error("File " + file.getAbsolutePath() + " does not exist");
return new Configuration();
}
try {
FileInputStream fis = new FileInputStream(file);
ObjectInputStream ois = new ObjectInputStream(fis);
Configuration config = (Configuration) ois.readObject();
ois.close();
fis.close();
return config;
} catch (IOException e) {
e.printStackTrace(); // TODO: Customize this generated block
} catch (ClassNotFoundException e) {
e.printStackTrace(); // TODO: Customize this generated block
}
return new Configuration();
}
private static class CountTime {
private long count = 0L;
private long time = 0L;
private void add(long count, long time) {
this.count += count;
this.time += time;
}
private void add(CountTime countTime) {
this.count += countTime.count;
this.time += countTime.time;
}
@Override
public String toString() {
return "CountTime [checked resource count=" + count + ", time=" + time + "]";
}
}
}