/*
* RHQ Management Platform
* Copyright (C) 2005-2014 Red Hat, Inc.
* All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation version 2 of the License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*/
package org.rhq.enterprise.server.core;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.lang.management.ManagementFactory;
import java.net.InetAddress;
import java.net.UnknownHostException;
import java.sql.Connection;
import java.util.Date;
import java.util.List;
import java.util.Properties;
import javax.annotation.Resource;
import javax.ejb.ConcurrencyManagement;
import javax.ejb.ConcurrencyManagementType;
import javax.ejb.EJB;
import javax.ejb.Singleton;
import javax.ejb.Timeout;
import javax.ejb.Timer;
import javax.ejb.TimerConfig;
import javax.ejb.TimerService;
import javax.ejb.TransactionAttribute;
import javax.ejb.TransactionAttributeType;
import javax.management.Attribute;
import javax.management.MBeanServer;
import javax.management.MBeanServerInvocationHandler;
import javax.management.ObjectName;
import javax.sql.DataSource;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.quartz.CronTrigger;
import org.quartz.SchedulerException;
import org.rhq.core.db.DatabaseTypeFactory;
import org.rhq.core.domain.auth.Subject;
import org.rhq.core.domain.cloud.Server;
import org.rhq.core.domain.cloud.StorageNode;
import org.rhq.core.domain.common.ProductInfo;
import org.rhq.core.domain.criteria.ServerCriteria;
import org.rhq.core.domain.criteria.StorageNodeCriteria;
import org.rhq.core.domain.resource.Agent;
import org.rhq.core.util.ObjectNameFactory;
import org.rhq.core.util.exception.ThrowableUtil;
import org.rhq.core.util.stream.StreamUtil;
import org.rhq.enterprise.communications.ServiceContainerConfigurationConstants;
import org.rhq.enterprise.communications.util.SecurityUtil;
import org.rhq.enterprise.server.RHQConstants;
import org.rhq.enterprise.server.alert.engine.internal.AlertConditionCacheCoordinator;
import org.rhq.enterprise.server.auth.SessionManager;
import org.rhq.enterprise.server.auth.SubjectManagerLocal;
import org.rhq.enterprise.server.cloud.StorageNodeManagerLocal;
import org.rhq.enterprise.server.cloud.TopologyManagerLocal;
import org.rhq.enterprise.server.cloud.instance.CacheConsistencyManagerLocal;
import org.rhq.enterprise.server.cloud.instance.ServerManagerLocal;
import org.rhq.enterprise.server.cloud.instance.SyncEndpointAddressException;
import org.rhq.enterprise.server.core.comm.ServerCommunicationsServiceUtil;
import org.rhq.enterprise.server.core.plugin.PluginDeploymentScannerMBean;
import org.rhq.enterprise.server.naming.NamingHack;
import org.rhq.enterprise.server.plugin.pc.MasterServerPluginContainer;
import org.rhq.enterprise.server.plugin.pc.ServerPluginServiceMBean;
import org.rhq.enterprise.server.resource.ResourceTypeManagerLocal;
import org.rhq.enterprise.server.scheduler.SchedulerLocal;
import org.rhq.enterprise.server.scheduler.jobs.AsyncResourceDeleteJob;
import org.rhq.enterprise.server.scheduler.jobs.CheckForSuspectedAgentsJob;
import org.rhq.enterprise.server.scheduler.jobs.CheckForTimedOutConfigUpdatesJob;
import org.rhq.enterprise.server.scheduler.jobs.CheckForTimedOutContentRequestsJob;
import org.rhq.enterprise.server.scheduler.jobs.CheckForTimedOutOperationsJob;
import org.rhq.enterprise.server.scheduler.jobs.CloudManagerJob;
import org.rhq.enterprise.server.scheduler.jobs.DataCalcJob;
import org.rhq.enterprise.server.scheduler.jobs.DataPurgeJob;
import org.rhq.enterprise.server.scheduler.jobs.DynaGroupAutoRecalculationJob;
import org.rhq.enterprise.server.scheduler.jobs.PurgePluginsJob;
import org.rhq.enterprise.server.scheduler.jobs.PurgeResourceTypesJob;
import org.rhq.enterprise.server.scheduler.jobs.ReplicationFactorCheckJob;
import org.rhq.enterprise.server.scheduler.jobs.SavedSearchResultCountRecalculationJob;
import org.rhq.enterprise.server.scheduler.jobs.StorageClusterReadRepairJob;
import org.rhq.enterprise.server.storage.StorageClientManager;
import org.rhq.enterprise.server.system.SystemManagerLocal;
import org.rhq.enterprise.server.util.LookupUtil;
import org.rhq.enterprise.server.util.concurrent.AlertSerializer;
import org.rhq.enterprise.server.util.concurrent.AvailabilityReportSerializer;
import org.rhq.server.metrics.DateTimeService;
/**
* This startup singleton EJB performs the rest of the RHQ Server startup initialization.
* In order for it to do its work properly, we must ensure everything has been deployed and started;
* specifically, all EJBs must have been deployed and available.
*
* This bean is not meant for client consumption - it is only for startup initialization.
*
* BEAN ConcurrencyManagement is enough: the {@link #initialized} property is only modified on startup.
*/
@Singleton
@TransactionAttribute(TransactionAttributeType.NOT_SUPPORTED)
@ConcurrencyManagement(ConcurrencyManagementType.BEAN)
public class StartupBean implements StartupLocal {
private Log log = LogFactory.getLog(this.getClass());
private volatile boolean initialized = false;
private String error = "";
@EJB
private AgentManagerLocal agentManager;
@EJB
private CacheConsistencyManagerLocal cacheConsistencyManager;
@EJB
private TopologyManagerLocal topologyManager;
@EJB
private StorageNodeManagerLocal storageNodeManager;
@EJB
private ResourceTypeManagerLocal resourceTypeManager;
@EJB
private SchedulerLocal schedulerBean;
@EJB
private ServerManagerLocal serverManager;
@EJB
private SubjectManagerLocal subjectManager;
@EJB
private SystemManagerLocal systemManager;
@EJB
private ShutdownListener shutdownListener;
@EJB
private StorageClientManager storageClientManager;
@Resource
private TimerService timerService; // needed to schedule our plugin scanner
@Resource(name = "RHQ_DS", mappedName = RHQConstants.DATASOURCE_JNDI_NAME)
private DataSource dataSource;
@Override
public boolean isInitialized() {
return this.initialized;
}
@Override
public String getError() {
return error;
}
/**
* Modifies the naming subsystem to be able to check for Java security permissions on JNDI lookup.
*/
private void secureNaming() {
NamingHack.bruteForceInitialContextFactoryBuilder();
}
/**
* Performs the final RHQ Server initialization work that needs to talk place. EJBs are available in this method.
*
* @throws RuntimeException
*/
@Override
public void init() throws RuntimeException {
//[BZ 1161806] Make sure the default time zone is set to UTC prior to use
@SuppressWarnings("unused")
DateTimeService temp = new DateTimeService();
checkTempDir();
checkCluster();
secureNaming();
initialized = false;
log.info("All business tier deployments are complete - finishing the startup...");
// get singletons right now so we load the classes immediately into our classloader
AlertConditionCacheCoordinator.getInstance();
SessionManager.getInstance();
AlertSerializer.getSingleton();
AvailabilityReportSerializer.getSingleton();
// load resource facets cache
try {
resourceTypeManager.reloadResourceFacetsCache();
} catch (Throwable t) {
error += (error.isEmpty() ? "" : ", ") + "reloading facets cache";
log.error("Could not load ResourceFacets cache.", t);
}
//Server depends on the storage cluster availability. Since the storage client init just
//establishes connectivity with the storage cluster, then run it before the server init.
initStorageClient();
// Before starting determine the operating mode of this server and
// take any necessary initialization action. Must happen before comm startup since listeners
// may be added.
initializeServer();
// The order here is important!!!
// IF YOU WANT TO CHANGE THE ORDER YOU MUST GET THE CHANGE PEER-REVIEWED FIRST BEFORE COMMITTING IT!!!
//
// If we start the scheduler before the comm layer, what happens if a stored job needs to send a message?
// But if we start the comm layer before the scheduler, what happens if a message is received that needs
// a job scheduled for it? I think the former is more likely to happen than the latter
// (that is, a scheduled job would more likely need to send a message; as opposed to an incoming message
// causing a job to be scheduled), so that explains the ordering of the comm layer and the scheduler.
startHibernateStatistics();
initScheduler(); // make sure this is initialized before starting the plugin deployer
startPluginDeployer(); // make sure this is initialized before starting the server plugin container
startServerPluginContainer(); // before comm in case an agent wants to talk to it
upgradeRhqUserSecurityDomainIfNeeded();
startServerCommunicationServices();
startScheduler();
scheduleJobs();
//startAgentClients(); // this could be expensive if we have large number of agents so skip it and we'll create them lazily
//startEmbeddedAgent(); // this is obsolete - we no longer have an embedded agent
registerShutdownListener();
registerPluginDeploymentScannerJob();
logServerStartedMessage();
initialized = true;
return;
}
private void checkTempDir() {
File tmpDir = new File(System.getProperty("java.io.tmpdir"));
if (!tmpDir.exists()) {
log.warn("Invalid java.io.tmpdir: [" + tmpDir.getAbsolutePath() + "] does not exist.");
try {
log.info("Creating java.io.tmpdir: [" + tmpDir.getAbsolutePath() + "]");
tmpDir.mkdir();
} catch (Throwable t) {
throw new RuntimeException("Startup failed: Could not create missing java.io.tmpdir ["
+ tmpDir.getAbsolutePath() + "]", t);
}
}
if (!tmpDir.isDirectory()) {
throw new RuntimeException("Startup failed: java.io.tmpdir [" + tmpDir.getAbsolutePath()
+ "] is not a directory");
}
if (!tmpDir.canRead() || !tmpDir.canExecute()) {
throw new RuntimeException("Startup failed: java.io.tmpdir [" + tmpDir.getAbsolutePath()
+ "] is not readable");
}
if (!tmpDir.canWrite()) {
throw new RuntimeException("Startup failed: java.io.tmpdir [" + tmpDir.getAbsolutePath()
+ "] is not writable");
}
}
/**
* Ensure all Servers and StorageNodes are at the same version. This prevents startup when an
* upgrade of all cluster members is still in progress.
*/
private void checkCluster() {
try {
Subject overlord = subjectManager.getOverlord();
String version = this.getClass().getPackage().getImplementationVersion();
ServerCriteria sc = new ServerCriteria();
sc.clearPaging();
List<Server> servers = topologyManager.findServersByCriteria(overlord, sc);
for (Server server : servers) {
if (!version.equals(server.getVersion())) {
throw new RuntimeException(
"Startup failed: Could not start Server because not all Servers are running the same version. This Server is running version ["
+ version
+ "] but Server ["
+ server.getName()
+ "] is running version ["
+ server.getVersion()
+ "] Please complete the upgrade for all Servers and StorageNodes before trying to start a server.");
}
}
StorageNodeCriteria snc = new StorageNodeCriteria();
snc.clearPaging();
List<StorageNode> storageNodes = storageNodeManager.findStorageNodesByCriteria(overlord, snc);
for (StorageNode storageNode : storageNodes) {
if (!version.equals(storageNode.getVersion())) {
throw new RuntimeException(
"Startup failed: Could not start Server because not all Storage Nodes are running the same version. This Server is running version ["
+ version
+ "] but Storage Node ["
+ storageNode.getAddress()
+ "] is running version ["
+ storageNode.getVersion()
+ "] Please complete the upgrade for all Servers and StorageNodes before trying to start a server.");
}
}
} catch (Throwable t) {
throw new RuntimeException("Startup failed: Could not validat Server or Storage Node versions", t);
}
}
private long readShutdownTimeLogFile() throws Exception {
File timeFile = shutdownListener.getShutdownTimeLogFile();
if (!timeFile.exists()) {
// this is probably ok, perhaps its the first time we started this server, so this exception
// just forces the caller to use startup time instead
throw new FileNotFoundException();
}
try {
FileInputStream input = new FileInputStream(timeFile);
String timeString = new String(StreamUtil.slurp(input));
return Long.parseLong(timeString);
} catch (Exception e) {
if (log.isDebugEnabled()) {
log.warn("Failed to read the shutdown time log file", e);
} else {
log.warn("Failed to read the shutdown time log file: " + e.getMessage());
}
throw e;
} finally {
// since we are starting again, we want to remove the now obsolete shutdown time file
timeFile.delete();
}
}
private void initializeServer() {
// Ensure the class is loaded and the dbType is set for our current db
Connection conn = null;
try {
conn = dataSource.getConnection();
DatabaseTypeFactory.setDefaultDatabaseType(DatabaseTypeFactory.getDatabaseType(conn));
} catch (Exception e) {
error += (error.isEmpty() ? "" : ", ") + "server";
log.error("Could not initialize server.", e);
} finally {
if (conn != null) {
try {
conn.close();
} catch (Exception e) {
log.error("Failed to close temporary connection used for server initialization.", e);
}
}
}
// Ensure that this server is registered in the database.
createDefaultServerIfNecessary();
// immediately put the server into MM if configured to do so
if (ServerCommunicationsServiceUtil.getService().getMaintenanceModeAtStartup()) {
log.info("Server is configured to start up in MAINTENANCE mode.");
Server server = serverManager.getServer();
Integer[] serverId = new Integer[] { server.getId() };
topologyManager.updateServerManualMaintenance(LookupUtil.getSubjectManager().getOverlord(), serverId, true);
}
// Establish the current server mode for the server. This will move the server to NORMAL
// mode from DOWN if necessary. This can also affect comm layer behavior.
serverManager.establishCurrentServerMode();
if ("true".equals(System.getProperty("rhq.sync.endpoint-address", "false"))) {
try {
serverManager.syncEndpointAddress();
} catch (SyncEndpointAddressException e) {
log.error("Failed to sync server endpoint address.", e);
}
}
}
/**
* For developer builds that don't use the HA installer to write a localhost entry into the {@link Server}
* table, we will create a default one here. Then, if the "rhq.high-availability.name" property is missing, the
* {@link ServerManagerLocal} will return this localhost entry.
*
* If the installer was already run, then this method should be a no-op because a row would already exist
* in the {@link Server} table
*/
private void createDefaultServerIfNecessary() {
String identity = serverManager.getIdentity();
Server server = topologyManager.getServerByName(identity);
if (server == null) {
server = new Server();
server.setName(identity);
String address;
try {
address = InetAddress.getLocalHost().getCanonicalHostName();
} catch (UnknownHostException e) {
address = "localhost";
}
server.setAddress(address);
server.setPort(7080);
server.setSecurePort(7443);
server.setComputePower(1);
server.setOperationMode(Server.OperationMode.INSTALLED);
server.setVersion(this.getClass().getPackage().getImplementationVersion());
serverManager.create(server);
log.info("Default HA server created: " + server);
}
}
/**
* Starts monitoring hibernate by attaching a statistics mbean to the entity manager injected by ejb3.
*
* @throws RuntimeException
*/
private void startHibernateStatistics() throws RuntimeException {
log.info("Starting hibernate statistics monitoring...");
try {
systemManager.enableHibernateStatistics();
} catch (Exception e) {
error += (error.isEmpty() ? "" : ", ") + "hibernate statistics";
throw new RuntimeException("Cannot start hibernate statistics monitoring!", e);
}
}
/**
* Starts the plugin deployer which will effectively ask the plugin deployer to persist information about all
* detected agent and server plugins.
*
* Because this will scan and register the initial plugins right now, make sure this is called prior
* to starting the master plugin container; otherwise, the master PC will not have any plugins to start.
*
* @throws RuntimeException
*/
private void startPluginDeployer() throws RuntimeException {
log.info("Starting the agent/server plugin deployer...");
try {
PluginDeploymentScannerMBean deployer = getPluginDeploymentScanner();
deployer.startDeployment();
} catch (Exception e) {
error += (error.isEmpty() ? "" : ", ") + "plugin deployer";
throw new RuntimeException("Cannot start the agent/server plugin deployer!", e);
}
}
/**
* Creates the timer that will trigger periodic scans for new plugins.
* @throws RuntimeException
*/
private void registerPluginDeploymentScannerJob() throws RuntimeException {
log.info("Creating timer to begin scanning for plugins...");
try {
PluginDeploymentScannerMBean deployer = getPluginDeploymentScanner();
long scanPeriod = 5 * 60000L;
try {
String scanPeriodString = deployer.getScanPeriod();
scanPeriod = Long.parseLong(scanPeriodString);
} catch (Exception e) {
log.warn("could not determine plugin scanner scan period - using: " + scanPeriod, e);
}
// create a non-persistent periodic timer (we'll reset it ever startup) with the scan period as configured in our scanner object
timerService.createIntervalTimer(scanPeriod, scanPeriod, new TimerConfig(null, false));
} catch (Exception e) {
error += (error.isEmpty() ? "" : ", ") + "plugin scanner";
throw new RuntimeException("Cannot schedule plugin scanning timer - new plugins will not be detected!", e);
}
}
@Timeout
public void scanForPlugins(final Timer timer) {
try {
PluginDeploymentScannerMBean deployer = getPluginDeploymentScanner();
deployer.scanAndRegister();
} catch (Throwable t) {
log.error("Plugin scan failed. Cause: " + ThrowableUtil.getAllMessages(t));
if (log.isDebugEnabled()) {
log.debug("Plugin scan failure stack trace follows:", t);
}
}
}
private PluginDeploymentScannerMBean getPluginDeploymentScanner() {
PluginDeploymentScannerMBean deployer;
MBeanServer mbs = ManagementFactory.getPlatformMBeanServer();
ObjectName name = PluginDeploymentScannerMBean.OBJECT_NAME;
Class<?> iface = PluginDeploymentScannerMBean.class;
deployer = (PluginDeploymentScannerMBean) MBeanServerInvocationHandler
.newProxyInstance(mbs, name, iface, false);
return deployer;
}
/**
* Installs the JAAS login modules so our users can login.
*
* @throws RuntimeException
*/
private void upgradeRhqUserSecurityDomainIfNeeded() throws RuntimeException {
try {
CustomJaasDeploymentServiceMBean jaas_mbean;
MBeanServer mbs = ManagementFactory.getPlatformMBeanServer();
ObjectName name = CustomJaasDeploymentServiceMBean.OBJECT_NAME;
Class<?> iface = CustomJaasDeploymentServiceMBean.class;
jaas_mbean = (CustomJaasDeploymentServiceMBean) MBeanServerInvocationHandler.newProxyInstance(mbs, name,
iface, false);
jaas_mbean.upgradeRhqUserSecurityDomainIfNeeded();
} catch (Exception e) {
error += (error.isEmpty() ? "" : ", ") + "security domain upgrade";
throw new RuntimeException("Cannot upgrade JAAS login modules!", e);
}
}
/**
* Initializes, but doesn't start, the Quartz scheduler now.
*
* @throws RuntimeException
*/
private void initScheduler() throws RuntimeException {
log.info("Initializing the scheduler....");
try {
schedulerBean.initQuartzScheduler();
} catch (SchedulerException e) {
error += (error.isEmpty() ? "" : ", ") + "scheduler initialization";
throw new RuntimeException("Cannot initialize the scheduler!", e);
}
}
/**
* Initializes the storage client subsystem which is needed for reading/writing metric data.
*
* @return true if the storage subsystem is running
*/
private boolean initStorageClient() {
boolean isStorageRunning = storageClientManager.init();
if (!isStorageRunning) {
error += (error.isEmpty() ? "" : ", ") + "storage";
}
return isStorageRunning;
}
/**
* Starts the Quartz scheduler now. We are assured that all EJBs are deployed now, so any jobs that have to be
* executed now will have those EJBs available.
*
* @throws RuntimeException
*/
private void startScheduler() throws RuntimeException {
log.info("Starting the scheduler...");
try {
schedulerBean.startQuartzScheduler();
} catch (SchedulerException e) {
error += (error.isEmpty() ? "" : ", ") + "scheduler";
throw new RuntimeException("Cannot start the scheduler!", e);
}
}
/**
* Initializes the server-side communications services. Once complete, agents can talk to the server.
*
* @throws RuntimeException
*/
private void startServerCommunicationServices() throws RuntimeException {
// under a rare case, if the server starts up really fast as soon as it dies, any connected
// agents will not realize the server has bounced and will not know to re-connect. When this
// happens the server's caches will not be refreshed and bad things will happen (e.g. alerts not firing).
// make sure we are down for a certain amount of time to ensure the agent's know the server was down.
long ensureDownTimeSecs;
try {
ensureDownTimeSecs = Long.parseLong(System.getProperty("rhq.server.ensure-down-time-secs", "70"));
} catch (Exception e) {
ensureDownTimeSecs = 70;
}
long elapsed = getElapsedTimeSinceLastShutdown();
long sleepTime = (ensureDownTimeSecs * 1000L) - elapsed;
if (sleepTime > 0) {
try {
log.info("Forcing the server to wait [" + sleepTime + "]ms to ensure agents know we went down...");
Thread.sleep(sleepTime);
} catch (InterruptedException ignore) {
}
}
// now start our comm layer
log.info("Starting the server-agent communications services...");
try {
ServerCommunicationsServiceUtil.getService().startCommunicationServices();
ServerCommunicationsServiceUtil
.getService()
.getServiceContainer()
.addCommandListener(
new ExternalizableStrategyCommandListener(
org.rhq.core.domain.server.ExternalizableStrategy.Subsystem.AGENT));
} catch (Exception e) {
error += (error.isEmpty() ? "" : ", ") + "communications services";
throw new RuntimeException("Cannot start the server-side communications services.", e);
}
}
/**
* This seeds the agent clients cache with clients for all known agents. These clients will be started so they can
* immediately begin to send any persisted guaranteed messages that might already exist. This method must be called
* at a time when the server is ready to accept messages from agents because any guaranteed messages that are
* delivered might trigger the agents to send messages back to the server.
*
* NOTE: we don't need to do this - so far, none of the messages the server sends to the agent are marked
* with "guaranteed delivery" (this is on purpose and a good thing) so we don't need to start all the agent clients
* in case they have persisted messages. Since the number of agents could be large this cache could be huge and
* take some time to initialize. If we don't call this, it speeds up start up, and doesn't bloat memory with
* clients we might not ever need (since agents might have affinity to other servers). Agent clients
* can be created lazily at runtime when the server needs it.
*/
private void startAgentClients() {
log.info("Starting agent clients - any persisted messages with guaranteed delivery will be sent...");
List<Agent> agents = agentManager.getAllAgents();
if (agents != null) {
for (Agent agent : agents) {
agentManager.getAgentClient(agent); // this caches and starts the client
}
}
return;
}
/**
* This will make sure all jobs that need to periodically run are scheduled.
*
* @throws RuntimeException if unable to schedule a job
*/
private void scheduleJobs() throws RuntimeException {
log.info("Scheduling asynchronous jobs...");
/*
* All jobs need to be set as non-volatile since a volatile job in a clustered environment is effectively
* non-volatile;
*/
// TODO [mazz]: make all of the intervals here configurable via something like SystemManagerBean
serverManager.scheduleServerHeartbeat();
cacheConsistencyManager.scheduleServerCacheReloader();
systemManager.scheduleConfigCacheReloader();
subjectManager.scheduleSessionPurgeJob();
storageClientManager.scheduleStorageSessionMaintenance();
try {
// Do not check until we are up at least 1 min, and every minute thereafter.
final long initialDelay = 1000L * 60;
final long interval = 1000L * 60;
schedulerBean.scheduleSimpleRepeatingJob(SavedSearchResultCountRecalculationJob.class, true, false,
initialDelay, interval);
} catch (Exception e) {
log.error("Cannot schedule asynchronous resource deletion job.", e);
}
try {
// Do not check until we are up at least 1 min, and every 5 minutes thereafter.
final long initialDelay = 1000L * 60;
final long interval = 1000L * 60 * 5;
schedulerBean.scheduleSimpleRepeatingJob(AsyncResourceDeleteJob.class, true, false, initialDelay, interval);
} catch (Exception e) {
log.error("Cannot schedule asynchronous resource deletion job.", e);
}
try {
// Do not check until we are up at least 1 min, and every 5 minutes thereafter.
final long initialDelay = 1000L * 60;
final long interval = 1000L * 60 * 5;
schedulerBean.scheduleSimpleRepeatingJob(PurgeResourceTypesJob.class, true, false, initialDelay, interval);
} catch (Exception e) {
log.error("Cannot schedule purge resource types job.", e);
}
try {
// Do not check until we are up at least 1 min, and every 3 minutes thereafter.
final long initialDelay = 1000L * 60;
final long interval = 1000L * 60 * 3;
schedulerBean.scheduleSimpleRepeatingJob(PurgePluginsJob.class, true, false, initialDelay, interval);
} catch (Exception e) {
log.error("Cannot schedule purge plugins job.", e);
}
// DynaGroup Auto-Recalculation Job
try {
// Do not check until we are up at least 1 min, and every minute thereafter.
final long initialDelay = 1000L * 60;
final long interval = 1000L * 60;
schedulerBean.scheduleSimpleRepeatingJob(DynaGroupAutoRecalculationJob.class, true, false, initialDelay,
interval);
} catch (Exception e) {
log.error("Cannot schedule DynaGroup auto-recalculation job.", e);
}
// Cluster Manager Job
try {
String oldJobName = "org.rhq.enterprise.server.scheduler.jobs.ClusterManagerJob";
boolean foundAndDeleted = schedulerBean.deleteJob(oldJobName, oldJobName);
if (foundAndDeleted) {
log.info("Unscheduling deprecated job references for " + oldJobName + "...");
} else {
log.debug("No deprecated job references found for " + oldJobName + ".");
}
// Wait long enough to allow the Server instance jobs to start executing first.
final long initialDelay = 1000L * 60 * 2; // 2 mins
final long interval = 1000L * 30; // 30 secs
schedulerBean.scheduleSimpleRepeatingJob(CloudManagerJob.class, true, false, initialDelay, interval);
} catch (Exception e) {
log.error("Cannot schedule cloud management job.", e);
}
// Suspected Agents Job
try {
// Do not check until we are up at least 10 mins, but check every 60 secs thereafter.
final long initialDelay = 1000L * 60 * 10; // 10 mins
final long interval = 1000L * 60; // 60 secs
schedulerBean.scheduleSimpleRepeatingJob(CheckForSuspectedAgentsJob.class, true, false, initialDelay,
interval);
} catch (Exception e) {
log.error("Cannot schedule suspected Agents job.", e);
}
// Timed Out Operations Job
try {
final long initialDelay = 1000L * 60 * 3; // 3 min
final long interval = 1000L * 60 * 10; // 10 minutes
schedulerBean.scheduleSimpleRepeatingJob(CheckForTimedOutOperationsJob.class, true, false, initialDelay,
interval);
} catch (Exception e) {
log.error("Cannot schedule check-for-timed-out-operations job.", e);
}
// Timed Out Resource Configuration Update Requests Job
// (NOTE: We don't need to check for timed out plugin Cofiguration updates, since those are executed synchronously.)
try {
final long initialDelay = 1000L * 60 * 4; // 4 mins
final long interval = 1000L * 60 * 10; // 10 mins
schedulerBean.scheduleSimpleRepeatingJob(CheckForTimedOutConfigUpdatesJob.class, true, false, initialDelay,
interval);
} catch (Exception e) {
log.error("Cannot schedule check-for-timed-out-configuration-update-requests job.", e);
}
// Timed Out Content Requests Job
try {
final long initialDelay = 1000L * 60 * 5; // 5 mins
final long interval = 1000L * 60 * 15; // 15 mins
schedulerBean.scheduleSimpleRepeatingJob(CheckForTimedOutContentRequestsJob.class, true, false,
initialDelay, interval);
} catch (Exception e) {
log.error("Cannot schedule check-for-timed-out-artifact-requests job.", e);
}
// Data Purge Job
try {
// TODO [mazz]: make the data purge job's cron string configurable via SystemManagerBean
// For Quartz cron syntax, see: http://www.quartz-scheduler.org/documentation/quartz-2.1.x/tutorials/crontrigger
String cronString = "0 30 * * * ?"; // every hour, on the half-hour (to offset from DataCalcJob)
schedulerBean.scheduleSimpleCronJob(DataPurgeJob.class, true, false, cronString,
CronTrigger.MISFIRE_INSTRUCTION_DO_NOTHING);
} catch (Exception e) {
log.error("Cannot schedule data purge job.", e);
}
// Data Calc Job
try {
// TODO [mazz]: make the data calc job's cron string configurable via SystemManagerBean
// For Quartz cron syntax, see: http://www.quartz-scheduler.org/documentation/quartz-2.1.x/tutorials/crontrigger
String cronString = "0 0 * * * ?"; // every hour, on the hour
schedulerBean.scheduleSimpleCronJob(DataCalcJob.class, true, false, cronString,
CronTrigger.MISFIRE_INSTRUCTION_DO_NOTHING);
} catch (Exception e) {
log.error("Cannot schedule data calc job.", e);
}
// Server Plugin Jobs
try {
ServerPluginServiceMBean mbean = LookupUtil.getServerPluginService();
MasterServerPluginContainer masterPC = mbean.getMasterPluginContainer();
masterPC.scheduleAllPluginJobs();
} catch (Exception e) {
log.error("Cannot schedule server plugin jobs.", e);
}
try {
// Wait long enough to allow the Server instance jobs to start executing first.
String cronString = "0 30 0 ? * SUN *"; // every sunday starting at 00:30.
schedulerBean.scheduleSimpleCronJob(StorageClusterReadRepairJob.class, true, true, cronString, null);
} catch (Exception e) {
log.error("Cannot create storage cluster read repair job", e);
}
// Storage cluster replication factor check Job
try {
final long initialDelay = 1000L * 60 * 2; // 2 mins
final long interval = 1000L * 60 * 5; // 5 mins
schedulerBean.scheduleSimpleRepeatingJob(ReplicationFactorCheckJob.class, true, false, initialDelay,
interval);
} catch (Exception e) {
log.error("Cannot schedule Storage cluster replication factor check job.", e);
}
}
/**
* Starts the embedded agent, but only if the embedded agent is installed and it is enabled.
*
* @throws RuntimeException if the agent is installed and enabled but failed to start
*
* @deprecated we don't have an embedded agent anymore, leaving this in case we resurrect it
*/
@Deprecated
private void startEmbeddedAgent() throws RuntimeException {
// we can't use EmbeddedAgentBootstrapServiceMBean because if the embedded agent
// isn't installed, that class will not be available; we must use JMX API
final ObjectName agentBootstrapMBean = ObjectNameFactory.create("rhq:service=EmbeddedAgentBootstrap");
final String agentEnabledAttribute = "AgentEnabled";
final String startAgentMethod = "startAgent";
final String configurationOverridesAttribute = "ConfigurationOverrides";
final MBeanServer mbs = ManagementFactory.getPlatformMBeanServer();
try {
// this will fail if the embedded agent isn't installed
String enabled = (String) mbs.getAttribute(agentBootstrapMBean, agentEnabledAttribute);
// if we got this far, the embedded agent is at least installed
// now check to see if its enabled - if so start it; any startup exceptions now are thrown
try {
if (Boolean.valueOf(enabled)) {
log.info("The embedded Agent is installed and enabled - it will now be started...");
// NOTE: we cannot directly import AgentConfigurationConstants, so we hardcode the
// actual constant values here - need to keep an eye on these in the unlikely event
// the constant values change.
String AgentConfigurationConstants_SERVER_TRANSPORT = "rhq.agent.server.transport";
String AgentConfigurationConstants_SERVER_BIND_ADDRESS = "rhq.agent.server.bind-address";
String AgentConfigurationConstants_SERVER_BIND_PORT = "rhq.agent.server.bind-port";
// Get the configuration overrides as set in the configuration file.
// If the agent's bind address isn't overridden with a non-empty value,
// then we need to get the Server bind address and use it for the agent's bind address.
// If the agent's server endpoint address/port are empty, we again use the values
// appropriate for the Server this agent is embedded in.
// Note that we don't look for the values in persisted preferences - we assume they
// are always present in the configuration overrides (which they should always be);
Properties overrides;
String serverTransport;
String serverAddress;
String serverPort;
String agentAddress;
overrides = (Properties) mbs.getAttribute(agentBootstrapMBean, configurationOverridesAttribute);
serverTransport = overrides.getProperty(AgentConfigurationConstants_SERVER_TRANSPORT);
serverAddress = overrides.getProperty(AgentConfigurationConstants_SERVER_BIND_ADDRESS);
serverPort = overrides.getProperty(AgentConfigurationConstants_SERVER_BIND_PORT);
agentAddress = overrides.getProperty(ServiceContainerConfigurationConstants.CONNECTOR_BIND_ADDRESS);
Server server = serverManager.getServer();
if (agentAddress == null || agentAddress.trim().equals("")) {
overrides.setProperty(ServiceContainerConfigurationConstants.CONNECTOR_BIND_ADDRESS,
server.getAddress());
}
if (serverAddress == null || serverAddress.trim().equals("")) {
overrides.setProperty(AgentConfigurationConstants_SERVER_BIND_ADDRESS, server.getAddress());
}
if (serverPort == null || serverPort.trim().equals("")) {
if (SecurityUtil.isTransportSecure(serverTransport)) {
overrides.setProperty(AgentConfigurationConstants_SERVER_BIND_PORT,
Integer.toString(server.getSecurePort()));
} else {
overrides.setProperty(AgentConfigurationConstants_SERVER_BIND_PORT,
Integer.toString(server.getPort()));
}
}
mbs.setAttribute(agentBootstrapMBean, new Attribute(configurationOverridesAttribute, overrides));
// We need to do the agent startup in a separate thread so we do not hang
// this startup servlet. JBossAS 4.2 will not begin accepting HTTP requests
// until this startup servlet has finished (this is different from JBossAS 4.0).
// The agent needs to submit an HTTP request in order to complete its startup
// (it needs to register with the server).
// The side effect of this is the RHQ Server will still start even if the embedded
// agent fails to start - this may not be a bad thing. We probably do not want
// the entire RHQ Server to go down if its agent fails to start.
Runnable agentStartRunnable = new Runnable() {
public void run() {
// this returns only when the agent has started and is registered (sends HTTP request)
try {
mbs.invoke(agentBootstrapMBean, startAgentMethod, new Object[0], new String[0]);
} catch (Throwable t) {
log.error("Failed to start the embedded Agent - it will not be available!", t);
}
}
};
Thread agentStartThread = new Thread(agentStartRunnable, "Embedded Agent Startup");
agentStartThread.setDaemon(true);
agentStartThread.start();
} else {
log.debug("The embedded Agent is not enabled, so it will not be started.");
}
} catch (Throwable t) {
throw new RuntimeException("Failed to start the embedded Agent.", t);
}
} catch (RuntimeException se) {
throw se;
} catch (Throwable t) {
log.info("The embedded Agent is not installed, so it will not be started (" + t + ").");
}
return;
}
/**
* Starts the server-side plugin container.
*
* @throws RuntimeException
*/
private void startServerPluginContainer() throws RuntimeException {
log.info("Starting the master server plugin container...");
try {
ServerPluginServiceMBean mbean = LookupUtil.getServerPluginService();
mbean.startMasterPluginContainerWithoutSchedulingJobs();
} catch (Exception e) {
error += (error.isEmpty() ? "" : ", ") + "server plugin container";
throw new RuntimeException("Cannot start the master server plugin container!", e);
}
}
/**
* Registers a listener to the system shutdown notification so some components can be cleaned up in an
* orderly fashion when the server is shutdown.
*
* @throws RuntimeException if cannot register a shutdown listener
*/
private void registerShutdownListener() throws RuntimeException {
// as of JBossAS 4.0.5, this is the known MBean name of the service that notifies when the server is shutting down
// AS7 today does not have notifications like this. So we have a new EJB singleton ShutdownListener with a PreDestroy method.
// If that doesn't work, we can try to create a system shutdown hook in here. Thus I'm leaving this method in here in case
// we need it later. Just add a Runtime.addShutdownHook call in here that calls our ShutdownListener.
return;
}
/**
* Gets the number of milliseconds since the time when the server was last shutdown.
* If we don't know, then return the time since it was started.
* @return elapsed time since server started, 0 if not known
*/
private long getElapsedTimeSinceLastShutdown() throws RuntimeException {
long elapsed;
try {
long shutdownTime = readShutdownTimeLogFile();
long currentTime = System.currentTimeMillis();
elapsed = currentTime - shutdownTime;
} catch (Exception ignore) {
// we will have already logged an error, don't bother logging more
// but now at least try to see how long its been since we've started
try {
CoreServerMBean coreServer = LookupUtil.getCoreServer();
Date startTime = coreServer.getBootTime();
long currentTime = System.currentTimeMillis();
elapsed = currentTime - startTime.getTime();
} catch (Exception e1) {
elapsed = 0;
}
}
return elapsed;
}
private void logServerStartedMessage() {
Subject overlord = subjectManager.getOverlord();
ProductInfo productInfo = systemManager.getProductInfo(overlord);
log.info("--------------------------------------------------"); // 50 dashes
log.info(productInfo.getFullName() + " " + productInfo.getVersion() + " (build " + productInfo.getBuildNumber()
+ ") Server started.");
log.info("--------------------------------------------------"); // 50 dashes
}
}