/*
* RHQ Management Platform
* Copyright (C) 2005-2014 Red Hat, Inc.
* All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation version 2 of the License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*/
package org.rhq.enterprise.server.scheduler.jobs;
import static java.util.concurrent.TimeUnit.HOURS;
import static java.util.concurrent.TimeUnit.MILLISECONDS;
import static org.rhq.core.domain.common.composite.SystemSetting.ALERT_PURGE_PERIOD;
import static org.rhq.core.domain.common.composite.SystemSetting.AVAILABILITY_PURGE_PERIOD;
import static org.rhq.core.domain.common.composite.SystemSetting.DATA_MAINTENANCE_PERIOD;
import static org.rhq.core.domain.common.composite.SystemSetting.DATA_REINDEX_NIGHTLY;
import static org.rhq.core.domain.common.composite.SystemSetting.DRIFT_FILE_PURGE_PERIOD;
import static org.rhq.core.domain.common.composite.SystemSetting.EVENT_PURGE_PERIOD;
import static org.rhq.core.domain.common.composite.SystemSetting.OPERATION_HISTORY_PURGE_PERIOD;
import static org.rhq.core.domain.common.composite.SystemSetting.PARTITION_EVENT_PURGE_PERIOD;
import static org.rhq.core.domain.common.composite.SystemSetting.RESOURCE_CONFIG_HISTORY_PURGE_PERIOD;
import static org.rhq.core.domain.common.composite.SystemSetting.RT_DATA_PURGE_PERIOD;
import static org.rhq.core.domain.common.composite.SystemSetting.TRAIT_PURGE_PERIOD;
import java.util.Calendar;
import java.util.Date;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.quartz.JobExecutionContext;
import org.quartz.JobExecutionException;
import org.quartz.SimpleTrigger;
import org.rhq.core.domain.auth.Subject;
import org.rhq.core.domain.common.composite.SystemSettings;
import org.rhq.enterprise.server.alert.AlertConditionManagerLocal;
import org.rhq.enterprise.server.alert.AlertDefinitionManagerLocal;
import org.rhq.enterprise.server.alert.AlertNotificationManagerLocal;
import org.rhq.enterprise.server.auth.SubjectManagerLocal;
import org.rhq.enterprise.server.content.ContentManagerLocal;
import org.rhq.enterprise.server.drift.DriftManagerLocal;
import org.rhq.enterprise.server.operation.OperationManagerLocal;
import org.rhq.enterprise.server.purge.PurgeManagerLocal;
import org.rhq.enterprise.server.resource.ResourceManagerLocal;
import org.rhq.enterprise.server.scheduler.SchedulerLocal;
import org.rhq.enterprise.server.system.SystemManagerLocal;
import org.rhq.enterprise.server.util.LookupUtil;
import org.rhq.enterprise.server.util.TimingVoodoo;
/**
* This implements {@link org.quartz.StatefulJob} (as opposed to {@link org.quartz.Job}) because we do not need nor want
* this job triggered concurrently. That is, we don't want multiple data purge jobs performing the data purge at the
* same time.
*
* Note, some of the work previously performed in this job has been moved to {@link DataCalcJob}.
*/
public class DataPurgeJob extends AbstractStatefulJob {
private static final Log LOG = LogFactory.getLog(DataPurgeJob.class);
private static final long HOUR = MILLISECONDS.convert(1, HOURS);
private final SubjectManagerLocal subjectManager;
private final SystemManagerLocal systemManager;
private final PurgeManagerLocal purgeManager;
private final OperationManagerLocal operationManager;
private final AlertDefinitionManagerLocal alertDefinitionManager;
private final AlertConditionManagerLocal alertConditionManager;
private final AlertNotificationManagerLocal alertNotificationManager;
private final DriftManagerLocal driftManager;
private final ResourceManagerLocal resourceManager;
private final ContentManagerLocal contentManager;
public DataPurgeJob() {
subjectManager = LookupUtil.getSubjectManager();
systemManager = LookupUtil.getSystemManager();
purgeManager = LookupUtil.getPurgeManager();
operationManager = LookupUtil.getOperationManager();
alertDefinitionManager = LookupUtil.getAlertDefinitionManager();
alertConditionManager = LookupUtil.getAlertConditionManager();
alertNotificationManager = LookupUtil.getAlertNotificationManager();
driftManager = LookupUtil.getDriftManager();
resourceManager = LookupUtil.getResourceManager();
contentManager = LookupUtil.getContentManager();
}
/**
* Schedules a purge job to trigger right now. This will not block - it schedules the job to trigger but immediately
* returns. This method will ensure that no two data purge jobs will execute at the same time (Quartz will ensure
* this since {@link DataPurgeJob} is an implementation of {@link org.quartz.StatefulJob}).
*
* @throws Exception if failed to schedule the data purge for immediate execution
*/
public static void purgeNow() throws Exception {
// there should always be a DataPurgeJob defined with a job name as the same as this class' name
// let's trigger that job now. this ensures the job is only ever run once, never concurrently
// note that you can't call this method again until the data purge job finished; otherwise,
// you'll get an exception saying there is already a trigger defined - this is what we want, you
// shouldn't ask for more than one data purge job to execute now - you have to wait for it to finish.
SchedulerLocal scheduler = LookupUtil.getSchedulerBean();
SimpleTrigger trigger = new SimpleTrigger("DataPurgeJobNow", DataPurgeJob.class.getName());
trigger.setJobName(DataPurgeJob.class.getName());
trigger.setJobGroup(DataPurgeJob.class.getName());
scheduler.scheduleJob(trigger);
}
@Override
public void executeJobCode(JobExecutionContext context) throws JobExecutionException {
long timeStart = System.currentTimeMillis();
LOG.info("Data Purge Job STARTING");
try {
Subject overlord = subjectManager.getOverlord();
SystemSettings systemSettings = systemManager.getSystemSettings(overlord);
purgeEverything(systemSettings);
performDatabaseMaintenance(systemSettings);
} catch (Exception e) {
LOG.error("Data Purge Job FAILED TO COMPLETE. Cause: " + e);
} finally {
long duration = System.currentTimeMillis() - timeStart;
LOG.info("Data Purge Job FINISHED [" + duration + "]ms");
}
}
private void purgeEverything(SystemSettings systemSettings) {
purgeCallTimeData(systemSettings);
purgeEventData(systemSettings);
purgeAlertData(systemSettings);
purgeUnusedAlertDefinitions();
purgeOrphanedAlertConditions();
purgeOrphanedAlertNotifications();
purgeMeasurementTraitData(systemSettings);
purgeAvailabilityData(systemSettings);
purgeOrphanedDriftFiles(systemSettings);
purgeOperationHistoryData(systemSettings);
purgeOrphanedBundleResourceDeploymentHistory();
purgePartitionEventsData(systemSettings);
purgeResourceConfigHistory(systemSettings);
removeResourceErrorDuplicates();
removeStaleAvailabilityResourceErrors();
purgeOldPackageBits();
}
private void purgeOldPackageBits(){
long timeStart = System.currentTimeMillis();
LOG.info("Package bits purge starting at " + new Date(timeStart));
contentManager.removeHistoryDeploymentsBits();
long duration = System.currentTimeMillis() - timeStart;
LOG.info("Purged old package bits completed in [" + duration + "]ms");
}
private void purgeMeasurementTraitData(SystemSettings systemSettings) {
long timeStart = System.currentTimeMillis();
LOG.info("Trait data purge starting at " + new Date(timeStart));
int traitsPurged = 0;
try {
long threshold;
String traitPurgeThresholdStr = systemSettings.get(TRAIT_PURGE_PERIOD);
if (traitPurgeThresholdStr == null) {
threshold = timeStart - (1000L * 60 * 60 * 24 * 365);
LOG.debug("No purge traits threshold found - will purge traits older than one year");
} else {
threshold = timeStart - Long.parseLong(traitPurgeThresholdStr);
}
LOG.info("Purging traits that are older than " + new Date(threshold));
traitsPurged = purgeManager.purgeTraits(threshold);
} catch (Exception e) {
LOG.error("Failed to purge trait data. Cause: " + e, e);
} finally {
long duration = System.currentTimeMillis() - timeStart;
LOG.info("Traits data purged [" + traitsPurged + "] - completed in [" + duration + "]ms");
}
}
private void purgeOperationHistoryData(SystemSettings systemSettings) {
long timeStart = System.currentTimeMillis();
int purgeCount = 0;
try {
String purgeThresholdStr = systemSettings.get(OPERATION_HISTORY_PURGE_PERIOD);
long purgeThreshold = purgeThresholdStr != null ? Long.parseLong(purgeThresholdStr) : 0;
if (purgeThreshold <= 0) {
LOG.info("Operation History threshold set to 0, skipping purge of operation history data.");
return;
}
LOG.info("Operation History data purge starting at " + new Date(timeStart));
long threshold = timeStart - purgeThreshold;
Date purgeBeforeTime = new Date(threshold);
LOG.info("Purging operation history older than " + purgeBeforeTime);
purgeCount = operationManager.purgeOperationHistory(purgeBeforeTime);
} catch (Exception e) {
LOG.error("Failed to purge operation history data. Cause: " + e, e);
} finally {
long duration = System.currentTimeMillis() - timeStart;
LOG.info("Operation history data purged [" + purgeCount + "] - completed in [" + duration + "]ms");
}
}
private void purgeAvailabilityData(SystemSettings systemSettings) {
long timeStart = System.currentTimeMillis();
LOG.info("Availability data purge starting at " + new Date(timeStart));
int availsPurged = 0;
try {
long threshold;
String availPurgeThresholdStr = systemSettings.get(AVAILABILITY_PURGE_PERIOD);
if (availPurgeThresholdStr == null) {
threshold = timeStart - (1000L * 60 * 60 * 24 * 365);
LOG.debug("No purge avails threshold found - will purge availabilities older than one year");
} else {
threshold = timeStart - Long.parseLong(availPurgeThresholdStr);
}
LOG.info("Purging availablities that are older than " + new Date(threshold));
availsPurged = purgeManager.purgeAvailabilities(threshold);
} catch (Exception e) {
LOG.error("Failed to purge availability data. Cause: " + e, e);
} finally {
long duration = System.currentTimeMillis() - timeStart;
LOG.info("Availability data purged [" + availsPurged + "] - completed in [" + duration + "]ms");
}
}
private void purgeCallTimeData(SystemSettings systemSettings) {
long timeStart = System.currentTimeMillis();
LOG.info("Measurement calltime data purge starting at " + new Date(timeStart));
int calltimePurged = 0;
try {
long threshold = timeStart - Long.parseLong(systemSettings.get(RT_DATA_PURGE_PERIOD));
LOG.info("Purging calltime data that is older than " + new Date(threshold));
calltimePurged = purgeManager.purgeCallTimeData(threshold);
} catch (Exception e) {
LOG.error("Failed to purge calltime data. Cause: " + e, e);
} finally {
long duration = System.currentTimeMillis() - timeStart;
LOG.info("Calltime purged [" + calltimePurged + "] - completed in [" + duration + "]ms");
}
}
private void purgeEventData(SystemSettings systemSettings) {
long timeStart = System.currentTimeMillis();
LOG.info("Event data purge starting at " + new Date(timeStart));
int eventsPurged = 0;
try {
long threshold = timeStart - Long.parseLong(systemSettings.get(EVENT_PURGE_PERIOD));
LOG.info("Purging event data older than " + new Date(threshold));
eventsPurged = purgeManager.purgeEventData(threshold);
} catch (Exception e) {
LOG.error("Failed to purge event data. Cause: " + e, e);
} finally {
long duration = System.currentTimeMillis() - timeStart;
LOG.info("Event data purged [" + eventsPurged + "] - completed in [" + duration + "]ms");
}
}
private void purgeAlertData(SystemSettings systemSettings) {
long timeStart = System.currentTimeMillis();
LOG.info("Alert data purge starting at " + new Date(timeStart));
int alertsPurged = 0;
try {
long threshold = timeStart - Long.parseLong(systemSettings.get(ALERT_PURGE_PERIOD));
LOG.info("Purging alert data older than " + new Date(threshold));
alertsPurged = purgeManager.deleteAlerts(0, threshold);
} catch (Exception e) {
LOG.error("Failed to purge alert data. Cause: " + e, e);
} finally {
long duration = System.currentTimeMillis() - timeStart;
LOG.info("Alert data purged [" + alertsPurged + "] - completed in [" + duration + "]ms");
}
}
private void purgeUnusedAlertDefinitions() {
long timeStart = System.currentTimeMillis();
LOG.info("Alert definition unused purge starting at " + new Date(timeStart));
int alertDefinitionsPurged = 0;
try {
alertDefinitionsPurged = alertDefinitionManager.purgeUnusedAlertDefinitions();
} catch (Exception e) {
LOG.error("Failed to purge alert definition data. Cause: " + e, e);
} finally {
long duration = System.currentTimeMillis() - timeStart;
LOG.info("Alert definitions purged [" + alertDefinitionsPurged + "] - completed in [" + duration + "]ms");
}
}
private void purgeOrphanedAlertConditions() {
long timeStart = System.currentTimeMillis();
LOG.info("Alert condition orphan purge starting at " + new Date(timeStart));
int orphansPurged = 0;
try {
orphansPurged = alertConditionManager.purgeOrphanedAlertConditions();
} catch (Exception e) {
LOG.error("Failed to purge alert condition data. Cause: " + e, e);
} finally {
long duration = System.currentTimeMillis() - timeStart;
LOG.info("Purged [" + orphansPurged + "] orphan alert conditions - completed in [" + duration + "]ms");
}
}
private void purgeOrphanedAlertNotifications() {
long timeStart = System.currentTimeMillis();
LOG.info("Alert notification orphan purge starting at " + new Date(timeStart));
int orphansPurged = 0;
try {
orphansPurged = alertNotificationManager.purgeOrphanedAlertNotifications();
} catch (Exception e) {
LOG.error("Failed to purge alert notification data. Cause: " + e, e);
} finally {
long duration = System.currentTimeMillis() - timeStart;
LOG.info("Purged [" + orphansPurged + "] orphan alert notifications - completed in [" + duration + "]ms");
}
}
private void purgeOrphanedDriftFiles(SystemSettings systemSettings) {
long timeStart = System.currentTimeMillis();
LOG.info("Drift file orphan purge starting at " + new Date(timeStart));
int orphansPurged = 0;
try {
long threshold = timeStart - Long.parseLong(systemSettings.get(DRIFT_FILE_PURGE_PERIOD));
LOG.info("Purging orphaned drift files older than " + new Date(threshold));
orphansPurged = driftManager.purgeOrphanedDriftFiles(subjectManager.getOverlord(), threshold);
} catch (Exception e) {
LOG.error("Failed to purge orphaned drift files. Cause: " + e, e);
} finally {
long duration = System.currentTimeMillis() - timeStart;
LOG.info("Purged [" + orphansPurged + "] orphaned drift files - completed in [" + duration + "]ms");
}
}
private void purgeOrphanedBundleResourceDeploymentHistory() {
long timeStart = System.currentTimeMillis();
LOG.info("Orphaned bundle audit messages purge starting at " + new Date(timeStart));
int orphansPurged = 0;
try {
orphansPurged = purgeManager.purgeOrphanedBundleResourceDeploymentHistory();
} catch (Exception e) {
LOG.error("Failed to purge orphaned bundle audit messages. Cause: " + e, e);
} finally {
long duration = System.currentTimeMillis() - timeStart;
LOG.info("Purged [" + orphansPurged + "] orphaned bundle audit messages - completed in [" + duration
+ "]ms");
}
}
private void purgePartitionEventsData(SystemSettings systemSettings) {
long timeStart = System.currentTimeMillis();
LOG.info("Partition event data purge starting at " + new Date(timeStart));
int eventsPurged = 0;
try {
String purgeThresholdStr = systemSettings.get(PARTITION_EVENT_PURGE_PERIOD);
long purgeThreshold = purgeThresholdStr != null ? Long.parseLong(purgeThresholdStr) : 0;
if (purgeThreshold <= 0) {
LOG.info("Partition event threshold set to 0, skipping purge of operation history data.");
return;
}
long deleteUpToTime = timeStart - purgeThreshold;
LOG.info("Purging partition event data older than " + new Date(deleteUpToTime));
eventsPurged = purgeManager.purgePartitionEvents(deleteUpToTime);
} catch (Exception e) {
LOG.error("Failed to purge partition event data. Cause: " + e, e);
} finally {
long duration = System.currentTimeMillis() - timeStart;
LOG.info("Partition event data purged [" + eventsPurged + "] - completed in [" + duration + "]ms");
}
}
private void purgeResourceConfigHistory(SystemSettings systemSettings) {
long timeStart = System.currentTimeMillis();
LOG.info("Resource configuration history purge starting at " + new Date(timeStart));
int configurationsPurged = 0;
try {
String purgeThresholdStr = systemSettings.get(RESOURCE_CONFIG_HISTORY_PURGE_PERIOD);
long purgeThreshold = purgeThresholdStr != null ? Long.parseLong(purgeThresholdStr) : 0;
if (purgeThreshold <= 0) {
LOG.info("Resource configuration history threshold set to 0, "
+ "skipping purge of resource configuration history data.");
return;
}
long deleteUpToTime = timeStart - purgeThreshold;
LOG.info("Purging resource configuration history data older than " + new Date(deleteUpToTime));
configurationsPurged = purgeManager.purgeResourceConfigHistory(deleteUpToTime);
} catch (Exception e) {
LOG.error("Failed to purge resource configuration history data. Cause: " + e, e);
} finally {
long duration = System.currentTimeMillis() - timeStart;
LOG.info("Resource configuration history purged [" + configurationsPurged + "] - completed in [" + duration + "]ms");
}
}
private void removeResourceErrorDuplicates() {
long timeStart = System.currentTimeMillis();
LOG.info("Resource error duplicates removal starting at " + new Date(timeStart));
int deleted = 0;
try {
deleted = resourceManager.removeResourceErrorDuplicates();
} catch (Exception e) {
LOG.error("Failed to remove resource error duplicates.", e);
} finally {
long duration = System.currentTimeMillis() - timeStart;
LOG.info("Resource error duplicates removed [" + deleted + "] - completed in [" + duration + "]ms");
}
}
private void removeStaleAvailabilityResourceErrors() {
long timeStart = System.currentTimeMillis();
LOG.info("Stale availability resource errors removal starting at " + new Date(timeStart));
int deleted = 0;
try {
deleted = resourceManager.removeStaleAvailabilityResourceErrors();
} catch (Exception e) {
LOG.error("Failed to remove stale availability resource errors.", e);
} finally {
long duration = System.currentTimeMillis() - timeStart;
LOG.info("Stale availability resource errors removed [" + deleted + "] - completed in [" + duration + "]ms");
}
}
private void performDatabaseMaintenance(SystemSettings systemSettings) {
long timeStart = System.currentTimeMillis();
LOG.info("Database maintenance starting at " + new Date(timeStart));
try {
// Once compression finishes, we need to check to see if database maintenance
// should be performed. This is defaulted to 1 hour, so it should
// always run unless changed by the user. This is only a safeguard,
// as usually an ANALYZE only takes a fraction of what a full VACUUM
// takes. VACUUM will occur every day at midnight.
String dataMaintenance = systemSettings.get(DATA_MAINTENANCE_PERIOD);
if (dataMaintenance == null) {
LOG.error("No data maintenance interval found - will not perform db maintenance");
return;
}
long maintInterval = Long.parseLong(dataMaintenance);
// At midnight we always perform a VACUUM, otherwise we check to see if it is time to
// perform normal database maintenance. (On postgres we just rebuild indices using an ANALYZE)
Calendar cal = Calendar.getInstance();
if (cal.get(Calendar.HOUR_OF_DAY) == 0) {
LOG.info("Performing daily database maintenance");
systemManager.vacuum(subjectManager.getOverlord());
String reindexStr = systemSettings.get(DATA_REINDEX_NIGHTLY);
boolean reindexNightly = Boolean.valueOf(reindexStr);
if (reindexNightly) {
LOG.info("Re-indexing data tables");
systemManager.reindex(subjectManager.getOverlord());
} else {
LOG.info("Skipping re-indexing of data tables");
}
} else if (TimingVoodoo.roundDownTime(timeStart, HOUR) == TimingVoodoo.roundDownTime(timeStart,
maintInterval)) {
LOG.info("Performing hourly database maintenance");
systemManager.analyze(subjectManager.getOverlord());
} else {
LOG.debug("Not performing any database maintenance now");
}
} catch (Exception e) {
LOG.error("Failed to perform database maintenance. Cause: " + e, e);
} finally {
long duration = System.currentTimeMillis() - timeStart;
LOG.info("Database maintenance completed in [" + duration + "]ms");
}
}
}