/* * NOTE: This copyright does *not* cover user programs that use HQ * program services by normal system calls through the application * program interfaces provided as part of the Hyperic Plug-in Development * Kit or the Hyperic Client Development Kit - this is merely considered * normal use of the program, and does *not* fall under the heading of * "derived work". * * Copyright (C) [2004, 2005, 2006], Hyperic, Inc. * This file is part of HQ. * * HQ is free software; you can redistribute it and/or modify * it under the terms version 2 of the GNU General Public License as * published by the Free Software Foundation. This program is distributed * in the hope that it will be useful, but WITHOUT ANY WARRANTY; without * even the implied warranty of MERCHANTABILITY or FITNESS FOR A * PARTICULAR PURPOSE. See the GNU General Public License for more * details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 * USA. */ package org.hyperic.hq.measurement; import java.beans.Introspector; import java.util.Date; import java.util.Properties; import javax.annotation.PostConstruct; import org.apache.commons.lang.time.DateUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.hibernate.SessionFactory; import org.hibernate.engine.SessionFactoryImplementor; import org.hyperic.hibernate.dialect.HQDialect; import org.hyperic.hq.common.SystemException; import org.hyperic.hq.common.shared.HQConstants; import org.hyperic.hq.common.shared.ServerConfigManager; import org.hyperic.hq.context.Bootstrap; import org.hyperic.hq.events.shared.AlertManager; import org.hyperic.hq.events.shared.EventLogManager; import org.hyperic.hq.ha.HAUtil; import org.hyperic.hq.measurement.shared.DataCompress; import org.hyperic.hq.measurement.shared.DataManager; import org.hyperic.hq.measurement.shared.MeasurementManager; import org.hyperic.hq.stats.ConcurrentStatsCollector; import org.hyperic.util.ConfigPropertyException; import org.hyperic.util.TimeUtil; import org.hyperic.util.timer.StopWatch; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Component; @Component("dataPurgeJob") public class DataPurgeJob implements Runnable { private ServerConfigManager serverConfigManager; private MeasurementManager measurementManager; private EventLogManager eventLogManager; private DataCompress dataCompress; private AlertManager alertManager; private DataManager dataManager; private ConcurrentStatsCollector concurrentStatsCollector; private long _lastAnalyze = 0l; private static final long ANALYZE_INTERVAL = Integer.parseInt(System.getProperty( "data.purge.analyze.interval", "6")) * MeasurementConstants.HOUR; private static final boolean RUN_ANALYZE_METRIC_TABLES = Boolean.parseBoolean((System.getProperty( "data.purge.analyze.metricTables", "true"))); private static final boolean RUN_ANALYZE_NON_METRIC_TABLES = Boolean.parseBoolean((System.getProperty( "data.purge.analyze.nonMetricTables", "true"))); private final Object analyzeRunningLock = new Object(); private boolean analyzeRunning = false; private final Object compressRunningLock = new Object(); private boolean compressRunning = false; private long purgeRaw, purge1h, purge6h, purge1d, purgeAlert; private int purgeTopN; private final Log log = LogFactory.getLog(DataPurgeJob.class); @Autowired public DataPurgeJob(ServerConfigManager serverConfigManager, MeasurementManager measurementManager, EventLogManager eventLogManager, DataCompress dataCompress, ConcurrentStatsCollector concurrentStatsCollector, AlertManager alertManager, DataManager dataManager) { this.serverConfigManager = serverConfigManager; this.measurementManager = measurementManager; this.eventLogManager = eventLogManager; this.dataCompress = dataCompress; this.concurrentStatsCollector = concurrentStatsCollector; this.alertManager = alertManager; this.dataManager = dataManager; } @PostConstruct public void init() { initStatsCollector(); loadPurgeDefaults(); dataCompress.createMetricDataViews(); } private void initStatsCollector() { concurrentStatsCollector.register(ConcurrentStatsCollector.METRIC_DATA_COMPRESS_TIME); concurrentStatsCollector.register(ConcurrentStatsCollector.DB_ANALYZE_TIME); concurrentStatsCollector.register(ConcurrentStatsCollector.PURGE_EVENT_LOGS_TIME); concurrentStatsCollector.register(ConcurrentStatsCollector.PURGE_MEASUREMENTS_TIME); } /** * Get the server purge configuration, loaded on startup. */ private void loadPurgeDefaults() { this.log.info("Loading default purge intervals"); Properties conf; try { conf = serverConfigManager.getConfig(); } catch (ConfigPropertyException e) { // Not gonna happen throw new SystemException(e); } String purgeRawString = conf.getProperty(HQConstants.DataPurgeRaw); String purge1hString = conf.getProperty(HQConstants.DataPurge1Hour); String purge6hString = conf.getProperty(HQConstants.DataPurge6Hour); String purge1dString = conf.getProperty(HQConstants.DataPurge1Day); String purgeAlertString = conf.getProperty(HQConstants.AlertPurge); String purgeTopNString = conf.getProperty(HQConstants.TopNPurge); try { this.purgeRaw = Long.parseLong(purgeRawString); this.purge1h = Long.parseLong(purge1hString); this.purge6h = Long.parseLong(purge6hString); this.purge1d = Long.parseLong(purge1dString); this.purgeAlert = Long.parseLong(purgeAlertString); this.purgeTopN = Integer.parseInt(purgeTopNString); } catch (NumberFormatException e) { // Shouldn't happen unless manual edit of config table throw new IllegalArgumentException("Invalid purge interval: " + e, e); } } public synchronized void run() { try { if (!HAUtil.isMasterNode()) { log.warn("Data Purge invoked on the secondary HQ node. " + "It should not be run if the node is not master. Not running"); return; } compressData(); Properties conf = null; try { conf = serverConfigManager.getConfig(); } catch (Exception e) { throw new SystemException(e); } final long now = System.currentTimeMillis(); // need to do this because of the Sun JVM bug // http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=5102804 Introspector.flushCaches(); if (conf != null) { purge(conf, now); } } catch (Throwable e) { log.error(e.getMessage(), e); } } /** * Entry point into compression routine */ public void compressData() { // First check if we are already running synchronized (compressRunningLock) { if (compressRunning) { log.info("Not starting data compression. (Already running)"); return; } else { compressRunning = true; } } final long time_start = now(); try { // Announce log.info("Data compression starting at " + TimeUtil.toString(time_start)); HQDialect dialect = (HQDialect) ((SessionFactoryImplementor) Bootstrap.getBean(SessionFactory.class)).getDialect(); if (dialect.analyzeDb()) { runDBAnalyze(); concurrentStatsCollector.addStat((now() - time_start), ConcurrentStatsCollector.DB_ANALYZE_TIME); } final long start = now(); // Round down to the nearest hour. long now = TimingVoodoo.roundDownTime(System.currentTimeMillis(), MeasurementConstants.HOUR); long last; // Compress hourly data last = compressData(MeasurementConstants.HOUR, now); // Purge, ensuring we don't purge data not yet compressed. truncateMeasurementData(Math.min(now - this.purgeRaw, last)); // Purge metric problems as well purgeMetricProblems(Math.min(now - this.purgeRaw, last)); // Compress 6 hour data last = compressData(MeasurementConstants.SIX_HOUR, now); // Purge, ensuring we don't purge data not yet compressed. purgeMeasurements(MeasurementConstants.HOUR, Math.min(start - this.purge1h, last)); // Compress daily data last = compressData(MeasurementConstants.DAY, now); // Purge, ensuring we don't purge data not yet compressed. purgeMeasurements(MeasurementConstants.SIX_HOUR, Math.min(now - this.purge6h, last)); // Purge, we never store more than 1 year of data. purgeMeasurements(MeasurementConstants.DAY, now - this.purge1d); purgeAlerts(now); purgeTopNData(now); concurrentStatsCollector.addStat((now() - start), ConcurrentStatsCollector.METRIC_DATA_COMPRESS_TIME); } finally { synchronized (compressRunningLock) { compressRunning = false; } } long time_end = System.currentTimeMillis(); log.info("Data compression completed in " + ((time_end - time_start) / 1000) + " seconds."); runDBMaintenance(); } private void purgeAlerts(long now) { log.info("Purging alerts older than " + TimeUtil.toString(now - this.purgeAlert)); int alertsDeleted = -1; int totalDeleted = 0; // HQ-2731 - want to batch this 10,000 rows at a time // this avoids the session getting too large and will (hopefully) avoid transaction timeouts int maxBatch = 10000; do { alertsDeleted = alertManager.deleteAlerts(now - this.purgeAlert, maxBatch); totalDeleted += alertsDeleted; } while (alertsDeleted >= maxBatch); log.info("Done (Deleted " + totalDeleted + " alerts)"); } private void purgeTopNData(long now) { Date timeToKeep = DateUtils.addDays(new Date(now), -this.purgeTopN); log.info("Purging TopNData older than " + timeToKeep); int topNDeleted = dataManager.purgeTopNData(timeToKeep); log.info("Done (Deleted " + topNDeleted + " TopNData rows)"); } long compressData(long toInterval, long now) { long begin = dataCompress.getCompressionStartTime(toInterval, now); if (begin == 0) { //No data to compress return 0; } // Compress all the way up to now. StopWatch watch = new StopWatch(); //begin=1:00:00 AM //toInterval = 1 hour //now=2:00:00 AM //We add toInterval due to bug HQ-4497, 6 hours and 1 day aggregations were wrong //begin + toInterval <= now (2:00:00<=2:00:00) otherwise it will enter the while one hour later while (begin + toInterval <= now) { long end = begin + toInterval; log.info("Compression interval: " + TimeUtil.toString(begin) + " to " + TimeUtil.toString(end)); try { dataCompress.compressData(toInterval, now, begin, end); } catch (Exception e) { // Just log the error and continue log.debug("Exception when inserting data " + " at " + TimeUtil.toString(begin), e); } // Increment for next iteration. begin = end; } log.info("Done (" + (watch.getElapsed() / 1000) + " seconds)"); // Return the last interval that was compressed. return begin; } void truncateMeasurementData(long truncateBefore) { dataCompress.truncateMeasurementData(truncateBefore); } private final long now() { return System.currentTimeMillis(); } private void runDBAnalyze() { if (log.isDebugEnabled()) { log.debug("Run analyze metric tables: " + RUN_ANALYZE_METRIC_TABLES); log.debug("Run analyze non metric tables: " + RUN_ANALYZE_NON_METRIC_TABLES); log.debug("Analyze interval: " + ANALYZE_INTERVAL + " ms"); } // First check if we are already running long analyzeStart = System.currentTimeMillis(); synchronized (analyzeRunningLock) { if (analyzeRunning) { log.info("Not starting db analyze. (Already running)"); return; } else if (RUN_ANALYZE_METRIC_TABLES && (_lastAnalyze + ANALYZE_INTERVAL) > analyzeStart) { serverConfigManager.analyzeHqMetricTables(false); log.info("Only running analyze on current metric data table " + "since last full run was at " + TimeUtil.toString(_lastAnalyze)); return; } else { analyzeRunning = true; } } try { if (RUN_ANALYZE_METRIC_TABLES) { log.info("Performing database analyze on metric tables."); // Analyze the current and previous hq_metric_data table serverConfigManager.analyzeHqMetricTables(true); } if (RUN_ANALYZE_NON_METRIC_TABLES) { log.info("Performing database analyze on non metric tables."); // Analyze all non-metric tables serverConfigManager.analyzeNonMetricTables(); } long secs = (System.currentTimeMillis() - analyzeStart) / 1000; log.info("Completed database analyze " + secs + " secs"); } finally { synchronized (analyzeRunningLock) { analyzeRunning = false; _lastAnalyze = analyzeStart; } } } private void runDBMaintenance() { // Once compression finishes, we check to see if database maintenance // should be performed. This is defaulted to 1 hour, so it should // always run unless changed by the user. long time_start = System.currentTimeMillis(); Properties conf; try { conf = serverConfigManager.getConfig(); } catch (Exception e) { throw new SystemException(e); } String dataMaintenance = conf.getProperty(HQConstants.DataMaintenance); if (dataMaintenance == null) { // Should never happen log.error("No data maintenance interval found"); return; } long maintInterval = Long.parseLong(dataMaintenance); if (maintInterval <= 0) { log.error("Maintenance interval was specified as [" + dataMaintenance + "] -- which is invalid"); return; } } protected void purge(Properties conf, long now) { long start = now(); purgeEventLogs(conf, now); concurrentStatsCollector.addStat((now() - start), ConcurrentStatsCollector.PURGE_EVENT_LOGS_TIME); start = now(); purgeMeasurements(); concurrentStatsCollector.addStat((now() - start), ConcurrentStatsCollector.PURGE_MEASUREMENTS_TIME); } /** * Remove measurements no longer associated with a resource. */ private void purgeMeasurements() { final long start = System.currentTimeMillis(); int total = 0; try { final int batchSize = 10000; int dcount = Integer.MAX_VALUE; while (dcount > 0) { dcount = measurementManager.removeOrphanedMeasurements(batchSize); total += dcount; } } catch (Exception e) { // Do not allow errors to cause other maintenance functions to not run. log.error("Error removing measurements: " + e, e); } final long finish = System.currentTimeMillis(); log.info("Removed " + total + " measurements in " + ((finish - start) / 1000) + " seconds."); } void purgeMeasurements(long dataInterval, long purgeAfter) { long min = dataCompress.getMinTimestamp(dataInterval); // No data if (min != 0) { long interval = MeasurementConstants.HOUR; long endWindow = purgeAfter; long startWindow = endWindow - interval; // while oldest timestamp in DB is older than purgeAfter, delete // batches in one hour increments while (endWindow > min) { dataCompress.purgeMeasurements(dataInterval, startWindow, endWindow); endWindow -= interval; startWindow -= interval; } } } void purgeMetricProblems(long purgeAfter) { long min = dataCompress.getMetricProblemMinTimestamp(); // No data if (min != 0) { long interval = MeasurementConstants.HOUR; long endWindow = purgeAfter; long startWindow = endWindow - interval; // while oldest timestamp in DB is older than purgeAfter, delete // batches in one hour increments while (endWindow > min) { dataCompress.purgeMetricProblems(startWindow, endWindow); endWindow -= interval; startWindow -= interval; } } } /** * Purge Event Log data */ private void purgeEventLogs(Properties conf, long now) { String purgeEventString = conf.getProperty(HQConstants.EventLogPurge); long purgeEventLog = Long.parseLong(purgeEventString); // Purge event logs log.info("Purging event logs older than " + TimeUtil.toString(now - purgeEventLog)); try { int rowsDeleted = eventLogManager.deleteLogs(-1, now - purgeEventLog); log.info("Done (Deleted " + rowsDeleted + " event logs)"); } catch (Exception e) { log.error("Unable to delete event logs: " + e.getMessage(), e); } } }