/* * RHQ Management Platform * Copyright (C) 2005-2014 Red Hat, Inc. * All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation version 2 of the License. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */ package org.rhq.enterprise.server.scheduler.jobs; import java.util.Collections; import java.util.Date; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.quartz.JobExecutionContext; import org.quartz.JobExecutionException; import org.quartz.SimpleTrigger; import org.rhq.core.domain.auth.Subject; import org.rhq.enterprise.server.measurement.MeasurementBaselineManagerLocal; import org.rhq.enterprise.server.measurement.MeasurementOOBManagerLocal; import org.rhq.enterprise.server.purge.PurgeManagerLocal; import org.rhq.enterprise.server.scheduler.SchedulerLocal; import org.rhq.enterprise.server.storage.StorageClientManager; import org.rhq.enterprise.server.system.SystemManagerLocal; import org.rhq.enterprise.server.util.LookupUtil; import org.rhq.server.metrics.MetricsServer; import org.rhq.server.metrics.domain.AggregateNumericMetric; /** * This implements {@link org.quartz.StatefulJob} (as opposed to {@link org.quartz.Job}) because we do not need nor want * this job triggered concurrently. That is, we don't want multiple data calc jobs performing the data calc at the * same time. * * The work done in this job used to be performed as part of {@link DataPurgeJob} but now, especially since the metric * storage and aggregation work is performed against the cassandra storage cluster, and not the RDB, it's been split * away to be executed independently. Moreover, there is really no reason to wait for all of the DB maintenance and * unrelated data purge before performing out metric calculations. Also, see [BZ 1125439]. * * @author Jay Shaughnessy */ public class DataCalcJob extends AbstractStatefulJob { private static final Log LOG = LogFactory.getLog(DataCalcJob.class); /** * Schedules a calc job to trigger right now. This will not block - it schedules the job to trigger but immediately * returns. This method will ensure that no two data calc jobs will execute at the same time (Quartz will ensure * this since {@link DataCalcJob} is an implementation of {@link org.quartz.StatefulJob}). * * @throws Exception if failed to schedule the data calc for immediate execution */ public static void calcNow() throws Exception { // there should always be a DataCalcJob defined with a job name as the same as this class' name // let's trigger that job now. this ensures the job is only ever run once, never concurrently // note that you can't call this method again until the data calc job finished; otherwise, // you'll get an exception saying there is already a trigger defined - this is what we want, you // shouldn't ask for more than one data calc job to execute now - you have to wait for it to finish. SchedulerLocal scheduler = LookupUtil.getSchedulerBean(); SimpleTrigger trigger = new SimpleTrigger("DataCalcJobNow", DataCalcJob.class.getName()); trigger.setJobName(DataCalcJob.class.getName()); trigger.setJobGroup(DataCalcJob.class.getName()); scheduler.scheduleJob(trigger); } @Override public void executeJobCode(JobExecutionContext context) throws JobExecutionException { long timeStart = System.currentTimeMillis(); LOG.info("Data Calc Job STARTING"); try { Iterable<AggregateNumericMetric> oneHourAggregates = compressMeasurementData(); calculateAutoBaselines(LookupUtil.getMeasurementBaselineManager()); calculateOOBs(oneHourAggregates); } catch (Exception e) { LOG.error("Data Calc Job FAILED TO COMPLETE. Cause: " + e); } finally { long duration = System.currentTimeMillis() - timeStart; LOG.info("Data Calc Job FINISHED [" + duration + "]ms"); } } private Iterable<AggregateNumericMetric> compressMeasurementData() { long timeStart = System.currentTimeMillis(); LOG.info("Measurement data compression starting at " + new Date(timeStart)); try { StorageClientManager storageClientManager = LookupUtil.getStorageClientManager(); MetricsServer metricsServer = storageClientManager.getMetricsServer(); return metricsServer.calculateAggregates(); } catch (Exception e) { LOG.error("Failed to compress measurement data. Cause: " + e, e); return Collections.emptyList(); } finally { long duration = System.currentTimeMillis() - timeStart; LOG.info("Measurement data compression completed in [" + duration + "]ms"); } } private void calculateAutoBaselines(MeasurementBaselineManagerLocal measurementBaselineManager) { long timeStart = System.currentTimeMillis(); LOG.info("Auto-calculation of baselines starting at " + new Date(timeStart)); try { measurementBaselineManager.calculateAutoBaselines(); } catch (Exception e) { LOG.error("Failed to auto-calculate baselines. Cause: " + e, e); } finally { long duration = System.currentTimeMillis() - timeStart; LOG.info("Auto-calculation of baselines completed in [" + duration + "]ms"); } } /** * Calculate the OOB values for the last hour. * This also removes out-dated ones due to recalculated baselines. */ public void calculateOOBs(Iterable<AggregateNumericMetric> oneHourAggregates) { LOG.info("Auto-calculation of OOBs starting"); Subject overlord = LookupUtil.getSubjectManager().getOverlord(); MeasurementOOBManagerLocal manager = LookupUtil.getOOBManager(); PurgeManagerLocal purgeManager = LookupUtil.getPurgeManager(); SystemManagerLocal systemManager = LookupUtil.getSystemManager(); // purge OOBs whose baseline just got recalculated // For now just assume that our system is fast, so a cutoff of 30mins is ok, // as the calculate baseline job runs hourly long cutOff = System.currentTimeMillis() - (30L * 60L * 1000L); long timeStart = System.currentTimeMillis(); purgeManager.removeOutdatedOOBs(cutOff); // clean up systemManager.vacuum(overlord, new String[] { "RHQ_MEASUREMENT_OOB" }); // Now calculate the fresh OOBs manager.computeOOBsForLastHour(overlord, oneHourAggregates); long duration = System.currentTimeMillis() - timeStart; LOG.info("Auto-calculation of OOBs completed in [" + duration + "]ms"); } }