package org.rhq.server.metrics.aggregation; import java.util.Collections; import java.util.Comparator; import java.util.List; import java.util.Set; import java.util.concurrent.ConcurrentSkipListSet; import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.Semaphore; import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicLong; import com.google.common.base.Stopwatch; import com.google.common.util.concurrent.ListeningExecutorService; import com.google.common.util.concurrent.MoreExecutors; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.joda.time.DateTime; import org.rhq.server.metrics.AbortedException; import org.rhq.server.metrics.DateTimeService; import org.rhq.server.metrics.MetricsConfiguration; import org.rhq.server.metrics.MetricsDAO; import org.rhq.server.metrics.StorageClientThreadFactory; import org.rhq.server.metrics.domain.AggregateNumericMetric; import org.rhq.server.metrics.domain.IndexBucket; /** * @author John Sanda */ public class AggregationManager { private static final Comparator<AggregateNumericMetric> AGGREGATE_COMPARATOR = new Comparator<AggregateNumericMetric>() { @Override public int compare(AggregateNumericMetric left, AggregateNumericMetric right) { if (left.getScheduleId() == right.getScheduleId()) { if (left.getTimestamp() < right.getTimestamp()) { return -1; } else if (left.getTimestamp() > right.getTimestamp()) { return 1; } else { return 0; } } if (left.getScheduleId() < right.getScheduleId()) { return -1; } return 1; } }; private final Log log = LogFactory.getLog(AggregationManager.class); private MetricsDAO dao; private DateTimeService dtService; private ListeningExecutorService aggregationTasks; private MetricsConfiguration configuration; private int batchSize; private int parallelism; private AtomicLong totalAggregationTime = new AtomicLong(); private ThreadPoolExecutor threadPool; public AggregationManager(MetricsDAO dao, DateTimeService dtService, MetricsConfiguration configuration) { this.dao = dao; this.dtService = dtService; this.configuration = configuration; batchSize = Integer.parseInt(System.getProperty("rhq.metrics.aggregation.batch-size", "5")); parallelism = Integer.parseInt(System.getProperty("rhq.metrics.aggregation.parallelism", "3")); int numWorkers = Integer.parseInt(System.getProperty("rhq.metrics.aggregation.workers", "4")); // We have to have more than 1 thread, otherwise we can deadlock during aggregation task scheduling. // See https://bugzilla.redhat.com/show_bug.cgi?id=1084626 for details if (numWorkers < 2) { numWorkers = 2; } threadPool = new ThreadPoolExecutor(numWorkers, numWorkers, 30, TimeUnit.SECONDS, new LinkedBlockingQueue<Runnable>(), new StorageClientThreadFactory("AggregationTasks")); aggregationTasks = MoreExecutors.listeningDecorator(threadPool); } public void shutdown() { aggregationTasks.shutdownNow(); } public int getBatchSize() { return batchSize; } public void setBatchSize(int batchSize) { this.batchSize = batchSize; } public int getParallelism() { return parallelism; } public void setParallelism(int parallelism) { this.parallelism = parallelism; } public int getNumWorkers() { return threadPool.getMaximumPoolSize(); } public void setNumWorkers(int numWorkers) { log.debug("Setting aggregation worker thread count to " + numWorkers); threadPool.setCorePoolSize(numWorkers); threadPool.setMaximumPoolSize(numWorkers); } /** * @return The total aggregation time in milliseconds since server start. This property is updated after each of * raw, one hour, and six hour data are aggregated. */ public long getTotalAggregationTime() { return totalAggregationTime.get(); } public Set<AggregateNumericMetric> run() { log.info("Starting metrics data aggregation"); Stopwatch stopwatch = Stopwatch.createStarted(); Semaphore permits = new Semaphore(batchSize * parallelism); log.debug("Allocating " + permits.availablePermits() + " permits"); int num1Hour = 0; int num6Hour = 0; int num24Hour = 0; try { PersistFunctions persistFunctions = new PersistFunctions(dao, dtService); final Set<AggregateNumericMetric> oneHourData = new ConcurrentSkipListSet<AggregateNumericMetric>( AGGREGATE_COMPARATOR); DateTime endTime = dtService.currentHour(); DateTime end = endTime; // We set the start time to the retention period minus 1 hour, or 6 days and 23 // hours ago instead of 7 days ago because if we set the start time to the full // 7 days, then we could end up in a situation where data has expired and // aggregate metric get overwritten with partial data. DateTime start = end.minus(configuration.getRawRetention().toPeriod().minusHours(1)); DataAggregator rawAggregator = createRawAggregator(persistFunctions, permits); rawAggregator.setBatchFinishedListener(new DataAggregator.BatchFinishedListener() { @Override public void onFinish(List<AggregateNumericMetric> metrics) { oneHourData.addAll(metrics); } }); num1Hour = rawAggregator.execute(start, end); end = dtService.get6HourTimeSlice(endTime); start = dtService.get6HourTimeSlice(endTime).minus(configuration.getRawRetention()); num6Hour = create1HourAggregator(persistFunctions, permits).execute(start, end); end = dtService.get24HourTimeSlice(endTime); start = dtService.get24HourTimeSlice(endTime).minus(configuration.getRawRetention()); num24Hour = create6HourAggregator(persistFunctions, permits).execute(start, end); return oneHourData; } catch (InterruptedException e) { log.info("There was an interrupt while waiting for aggregation to finish. Aggregation will be aborted."); return Collections.emptySet(); } catch (AbortedException e) { log.warn("Aggregation has been aborted: " + e.getMessage()); return Collections.emptySet(); } finally { stopwatch.stop(); totalAggregationTime.addAndGet(stopwatch.elapsed(TimeUnit.MILLISECONDS)); log.info("Finished aggregation of {\"raw schedules\": " + num1Hour + ", \"1 hour schedules\": " + num6Hour + ", \"6 hour schedules\": " + num24Hour + "} in " + stopwatch.elapsed(TimeUnit.MILLISECONDS) + " ms"); } } private DataAggregator createRawAggregator(PersistFunctions persistFunctions, Semaphore permits) { DataAggregator aggregator = new DataAggregator(); aggregator.setAggregationTasks(aggregationTasks); aggregator.setBucket(IndexBucket.RAW); aggregator.setTimeSliceDuration(configuration.getRawTimeSliceDuration()); aggregator.setDao(dao); aggregator.setPermits(permits); aggregator.setDateTimeService(dtService); aggregator.setPersistMetrics(persistFunctions.persist1HourMetrics()); aggregator.setConfiguration(configuration); aggregator.setBatchSize(batchSize); return aggregator; } private DataAggregator create1HourAggregator(PersistFunctions persistFunctions, Semaphore permits) { DataAggregator aggregator = new DataAggregator(); aggregator.setAggregationTasks(aggregationTasks); aggregator.setBucket(IndexBucket.ONE_HOUR); aggregator.setTimeSliceDuration(configuration.getOneHourTimeSliceDuration()); aggregator.setDao(dao); aggregator.setPermits(permits); aggregator.setDateTimeService(dtService); aggregator.setPersistMetrics(persistFunctions.persist6HourMetrics()); aggregator.setConfiguration(configuration); aggregator.setBatchSize(batchSize); return aggregator; } private DataAggregator create6HourAggregator(PersistFunctions persistFunctions, Semaphore permits) { DataAggregator aggregator = new DataAggregator(); aggregator.setAggregationTasks(aggregationTasks); aggregator.setBucket(IndexBucket.SIX_HOUR); aggregator.setTimeSliceDuration(configuration.getSixHourTimeSliceDuration()); aggregator.setDao(dao); aggregator.setPermits(permits); aggregator.setDateTimeService(dtService); aggregator.setPersistMetrics(persistFunctions.persist24HourMetrics()); aggregator.setConfiguration(configuration); aggregator.setBatchSize(batchSize); return aggregator; } }