/*
*
* RHQ Management Platform
* Copyright (C) 2005-2013 Red Hat, Inc.
* All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License, version 2, as
* published by the Free Software Foundation, and/or the GNU Lesser
* General Public License, version 2.1, also as published by the Free
* Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License and the GNU Lesser General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License
* and the GNU Lesser General Public License along with this program;
* if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
*/
package org.rhq.server.metrics;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Set;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.Executors;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.TimeUnit;
import com.datastax.driver.core.ResultSet;
import com.google.common.base.Stopwatch;
import com.google.common.util.concurrent.FutureCallback;
import com.google.common.util.concurrent.Futures;
import com.google.common.util.concurrent.ListenableFuture;
import com.google.common.util.concurrent.ListeningExecutorService;
import com.google.common.util.concurrent.MoreExecutors;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.joda.time.DateTime;
import org.joda.time.Days;
import org.rhq.core.domain.measurement.MeasurementDataNumeric;
import org.rhq.core.domain.measurement.composite.MeasurementDataNumericHighLowComposite;
import org.rhq.core.util.exception.ThrowableUtil;
import org.rhq.server.metrics.aggregation.AggregationManager;
import org.rhq.server.metrics.domain.AggregateNumericMetric;
import org.rhq.server.metrics.domain.AggregateNumericMetricMapper;
import org.rhq.server.metrics.domain.Bucket;
import org.rhq.server.metrics.domain.IndexBucket;
import org.rhq.server.metrics.domain.RawNumericMetric;
import org.rhq.server.metrics.invalid.InvalidMetricsManager;
/**
* @author John Sanda
*/
public class MetricsServer {
private final Log log = LogFactory.getLog(MetricsServer.class);
private static final int RAW_DATA_AGE_LIMIT_MAX = 5;
private static final int QUEUE_SIZE = 200000;
private DateTimeService dateTimeService = new DateTimeService();
private MetricsDAO dao;
private MetricsConfiguration configuration;
private ListeningExecutorService tasks = MoreExecutors.listeningDecorator(Executors.newCachedThreadPool(
new StorageClientThreadFactory("MetricsServerTasks")));
private InvalidMetricsManager invalidMetricsManager;
private AggregationManager aggregationManager;
private volatile BlockingQueue<MeasurementDataNumeric> numericQueue;
private NumericQueueConsumer numericQueueConsumer;
private Days rawDataAgeLimit = Days.days(Math.min(3, Integer.parseInt(
System.getProperty("rhq.metrics.data.age-limit", "3"))));
public void setDAO(MetricsDAO dao) {
this.dao = dao;
}
public void setConfiguration(MetricsConfiguration configuration) {
this.configuration = configuration;
}
public void setDateTimeService(DateTimeService dateTimeService) {
this.dateTimeService = dateTimeService;
}
public int getRawDataAgeLimit() {
return rawDataAgeLimit.getDays();
}
public void setRawDataAgeLimit(int rawDataAgeLimit) {
if (rawDataAgeLimit > RAW_DATA_AGE_LIMIT_MAX) {
throw new IllegalArgumentException("The requested limit, " + rawDataAgeLimit + ", exceeds the max age " +
"limit of " + RAW_DATA_AGE_LIMIT_MAX);
}
this.rawDataAgeLimit = Days.days(rawDataAgeLimit);
}
public void setIndexPartitions(int indexPartitions) {
configuration.setIndexPartitions(indexPartitions);
}
public void init() {
aggregationManager = new AggregationManager(dao, dateTimeService, configuration);
invalidMetricsManager = new InvalidMetricsManager(dateTimeService, dao);
numericQueue = new ArrayBlockingQueue<MeasurementDataNumeric>(QUEUE_SIZE);
numericQueueConsumer = new NumericQueueConsumer();
numericQueueConsumer.setName("MetricsServer numericQueueConsumer");
numericQueueConsumer.start();
}
/**
* A test hook
*/
InvalidMetricsManager getInvalidMetricsManager() {
return invalidMetricsManager;
}
public AggregationManager getAggregationManager() {
return aggregationManager;
}
public void shutdown() {
numericQueueConsumer.shutdown();
try {
numericQueueConsumer.join(); // Lets wait for it to finish before we continue..
} catch (InterruptedException e) {
log.warn("Potential data loss while waiting for the queue to empty, ", e);
}
aggregationManager.shutdown();
invalidMetricsManager.shutdown();
}
public RawNumericMetric findLatestValueForResource(int scheduleId) {
log.debug("Querying for most recent raw metrics for [scheduleId: " + scheduleId + "]");
return dao.findLatestRawMetric(scheduleId);
}
public Iterable<MeasurementDataNumericHighLowComposite> findDataForResource(int scheduleId, long beginTime,
long endTime, int numberOfBuckets) {
Stopwatch stopwatch = new Stopwatch().start();
try {
DateTime begin = new DateTime(beginTime);
if (dateTimeService.isInRawDataRange(begin)) {
Iterable<RawNumericMetric> metrics = dao.findRawMetrics(scheduleId, beginTime, endTime);
return createRawComposites(metrics, beginTime, endTime, numberOfBuckets);
}
List<AggregateNumericMetric> metrics = null;
if (dateTimeService.isIn1HourDataRange(begin)) {
metrics = dao.findAggregateMetrics(scheduleId, Bucket.ONE_HOUR, beginTime,
endTime);
return createComposites(metrics, beginTime, endTime, numberOfBuckets);
} else if (dateTimeService.isIn6HourDataRange(begin)) {
metrics = dao.findAggregateMetrics(scheduleId, Bucket.SIX_HOUR, beginTime, endTime);
return createComposites(metrics, beginTime, endTime, numberOfBuckets);
} else if (dateTimeService.isIn24HourDataRange(begin)) {
metrics = dao.findAggregateMetrics(scheduleId, Bucket.TWENTY_FOUR_HOUR, beginTime, endTime);
return createComposites(metrics, beginTime, endTime, numberOfBuckets);
} else {
throw new IllegalArgumentException("beginTime[" + beginTime + "] is outside the accepted range.");
}
} finally {
stopwatch.stop();
if (log.isDebugEnabled()) {
log.debug("Finished calculating resource summary aggregate in " +
stopwatch.elapsed(TimeUnit.MILLISECONDS) + " ms");
}
}
}
public List<MeasurementDataNumericHighLowComposite> findDataForGroup(List<Integer> scheduleIds, long beginTime,
long endTime, int numberOfBuckets) {
if (log.isDebugEnabled()) {
log.debug("Querying for metric data using parameters [scheduleIds: " + scheduleIds + ", beingTime: " +
beginTime + ", endTime: " + endTime + ", numberOfBuckets: " + numberOfBuckets + "]");
}
DateTime begin = new DateTime(beginTime);
if (dateTimeService.isInRawDataRange(begin)) {
Iterable<RawNumericMetric> metrics = dao.findRawMetrics(scheduleIds, beginTime, endTime);
return createRawComposites(metrics, beginTime, endTime, numberOfBuckets);
}
Bucket bucket = getBucket(begin);
List<AggregateNumericMetric> metrics = loadMetrics(scheduleIds, beginTime, endTime, bucket);
return createComposites(metrics, beginTime, endTime, numberOfBuckets);
}
public AggregateNumericMetric getSummaryAggregate(int scheduleId, long beginTime, long endTime) {
Stopwatch stopwatch = new Stopwatch().start();
try {
DateTime begin = new DateTime(beginTime);
if (dateTimeService.isInRawDataRange(begin)) {
Iterable<RawNumericMetric> metrics = dao.findRawMetrics(scheduleId, beginTime, endTime);
return calculateAggregatedRaw(metrics, beginTime);
}
Bucket bucket = getBucket(begin);
List<AggregateNumericMetric> metrics = dao.findAggregateMetrics(scheduleId, bucket, beginTime, endTime);
return calculateAggregate(metrics, beginTime, bucket);
} finally {
stopwatch.stop();
if (log.isDebugEnabled()) {
log.debug("Finished calculating resource summary aggregate for [scheduleId: " + scheduleId +
", beginTime: " + beginTime + ", endTime: " + endTime + "] in " +
stopwatch.elapsed(TimeUnit.MILLISECONDS) + " ms");
}
}
}
public ListenableFuture<AggregateNumericMetric> getSummaryAggregateAsync(int scheduleId, long beginTime,
long endTime) {
long start = System.currentTimeMillis();
try {
if (log.isDebugEnabled()) {
log.debug("Calculating resource summary aggregate (async) for [scheduleId: " + scheduleId +
", beginTime: " + beginTime + ", endTime: " + endTime + "]");
}
DateTime begin = new DateTime(beginTime);
StorageResultSetFuture queryFuture;
if (dateTimeService.isInRawDataRange(begin)) {
queryFuture = dao.findRawMetricsAsync(scheduleId, beginTime, endTime);
return Futures.transform(queryFuture, new ComputeRawAggregate(beginTime));
}
Bucket bucket = getBucket(begin);
queryFuture = dao.findAggregateMetricsAsync(scheduleId, bucket, beginTime, endTime);
return Futures.transform(queryFuture, new ComputeAggregate(beginTime, bucket));
} finally {
long end = System.currentTimeMillis();
if (log.isDebugEnabled()) {
log.debug("Finished calculating resource summary aggregate (async) in " + (end - start) + " ms");
}
}
}
public AggregateNumericMetric getSummaryAggregate(List<Integer> scheduleIds, long beginTime, long endTime) {
Stopwatch stopwatch = new Stopwatch().start();
try {
DateTime begin = new DateTime(beginTime);
if (dateTimeService.isInRawDataRange(new DateTime(beginTime))) {
Iterable<RawNumericMetric> metrics = dao.findRawMetrics(scheduleIds, beginTime, endTime);
return calculateAggregatedRaw(metrics, beginTime);
}
Bucket bucket = getBucket(begin);
List<AggregateNumericMetric> metrics = loadMetrics(scheduleIds, beginTime, endTime, bucket);
return calculateAggregate(metrics, beginTime, bucket);
} finally {
stopwatch.stop();
if (log.isDebugEnabled()) {
log.debug("Finished calculating group summary aggregate for [scheduleIds: " + scheduleIds +
", beginTime: " + beginTime + ", endTime: " + endTime + "] in " +
stopwatch.elapsed(TimeUnit.MILLISECONDS) + " ms");
}
}
}
private List<AggregateNumericMetric> loadMetrics(List<Integer> scheduleIds, long begin, long end, Bucket bucket) {
List<StorageResultSetFuture> futures = new ArrayList<StorageResultSetFuture>(scheduleIds.size());
for (Integer scheduleId : scheduleIds) {
futures.add(dao.findAggregateMetricsAsync(scheduleId, bucket, begin, end));
}
ListenableFuture<List<ResultSet>> resultSetsFuture = Futures.successfulAsList(futures);
try {
List<ResultSet> resultSets = resultSetsFuture.get();
AggregateNumericMetricMapper mapper = new AggregateNumericMetricMapper();
List<AggregateNumericMetric> metrics = new ArrayList<AggregateNumericMetric>();
for (ResultSet resultSet : resultSets) {
metrics.addAll(mapper.mapAll(resultSet));
}
return metrics;
} catch (Exception e) {
log.warn("There was an error while fetching " + bucket + " data for {scheduleIds: " + scheduleIds +
", beginTime: " + begin + ", endTime: " + end + "}", e);
return Collections.emptyList();
}
}
protected Bucket getBucket(DateTime begin) {
Bucket bucket;
if (dateTimeService.isIn1HourDataRange(begin)) {
bucket = Bucket.ONE_HOUR;
} else if (dateTimeService.isIn6HourDataRange(begin)) {
bucket = Bucket.SIX_HOUR;
} else if (dateTimeService.isIn24HourDataRange(begin)) {
bucket = Bucket.TWENTY_FOUR_HOUR;
} else {
throw new IllegalArgumentException("beginTime[" + begin.getMillis() + "] is outside the accepted range.");
}
return bucket;
}
private List<MeasurementDataNumericHighLowComposite> createRawComposites(Iterable<RawNumericMetric> metrics,
long beginTime, long endTime, int numberOfBuckets) {
Buckets buckets = new Buckets(beginTime, endTime, numberOfBuckets);
for (RawNumericMetric metric : metrics) {
buckets.insert(metric.getTimestamp(), metric.getValue(), metric.getValue(), metric.getValue());
}
List<MeasurementDataNumericHighLowComposite> data = new ArrayList<MeasurementDataNumericHighLowComposite>();
for (int i = 0; i < buckets.getNumDataPoints(); ++i) {
Buckets.Bucket bucket = buckets.get(i);
data.add(new MeasurementDataNumericHighLowComposite(bucket.getStartTime(), bucket.getAvg(),
bucket.getMax(), bucket.getMin()));
}
return data;
}
private List<MeasurementDataNumericHighLowComposite> createComposites(Iterable<AggregateNumericMetric> metrics,
long beginTime, long endTime, int numberOfBuckets) {
Buckets buckets = new Buckets(beginTime, endTime, numberOfBuckets);
for (AggregateNumericMetric metric : metrics) {
if (invalidMetricsManager.isInvalidMetric(metric)) {
log.warn("The " + metric.getBucket() + " metric " + metric + " is invalid. It will be excluded from " +
"the results sent to the client and we will attempt to recompute the metric.");
invalidMetricsManager.submit(metric);
} else {
buckets.insert(metric.getTimestamp(), metric.getAvg(), metric.getMin(), metric.getMax());
}
}
List<MeasurementDataNumericHighLowComposite> data = new ArrayList<MeasurementDataNumericHighLowComposite>();
for (int i = 0; i < buckets.getNumDataPoints(); ++i) {
Buckets.Bucket bucket = buckets.get(i);
data.add(new MeasurementDataNumericHighLowComposite(bucket.getStartTime(), bucket.getAvg(),
bucket.getMax(), bucket.getMin()));
}
return data;
}
public synchronized void addNumericData(final Set<MeasurementDataNumeric> dataSet, final FutureCallback<Void> callback) {
if(numericQueue.remainingCapacity() > dataSet.size()) {
for(final MeasurementDataNumeric data : dataSet) {
try {
if(data != null) {
numericQueue.offer(data, 1, TimeUnit.SECONDS);
}
} catch (InterruptedException e) {
// Modify the exception to include sane message telling us even the buffer is full..
// and actually do something with it in MeasurementDataManagerBean ..
callback.onFailure(new RuntimeException("The queue insert timed out after one second, can't finish the queue loading.", e));
return;
}
}
callback.onSuccess(null);
} else {
callback.onFailure(new RuntimeException("The server is overloaded, queue is full."));
}
}
/**
* Computes and stores aggregates for all buckets that are ready to be aggregated.
* This includes raw, 1hr, 6hr, and 24hr data.
*
* @return One hour aggregates. That is, any raw data that has been rolled up into onr
* one hour aggregates. The one hour aggregates are returned because they are needed
* for subsequently computing baselines.
*/
public Iterable<AggregateNumericMetric> calculateAggregates() {
return aggregationManager.run();
}
private AggregateNumericMetric calculateAggregatedRaw(Iterable<RawNumericMetric> rawMetrics, long timestamp) {
double min = Double.NaN;
double max = min;
int count = 0;
ArithmeticMeanCalculator mean = new ArithmeticMeanCalculator();
double value;
for (RawNumericMetric metric : rawMetrics) {
value = metric.getValue();
if (count == 0) {
min = value;
max = min;
}
if (value < min) {
min = value;
} else if (value > max) {
max = value;
}
mean.add(value);
++count;
}
// We let the caller handle setting the schedule id because in some cases we do
// not care about it.
return new AggregateNumericMetric(0, Bucket.ONE_HOUR, mean.getArithmeticMean(), min, max, timestamp);
}
private AggregateNumericMetric calculateAggregate(Iterable<AggregateNumericMetric> metrics, long timestamp,
Bucket bucket) {
double min = Double.NaN;
double max = min;
int count = 0;
ArithmeticMeanCalculator mean = new ArithmeticMeanCalculator();
for (AggregateNumericMetric metric : metrics) {
if (count == 0) {
min = metric.getMin();
max = metric.getMax();
}
if (metric.getMin() < min) {
min = metric.getMin();
}
if (metric.getMax() > max) {
max = metric.getMax();
}
mean.add(metric.getAvg());
++count;
}
// We let the caller handle setting the schedule id because in some cases we do
// not care about it.
return new AggregateNumericMetric(0, bucket, mean.getArithmeticMean(), min, max, timestamp);
}
public int getQueueAvailableCapacity() {
return this.numericQueue.remainingCapacity();
}
private class NumericQueueConsumer extends Thread {
private final MeasurementDataNumeric KILL_SIGNAL = new MeasurementDataNumeric(Long.MIN_VALUE, Integer.MIN_VALUE, Double.valueOf(Double.MIN_VALUE));
@Override
public void run() {
while(true) {
final MeasurementDataNumeric data;
try {
data = numericQueue.take();
if(data == KILL_SIGNAL) {
return;
}
} catch (InterruptedException e) {
// We still keep getting interrupted.. we really need to die
return;
}
DateTime collectionTimeSlice = dateTimeService.getTimeSlice(new DateTime(data.getTimestamp()),
configuration.getRawTimeSliceDuration());
Days days = Days.daysBetween(collectionTimeSlice, dateTimeService.now());
if (days.isGreaterThan(rawDataAgeLimit)) {
log.info(data + " is older than the raw data age limit of " + rawDataAgeLimit.getDays() +
" days. It will not be stored.");
} else {
StorageResultSetFuture rawFuture = dao.insertRawData(data);
StorageResultSetFuture indexFuture = dao.updateIndex(IndexBucket.RAW, collectionTimeSlice.getMillis(),
data.getScheduleId());
ListenableFuture<List<ResultSet>> insertsFuture = Futures.successfulAsList(rawFuture, indexFuture);
Futures.addCallback(insertsFuture, new FutureCallback<List<ResultSet>>() {
@Override
public void onSuccess(List<ResultSet> result) {
}
@Override
public void onFailure(Throwable t) {
boolean offerSuccess = numericQueue.offer(data);
if(!offerSuccess) {
if (log.isDebugEnabled()) {
log.debug("An error occurred while inserting raw data", ThrowableUtil.getRootCause(t));
} else {
log.warn("An error occurred while inserting raw data: " + ThrowableUtil.getRootMessage(t)
+ ", data was: " + data);
}
}
}
}, tasks);
}
}
}
/**
* Push KILL_SIGNAL to the processing queue
*/
public void shutdown() {
try {
numericQueue.put(KILL_SIGNAL);
} catch (InterruptedException e) {
log.warn("Could not push kill signal to the numericQueue. Possible data loss.");
Thread.currentThread().interrupt();
}
}
}
}