package org.rhq.cassandra.schema;
import java.sql.Connection;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.Date;
import java.util.List;
import java.util.Properties;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.Executors;
import java.util.concurrent.atomic.AtomicInteger;
import com.datastax.driver.core.BoundStatement;
import com.datastax.driver.core.PreparedStatement;
import com.datastax.driver.core.ResultSet;
import com.datastax.driver.core.ResultSetFuture;
import com.datastax.driver.core.Row;
import com.datastax.driver.core.Session;
import com.google.common.collect.ImmutableSet;
import com.google.common.util.concurrent.FutureCallback;
import com.google.common.util.concurrent.Futures;
import com.google.common.util.concurrent.ListeningExecutorService;
import com.google.common.util.concurrent.MoreExecutors;
import com.google.common.util.concurrent.RateLimiter;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.joda.time.DateTime;
import org.joda.time.Days;
import org.joda.time.Duration;
import org.joda.time.Period;
import org.rhq.core.util.exception.ThrowableUtil;
import org.rhq.core.util.jdbc.JDBCUtil;
/**
* This is a custom step that performs data migration for users migrating to RHQ 4.11 from either 4.9 or 4.10. Before
* 4.11 we had the numeric data tables and the metrics_index table which serves as a custom index. In 4.11, two new
* tables are being added - metrics_cache and metrics_cache_index. The metrics_index table is being dropped. This
* class performs several tasks.
*
* <ul>
* <li>Migrates data from metrics_index to metrics_cache</li>
* <li>Delete rows in metrics_index of migrated data</li>
* <li>Deactivates the metrics cache (if necessary)</li>
* <li>Drops the metrics_index table</li>
* </ul>
*
* A row in metrics_index is deleted only after successfully migrating the data into metrics_cache_index. And at the
* end of the process, metrics_index is dropped only if there are no errors. If there are any errors inserting data
* into metrics_cache_index, an exception is thrown which will cause the installation/upgrade to fail. This allows
* for the upgrade to be retried so that any remaining data in metrics_index can be migrated.
*
* @author John Sanda
*/
public class PopulateCacheIndex implements Step {
private static final Log log = LogFactory.getLog(PopulateCacheIndex.class);
private static final int CACHE_INDEX_PARTITION = 0;
private static final String INDEX_TABLE = "metrics_index";
private static final String CACHE_INDEX_TABLE = "metrics_cache_index";
private static enum Bucket {
RAW("raw_metrics"),
ONE_HOUR("one_hour_metrics"),
SIX_HOUR("six_hour_metrics"),
TWENTY_FOUR_HOUR("twenty_four_hour_metrics");
private String text;
private Bucket(String text) {
this.text = text;
}
public String text() {
return text;
}
}
private class CacheIndexUpdatedCallback implements FutureCallback<ResultSet> {
private Bucket bucket;
private int scheduleId;
private Date time;
private CountDownLatch updatesFinished;
public CacheIndexUpdatedCallback(Bucket bucket, int scheduleId, Date time, CountDownLatch updatesFinished) {
this.bucket = bucket;
this.scheduleId = scheduleId;
this.time = time;
this.updatesFinished = updatesFinished;
}
@Override
public void onSuccess(ResultSet result) {
permits.acquire();
BoundStatement statement = deleteIndexEntry.bind(bucket.text(), time, scheduleId);
ResultSetFuture future = session.executeAsync(statement);
Futures.addCallback(future, new IndexUpdatedCallback(bucket, scheduleId, updatesFinished), tasks);
}
@Override
public void onFailure(Throwable t) {
log.warn("Failed to update cache index for {bucket: " + bucket.text() + ", scheduleId: " + scheduleId +
"}: ", ThrowableUtil.getRootCause(t));
failedUpdates.incrementAndGet();
updatesFinished.countDown();
}
}
private class IndexUpdatedCallback implements FutureCallback<ResultSet> {
private Bucket bucket;
private int scheduleId;
private CountDownLatch updatesFinished;
public IndexUpdatedCallback(Bucket bucket, int scheduleId, CountDownLatch updatesFinished) {
this.bucket = bucket;
this.scheduleId = scheduleId;
this.updatesFinished = updatesFinished;
}
@Override
public void onSuccess(ResultSet result) {
updatesFinished.countDown();
}
@Override
public void onFailure(Throwable t) {
// If we fail to delete a row in metrics_index, we can just log it and keep going because at the end of the
// process we will drop the table assuming there are no errors updating metrics_cache_index.
log.info("Failed to delete {bucket: " + bucket.text() + ", scheduleId: " + scheduleId + "} from " +
INDEX_TABLE + ": " + ThrowableUtil.getRootMessage(t));
updatesFinished.countDown();
}
}
private Session session;
private int cacheBlockSize = Integer.parseInt(System.getProperty("rhq.metrics.cache.block-size", "5"));
private RateLimiter permits = RateLimiter.create(20000);
private PreparedStatement updateCacheIndex;
private PreparedStatement findIndexTimeSlice;
private PreparedStatement findIndexEntries;
private PreparedStatement deleteIndexEntry;
private AtomicInteger failedUpdates = new AtomicInteger();
private ListeningExecutorService tasks;
private DBConnectionFactory dbConnectionFactory;
@Override
public void setSession(Session session) {
this.session = session;
}
@Override
public void bind(Properties properties) {
dbConnectionFactory = (DBConnectionFactory) properties.get(SchemaManager.RELATIONAL_DB_CONNECTION_FACTORY_PROP);
}
@Override
public void execute() {
// dbConnectionFactory can be null in test environments which is fine because we start tests with a brand
// new schema and cluster. In this case, we do not need to do anything since it is not an upgrade scenario.
if (dbConnectionFactory == null) {
log.info("The relational database connection factory is not set. No data migration necessary");
} else {
tasks = MoreExecutors.listeningDecorator(Executors.newFixedThreadPool(3, new SchemaUpdateThreadFactory()));
initPreparedStatements();
Date mostRecent1HourTimeSlice = findMostRecentRawDataSinceLastShutdown();
try {
if (mostRecent1HourTimeSlice == null) {
log.info("The " + CACHE_INDEX_TABLE + " table will not be updated. No raw data was found.");
} else {
log.debug("The most recent hour with raw data is " + mostRecent1HourTimeSlice);
Date mostRecent6HourTimeSlice = get6HourTimeSlice(mostRecent1HourTimeSlice).toDate();
Date mostRecent24HourTimeSlice = get24HourTimeSlice(mostRecent1HourTimeSlice).toDate();
Date day = mostRecent24HourTimeSlice;
updateCacheIndex(fetchRawIndexEntries(mostRecent1HourTimeSlice), Bucket.RAW, day,
current1HourTimeSlice().toDate());
updateCacheIndex(fetch1HourIndexEntries(mostRecent6HourTimeSlice), Bucket.ONE_HOUR, day,
mostRecent6HourTimeSlice);
updateCacheIndex(fetch6HourIndexEntries(mostRecent24HourTimeSlice), Bucket.SIX_HOUR, day,
mostRecent24HourTimeSlice);
if (failedUpdates.get() > 0) {
throw new RuntimeException("Cannot complete upgrade step due to previous errors. There were " +
failedUpdates.get() + " failed updates.");
}
deactivateCacheIfNecessary(mostRecent24HourTimeSlice);
}
} catch (InterruptedException e) {
throw new RuntimeException("The " + CACHE_INDEX_TABLE + " updates have not completed due to an " +
"interrupt. The schema upgrade will have to be run again to complete the updates.", e);
}
}
dropIndex();
}
private void initPreparedStatements() {
findIndexEntries = session.prepare("SELECT schedule_id FROM rhq." + INDEX_TABLE + " WHERE bucket = ? AND time = ?");
updateCacheIndex = session.prepare(
"UPDATE rhq." + CACHE_INDEX_TABLE + " " +
"SET schedule_ids = schedule_ids + ? " +
"WHERE bucket = ? AND day = ? AND partition = ? AND collection_time_slice = ? AND " +
" start_schedule_id = ? AND insert_time_slice = ?");
findIndexTimeSlice = session.prepare("SELECT time FROM rhq." + INDEX_TABLE + " WHERE bucket = ? AND time = ?");
deleteIndexEntry = session.prepare(
"DELETE FROM rhq." + INDEX_TABLE + " WHERE bucket = ? AND time = ? AND schedule_id = ?");
}
private void updateCacheIndex(ResultSet resultSet, Bucket bucket, Date day, Date timeSlice)
throws InterruptedException {
List<Row> rows = resultSet.all();
CountDownLatch updatesFinished = new CountDownLatch(rows.size());
log.info("Preparing to update " + CACHE_INDEX_TABLE + " for " + rows.size() + " schedules from the " +
bucket.text() + " bucket");
// We need collectionTimeSlice != insertTimeSlice to make sure that data is pulled
// from the historical tables during aggregation. The METRICS_CACHE_ACTIVATION_TIME
// sys config property is set to the start of the next day to indicate that the
// cache table should not be used until then. There is an edge case though that can
// occur if the data for which updates are being made does not get aggregated until
// after METRICS_CACHE_ACTIVATION_TIME has passed. This could happen if the server
// is shutdown for a while after upgrading. We therefore need to use a different
// value for the insertTimeSlice column to ensure the data gets pulled from the
// historical tables during aggregation.
Date insertTimeSlice = new Date(timeSlice.getTime() + 100);
for (Row row : rows) {
permits.acquire();
int scheduleId = row.getInt(0);
BoundStatement statement = updateCacheIndex.bind(ImmutableSet.of(scheduleId), bucket.text(), day,
CACHE_INDEX_PARTITION, timeSlice, startId(scheduleId), insertTimeSlice);
ResultSetFuture future = session.executeAsync(statement);
Futures.addCallback(future, new CacheIndexUpdatedCallback(bucket, scheduleId, timeSlice, updatesFinished),
tasks);
}
updatesFinished.await();
log.info("Finished updating " + CACHE_INDEX_TABLE + " for " + bucket.text() + " bucket");
}
private Date findMostRecentRawDataSinceLastShutdown() {
log.info("Searching for most recent hour having raw data");
DateTime previousHour = current1HourTimeSlice();
DateTime oldestRawTime = previousHour.minus(Days.days(7));
ResultSet resultSet = getIndexTimeSlice(Bucket.ONE_HOUR, previousHour);
Row row = resultSet.one();
while (row == null && previousHour.compareTo(oldestRawTime) > 0) {
previousHour = previousHour.minusHours(1);
resultSet = getIndexTimeSlice(Bucket.ONE_HOUR, previousHour);
row = resultSet.one();
}
if (row == null) {
log.info("No data found in metrics_index table");
return null;
} else {
Date date = row.getDate(0);
log.info("The latest hour with raw data is " + date);
return date;
}
}
private ResultSet getIndexTimeSlice(Bucket bucket, DateTime time) {
BoundStatement statement = findIndexTimeSlice.bind(bucket.text(), time.toDate());
return session.execute(statement);
}
private ResultSet fetchRawIndexEntries(Date timeSlice) {
return queryMetricsIndex(Bucket.ONE_HOUR, timeSlice);
}
private ResultSet fetch1HourIndexEntries(Date timeSlice) {
return queryMetricsIndex(Bucket.SIX_HOUR, timeSlice);
}
private ResultSet fetch6HourIndexEntries(Date timeSlice) {
return queryMetricsIndex(Bucket.TWENTY_FOUR_HOUR, timeSlice);
}
private ResultSet queryMetricsIndex(Bucket bucket, Date timeSlice) {
BoundStatement statement = findIndexEntries.bind(bucket.text(), timeSlice);
return session.execute(statement);
}
private DateTime current1HourTimeSlice() {
return getTimeSlice(DateTime.now(), Duration.standardHours(1));
}
private DateTime get6HourTimeSlice(Date date) {
return getTimeSlice(new DateTime(date.getTime()), Duration.standardHours(6));
}
private DateTime get24HourTimeSlice(Date date) {
return getTimeSlice(new DateTime(date.getTime()), Duration.standardHours(24));
}
private DateTime getTimeSlice(DateTime dt, Duration duration) {
Period p = duration.toPeriod();
if (p.getYears() != 0) {
return dt.yearOfEra().roundFloorCopy().minusYears(dt.getYearOfEra() % p.getYears());
} else if (p.getMonths() != 0) {
return dt.monthOfYear().roundFloorCopy().minusMonths((dt.getMonthOfYear() - 1) % p.getMonths());
} else if (p.getWeeks() != 0) {
return dt.weekOfWeekyear().roundFloorCopy().minusWeeks((dt.getWeekOfWeekyear() - 1) % p.getWeeks());
} else if (p.getDays() != 0) {
return dt.dayOfMonth().roundFloorCopy().minusDays((dt.getDayOfMonth() - 1) % p.getDays());
} else if (p.getHours() != 0) {
return dt.hourOfDay().roundFloorCopy().minusHours(dt.getHourOfDay() % p.getHours());
} else if (p.getMinutes() != 0) {
return dt.minuteOfHour().roundFloorCopy().minusMinutes(dt.getMinuteOfHour() % p.getMinutes());
} else if (p.getSeconds() != 0) {
return dt.secondOfMinute().roundFloorCopy().minusSeconds(dt.getSecondOfMinute() % p.getSeconds());
}
return dt.millisOfSecond().roundCeilingCopy().minusMillis(dt.getMillisOfSecond() % p.getMillis());
}
private int startId(int scheduleId) {
return (scheduleId / cacheBlockSize) * cacheBlockSize;
}
private void deactivateCacheIfNecessary(Date mostRecent24HourTimeSlice) {
Connection connection = null;
Statement statement = null;
try {
DateTime current24HourTimeSlice = get24HourTimeSlice(new Date());
if (current24HourTimeSlice.isAfter(mostRecent24HourTimeSlice.getTime())) {
log.info("The metrics cache will not be deactivated since the most recent raw data is from before today - "
+ mostRecent24HourTimeSlice);
} else {
DateTime next24HourTimeSlice = current24HourTimeSlice.plusDays(1);
log.info("The metrics cache will be come active at " + next24HourTimeSlice);
connection = dbConnectionFactory.newConnection();
statement = connection.createStatement();
statement.executeUpdate("UPDATE rhq_system_config SET property_value = '" +
next24HourTimeSlice.getMillis() + "' WHERE property_key = 'METRICS_CACHE_ACTIVATION_TIME'");
}
} catch (SQLException e) {
throw new RuntimeException("Failed to deactivate metrics cache", e);
} finally {
JDBCUtil.safeClose(statement);
JDBCUtil.safeClose(connection);
}
}
private void dropIndex() {
session.execute("DROP TABLE rhq.metrics_index");
}
}