/*
* Copyright (c) 2013 EMC Corporation
* All Rights Reserved
*/
package com.emc.storageos.db.server.impl;
import com.emc.storageos.services.util.JmxServerWrapper;
import com.emc.storageos.services.util.TimeUtils;
import org.apache.cassandra.service.StorageService;
import org.apache.cassandra.service.ActiveRepairService;
import org.apache.cassandra.service.StorageServiceMBean;
import org.apache.cassandra.dht.Range;
import org.apache.cassandra.dht.Token;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.management.Notification;
import javax.management.NotificationListener;
import javax.management.remote.JMXConnectionNotification;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.Collections;
import java.util.List;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.Collection;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.ScheduledFuture;
import java.util.concurrent.locks.Condition;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;
/**
* Class handles running repair job and listening for messages related modeled
* after org.apache.cassandra.tools.NodeProbe.RepairRunner
*/
public class RepairJobRunner implements NotificationListener, AutoCloseable {
public static interface ProgressNotificationListener {
public void onStartToken(String token, int progress);
}
private static final Logger _log = LoggerFactory
.getLogger(RepairJobRunner.class);
private final SimpleDateFormat format = new SimpleDateFormat(
"yyyy-MM-dd HH:mm:ss,SSS");
private static final int MIN_MINUTE_FOR_REPAIR_TIME_IN_LOG = 5;
final private Lock lock = new ReentrantLock();
final private Condition finished = lock.newCondition();
private boolean repairRangeDone = false;
/**
* Flag to indicate the job is successful or failed
*/
private boolean _success = true;
/**
* Total number of repair sessions to be executed
*/
private int _totalRepairSessions = 0;
/**
* Number of completed repair sessions
*/
private int _completedRepairSessions = 0;
/**
* Max wait time in minutes for each range repair. Terminate the job
*/
private int _maxWaitInMinutes = 90;
/**
* Start execution time of the job. For time estimation of remaining repair
* sessions.
*/
private long _startTimeInMillis = 0L;
/**
* Executor to schedule job monitor thread
*/
private ScheduledExecutorService _exe;
/**
* Flag to indicate if current job is aborted
*/
private boolean _aborted = false;
/**
* Token that is successfully repaired
*/
private String _lastToken = null;
private ProgressNotificationListener listener;
private JmxServerWrapper jmxServer;
private StorageServiceMBean svcProxy;
private String keySpaceName;
private String clusterStateDigest;
/**
*
* @param svcProxy
* Reference to Cassandra JMX bean
* @param keySpaceName
* ViPR table name
* @param exe
* @param listener
* @param startToken
*/
public RepairJobRunner(JmxServerWrapper jmxServer, StorageServiceMBean svcProxy, String keySpaceName, ScheduledExecutorService exe,
ProgressNotificationListener listener, String startToken, String clusterStateDigest) {
this.jmxServer = jmxServer;
this.svcProxy = svcProxy;
this.keySpaceName = keySpaceName;
_exe = exe;
_lastToken = startToken;
this.listener = listener;
this.clusterStateDigest = clusterStateDigest;
}
public static class StringTokenRange {
public final String begin;
public final String end;
public StringTokenRange(String begin, String end) {
this.begin = begin;
this.end = end;
}
}
/**
* Return sorted primary ranges on local node
*/
public static List<StringTokenRange> getLocalRanges(String keyspace) {
Collection<Range<Token>> ranges = StorageService.instance.getLocalRanges(keyspace);
ArrayList<Range<Token>> sortedRanges = new ArrayList<>();
sortedRanges.addAll(ranges);
Collections.sort(sortedRanges);
ArrayList<StringTokenRange> result = new ArrayList<>();
Iterator<Range<Token>> iter = sortedRanges.iterator();
while (iter.hasNext()) {
Range<Token> range = iter.next();
List<String> startAndEnd = range.asList();
if (startAndEnd.size() != 2) {
_log.error("Illegal local primary range found {}. Stop the db repair",
range);
return null;
}
result.add(new StringTokenRange(startAndEnd.get(0), startAndEnd.get(1)));
}
return result;
}
private static int indexOfRange(List<StringTokenRange> ranges, String token) {
for (int i = 0; i < ranges.size(); i++) {
StringTokenRange range = ranges.get(i);
if (range.end.equals(token)) {
return i;
}
}
return -1;
}
/**
* Execute DB repair job for local primary ranges on DHT ring. If _lastToken is
* not null, the repair job starts from _lastToken
*
* It is supposed to execute this method on all nodes of the cluster so that
* full DHT ring is repaired.
*
* @return True for success. Otherwise failure
* @throws IOException
* @throws InterruptedException
*/
public boolean runRepair() throws IOException, InterruptedException {
_startTimeInMillis = System.currentTimeMillis();
List<StringTokenRange> localRanges = getLocalRanges(keySpaceName);
if (localRanges == null) {
_success = false;
return false;
}
_totalRepairSessions = localRanges.size();
if (_totalRepairSessions == 0) {
_log.info("Nothing to repair for keyspace {}", keySpaceName);
return _success;
}
_log.info("Run repair job for {}. Total # local ranges {}",
this.keySpaceName, _totalRepairSessions);
// Find start token, in case the token is no longer in ring, we have to start from beginning
_completedRepairSessions = _lastToken == null ? 0 : indexOfRange(localRanges, _lastToken);
if (_completedRepairSessions == -1) {
_log.error("Recorded last working range \"{}\" is not found, starting from beginning", _lastToken);
_completedRepairSessions = 0;
} else {
_log.info("Last token is {}, progress is {}%, starting repair from token #{}",
new Object[] { _lastToken, getProgress(), _completedRepairSessions });
}
ScheduledFuture<?> jobMonitorHandle = startMonitor(svcProxy);
lock.lock();
try {
_aborted = false;
_success = true;
while (_completedRepairSessions < _totalRepairSessions) {
String currentDigest = DbRepairRunnable.getClusterStateDigest();
if (!clusterStateDigest.equals(currentDigest)) {
_log.error("Cluster state changed from {} to {}, repair failed", clusterStateDigest, currentDigest);
_success = false;
break;
}
StringTokenRange range = localRanges.get(_completedRepairSessions);
repairRange(range);
if (!_success) {
_log.error("Fail to repair range {} {}. Stopping the job", range.begin, range.end);
break;
}
_lastToken = range.end;
_completedRepairSessions++;
_log.info("{} repair sessions finished. Current progress {}%", _completedRepairSessions, getProgress());
}
} finally {
lock.unlock();
jobMonitorHandle.cancel(false);
_log.info("Stopped repair job monitor");
}
// Reset lastToken after a successful full repair of local primary ranges
if (_success) {
_lastToken = null;
}
long repairMillis = System.currentTimeMillis() - _startTimeInMillis;
_log.info("Db repair consumes {} ",repairMillis > MIN_MINUTE_FOR_REPAIR_TIME_IN_LOG * TimeUtils.MINUTES ?
repairMillis / TimeUtils.MINUTES + " minutes" : repairMillis / TimeUtils.SECONDS + " seconds");
return _success;
}
private void repairRange(StringTokenRange range) throws InterruptedException {
listener.onStartToken(range.end, getProgress());
repairRangeDone = false;
int cmd = svcProxy.forceRepairRangeAsync(range.begin, range.end, keySpaceName, false, false, false);
_log.info("Wait for repairing this range to be done cmd={}", cmd);
if (cmd > 0) {
while (!repairRangeDone) {
finished.await();
}
}
_log.info("Repair this range is done success={}", _success);
}
/**
* Start background task to monitor job progress. If job could not move
* ahead for _maxWaitInMinutes, the job is thought as hanging and we force
* to abort the whole repair.
*
* @param svcProxy
* Reference to Cassandra JMX bean
* @return Future object
*/
protected ScheduledFuture<?> startMonitor(final StorageServiceMBean svcProxy) {
ScheduledFuture<?> jobMonitorHandle = _exe.scheduleAtFixedRate(
new Runnable() {
long _lastProgress = 0L;
long _lastCheckMillis = 0L;
@Override
public void run() {
int progress = getProgress();
long currentMillis = System.currentTimeMillis();
_log.info("Monitor repair job progress {} last progress {} ",
progress, _lastProgress);
if (_lastCheckMillis == 0L) {
_lastCheckMillis = currentMillis;
}
if (progress == _lastProgress) {
long delta = (currentMillis - _lastCheckMillis) / 60000;
if (delta > _maxWaitInMinutes) {
_log.info("Repair job hangs for {} minutes. Abort it", delta);
svcProxy.forceTerminateAllRepairSessions();
_aborted = true;
}
} else {
_lastProgress = progress;
_lastCheckMillis = currentMillis;
}
}
}, 1, 1, TimeUnit.MINUTES);
return jobMonitorHandle;
}
/**
* Get execution percentage of db repair job
*
* @return 0 - 100 to indicate a job is running. -1 to indicate job not
* started
*/
public int getProgress() {
if (_totalRepairSessions > 0) {
return _completedRepairSessions * 100 / _totalRepairSessions;
}
return -1;
}
/**
* Get job start time in milliseconds since epoc.
*
* @return
*/
public long getStartTimeInMillis() {
return this._startTimeInMillis;
}
/**
* Handle DB repair request notification from Cassandra JMX bean
*/
public void handleNotification(Notification notification, Object handback) {
lock.lock();
try {
_log.info("Notification type: {}", notification.getType(), notification.getMessage());
if ("repair".equals(notification.getType())) {
int[] status = (int[]) notification.getUserData();
if (status.length == 2) {
_log.info("Repair notification [{}] {}", format.format(notification.getTimeStamp()),
notification.getMessage());
_log.info("status: {} {}", status[0], status[1]);
// repair status is int array with [0] = cmd number, [1] = status
if (status[1] == ActiveRepairService.Status.SESSION_FAILED.ordinal()) {
_log.info("Repair cmd={} failed", status[0]);
_success = false;
repairRangeDone = true;
finished.signal();
} else if (status[1] == ActiveRepairService.Status.FINISHED.ordinal() ||
(_aborted && status[1] == ActiveRepairService.Status.SESSION_SUCCESS.ordinal())) {
_log.info("Repair cmd={} finished", status[0]);
if (_aborted) {
_success = false;
}
repairRangeDone = true;
finished.signal();
}
} else {
_log.error("Unexpected notification: status.length {}", status.length);
}
} else if (JMXConnectionNotification.NOTIFS_LOST.equals(notification.getType())) {
_log.error("[{}] Lost notification. You should check server log for repair status of keyspace {}",
format.format(notification.getTimeStamp()),
keySpaceName);
} else if (JMXConnectionNotification.FAILED.equals(notification.getType())
|| JMXConnectionNotification.CLOSED.equals(notification.getType())) {
_log.error("JMX connection closed. You should check server log for repair status of keyspace {}"
+ "(Subsequent keyspaces are not going to be repaired).",
keySpaceName);
repairRangeDone = true;
finished.signal();
}
}finally {
lock.unlock();
}
}
@Override
public void close() throws Exception {
_log.info("remove listener");
svcProxy.removeNotificationListener(this);
jmxServer.removeConnectionNotificationListener(this);
}
}