package org.apache.fullmatix.mysql.tools;
import java.io.OutputStreamWriter;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.fullmatix.mysql.MasterStatus;
import org.apache.fullmatix.mysql.MasterStatus.MasterStatusAttribute;
import org.apache.fullmatix.mysql.MySQLAdmin;
import org.apache.fullmatix.mysql.MySQLConstants;
import org.apache.helix.HelixAdmin;
import org.apache.helix.manager.zk.ZKHelixAdmin;
import org.apache.helix.model.ExternalView;
import org.apache.helix.model.IdealState;
import org.apache.helix.model.InstanceConfig;
import org.apache.log4j.ConsoleAppender;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import org.apache.log4j.PatternLayout;
public class ChaosMonkey {
private static final Logger LOG = Logger.getLogger(ChaosMonkey.class);
private String _zkAddress;
private String _clusterName;
private LoadGenerator _generator;
private HelixAdmin _admin;
public ChaosMonkey(String zkAddress, String clusterName) {
_zkAddress = zkAddress;
_clusterName = clusterName;
_generator = new LoadGenerator(zkAddress, clusterName);
_admin = new ZKHelixAdmin(_zkAddress);
}
public void run() throws Exception {
LOG.info("Starting load generator");
String resourceName = MySQLConstants.MASTER_SLAVE_RESOURCE_NAME;
IdealState idealState = _admin.getResourceIdealState(_clusterName, resourceName);
int numSlices = idealState.getNumPartitions();
boolean isHealthy = true;
new Thread(_generator).start();
_generator.pause();
int iter = 0;
while (isHealthy) {
LOG.info("Starting iteration:" + iter++);
LOG.info("Un-Pausing LoadGenerator");
_generator.unpause();
// generate load for a minute
Thread.sleep(10 * 1000);
// pick a random slice and disable the master
int randomSlice = (int) (Math.random() * 1000) % numSlices;
LOG.info("Randomly selected sliceNum:" + randomSlice + " to simulate failure");
String currentMaster = null;
currentMaster = getCurrentMaster(randomSlice);
LOG.info("Current master for sliceNum:" + randomSlice + " is " + currentMaster);
if (currentMaster != null) {
LOG.info("Pausing LoadGenerator");
_generator.pause();
String oldMaster = currentMaster;
// disable the current master
LOG.info("Disabling the current master:" + currentMaster);
_admin.enableInstance(_clusterName, currentMaster, false);
String newMaster = null;
// wait until there is a new master
LOG.info("Waiting for new master:");
do {
Thread.sleep(1000);
newMaster = getCurrentMaster(randomSlice);
} while (newMaster == null || newMaster.equals(oldMaster));
LOG.info("New master:" + newMaster);
// start writes on new master
_generator.unpause();
// wait for writes to happen on the new master for X seconds
Thread.sleep(10 * 1000);
// enable the old master
LOG.info("Enabling the old master:" + currentMaster);
_admin.enableInstance(_clusterName, oldMaster, true);
_generator.pause();
// wait for it to become slave and catch up
Thread.sleep(10000);
LOG.info("Validating cluster health");
isHealthy = validate(randomSlice);
} else {
LOG.error("No master available for slice:" + randomSlice);
break;
}
}
}
/**
* validates that all partitions have same amount of data
* @param sliceNumber
* @return
*/
private boolean validate(int sliceNumber) {
IdealState idealstate =
_admin.getResourceIdealState(_clusterName, MySQLConstants.MASTER_SLAVE_RESOURCE_NAME);
String partitionName = MySQLConstants.MASTER_SLAVE_RESOURCE_NAME + "_" + sliceNumber;
Map<String, String> instanceStateMap = idealstate.getInstanceStateMap(partitionName);
Map<String, MasterStatus> masterStatusMap = new HashMap<String, MasterStatus>();
for (String instance : instanceStateMap.keySet()) {
InstanceConfig instanceConfig = _admin.getInstanceConfig(_clusterName, instance);
MySQLAdmin admin = new MySQLAdmin(instanceConfig);
MasterStatus masterStatus = admin.getMasterStatus();
masterStatusMap.put(instance, masterStatus);
admin.close();
}
for (String instance1 : instanceStateMap.keySet()) {
MasterStatus masterStatus1 = masterStatusMap.get(instance1);
String gtIdSet1 = masterStatus1.getString(MasterStatusAttribute.Executed_Gtid_Set);
for (String instance2 : instanceStateMap.keySet()) {
if (!instance1.equals(instance2)) {
MasterStatus masterStatus2 = masterStatusMap.get(instance2);
String gtIdSet2 = masterStatus2.getString(MasterStatusAttribute.Executed_Gtid_Set);
if (!gtIdSet1.equals(gtIdSet2)) {
LOG.error("Cluster is unhealthy: gtid set of " + instance1 + ":" + gtIdSet1
+ " does not match the gtid set of " + instance2 + ":" + gtIdSet2);
return false;
}
}
}
}
return true;
}
public static void main(String[] args) throws Exception {
String zkAddress = args[0];
String clusterName = args[1];
ChaosMonkey monkey = new ChaosMonkey(zkAddress, clusterName);
monkey.run();
System.exit(1);
}
private String getCurrentMaster(int randomSlice) {
String resourceName = MySQLConstants.MASTER_SLAVE_RESOURCE_NAME;
ExternalView resourceExternalView = _admin.getResourceExternalView(_clusterName, resourceName);
Map<String, String> instanceStateMap =
resourceExternalView.getStateMap(resourceName + "_" + randomSlice);
String currentMaster = null;
for (String instance : instanceStateMap.keySet()) {
if (instanceStateMap.get(instance).equalsIgnoreCase("MASTER")) {
currentMaster = instance;
break;
}
}
return currentMaster;
}
}