package org.radargun; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Stack; import org.radargun.config.Cluster; import org.radargun.config.Configuration; import org.radargun.config.InitHelper; import org.radargun.config.MasterConfig; import org.radargun.config.ReporterConfiguration; import org.radargun.logging.Log; import org.radargun.logging.LogFactory; import org.radargun.reporting.Report; import org.radargun.reporting.Reporter; import org.radargun.reporting.ReporterHelper; import org.radargun.reporting.Timeline; import org.radargun.stages.control.RepeatStage; import org.radargun.state.MasterListener; import org.radargun.state.MasterState; import org.radargun.utils.TimeService; import org.radargun.utils.Utils; /** * This is the master that will coordinate the {@link Slave}s in order to run the benchmark. * * @author Mircea Markus <Mircea.Markus@jboss.com> */ public class Master { private static Log log = LogFactory.getLog(Master.class); private final MasterConfig masterConfig; private final MasterState state; private final ArrayList<Report> reports = new ArrayList<>(); private int returnCode; private boolean exitFlag = false; private RemoteSlaveConnection connection; public Master(MasterConfig masterConfig) { this.masterConfig = masterConfig; state = new MasterState(masterConfig); Runtime.getRuntime().addShutdownHook(new ShutDownHook("Master process")); } public MasterConfig getMasterConfig() { return masterConfig; } public void run() throws Exception { try { connection = new RemoteSlaveConnection(masterConfig.getMaxClusterSize(), masterConfig.getHost(), masterConfig.getPort()); connection.establish(); connection.receiveSlaveAddresses(); state.setMaxClusterSize(masterConfig.getMaxClusterSize()); // let's create reporters now to fail soon in case of misconfiguration ArrayList<Reporter> reporters = new ArrayList<>(); for (ReporterConfiguration reporterConfiguration : masterConfig.getReporters()) { for (ReporterConfiguration.Report report : reporterConfiguration.getReports()) { reporters.add(ReporterHelper.createReporter(reporterConfiguration.type, report.getProperties())); } } long benchmarkStart = TimeService.currentTimeMillis(); for (Configuration configuration : masterConfig.getConfigurations()) { log.info("Started benchmarking configuration '" + configuration.name + "'"); state.setConfigName(configuration.name); for (MasterListener listener : state.getListeners()) { listener.beforeConfiguration(); } long configStart = TimeService.currentTimeMillis(); for (Cluster cluster : masterConfig.getClusters()) { int clusterSize = cluster.getSize(); log.info("Starting scenario on " + cluster); connection.sendCluster(cluster); connection.sendSlaveAddresses(); connection.sendConfiguration(configuration); // here we should restart, therefore, we have to send it again connection.restartSlaves(clusterSize); connection.sendCluster(cluster); connection.sendSlaveAddresses(); connection.sendConfiguration(configuration); connection.sendScenario(masterConfig.getScenario(), clusterSize); state.setCluster(cluster); state.setReport(new Report(configuration, cluster)); for (MasterListener listener : state.getListeners()) { listener.beforeCluster(); } long clusterStart = TimeService.currentTimeMillis(); int stageCount = masterConfig.getScenario().getStageCount(); // These two stages are inserted to the end of the scenario in // this order during parsing int scenarioDestroyId = stageCount - 2; int scenarioCleanupId = stageCount - 1; try { try { // ScenarioDestroy and ScenarioCleanup are special ones, executed always int nextStageId = 0; do { nextStageId = executeStage(configuration, cluster, nextStageId); } while (nextStageId >= 0 && nextStageId < scenarioDestroyId); // run ScenarioDestroy } finally { executeStage(configuration, cluster, scenarioDestroyId); } } finally { // run ScenarioCleanup executeStage(configuration, cluster, scenarioCleanupId); } log.info("Finished scenario on " + cluster + " in " + Utils.getMillisDurationString(TimeService.currentTimeMillis() - clusterStart)); for (MasterListener listener : state.getListeners()) { listener.afterCluster(); } state.getReport().addTimelines(connection.receiveTimelines(clusterSize)); reports.add(state.getReport()); if (exitFlag) { break; } } log.info("Finished benchmarking configuration '" + configuration.name + "' in " + Utils.getMillisDurationString(TimeService.currentTimeMillis() - configStart)); for (MasterListener listener : state.getListeners()) { listener.afterConfiguration(); } if (exitFlag) { log.info("Exiting whole benchmark"); break; } } log.info("Executed all benchmarks in " + Utils.getMillisDurationString(TimeService.currentTimeMillis() - benchmarkStart) + ", reporting..."); for (Reporter reporter : reporters) { try { log.info("Running reporter " + reporter); reporter.run(masterConfig, Collections.unmodifiableList(reports)); } catch (Exception e) { log.error("Error in reporter " + reporter, e); returnCode = 127; } finally { InitHelper.destroy(reporter); } } String reportersMessage = reporters.isEmpty() ? "No reporters have been specified." : "All reporters have been executed, exiting."; log.info(reportersMessage); } catch (Throwable e) { log.error("Exception in Master.run: ", e); returnCode = 127; } finally { if (connection != null) { connection.release(); } ShutDownHook.exit(returnCode); } } private int executeStage(Configuration configuration, Cluster cluster, int stageId) { Stage stage = masterConfig.getScenario().getStage(stageId, state, getCurrentExtras(masterConfig, configuration, cluster), state.getReport()); InitHelper.init(stage); StageResult result; try { if (stage instanceof MasterStage) { result = executeMasterStage((MasterStage) stage); } else if (stage instanceof DistStage) { result = executeDistStage(stageId, (DistStage) stage); } else { log.error("Stage '" + stage.getName() + "' is neither master nor distributed"); return -1; } } finally { InitHelper.destroy(stage); } if (result == StageResult.SUCCESS) { return stageId + 1; } else if (result == StageResult.FAIL || result == StageResult.EXIT) { returnCode = masterConfig.getConfigurations().indexOf(configuration) + 1; if (result == StageResult.EXIT) { exitFlag = true; } return -1; } else if (result == StageResult.BREAK || result == StageResult.CONTINUE) { Stack<String> repeatNames = (Stack<String>) state.get(RepeatStage.REPEAT_NAMES); String nextLabel; if (repeatNames == null || repeatNames.isEmpty()) { log.warn("BREAK or CONTINUE used out of any repeat."); return -1; } else if (result == StageResult.BREAK) { nextLabel = Utils.concat(".", "repeat", repeatNames.peek(), "end"); } else if (result == StageResult.CONTINUE) { nextLabel = Utils.concat(".", "repeat", repeatNames.peek(), "begin"); } else throw new IllegalStateException(); int nextStageId = masterConfig.getScenario().getLabel(nextLabel); if (nextStageId < 0) { log.error("No label '" + nextLabel + "' defined"); } return nextStageId; } else { throw new IllegalStateException("Result does not match to any type."); } } private Map<String, String> getCurrentExtras(MasterConfig masterConfig, Configuration configuration, Cluster cluster) { Map<String, String> extras = new HashMap<String, String>(); extras.put(Properties.PROPERTY_CONFIG_NAME, configuration.name); extras.put(Properties.PROPERTY_CLUSTER_SIZE, String.valueOf(cluster.getSize())); extras.put(Properties.PROPERTY_CLUSTER_MAX_SIZE, String.valueOf(masterConfig.getMaxClusterSize())); // we have to define these properties because distributed stages are resolved on master as well extras.put(Properties.PROPERTY_PLUGIN_NAME, "__no-plugin"); extras.put(Properties.PROPERTY_GROUP_NAME, "__master"); extras.put(Properties.PROPERTY_GROUP_SIZE, "0"); for (Cluster.Group group : cluster.getGroups()) { extras.put(Properties.PROPERTY_GROUP_PREFIX + group.name + Properties.PROPERTY_SIZE_SUFFIX, String.valueOf(group.size)); } extras.put(Properties.PROPERTY_SLAVE_INDEX, "-1"); extras.put(Properties.PROPERTY_PROCESS_ID, String.valueOf(Utils.getProcessID())); return extras; } private StageResult executeMasterStage(MasterStage stage) { stage.init(state); if (log.isDebugEnabled()) log.debug("Starting master stage " + stage.getName() + ". Details:" + stage); else log.info("Starting master stage " + stage.getName() + "."); long start = TimeService.currentTimeMillis(), end = start; try { StageResult result = stage.execute(); end = TimeService.currentTimeMillis(); if (result.isError()) { log.error("Execution of master stage " + stage.getName() + " failed."); } else { log.info("Finished master stage " + stage.getName()); } return result; } catch (Exception e) { end = TimeService.currentTimeMillis(); log.error("Caught exception", e); return StageResult.FAIL; } finally { state.getTimeline().addEvent(Stage.STAGE, new Timeline.IntervalEvent(start, stage.getName(), end - start)); } } private StageResult executeDistStage(int stageId, DistStage stage) { if (log.isDebugEnabled()) log.debug("Starting distributed stage " + stage.getName() + ". Details:" + stage); else log.info("Starting distributed stage " + stage.getName() + "."); int numSlaves = state.getClusterSize(); Map<String, Object> masterData; try { stage.initOnMaster(state); masterData = stage.createMasterData(); } catch (Exception e) { log.error("Failed to initialize stage", e); return StageResult.EXIT; } List<DistStageAck> responses = null; try { responses = connection.runStage(stageId, masterData, numSlaves); } catch (IOException e) { log.error("Error when communicating to slaves"); return StageResult.EXIT; } if (responses.size() > 1) { Collections.sort(responses, new Comparator<DistStageAck>() { @Override public int compare(DistStageAck o1, DistStageAck o2) { int thisVal = o1.getSlaveIndex(); int anotherVal = o2.getSlaveIndex(); return (thisVal < anotherVal ? -1 : (thisVal == anotherVal ? 0 : 1)); } }); } StageResult result; try { result = stage.processAckOnMaster(responses); } catch (Exception e) { log.error("Processing acks on master failed", e); return StageResult.EXIT; } if (result.isError()) { log.error("Execution of distributed stage " + stage.getName() + " failed"); } else { log.info("Finished distributed stage " + stage.getName() + "."); } return result; } }