package uk.ac.imperial.lsds.seepmaster.query; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import uk.ac.imperial.lsds.seep.api.DataReference; import uk.ac.imperial.lsds.seep.api.RuntimeEvent; import uk.ac.imperial.lsds.seep.api.SeepChooseTask; import uk.ac.imperial.lsds.seep.api.operator.DownstreamConnection; import uk.ac.imperial.lsds.seep.api.operator.Operator; import uk.ac.imperial.lsds.seep.api.operator.SeepLogicalOperator; import uk.ac.imperial.lsds.seep.api.operator.SeepLogicalQuery; import uk.ac.imperial.lsds.seep.api.operator.UpstreamConnection; import uk.ac.imperial.lsds.seep.api.operator.sinks.MarkerSink; import uk.ac.imperial.lsds.seep.api.operator.sinks.Sink; import uk.ac.imperial.lsds.seep.api.operator.sources.Source; import uk.ac.imperial.lsds.seep.api.state.DistributedMutableState; import uk.ac.imperial.lsds.seep.comm.Comm; import uk.ac.imperial.lsds.seep.comm.Connection; import uk.ac.imperial.lsds.seep.comm.protocol.ProtocolCommandFactory; import uk.ac.imperial.lsds.seep.comm.protocol.SeepCommand; import uk.ac.imperial.lsds.seep.comm.protocol.StageStatusCommand; import uk.ac.imperial.lsds.seep.comm.serialization.KryoFactory; import uk.ac.imperial.lsds.seep.core.DatasetMetadataPackage; import uk.ac.imperial.lsds.seep.errors.NotImplementedException; import uk.ac.imperial.lsds.seep.scheduler.ScheduleDescription; import uk.ac.imperial.lsds.seep.scheduler.Stage; import uk.ac.imperial.lsds.seep.scheduler.StageType; import uk.ac.imperial.lsds.seep.util.Utils; import uk.ac.imperial.lsds.seepmaster.LifecycleManager; import uk.ac.imperial.lsds.seepmaster.MasterConfig; import uk.ac.imperial.lsds.seepmaster.infrastructure.master.ExecutionUnit; import uk.ac.imperial.lsds.seepmaster.infrastructure.master.InfrastructureManager; import uk.ac.imperial.lsds.seepmaster.scheduler.ScheduleManager; import uk.ac.imperial.lsds.seepmaster.scheduler.ScheduleTracker; import uk.ac.imperial.lsds.seepmaster.scheduler.SchedulerEngineWorker; import uk.ac.imperial.lsds.seepmaster.scheduler.loadbalancing.LoadBalancingStrategyType; import uk.ac.imperial.lsds.seepmaster.scheduler.schedulingstrategy.SchedulingStrategyType; import com.esotericsoftware.kryo.Kryo; public class ScheduledQueryManager implements QueryManager, ScheduleManager { final private Logger LOG = LoggerFactory.getLogger(ScheduledQueryManager.class); private MasterConfig mc; private static ScheduledQueryManager sqm; private String pathToQueryJar; private String definitionClassName; private String[] queryArgs; private String composeMethodName; private short queryType; private InfrastructureManager inf; private Comm comm; private Kryo k; private LifecycleManager lifeManager; // Scheduler machinery private ScheduleDescription scheduleDescription; private Thread worker; private SchedulerEngineWorker seWorker; private ScheduledQueryManager(InfrastructureManager inf, Comm comm, LifecycleManager lifeManager, MasterConfig mc, short queryType){ this.mc = mc; this.inf = inf; this.comm = comm; this.lifeManager = lifeManager; this.k = KryoFactory.buildKryoForProtocolCommands(this.getClass().getClassLoader()); this.queryType = queryType; } public static ScheduledQueryManager getInstance(InfrastructureManager inf, Comm comm, LifecycleManager lifeManager, MasterConfig mc, short queryType){ if(sqm == null){ return new ScheduledQueryManager(inf, comm, lifeManager, mc, queryType); } else{ return sqm; } } /** Implement QueryManager interface **/ @Override public boolean loadQueryFromParameter(short queryType, SeepLogicalQuery slq, String pathToQueryJar, String definitionClass, String[] queryArgs, String composeMethod) { boolean allowed = lifeManager.canTransitTo(LifecycleManager.AppStatus.QUERY_SUBMITTED); if(!allowed){ LOG.error("Attempt to violate application lifecycle"); return false; } this.pathToQueryJar = pathToQueryJar; this.definitionClassName = definitionClass; this.queryArgs = queryArgs; this.composeMethodName = composeMethod; LOG.debug("Logical query loaded: {}", slq.toString()); // Create Scheduler Engine and build scheduling plan for the given query scheduleDescription = this.buildSchedulingPlanForQuery(slq); loadSchedule(scheduleDescription, pathToQueryJar, definitionClassName, queryArgs, composeMethodName); return true; } public boolean loadSchedule( ScheduleDescription scheduleDescription, String pathToQueryJar, String definitionClass, String[] queryArgs, String composeMethod) { // Create Scheduler Engine for the given schedule this.scheduleDescription = scheduleDescription; this.pathToQueryJar = pathToQueryJar; this.definitionClassName = definitionClass; this.queryArgs = queryArgs; this.composeMethodName = composeMethod; // Initialize the schedulerThread seWorker = new SchedulerEngineWorker( scheduleDescription, SchedulingStrategyType.clazz(mc.getInt(MasterConfig.SCHED_STRATEGY)), LoadBalancingStrategyType.clazz(mc.getInt(MasterConfig.SCHED_STAGE_ASSIGMENT_STRATEGY)), mc.getInt(MasterConfig.MEM_MANAGEMENT_POLICY), mc.getDouble(MasterConfig.DISK_MEM_RATIO), inf, comm, k); worker = new Thread(seWorker); LOG.info("Schedule Description:"); LOG.info(scheduleDescription.toString()); lifeManager.tryTransitTo(LifecycleManager.AppStatus.QUERY_SUBMITTED); return true; } @Override public boolean loadQueryFromFile(short queryType, String pathToQueryJar, String definitionClass, String[] queryArgs, String composeMethod) { throw new NotImplementedException("ScheduledQueryManager.loadQueryFromFile not implemented !!"); } @Override public boolean deployQueryToNodes() { boolean allowed = lifeManager.canTransitTo(LifecycleManager.AppStatus.QUERY_DEPLOYED); if(!allowed) { LOG.error("Attempt to violate application lifecycle"); return false; } // Check that there is at least one resource available if(! (inf.executionUnitsAvailable() > 0)) { LOG.warn("Cannot deploy query, not enough nodes. Available: {}", inf.executionUnitsAvailable()); return false; } // We want to be able to schedule tasks in any node in the cluster, so send to all Set<Integer> involvedEUId = new HashSet<>(); int totalEUAvailable = inf.executionUnitsAvailable(); for(int i = 0; i < totalEUAvailable; i++) { ExecutionUnit eu = inf.getExecutionUnit(); involvedEUId.add(eu.getId()); } Set<Connection> connections = inf.getConnectionsTo(involvedEUId); LOG.info("Sending query and schedule to nodes"); sendQueryToNodes(queryType, connections, definitionClassName, queryArgs, composeMethodName); sendScheduleToNodes(connections); LOG.info("Sending query and schedule to nodes...OK {}"); LOG.info("Prepare scheduler engine..."); // Get the input info for the first stages seWorker.prepareForStart(connections); LOG.info("Prepare scheduler engine...OK"); lifeManager.tryTransitTo(LifecycleManager.AppStatus.QUERY_DEPLOYED); return true; } @Override public boolean startQuery() { LOG.info("Staring Scheduler"); worker.start(); return true; } @Override public boolean stopQuery() { LOG.info("Stop scheduling"); try { worker.join(); } catch (InterruptedException e) { e.printStackTrace(); } return true; } // FIXME: this code is repeated in materialisedQueryManager. please refactor // FIXME: in particular, consider moving this to MasterWorkerAPIImplementation (that already handles a comm and k) private void sendQueryToNodes(short queryType, Set<Connection> connections, String definitionClassName, String[] queryArgs, String composeMethodName) { // Send data file to nodes byte[] queryFile = Utils.readDataFromFile(pathToQueryJar); LOG.info("Sending query file of size: {} bytes", queryFile.length); SeepCommand code = ProtocolCommandFactory.buildCodeCommand(queryType, queryFile, definitionClassName, queryArgs, composeMethodName); comm.send_object_sync(code, connections, k); LOG.info("Sending query file...DONE!"); } private boolean sendScheduleToNodes(Set<Connection> connections){ LOG.info("Sending Schedule Deploy Command"); // Send physical query to all nodes SeepCommand scheduleDeploy = ProtocolCommandFactory.buildScheduleDeployCommand(scheduleDescription); boolean success = comm.send_object_sync(scheduleDeploy, connections, k); return success; } public ScheduleDescription buildSchedulingPlanForQuery(SeepLogicalQuery slq) { Set<Integer> opsAlreadyInSchedule = new HashSet<>(); // Start building from sink SeepLogicalOperator op = (SeepLogicalOperator) slq.getSink(); // Recursive method, with opsAlreadyInSchedule to detect already incorporated stages Set<Stage> stages = new HashSet<>(); int stageId = 0; buildScheduleFromStage(null, op, opsAlreadyInSchedule, slq, stages, stageId); ScheduleDescription sd = new ScheduleDescription(stages, slq.getAllOperators()); return sd; } private void buildScheduleFromStage(Stage parent, SeepLogicalOperator slo, Set<Integer> opsAlreadyInSchedule, SeepLogicalQuery slq, Set<Stage> stages, int stageId) { // Check whether this op has already been incorporated to a stage and abort if so int opId = slo.getOperatorId(); if(opsAlreadyInSchedule.contains(opId)) { // Create dependency with the stage governing opId in this case and return Stage dependency = stageResponsibleFor(opId, stages); parent.dependsOn(dependency); return; } // Create new stage and dependency with parent Stage stage = new Stage(stageId); if(parent != null) { // To jump the first case parent.dependsOn(stage); } stage = createStageFromLogicalOperator(stage, opsAlreadyInSchedule, slo); stages.add(stage); StageType type = stage.getStageType(); // If we hit a source or unique stage, then configure Input and finish if(type.equals(StageType.SOURCE_STAGE) || type.equals(StageType.UNIQUE_STAGE)) { return; } // Update slo after stage creation slo = (SeepLogicalOperator) slq.getOperatorWithId(stage.getIdOfOperatorBoundingStage()); // If multiple input explore for each if(stage.hasMultipleInput()) { for(UpstreamConnection uc : slo.upstreamConnections()) { SeepLogicalOperator upstreamOp = (SeepLogicalOperator) uc.getUpstreamOperator(); stageId = stages.size(); buildScheduleFromStage(stage, upstreamOp, opsAlreadyInSchedule, slq, stages, stageId); } // If not explore the previous op } else { SeepLogicalOperator upstreamOp = (SeepLogicalOperator)slo.upstreamConnections().get(0).getUpstreamOperator(); stageId++; buildScheduleFromStage(stage, upstreamOp, opsAlreadyInSchedule, slq, stages, stageId); } } private Stage stageResponsibleFor(int opId, Set<Stage> currentStages) { for(Stage s : currentStages) { if(s.responsibleFor(opId)) { return s; } } return null; } private Stage createStageFromLogicalOperator(Stage stage, Set<Integer> opsAlreadyInSchedule, SeepLogicalOperator slo) { StageType type = null; boolean containsSinkOperator = false; boolean containsSourceOperator = false; boolean isChooseOperator = false; boolean finishesStage = false; do { // get opId of current op int opId = slo.getOperatorId(); // Add opId to stage stage.add(opId); opsAlreadyInSchedule.add(opId); if (isSink(slo)) containsSinkOperator = true; if (isSource(slo)) containsSourceOperator = true; // FIXME: no need to reason about stateful ops here. only whether they need or not shuffle // Check if it terminates stage // has partitioned state? if(slo.isStateful()) { if(slo.getState().getDMS().equals(DistributedMutableState.PARTITIONED)) { stage.setHasPartitionedState(); finishesStage = true; } } // has multiple inputs? if(slo.upstreamConnections().size() > 1) { stage.setRequiresMultipleInput(); finishesStage = true; } // is a choose operator? if(slo.getSeepTask() instanceof SeepChooseTask) { isChooseOperator = true; finishesStage = true; } // is any of my upstream a choose operator? for(UpstreamConnection uc : slo.upstreamConnections()) { if(uc.getUpstreamOperator() != null) { if(uc.getUpstreamOperator().getSeepTask() instanceof SeepChooseTask) { finishesStage = true; } } } // is source operator? if(containsSourceOperator) { finishesStage = true; } // if not source op, then... else { // has upstream downstreams other than me? Operator op = slo.upstreamConnections().get(0).getUpstreamOperator(); if(op == null || op.downstreamConnections().size() > 1) { finishesStage = true; } } // Get next operator if(!finishesStage){ slo = (SeepLogicalOperator)slo.upstreamConnections().get(0).getUpstreamOperator(); } } while(!finishesStage); // Set stage type if(containsSourceOperator && containsSinkOperator) { type = StageType.UNIQUE_STAGE; } else if(containsSourceOperator) { type = StageType.SOURCE_STAGE; } else if(containsSinkOperator) { type = StageType.SINK_STAGE; } else if(isChooseOperator) { type = StageType.CHOOSE_STAGE; } else { type = StageType.INTERMEDIATE_STAGE; } stage.setStageType(type); return stage; } private boolean isSink(SeepLogicalOperator slo) { for(DownstreamConnection dc : slo.downstreamConnections()) { if (dc.getDownstreamOperator() instanceof MarkerSink) { return true; } } if(slo.getSeepTask() instanceof Sink) { return true; } return false; } private boolean isSource(SeepLogicalOperator slo) { // Source if the op is a Source itself, or if its unique upstream is null. // Null indicates that it's a tagging operator if(slo.getSeepTask() instanceof Source || slo.upstreamConnections().get(0).getUpstreamOperator() == null) { return true; } return false; } /** Implement ScheduleManager interface **/ @Override public void notifyStageStatus(StageStatusCommand ssc) { int stageId = ssc.getStageId(); int euId = ssc.getEuId(); Map<Integer, Set<DataReference>> results = ssc.getResultDataReference(); StageStatusCommand.Status status = ssc.getStatus(); List<RuntimeEvent> runtimeEvents = ssc.getRuntimeEvents(); DatasetMetadataPackage managedDatasets = ssc.getManagedDatasets(); seWorker.newStageStatus(stageId, euId, results, status, runtimeEvents, managedDatasets); } /** Methods to facilitate testing **/ public void __initializeEverything(){ seWorker.prepareForStart(null); } public ScheduleTracker __tracker_for_test(){ return seWorker.__tracker_for_testing(); } public Stage __get_next_stage_to_schedule_fot_test(){ return seWorker.__next_stage_scheduler(); } public void __reset_schedule() { seWorker.__reset_schedule(); } }