// Copyright 2016 Twitter. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.twitter.heron.scheduler.slurm;
import java.io.File;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;
import com.twitter.heron.common.basics.SysUtils;
import com.twitter.heron.proto.scheduler.Scheduler;
import com.twitter.heron.scheduler.utils.Runtime;
import com.twitter.heron.scheduler.utils.SchedulerUtils;
import com.twitter.heron.spi.common.Config;
import com.twitter.heron.spi.common.Context;
import com.twitter.heron.spi.packing.PackingPlan;
import com.twitter.heron.spi.scheduler.IScheduler;
/**
* Schedules a Heron topology in a HPC cluster using the Slurm Scheduler.
* Uses sbatch command to allocate the resources and srun to run the heron processes.
* Then uses scancel to cancel the running job.s
*/
public class SlurmScheduler implements IScheduler {
private static final Logger LOG = Logger.getLogger(SlurmScheduler.class.getName());
private Config config;
private Config runtime;
private SlurmController controller;
private String workingDirectory;
public SlurmScheduler() {
}
public SlurmScheduler(String workingDirectory) {
this.workingDirectory = workingDirectory;
}
@Override
public void initialize(Config mConfig, Config mRuntime) {
this.config = mConfig;
this.runtime = mRuntime;
this.controller = getController();
// get the topology working directory
this.workingDirectory = SlurmContext.workingDirectory(config);
}
/**
* Get a SlurmControl basing on the config and runtime
*
* @return SlurmController
*/
protected SlurmController getController() {
return new SlurmController(Context.verbose(config));
}
@Override
public void close() {
// Nothing to do here
}
@Override
public boolean onSchedule(PackingPlan packing) {
if (packing == null || packing.getContainers().isEmpty()) {
LOG.log(Level.SEVERE, "No container requested. Can't schedule");
return false;
}
LOG.info("Launching topology in Slurm scheduler");
long containers = Runtime.numContainers(runtime);
boolean jobCreated = controller.createJob(getHeronSlurmPath(),
SlurmContext.executorBinary(this.config),
getExecutorCommand(packing),
this.workingDirectory, containers);
if (!jobCreated) {
LOG.log(Level.SEVERE, "Failed to create job");
} else {
LOG.log(Level.FINE, "Job created successfully");
}
return jobCreated;
}
@Override
public List<String> getJobLinks() {
return new ArrayList<>();
}
@Override
public boolean onKill(Scheduler.KillTopologyRequest request) {
// get the slurm id
String file = getJobIdFilePath();
return controller.killJob(file);
}
@Override
public boolean onRestart(Scheduler.RestartTopologyRequest request) {
return true;
}
@Override
public boolean onUpdate(Scheduler.UpdateTopologyRequest request) {
LOG.severe("Topology onUpdate not implemented by this scheduler.");
return false;
}
protected String getJobIdFilePath() {
return new File(workingDirectory, SlurmContext.jobIdFile(config)).getPath();
}
protected String getHeronSlurmPath() {
return new File(Context.heronConf(config), SlurmContext.slurmShellScript(config)).getPath();
}
protected String[] getExecutorCommand(PackingPlan packing) {
List<String> freePorts = new ArrayList<>(SchedulerUtils.PORTS_REQUIRED_FOR_EXECUTOR);
for (int i = 0; i < SchedulerUtils.PORTS_REQUIRED_FOR_EXECUTOR; i++) {
freePorts.add(Integer.toString(SysUtils.getFreePort()));
}
String[] executorCmd = SchedulerUtils.executorCommandArgs(this.config, this.runtime, freePorts);
LOG.log(Level.FINE, "Executor command line: ", Arrays.toString(executorCmd));
return executorCmd;
}
}