/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.addthis.hydra.minion;
import javax.annotation.Nonnull;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.function.Function;
import com.addthis.basis.util.LessBytes;
import com.addthis.basis.util.LessFiles;
import com.addthis.basis.util.LessStrings;
import com.addthis.basis.util.SimpleExec;
import com.addthis.codec.annotations.FieldConfig;
import com.addthis.codec.codables.Codable;
import com.addthis.codec.json.CodecJSON;
import com.addthis.hydra.job.*;
import com.addthis.hydra.job.backup.DailyBackup;
import com.addthis.hydra.job.backup.GoldBackup;
import com.addthis.hydra.job.backup.HourlyBackup;
import com.addthis.hydra.job.backup.MonthlyBackup;
import com.addthis.hydra.job.backup.ScheduledBackupType;
import com.addthis.hydra.job.backup.WeeklyBackup;
import com.addthis.hydra.job.mq.CommandTaskKick;
import com.addthis.hydra.job.mq.CommandTaskNew;
import com.addthis.hydra.job.mq.JobKey;
import com.addthis.hydra.job.mq.ReplicaTarget;
import com.addthis.hydra.job.mq.StatusTaskBackup;
import com.addthis.hydra.job.mq.StatusTaskBegin;
import com.addthis.hydra.job.mq.StatusTaskEnd;
import com.addthis.hydra.job.mq.StatusTaskPort;
import com.addthis.hydra.job.mq.StatusTaskReplica;
import com.addthis.hydra.job.mq.StatusTaskReplicate;
import com.addthis.hydra.job.mq.StatusTaskRevert;
import com.addthis.hydra.task.run.TaskExitState;
import com.fasterxml.jackson.annotation.JsonAutoDetect;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
import com.yammer.metrics.core.TimerContext;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@JsonAutoDetect(getterVisibility = JsonAutoDetect.Visibility.NONE,
isGetterVisibility = JsonAutoDetect.Visibility.NONE,
setterVisibility = JsonAutoDetect.Visibility.NONE)
@JsonIgnoreProperties("retries")
public class JobTask implements Codable {
private static final Logger log = LoggerFactory.getLogger(JobTask.class);
@FieldConfig(required = true) String id;
@FieldConfig(required = true) Integer node;
@FieldConfig(required = true) int runCount;
@FieldConfig(required = true) long runTime;
@FieldConfig Integer nodeCount;
@FieldConfig CommandTaskKick kick;
@FieldConfig long startTime;
@FieldConfig boolean monitored = true;
@FieldConfig long fileCount;
@FieldConfig long fileBytes;
@FieldConfig volatile boolean deleted;
@FieldConfig boolean autoRetry;
@FieldConfig boolean wasQueued;
@FieldConfig long replicateStartTime;
@FieldConfig long backupStartTime;
@FieldConfig String rebalanceSource;
@FieldConfig String rebalanceTarget;
@FieldConfig volatile ReplicaTarget[] failureRecoveryReplicas;
@FieldConfig volatile ReplicaTarget[] replicas;
Minion minion;
Process process;
Thread workItemThread;
File taskRoot;
File jobRun;
File replicateSH;
File replicateRun;
File backupSH;
File backupRun;
File jobDone;
File replicateDone;
File backupDone;
File jobStopped;
File jobDir;
File logDir;
File logOut;
File logErr;
File jobPid;
File replicatePid;
File backupPid;
File jobPort;
Integer port;
public JobTask(Minion minion) {this.minion = minion;}
public void setDeleted(boolean deleted) {
this.deleted = deleted;
}
public JobKey getJobKey() {
return new JobKey(id, node);
}
public CommandTaskKick getKick() {
return kick;
}
public long getStartTime() {
return startTime;
}
public void setStartTime(long startTime) {
this.startTime = startTime;
}
public long getReplicateStartTime() {
return replicateStartTime;
}
public void setReplicateStartTime(long replicateStartTime) {
this.replicateStartTime = replicateStartTime;
}
public long getBackupStartTime() {
return backupStartTime;
}
public void setBackupStartTime(long backupStartTime) {
this.backupStartTime = backupStartTime;
}
public void setProcess(Process p) {
this.process = p;
}
public void interruptProcess() {
if (this.process != null) {
this.process.destroy();
}
}
public boolean getAutoRetry() {
return autoRetry;
}
public void setAutoRetry(boolean autoRetry) {
this.autoRetry = autoRetry;
}
public void clearFailureReplicas() {
// Merge all failureRecoveryReplicas into the master list, then clear failureRecoveryReplicas
List<ReplicaTarget> finalReplicas = replicas != null ? new ArrayList<>(Arrays.asList(replicas)) :
new ArrayList<>();
if (failureRecoveryReplicas != null) {
finalReplicas.addAll(Arrays.asList(failureRecoveryReplicas));
}
replicas = finalReplicas.toArray(new ReplicaTarget[finalReplicas.size()]);
failureRecoveryReplicas = new ReplicaTarget[]{};
}
public boolean isDeleted() {
return deleted;
}
public void setWorkItemThread(MinionWorkItem workItemThread) {
this.workItemThread = workItemThread == null ? null : new Thread(workItemThread);
}
public void save() {
try {
LessFiles.write(new File(getConfigDir(), "job.state"), LessBytes.toBytes(CodecJSON.encodeString(this)), false);
} catch (Exception e) {
log.warn("", e);
}
}
public void updateFileStats() {
final TimerContext updateTimer = minion.fileStatsTimer.time();
FileStats stats = new FileStats();
stats.update(jobDir);
try {
LessFiles.write(new File(getConfigDir(), "job.stats"), LessBytes.toBytes(CodecJSON.encodeString(stats)), false);
} catch (Exception e) {
log.warn("", e);
}
fileCount = stats.count;
fileBytes = stats.bytes;
updateTimer.stop();
}
public void allocate() {
Minion.capacityLock.lock();
try {
minion.activeTaskKeys.add(this.getName());
} finally {
Minion.capacityLock.unlock();
}
}
public void deallocate() {
Minion.capacityLock.lock();
try {
minion.activeTaskKeys.remove(this.getName());
} finally {
Minion.capacityLock.unlock();
}
}
public void sendNewStatusToReplicaHost(String hostUUID) {
minion.sendControlMessage(new CommandTaskNew(hostUUID, getJobKey().getJobUuid(), getJobKey().getNodeNumber()));
}
public void sendEndStatus(int exit) {
sendEndStatus(exit, null, null);
}
public void sendEndStatus(int exit, String rebalanceSource, String rebalanceTarget) {
TaskExitState exitState = new TaskExitState();
File jobExit = new File(jobDir, "job.exit");
if (jobExit.exists() && jobExit.canRead()) {
try {
CodecJSON.INSTANCE.decode(exitState, LessFiles.read(jobExit));
} catch (Exception ex) {
ex.printStackTrace();
}
}
exitState.setWasStopped(wasStopped());
StatusTaskEnd end = new StatusTaskEnd(minion.uuid, id, node, exit, fileCount, fileBytes);
end.setRebalanceSource(rebalanceSource);
end.setRebalanceTarget(rebalanceTarget);
end.setWasQueued(wasQueued);
end.setExitState(exitState);
setRebalanceSource(null);
setRebalanceTarget(null);
Minion.tasksCompletedPerHour.mark();
minion.sendStatusMessage(end);
try {
minion.kickNextJob();
} catch (Exception e) {
log.warn("[task.kick] exception while trying to kick next job", e);
}
}
public void sendPort() {
minion.sendStatusMessage(new StatusTaskPort(minion.uuid, kick.getJobUuid(), kick.getNodeID(), port));
}
/* restore a job state from a job/task-id root directory */
boolean restoreTaskState(File taskDir) throws IOException {
taskRoot = taskDir;
File liveDir = new File(taskDir, "live");
File replicaDir = new File(taskDir, "replica");
File configDir = new File(taskDir, "config");
LessFiles.initDirectory(configDir);
String jobID = taskDir.getParentFile().getName();
String nodeID = taskDir.getName();
String taskPath = jobID + "/" + nodeID;
if (replicaDir.isDirectory()) {
// Cut over replica to live
replicaDir.renameTo(liveDir);
} else if (liveDir.exists() && !minion.liveEverywhereMarkerFile.exists()) {
// On first startup, mark any existing "live" directory as complete.
new File(liveDir, "replicate.complete").createNewFile();
}
if (!liveDir.isDirectory()) {
log.warn("[restore] {} has no live or replica directories", taskPath);
return false;
}
id = jobID;
node = Integer.parseInt(nodeID);
initializeFileVariables();
if (!minion.liveEverywhereMarkerFile.exists()) {
// On first startup, make sure to get to known idle state
jobDone.createNewFile();
backupDone.createNewFile();
replicateDone.createNewFile();
}
File jobState = new File(configDir, "job.state");
if (jobState.exists()) {
try {
CodecJSON.decodeString(this, LessBytes.toString(LessFiles.read(jobState)));
} catch (Exception e) {
log.warn("", e);
return false;
}
}
if (Integer.parseInt(nodeID) != node) {
log.warn("[restore] {} mismatch with node # {}", taskPath, node);
return false;
}
if (!jobID.equals(id)) {
log.warn("[restore] {} mismatch with node id {}", taskPath, id);
return false;
}
monitored = true;
recoverWorkItem();
return true;
}
/* If minion detects that a task was running when the minion was shut down, attempt to recover by looking for the pid */
private void recoverWorkItem() {
try {
if ((startTime > 0) && (replicateStartTime == 0) && (backupStartTime == 0)) {
log.warn("[restore] {} as running", getName());
exec(this.kick, false);
} else if ((replicateStartTime > 0) && (backupStartTime == 0)) {
log.warn("[restore] {} as replicating", getName());
execReplicate(null, null, false, false, false);
} else if (isBackingUp()) {
log.warn("[restore] {} as backing up", getName());
execBackup(null, null, false);
} else if ((startTime > 0) || (replicateStartTime > 0) || (backupStartTime > 0)) {
// Minion had a process running that finished during the downtime; notify Spawn
log.warn("[restore] {} as previously active; now finished", getName());
startTime = 0;
replicateStartTime = 0;
backupStartTime = 0;
sendEndStatus(0);
}
} catch (Exception ex) {
log.warn("WARNING: failed to restore state for {}", getName(), ex);
}
}
void initializeFileVariables() {
jobDir = getLiveDir();
File configDir = getConfigDir();
logDir = new File(jobDir, "log");
logOut = new File(logDir, "log.out");
logErr = new File(logDir, "log.err");
jobPid = new File(configDir, "job.pid");
replicatePid = new File(configDir, "replicate.pid");
backupPid = new File(configDir, "backup.pid");
jobPort = new File(jobDir, "job.port");
jobDone = new File(configDir, "job.done");
replicateDone = new File(configDir, "replicate.done");
backupDone = new File(configDir, "backup.done");
}
boolean isComplete() {
File replicaComplete = new File(getLiveDir(), "replicate.complete");
return replicaComplete.exists();
}
private boolean shouldExecuteReplica(ReplicaTarget replica) {
if (replica.getHostUuid().equals(minion.uuid)) {
log.warn("Host: {} received a replication target of itself, this is NOT allowed for {}", minion.uuid,
getName());
return false;
}
return true;
}
private List<String> assembleReplicateCommandAndInformSpawn(ReplicaTarget replica, boolean replicateAllBackups) throws IOException {
List<String> rv = new ArrayList<>();
if (replica == null || !shouldExecuteReplica(replica)) {
return null;
}
try {
String target = ProcessUtils.getTaskBaseDir(replica.getBaseDir(), id, node);
if (!replicateAllBackups) {
target += "/live";
}
String userAT = replica.getUserAT();
String mkTarget = Minion.remoteConnectMethod + " " + userAT + " mkdir -p " + target + "/";
log.warn("[replicate] {} to {}:{}", getJobKey(), userAT, target);
if (log.isDebugEnabled()) {
log.debug(" --> {}", mkTarget);
}
int runCount = kick != null ? kick.getRunCount() : 0;
minion.sendStatusMessage(
new StatusTaskReplica(replica.getHostUuid(), id, node, runCount, System.currentTimeMillis()));
rv.add(mkTarget);
if (replicateAllBackups) {
StringBuilder sb = new StringBuilder();
sb.append(createRsyncCommand(userAT, jobDir.getParentFile().getAbsolutePath() + "/", target));
for (String backup : findLocalBackups(true)) {
if (backup.startsWith(ScheduledBackupType.getBackupPrefix())) {
// only include "b-" dirs/exclude gold - it won't exist on the remote host after the rsync.
// On some occasions, this logic can attempt to touch a backup that is about to be deleted -- if so, log a message but don't fail the command
sb.append("\n" + createTouchCommand(false, userAT, target + "/" + backup + "/backup.complete", true));
}
}
sb.append("\n" + createTouchCommand(false, userAT, target + "/live/replicate.complete", false));
rv.add(sb.toString());
} else {
rv.add(createDeleteCommand(false, userAT, target + "/replicate.complete") +
"\n" + createRsyncCommand(userAT, jobDir.getAbsolutePath() + "/", target) +
"\n" + createTouchCommand(false, userAT, target + "/replicate.complete", false)
);
}
} catch (Exception ex) {
log.warn("failed to replicate {} to {}", this.getJobKey(), replica.getHost(), ex);
}
return rv;
}
private List<String> assembleBackupCommandsForHost(boolean local, ReplicaTarget replica, List<String> symlinkCommands, List<String> deleteCommands, long time) {
List<String> copyCommands = new ArrayList<>();
for (ScheduledBackupType type : ScheduledBackupType.getBackupTypes().values()) {
String[] allBackups = local ? findLocalBackups(false) : findRemoteBackups(false, replica);
String backupName = type.generateNameForTime(time, true);
String symlinkName = type.getSymlinkName();
String userAT = local ? null : replica.getUserAT();
String source = "live";
String path = local ? jobDir.getParentFile().getAbsolutePath() : ProcessUtils.getTaskBaseDir(
replica.getBaseDir(),
id, node);
int maxNumBackups = getMaxNumBackupsForType(type);
if (maxNumBackups > 0 && type.shouldMakeNewBackup(allBackups)) {
String backupCMD = createBackupCommand(local, userAT, path, source, backupName);
copyCommands.add(backupCMD);
if (symlinkName != null) {
symlinkCommands.add(createSymlinkCommand(local, userAT, path, backupName, symlinkName));
}
maxNumBackups -= 1; // Diminish the max number by one, because we're about to add a new one
}
List<String> backupsToDelete = type.oldBackupsToDelete(allBackups, allBackups, maxNumBackups);
for (String oldBackup : backupsToDelete) {
if (MinionTaskDeleter.shouldDeleteBackup(oldBackup, type)) {
deleteCommands.add(createDeleteCommand(local, userAT, path + "/" + oldBackup));
}
}
}
minion.writeState();
return copyCommands;
}
private String createRsyncCommand(String userAT, String source, String target) throws Exception {
return "retry " + Minion.rsyncCommand + (Minion.copyBandwidthLimit > 0 ? " --bwlimit " + Minion.copyBandwidthLimit : "") + " -Hqa --exclude config --exclude gold --exclude replicate.complete --exclude backup.complete --delete-after -e \\'" + Minion.remoteConnectMethod + "\\' " + source + " " + userAT + ":" + target;
}
private String createBackupCommand(boolean local, String userAT, String baseDir, String source, String name) {
String sourceDir = baseDir + "/" + source;
String targetDir = baseDir + "/" + name;
log.warn("[backup] executing backup from {} to {}", sourceDir, targetDir);
return createDeleteCommand(local, userAT, targetDir) + " && " +
createCopyCommand(local, userAT, sourceDir, targetDir) + " && " +
createTouchCommand(local, userAT, targetDir + "/backup.complete", false);
}
private String createSymlinkCommand(boolean local, String userAt, String baseDir, String source, String name) {
String linkDir = baseDir + "/" + name;
String tmpName = linkDir + "_tmp";
return wrapCommandWithRetries(local, userAt, "if [ ! -L " + linkDir + " ]; then rm -rf " + linkDir + " ; fi && " + MacUtils.lncmd + " -nsf " + source + " " + tmpName + " && " + MacUtils.mvcmd + " -Tf " + tmpName + " " + linkDir);
}
private String createCopyCommand(boolean local, String userAt, String sourceDir, String targetDir) {
String cpParams = MacUtils.linkBackup ? " -lr " : " -r ";
return wrapCommandWithRetries(local, userAt, MacUtils.cpcmd + cpParams + sourceDir + " " + targetDir);
}
private String createTouchCommand(boolean local, String userAT, String path, boolean failSafe) {
return wrapCommandWithRetries(local, userAT, "touch " + path + (failSafe ? " 2>/dev/null || echo 'Skipped deleted backup'" : ""));
}
private String createDeleteCommand(boolean local, String userAT, String dirPath) {
return wrapCommandWithRetries(local, userAT, MacUtils.rmcmd + " -rf " + dirPath);
}
private String wrapCommandWithRetries(boolean local, String userAt, String command) {
return "retry \"" + (local ? "" : Minion.remoteConnectMethod + " " + userAt + " '") + command + (local ? "" : "'") + "\"";
}
/**
* Find local backups for a task.
*
* @param completeOnly Whether to restrict to backups that contain the backup.complete file
* @return A list of directory names
*/
private String[] findLocalBackups(boolean completeOnly) {
File[] dirs = jobDir.getParentFile().listFiles();
if (dirs == null) {
return new String[]{};
}
List<String> rvList = new ArrayList<>();
for (File dir : dirs) {
if (dir.isDirectory()) {
if (!completeOnly || LessStrings.contains(dir.list(), "backup.complete")) {
rvList.add(dir.getName());
}
}
}
Collections.sort(rvList);
return rvList.toArray(new String[]{});
}
/**
* Find backups for a task on a replica host
*
* @param completeOnly Whether to restrict to backups that contain the backup.complete file
* @param replica The ReplicaTarget object describing the destination for this replica
* @return A list of directory names
*/
private String[] findRemoteBackups(boolean completeOnly, ReplicaTarget replica) {
try {
String userAT = replica.getUser() + "@" + replica.getHost();
String baseDir = ProcessUtils.getTaskBaseDir(replica.getBaseDir(), id, node);
if (completeOnly) {
baseDir += "/*/backup.complete";
}
String lsResult = execCommandReturnStdOut(
Minion.remoteConnectMethod + " " + userAT + " " + MacUtils.lscmd + " " + baseDir);
String[] lines = lsResult.split("\n");
if (completeOnly) {
List<String> rv = new ArrayList<>(lines.length);
for (String line : lines) {
String[] splitLine = line.split("/");
if (splitLine.length > 2) {
rv.add(splitLine[splitLine.length - 2]);
}
}
return rv.toArray(new String[]{});
} else {
return lines;
}
} catch (Exception ex) {
return new String[]{};
}
}
private String execCommandReturnStdOut(String sshCMD) throws InterruptedException, IOException {
String[] wrappedCMD = new String[]{"/bin/sh", "-c", sshCMD};
SimpleExec command = runCommand(wrappedCMD, null);
if (command.exitCode() == 0) {
return command.stdoutString();
} else {
return "";
}
}
private SimpleExec runCommand(String[] sshCMDArray, String sshCMD) throws InterruptedException, IOException {
SimpleExec command;
if (sshCMD != null) {
command = new SimpleExec(sshCMD).join();
} else {
command = new SimpleExec(sshCMDArray).join();
}
return command;
}
/* Read the proper number of backups for each type from the kick parameters */
private int getMaxNumBackupsForType(ScheduledBackupType type) {
if (type instanceof GoldBackup) {
return 3; // Keep 3 gold backups around so that these directories will linger for query/streaming stability
}
if (kick == null) {
return -1; // If we're not sure how many backups to create, hold off until we receive a task kick
}
if (type instanceof HourlyBackup) {
return kick.getHourlyBackups();
} else if (type instanceof DailyBackup) {
return kick.getDailyBackups();
} else if (type instanceof WeeklyBackup) {
return kick.getWeeklyBackups();
} else if (type instanceof MonthlyBackup) {
return kick.getMonthlyBackups();
} else {
return 0; // Unknown backup type
}
}
/**
* Move the specified backup dir onto the live dir
*
* @param backupDir The "good" version of a task
* @param targetDir The target directory, generally "live", which may have bad/incomplete data
* @return True if the operation succeeds
*/
public boolean promoteBackupToLive(File backupDir, File targetDir) {
if (targetDir != null && backupDir != null && backupDir.exists() && backupDir.isDirectory()) {
moveAndDeleteAsync(targetDir);
// Copy the backup directory onto the target directory
String cpCMD = MacUtils.cpcmd + (MacUtils.linkBackup ? " -lrfT " : " -rfT ");
return ProcessUtils.shell(cpCMD + backupDir + " " + targetDir + " >> /dev/null 2>&1", minion.rootDir) == 0;
} else {
log.warn("[restore] invalid backup dir {}", backupDir);
}
return false;
}
/**
* Move a file to a temporary location, then delete it asynchronously via a request to MinionTaskDeleter
*
* @param file The file to be deleted.
*/
private void moveAndDeleteAsync(File file) {
if (file != null && file.exists()) {
File tmpLocation = new File(file.getParent(), "BAD-" + System.currentTimeMillis());
if (file.renameTo(tmpLocation)) {
copyLogBackAsArchive(file.toPath(), tmpLocation.toPath(), Paths.get("log/log.out"));
copyLogBackAsArchive(file.toPath(), tmpLocation.toPath(), Paths.get("log/log.err"));
submitPathToDelete(tmpLocation.getPath());
} else {
throw new RuntimeException("Could not rename file for asynchronous deletion: " + file);
}
}
}
private void copyLogBackAsArchive(Path oldName, Path newName, Path logSubpath) {
try {
Path newLogPath = newName.resolve(logSubpath);
if (java.nio.file.Files.isSymbolicLink(newLogPath)) {
Path resolvedPath = newLogPath.toRealPath();
Path oldLogPath = oldName.resolve(logSubpath).getParent()
.resolve(resolvedPath.getFileName().toString() + ".bad");
java.nio.file.Files.createDirectories(oldLogPath.getParent());
java.nio.file.Files.move(resolvedPath, oldLogPath);
}
} catch (Exception ex) {
log.warn("exception while trying to preserve reverted task's old logs -- ignoring", ex);
}
}
private void submitPathToDelete(String path) {
minion.minionStateLock.lock();
try {
minion.minionTaskDeleter.submitPathToDelete(path);
} finally {
minion.minionStateLock.unlock();
}
}
public boolean revertToBackup(int revision, long time, String type) {
Minion.revertLock.lock();
try {
if (isRunning() || isReplicating() || isBackingUp()) {
log.warn("[revert] cannot promote backup for active task {}", getName());
return false;
}
ScheduledBackupType typeToUse = ScheduledBackupType.getBackupTypes().get(type);
if (typeToUse == null) {
log.warn("[revert] unrecognized backup type {}", type);
return false;
}
String backupName;
if (revision < 0) {
backupName = getBackupByTime(time, type);
} else {
backupName = getBackupByRevision(revision, type);
}
if (backupName == null) {
log.warn("[revert] found no backups of type {} and time {} to revert to for {}; failing",
type, time, getName());
return false;
}
File oldBackup = new File(jobDir.getParentFile(), backupName);
log.warn("[revert] {} from {}", getName(), oldBackup);
minion.sendStatusMessage(new StatusTaskRevert(minion.getUUID(), id, node));
boolean promoteSuccess = promoteBackupToLive(oldBackup, jobDir);
if (promoteSuccess) {
try {
execReplicate(null, null, false, true, false);
return true;
} catch (Exception ex) {
log.warn("[revert] post-revert replicate of {} failed", getName(), ex);
return false;
}
} else {
log.warn("[revert] {} from {} failed", getName(), oldBackup);
sendEndStatus(JobTaskErrorCode.EXIT_REVERT_FAILURE);
return false;
}
} finally {
Minion.revertLock.unlock();
}
}
private String getBackupByTime(long time, String type) {
ScheduledBackupType backupType = ScheduledBackupType.getBackupTypes().get(type);
String[] backups = findLocalBackups(true);
if (backups == null || backups.length == 0) {
log.warn("[revert] fail, there are no local backups of type {} for {}", type, getName());
return null;
}
String timeName = backupType.stripSuffixAndPrefix(backupType.generateNameForTime(time, true));
for (String backupName : backups) {
if (backupType.isValidName(backupName) && (backupType.stripSuffixAndPrefix(backupName).equals(timeName))) {
return backupName;
}
}
log.warn("[revert] fail, invalid backup time for {}: {}", getName(), time);
return null;
}
/**
* Get all complete backups, ordered from most recent to earliest.
*
* @return A list of backup names
*/
public List<String> getBackupsOrdered() {
List<String> backups = new ArrayList<>(Arrays.asList(findLocalBackups(true)));
ScheduledBackupType.sortBackupsByTime(backups);
return backups;
}
/**
* Fetch the name of the backup directory for this task, n revisions back
*
* @param revision How far to go back -- 0 for latest stable version, 1 for the next oldest, etc.
* @param type Which backup type to use.
* @return The name of the appropriate complete backup, if found, and null if no such backup was found
*/
String getBackupByRevision(int revision, String type) {
String[] backupsRaw = findLocalBackups(true);
List<String> backups = new ArrayList<>();
if (backupsRaw == null) {
return null;
}
if ("all".equals(type)) {
backups.addAll(Arrays.asList(backupsRaw));
ScheduledBackupType.sortBackupsByTime(backups);
} else {
ScheduledBackupType backupType = ScheduledBackupType.getBackupTypes().get(type);
for (String backup : backupsRaw) {
if (backupType.isValidName(backup)) {
backups.add(backup);
}
}
}
int offset = (backups.size() - 1 - revision);
if (revision < 0 || offset < 0 || offset >= backups.size()) {
log.warn("[revert] fail: can't find revision={} with only {} complete backups", revision, backups.size());
return null;
}
return backups.get(offset);
}
private void require(boolean test, String msg) throws Exception {
require(test, msg, ExecException::new);
}
private void require(boolean test, String msg, Function<String,Exception> exceptionType) throws Exception {
if (!test) {
throw exceptionType.apply(msg);
}
}
private void requireNewOrEqual(Object currentValue, Object newValue, String valueName) throws IllegalArgumentException {
if (currentValue != null && !currentValue.equals(newValue)) {
throw new IllegalArgumentException("value mismatch for '" + valueName + "' " + newValue + " != " + currentValue);
}
}
public void exec(@Nonnull CommandTaskKick kickMessage, boolean execute) throws Exception {
// setup data directory
jobDir = LessFiles.initDirectory(new File(minion.rootDir, id + File.separator + node + File.separator + "live"));
File configDir = getConfigDir();
if (!configDir.exists()) {
LessFiles.initDirectory(configDir);
}
logDir = new File(jobDir, "log");
LessFiles.initDirectory(logDir);
replicateDone = new File(configDir, "replicate.done");
jobRun = new File(configDir, "job.run");
jobDone = new File(configDir, "job.done");
logOut = new File(logDir, "log.out");
logErr = new File(logDir, "log.err");
jobPid = new File(configDir, "job.pid");
jobPort = new File(jobDir, "job.port");
jobStopped = new File(jobDir, "job.stopped");
wasQueued = false;
if (execute) {
File replicateComplete = new File(getLiveDir(), "replicate.complete");
replicateComplete.createNewFile();
replicas = kickMessage.getReplicas();
String jobId = kickMessage.getJobUuid();
int jobNode = kickMessage.getJobKey().getNodeNumber();
log.debug("[task.exec] {}", kickMessage.getJobKey());
require(testTaskIdle(), "task is not idle, current state: " + this.getTaskState(), ExecStateException::new);
String jobCommand = kickMessage.getCommand();
require(!LessStrings.isEmpty(jobCommand), "task command is missing or empty");
// ensure we're not changing something critical on a re-spawn
int jobNodes = kickMessage.getJobNodes();
requireNewOrEqual(id, jobId, "Job ID");
requireNewOrEqual(node, jobNode, "Job Node");
requireNewOrEqual(nodeCount, jobNodes, "Job Node Count");
// store the new values
id = jobId;
node = jobNode;
nodeCount = jobNodes;
kick = kickMessage;
autoRetry = kick.getAutoRetry();
// allocate type slot if applicable
minion.sendStatusMessage(new StatusTaskBegin(minion.uuid, id, node));
// store in jobs on first run
if (runCount == 0) {
log.warn("[task.exec] first time running {}", getName());
}
String jobConfig = kickMessage.getConfig();
if (jobConfig != null) {
LessFiles.write(new File(jobDir, "job.conf"), LessBytes.toBytes(jobConfig), false);
}
String portString = String.valueOf(0);
// create exec command
jobCommand = jobCommand.replace("{{jobdir}}", jobDir.getPath())
.replace("{{jobid}}", jobId)
.replace("{{port}}", portString)
.replace("{{node}}", String.valueOf(jobNode))
.replace("{{nodes}}", String.valueOf(jobNodes));
String setEnvironmentPrefix = String.format(
"HYDRA_JOBDIR='%s' HYDRA_JOBID='%s' HYDRA_NODE='%s' HYDRA_NODES='%s' " +
"HYDRA_OWNER='%s' HYDRA_USERGROUP='%s' HYDRA_PORT='%s'",
jobDir.getPath(), jobId, jobNode, jobNodes,
kickMessage.getOwner(), kickMessage.getUserGroup(), portString);
log.warn("[task.exec] starting {} with autoRetry={}", jobDir.getPath(), autoRetry);
// create shell wrapper
require(minion.deleteFiles(jobPid, jobPort, jobDone, jobStopped), "failed to delete files");
port = null;
String stamp = Minion.timeFormat.print(System.currentTimeMillis());
File logOutTmp = new File(logDir, "log-" + stamp + ".out");
File logErrTmp = new File(logDir, "log-" + stamp + ".err");
StringBuilder bash = new StringBuilder("#!/bin/bash\n");
bash.append("find " + logDir + " -type f -mtime +30 -exec rm {} \\;\n");
bash.append("rm -f " + logOut + " " + logErr + "\n");
bash.append("ln -s " + logOutTmp.getName() + " " + logOut + "\n");
bash.append("ln -s " + logErrTmp.getName() + " " + logErr + "\n");
bash.append("(\n");
bash.append("cd " + jobDir + "\n");
bash.append('(' + setEnvironmentPrefix + ' ' + jobCommand + ") &\n");
bash.append("pid=$!\n");
bash.append("echo ${pid} > " + jobPid.getCanonicalPath() + "\n");
bash.append("exit=0\n");
String taskStartHeader = String.format("Starting job/task %s/%s on host/uuid %s/%s",
jobId, jobNode, minion.myHost, minion.getUUID());
String taskStartHeaderExtended = String.format(
" - job kicks %s - task runs (here/total) %s/%s - pid ${pid} - max time %s - cmd %s",
kick.getRunCount(), runCount, kick.getStarts(), kickMessage.getRunTime(), jobCommand);
bash.append(Minion.echoWithDate_cmd + "\"" + taskStartHeader + taskStartHeaderExtended + "\"\n");
bash.append("wait ${pid} || exit=$?\n");
bash.append("echo ${exit} > " + jobDone.getCanonicalPath() + "\n");
bash.append(Minion.echoWithDate_cmd + "Exiting task with return value: ${exit}" + "\n");
bash.append("exit ${exit}\n");
bash.append(") >" + logOutTmp + " 2>" + logErrTmp + " &\n");
LessFiles.write(jobRun, LessBytes.toBytes(bash.toString()), false);
runCount++;
this.startTime = System.currentTimeMillis();
}
save();
minion.sendHostStatus();
// start watcher, which will fire it up
workItemThread = new Thread(new RunTaskWorkItem(jobPid, jobRun, jobDone, this, execute, autoRetry));
workItemThread.setName("RunTask-WorkItem-" + getName());
workItemThread.start();
}
public void execReplicate(String rebalanceSource,
String rebalanceTarget,
boolean replicateAllBackups,
boolean execute,
boolean wasQueued) throws Exception {
setRebalanceSource(rebalanceSource);
setRebalanceTarget(rebalanceTarget);
setWasQueued(wasQueued);
if (log.isDebugEnabled()) {
log.debug("[task.execReplicate] {}", this.getJobKey());
}
require(!execute || testTaskIdle(), "task is not idle");
if (((replicas == null) || (replicas.length == 0))
&& ((failureRecoveryReplicas == null) || (failureRecoveryReplicas.length == 0))) {
execBackup(rebalanceSource, rebalanceTarget, true);
return;
}
if (execute && (ProcessUtils.findActiveRsync(id, node) != null)) {
String msg = "Replicate failed because an existing rsync process was found for " + getName();
log.warn("[task.execReplicate] {}", msg);
sendEndStatus(JobTaskErrorCode.EXIT_REPLICATE_FAILURE);
ProcessUtils.shell(Minion.echoWithDate_cmd + msg + " >> " + logErr.getCanonicalPath(), minion.rootDir);
return;
}
try {
jobDir = LessFiles.initDirectory(
new File(minion.rootDir, id + File.separator + node + File.separator + "live"));
log.info("[task.execReplicate] replicating {}", jobDir.getPath());
File configDir = getConfigDir();
LessFiles.initDirectory(configDir);
// create shell wrapper
replicateSH = new File(configDir, "replicate.sh");
replicateRun = new File(configDir, "replicate.run");
replicateDone = new File(configDir, "replicate.done");
replicatePid = new File(configDir, "replicate.pid");
if (execute) {
require(minion.deleteFiles(replicatePid, replicateDone), "failed to delete replicate config files");
String replicateRunScript = generateRunScript(replicateSH.getCanonicalPath(), replicatePid.getCanonicalPath(), replicateDone.getCanonicalPath());
LessFiles.write(replicateRun, LessBytes.toBytes(replicateRunScript), false);
String replicateSHScript = generateReplicateSHScript(replicateAllBackups);
LessFiles.write(replicateSH, LessBytes.toBytes(replicateSHScript), false);
minion.sendStatusMessage(new StatusTaskReplicate(minion.uuid, id, node, replicateAllBackups));
replicateStartTime = System.currentTimeMillis();
save();
}
// start watcher
Runnable workItem = new ReplicateWorkItem(replicatePid, replicateRun, replicateDone, this,
rebalanceSource, rebalanceTarget, execute);
workItemThread = new Thread(workItem, "Replicate-WorkItem-" + getName());
workItemThread.start();
} catch (Exception ex) {
sendEndStatus(JobTaskErrorCode.EXIT_SCRIPT_EXEC_ERROR);
throw ex;
}
}
public void execBackup(String rebalanceSource, String rebalanceTarget, boolean execute) throws Exception {
if (log.isDebugEnabled()) {
log.debug("[task.execBackup] {}", this.getJobKey());
}
require(!execute || testTaskIdle(), "task is not idle");
try {
log.info("[task.execBackup] backing up {}", jobDir.getPath());
File configDir = getConfigDir();
LessFiles.initDirectory(configDir);
backupSH = new File(configDir, "backup.sh");
backupRun = new File(configDir, "backup.run");
backupDone = new File(configDir, "backup.done");
backupPid = new File(configDir, "backup.pid");
if (execute) {
require(minion.deleteFiles(backupPid, backupDone), "failed to delete backup config files");
String backupSHScript = generateBackupSHScript(replicas);
LessFiles.write(backupSH, LessBytes.toBytes(backupSHScript), false);
String backupRunScript = generateRunScript(backupSH.getCanonicalPath(), backupPid.getCanonicalPath(), backupDone.getCanonicalPath());
LessFiles.write(backupRun, LessBytes.toBytes(backupRunScript), false);
minion.sendStatusMessage(new StatusTaskBackup(minion.uuid, id, node));
backupStartTime = System.currentTimeMillis();
save();
}
workItemThread = new Thread(new BackupWorkItem(backupPid, backupRun, backupDone, this, rebalanceSource, rebalanceTarget, execute));
workItemThread.setName("Backup-WorkItem-" + getName());
workItemThread.start();
} catch (Exception ex) {
sendEndStatus(JobTaskErrorCode.EXIT_SCRIPT_EXEC_ERROR);
throw ex;
}
}
private String makeRetryDefinition() {
StringBuilder sb = new StringBuilder();
sb.append("retries=" + Minion.copyRetryLimit + "\n");
sb.append("retryDelaySeconds=" + Minion.copyRetryDelaySeconds + "\n");
sb.append("function retry {\n" +
"try=0; cmd=\"$@\"\n" +
"until [ $try -ge $retries ]; do\n" +
"\tif [ \"$try\" -ge \"1\" ]; then echo starting retry $try; sleep $retryDelaySeconds; fi\n" +
"\ttry=$((try+1)); eval $cmd; exitCode=$?\n" +
"\tif [ \"$exitCode\" == \"0\" ]; then return $exitCode; fi\n" +
"done\n" +
"echo \"Command failed after $retries retries: $cmd\"; exit $exitCode\n" +
"}\n");
return sb.toString();
}
private String generateReplicateSHScript(boolean replicateAllBackups) throws IOException {
logDir = new File(jobDir, "log");
LessFiles.initDirectory(logDir);
StringBuilder bash = new StringBuilder("#!/bin/bash\n");
bash.append(makeRetryDefinition());
bash.append(Minion.echoWithDate_cmd + "Deleting environment lock files in preparation for replication\n");
bash.append("find " + jobDir.getCanonicalPath() + " -name je.lck -print -exec rm {} \\;\n");
bash.append("find " + jobDir.getCanonicalPath() + " -name je.info.0 -print -exec rm {} \\;\n");
appendReplicas(bash, failureRecoveryReplicas, true); // Add commands for any the failure-recovery replicas that definitely need full rsyncs
appendReplicas(bash, replicas, replicateAllBackups); // Add commands for the existing replicas
bash.append(Minion.echoWithDate_cmd + "Finished replicating successfully\n");
return bash.toString();
}
private void appendReplicas(StringBuilder bash, ReplicaTarget[] replicas, boolean replicateAllBackups) throws IOException {
if (replicas == null) {
return;
}
for (ReplicaTarget replica : replicas) {
if (replica.getHostUuid() == null || replica.getHostUuid().equals(minion.uuid)) {
return;
}
List<String> replicateCommands = assembleReplicateCommandAndInformSpawn(replica, replicateAllBackups);
if (replicateCommands == null || replicateCommands.isEmpty()) {
return;
}
String action = "replicating to " + replica.getHost() + " uuid=" + replica.getHostUuid();
appendCommandsWithStartFinishMessages(bash, action, replicateCommands, minion.replicateCommandDelaySeconds);
}
}
private String generateRunScript(String shName, String pidPath, String donePath) throws IOException {
if (logOut == null || logErr == null) {
File logRoot = new File(jobDir, "log");
logOut = new File(logRoot, "log.out");
logErr = new File(logRoot, "log.err");
}
StringBuilder bash = new StringBuilder("#!/bin/bash\n");
bash.append("(\n");
bash.append("\t cd " + jobDir.getCanonicalPath() + "\n");
bash.append("\t (bash " + shName + ") &\n");
bash.append("\t pid=$!\n");
bash.append("\t echo ${pid} > " + pidPath + "\n");
bash.append("\t exit=0\n");
bash.append("\t wait ${pid} || exit=$?\n");
bash.append("\t echo ${exit} > " + donePath + "\n");
bash.append("\t exit ${exit};\n");
bash.append(") >> " + logOut.getCanonicalPath() + " 2>> " + logErr.getCanonicalPath() + " &");
return bash.toString();
}
private String generateBackupSHScript(ReplicaTarget[] replicas) throws IOException {
logDir = new File(jobDir, "log");
LessFiles.initDirectory(logDir);
StringBuilder bash = new StringBuilder("#!/bin/bash\n");
bash.append("cd " + jobDir.getCanonicalPath() + "\n");
bash.append(makeRetryDefinition());
List<String> symlinkCommands = new ArrayList<>();
List<String> deleteCommands = new ArrayList<>();
long now = System.currentTimeMillis();
List<String> localBackupCommands = assembleBackupCommandsForHost(true, null, symlinkCommands, deleteCommands, now);
appendCommandsWithStartFinishMessages(bash, "updating local backups", localBackupCommands, minion.backupCommandDelaySeconds);
if (replicas != null) {
for (ReplicaTarget replica : replicas) {
if (replica.getHostUuid() == null || replica.getHostUuid().equals(minion.uuid)) {
continue;
}
String action = "updating backups on " + replica.getHost() + " uuid=" + replica.getHostUuid();
List<String> remoteBackupCommands = assembleBackupCommandsForHost(false, replica, symlinkCommands, deleteCommands, now);
appendCommandsWithStartFinishMessages(bash, action, remoteBackupCommands, minion.backupCommandDelaySeconds);
}
}
appendCommandsWithStartFinishMessages(bash, "updating symlinks", symlinkCommands, minion.backupCommandDelaySeconds);
appendCommandsWithStartFinishMessages(bash, "deleting old backups", deleteCommands, minion.backupCommandDelaySeconds);
bash.append(Minion.echoWithDate_cmd + "Finished backing up successfully\n");
return bash.toString();
}
private void appendCommandsWithStartFinishMessages(StringBuilder builder, String action, List<String> commands, int delaySeconds) {
builder.append(Minion.echoWithDate_cmd + " Started " + action + " \n");
for (String cmd : commands) {
if (delaySeconds > 0) {
builder.append("sleep " + delaySeconds + " && \\\n");
}
builder.append(cmd + " && \\\n");
}
builder.append(Minion.echoWithDate_cmd + " Finished " + action + " \n");
}
/**
* Suppose we have received a message to begin running a task / replicating / backing up.
* If we're already doing one of these, reject the received instruction and re-send an event describing what we're doing.
*
* @return true only if the task was really idle.
*/
private boolean testTaskIdle() {
if (isRunning()) {
minion.sendStatusMessage(new StatusTaskBegin(minion.uuid, id, node));
return false;
} else if (isReplicating()) {
minion.sendStatusMessage(new StatusTaskReplicate(minion.uuid, id, node, false));
return false;
} else if (isBackingUp()) {
minion.sendStatusMessage(new StatusTaskBackup(minion.uuid, id, node));
return false;
} else if (workItemThread != null) {
log.warn("clearing workItem for idle task {}", getName());
workItemThread.interrupt();
workItemThread = null;
}
return true;
}
boolean isProcessRunning(File pidFile) {
Integer pid = ProcessUtils.getPID(pidFile);
return pid != null && ProcessUtils.activeProcessExistsWithPid(pid, minion.rootDir);
}
protected void createDoneFileIfNoProcessRunning(File pidFile, File doneFile) {
if (doneFile == null || pidFile == null || doneFile.exists()) {
return;
}
boolean success = false;
try {
Integer pid = ProcessUtils.getPID(pidFile);
if (pid == null || !ProcessUtils.activeProcessExistsWithPid(pid, minion.rootDir)) {
success = doneFile.exists() || doneFile.createNewFile();
} else {
success = true; // Process exists, nothing to do.
}
} catch (IOException io) {
success = false;
log.warn("[task.state.check] exception when creating done file", io);
}
if (!success) {
log.warn("[task.state.check] failed to create done file for task {} path {}", getName(), doneFile);
}
}
public String getName() {
return id + "/" + node;
}
public File getJobDir() {
return jobDir;
}
public Integer getPort() {
try {
if (port == null && jobPort.exists())// && jobPort.lastModified() >= jobRun.lastModified())
{
port = Integer.parseInt(LessBytes.toString(LessFiles.read(jobPort)));
}
} catch (Exception ex) {
log.warn("", ex);
}
return port;
}
// TODO hookup to a job clean cmd at some point (for testing mostly)
public boolean deleteData() {
return false;
}
public boolean isRunning() {
if (jobDone == null) {
return false;
}
// no checking for process here since this doesn't seem to be broken like the others
return this.startTime > 0 && !jobDone.exists();
}
public boolean isReplicating() {
if (replicateDone == null) {
return false;
}
return !isRunning() && replicateStartTime > 0 && !replicateDone.exists() && isProcessRunning(replicatePid);
}
public boolean isBackingUp() {
if (backupDone == null) {
return false;
}
return !isRunning() && !isReplicating() && backupStartTime > 0 && !backupDone.exists() && isProcessRunning(backupPid);
}
public JobTaskState getTaskState() {
if(this.isRunning()) {
return JobTaskState.BUSY;
} else if(this.isReplicating()) {
return JobTaskState.REPLICATE;
} else if(this.isBackingUp()) {
return JobTaskState.BACKUP;
} else {
return JobTaskState.IDLE;
}
}
public File[] getActivePidFiles() {
if (isRunning()) {
return new File[]{jobPid};
} else if (isReplicating()) {
return new File[]{replicatePid};
} else if (isBackingUp()) {
return new File[]{backupPid};
} else {
return null;
}
}
public boolean stopWait(boolean kill) {
File[] activePidFiles = getActivePidFiles();
Integer rsync = null;
if (isReplicating()) {
rsync = ProcessUtils.findActiveRsync(id, node);
}
boolean success = activePidFiles != null && stopWait(activePidFiles, kill);
if (rsync != null) {
// Need to kill the rsync after the replicate script to avoid doing a retry
ProcessUtils.shell("kill -9 " + rsync, minion.rootDir);
}
return success;
}
public boolean stopWait(File[] pidFiles, boolean kill) {
boolean result = true;
boolean isRunning = isRunning();
try {
if (kill) {
resetStartTime();
log.warn("[stopWait] creating done files for {} if they do not exist", getName());
if (!jobDone.getParentFile().exists()) {
log.warn("The directory {} does not exist.", jobDone.getParent());
} else {
createDoneFileIfNoProcessRunning(jobPid, jobDone);
createDoneFileIfNoProcessRunning(replicatePid, replicateDone);
createDoneFileIfNoProcessRunning(backupPid, backupDone);
}
}
for (File pidFile : pidFiles) {
Integer pid = ProcessUtils.getPID(pidFile);
if (pid == null) {
log.warn("{}Wait failed with null pid for {}", kill ? "stop" : "kill", getName());
result = false;
} else {
if (pid.equals(minion.minionPid)) {
log.warn("[minion.kill] tried to kill my own process. pid: {}", pid);
result = false;
}
String cmd = ProcessUtils.getCmdLine(pid);
if (cmd == null) {
log.warn("[minion.kill] unable to read cmdline, so it seems unlikely the process is running, ret false");
result = false;
} else {
log.warn("[minion.kill] about to kill pid {} with cmd line: {}", pid, cmd);
if (cmd.contains(" minion") || cmd.contains(" mss") || cmd.contains(" mqworker")) {
log.warn("It looked like we are trying to kill an Important Process (TM), returning false instead");
result = false;
}
}
if (isRunning) {
jobStopped = new File(jobDir, "job.stopped");
if (!jobStopped.createNewFile()) {
log.warn("Failed to create job.stopped file for stopped job {}", getName());
}
}
if (kill) {
log.warn("[minion.kill] killing pid:{} hard", pid);
ProcessUtils.shell("kill -3 " + pid, minion.rootDir);
result &= ProcessUtils.shell("kill -9 " + pid, minion.rootDir) >= 0;
} else {
log.warn("[minion.kill] killing pid:{} nice", pid);
result &= ProcessUtils.shell("kill " + pid, minion.rootDir) >= 0;
}
}
}
} catch (Exception ex) {
log.warn("", ex);
}
return result;
}
private void resetStartTime() {
if (isRunning()) {
startTime = 0;
} else if (isReplicating()) {
replicateStartTime = 0;
} else if (isBackingUp()) {
backupStartTime = 0;
}
minion.writeState();
}
public File getLiveDir() {
return new File(taskRoot, "live");
}
public File getConfigDir() {
return new File(taskRoot, "config");
}
public String profile() {
File profile = new File(jobDir, "job.profile");
if (profile.exists()) {
try {
return LessBytes.toString(LessFiles.read(profile));
} catch (IOException e) {
log.warn("IO problem while trying to read job.profile", e);
}
}
return "";
}
public void setRuntime(long runTime) {
this.runTime = runTime;
}
public void setReplicas(ReplicaTarget[] replicas) {
this.replicas = replicas;
}
public void setFailureRecoveryReplicas(ReplicaTarget[] replicas) {
this.failureRecoveryReplicas = replicas;
}
public ReplicaTarget[] getFailureRecoveryReplicas() {
return failureRecoveryReplicas;
}
public ReplicaTarget[] getReplicas() {
return replicas;
}
public boolean wasStopped() {
if (jobStopped == null) {
jobStopped = new File(jobDir, "job.stopped");
}
return jobStopped.exists();
}
public String getRebalanceSource() {
return rebalanceSource;
}
public void setRebalanceSource(String rebalanceSource) {
this.rebalanceSource = rebalanceSource;
}
public boolean wasQueued() {
return wasQueued;
}
public void setWasQueued(boolean wasQueued) {
this.wasQueued = wasQueued;
}
public String getRebalanceTarget() {
return rebalanceTarget;
}
public void setRebalanceTarget(String rebalanceTarget) {
this.rebalanceTarget = rebalanceTarget;
}
@Override
public String toString() {
return "JobTask{" +
"id='" + id + '\'' +
", node=" + node +
", jobDir=" + jobDir +
'}';
}
/**
* Attempt to identify the task's last end status from the file system
* @return An integer representing the task's last exit code
*/
public int findLastJobStatus() {
if (jobDone != null && jobDone.exists()) {
try {
String jobDoneString = LessBytes.toString(LessFiles.read(jobDone));
if (jobDoneString == null || jobDoneString.isEmpty()) {
return 0;
}
return Integer.parseInt(jobDoneString.trim());
} catch (IOException e) {
return JobTaskErrorCode.EXIT_SCRIPT_EXEC_ERROR;
}
}
return 0;
}
}