/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.addthis.hydra.job;
import java.io.File;
import java.util.List;
import java.util.concurrent.TimeUnit;
import com.addthis.hydra.minion.JobTask;
import com.addthis.hydra.minion.MinionWorkItem;
import com.yammer.metrics.Metrics;
import com.yammer.metrics.core.Meter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class RunTaskWorkItem extends MinionWorkItem {
private static final Logger log = LoggerFactory.getLogger(RunTaskWorkItem.class);
private static final Meter autoRetryMeter = Metrics.newMeter(RunTaskWorkItem.class,
"autoRetries", "autoRetries", TimeUnit.HOURS);
private Integer port = null;
private int maxStops = 2;
private final boolean autoRetry;
public RunTaskWorkItem(File pidFile,
File runFile,
File doneFile,
JobTask task,
boolean execute,
boolean autoRetry) {
super(pidFile, runFile, doneFile, task, execute);
this.autoRetry = autoRetry;
}
@Override
public void updateStats() {
task.updateFileStats();
}
@Override
public long getStartTime() {
return task.getStartTime();
}
@Override
public void setStartTime(long start) {
task.setStartTime(start);
}
@Override
public void sendFinishStatusMessages(int exit) throws Exception {
if (exit == 0) {
synchronized (task) {
task.execReplicate(null, null, false, true, false);
}
} else {
task.sendEndStatus(exit);
}
}
@Override
public void executeWaitingCommands() {
if (port == null) {
port = task.getPort();
if (port != null) {
task.sendPort();
}
}
long runtime = task.getKick().getRunTime();
if ((runtime > 0) && ((System.currentTimeMillis() - getStartTime()) > runtime)) {
log.warn("[exit.wait] time stop {} @ {}", task.getName(), runtime);
if (maxStops > 0) {
task.stopWait(false);
}
maxStops--;
try {
Thread.sleep(200);
} catch (InterruptedException e) {
log.warn("[exit.wait] time stop interrupted", e);
}
}
}
/**
* If a task has autoRetry enabled, sometimes revert + retry to get around transient errors. The conditions under
* which or the number of times that revert + retry may be performed is undefined and may vary over time. AutoRetry
* should only be enabled on jobs where this is acceptable.
*/
@Override
public int waitForProcessExit() throws Exception {
int lastExit = waitAndGetExit();
if (autoRetry
// Only auto retry for exit codes that java returns for JVM errors (128 + 6 (SIGABRT))
&& (lastExit == 134)
// Do not retry if the task was manually killed
&& !task.wasStopped()) {
List<String> backups = task.getBackupsOrdered();
// After failing at least once, put more info into the minion log
log.warn("[exit.wait] attempting retry for {} due to failed exit={}", task.getName(), lastExit);
if (!backups.isEmpty()) {
String backupName = backups.get(0);
log.warn("[exit.wait] restoring {} to {} and retrying, delete={}",
task.getJobDir(), backupName, doneFile.delete());
File backupDir = new File(task.getJobDir().getParentFile(), backupName);
if (task.promoteBackupToLive(backupDir, task.getLiveDir())) {
autoRetryMeter.mark();
startAndWaitForPid();
lastExit = waitAndGetExit();
} else {
log.warn("cancelling retry for {} due to failed revert", task.getName());
}
} else {
log.warn("[exit.wait] exhausted backups for {}; sending error code", task.getName());
}
}
return lastExit;
}
private int waitAndGetExit() {
// Wait for the job.done to exist and attempt to parse the exit code
String exitString = exitWait();
if (exitString != null) {
return getExitStatusFromString(exitString);
} else {
log.warn("{} exited with null", task.getName());
return -1;
}
}
@Override
public void clear() {
log.warn("[task.clear] {}", task.getName());
if (doneFile.exists() && (getStartTime() > 0)) {
task.setRuntime(doneFile.lastModified() - getStartTime());
}
setStartTime(0);
task.setProcess(null);
}
}