/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.addthis.hydra.minion;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.concurrent.TimeUnit;
import com.addthis.basis.util.LessBytes;
import com.addthis.basis.util.LessFiles;
import com.addthis.basis.util.Parameter;
import com.addthis.hydra.job.mq.CommandTaskKick;
import com.google.common.util.concurrent.Uninterruptibles;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public abstract class MinionWorkItem implements Runnable {
private static final Logger log = LoggerFactory.getLogger(MinionWorkItem.class);
protected File pidFile;
private File runFile;
protected File doneFile;
private boolean execute;
protected final JobTask task;
public MinionWorkItem(File pidFile, File runFile, File doneFile, JobTask task, boolean execute) {
this.pidFile = pidFile;
this.runFile = runFile;
this.doneFile = doneFile;
this.task = task;
this.execute = execute;
}
public abstract long getStartTime();
public abstract void setStartTime(long start);
private final int numPidFileTries = Parameter.intValue("minion.pid.file.retries", 1200);
/**
* Send out messages indicating the workitem has ended, based on the exit code
*
* @param exit The exit code: 0 for success, >0 for (possible) failure
* @throws Exception If messaging fails for whatever reason
*/
public abstract void sendFinishStatusMessages(int exit) throws Exception;
/**
* Commands to run while waiting for the workitem to finish -- for example, sending the port number to spawn
*/
public abstract void executeWaitingCommands();
/**
* Clear this workitem, blanking its start time and saving the Minion.JobTask
*/
public abstract void clear();
/**
* Update any relevant job stats (like total byte size in RunTaskWorkItem)
*/
public void updateStats() {
}
/**
* Run this workitem, including retries if appropriate
*/
@Override public void run() {
int exit = -1;
task.allocate();
boolean interrupted = false;
try {
startAndWaitForPid();
exit = waitForProcessExit();
} catch (InterruptedException ie) {
interrupted = true;
} catch (Exception e) {
log.warn("{} exception during script execution: {}", task.getName(), e, e);
} finally {
task.deallocate();
try {
if (!interrupted) {
sendFinishStatusMessages(exit);
}
} catch (Exception ex) {
log.warn("{} exception when sending exit status: {}", task.getName(), ex, ex);
}
}
log.warn("{} exited with {}", task.getName(), exit);
}
protected void startAndWaitForPid() throws IOException {
long start = System.currentTimeMillis();
if (execute) {
task.setProcess(Runtime.getRuntime().exec("sh " + runFile));
}
for (int j = 0; j < numPidFileTries && !pidFile.exists(); j++) {
// Watch for the pid file. If it still doesn't exist after some time, fail noisily.
Uninterruptibles.sleepUninterruptibly(1000, TimeUnit.MILLISECONDS);
}
if (!pidFile.exists()) {
// We must interrupt the task process so that the replicate won't suddenly kick in at a future time
task.interruptProcess();
String msg = "failed to find pid file for " + task.getName() + " at " + pidFile + " after waiting";
throw new RuntimeException(msg);
}
long waited = System.currentTimeMillis() - start;
String pid = null;
try {
pid = LessBytes.toString(LessFiles.read(pidFile)).trim();
} catch (FileNotFoundException ex) {
log.warn("{} pid file not found", task.getName());
}
if (waited > 500) {
log.warn("{} pid [{}] after {}ms", task.getName(), pid, waited);
}
log.debug("{} waiting for exit pid={}", task.getName(), pid);
}
protected int waitForProcessExit() throws Exception {
int exit = 0;
String exitString = exitWait();
if (exitString != null) {
exit = getExitStatusFromString(exitString);
} else {
log.warn("{} exited with null", task.getName());
}
return exit;
}
/**
* Wait for the workitem to finish, as indicated by the doneFile
*
* @return The exit code, assuming one was received
*/
public String exitWait() {
String name = task.getName();
CommandTaskKick kick = task.getKick();
if (kick != null) {
log.warn("[exit.wait] {} maxTime={} start={}", name, kick.getRunTime(), getStartTime());
}
try {
while (!task.isDeleted() && !doneFile.exists() && doneFile.getParentFile().exists()) {
Thread.sleep(100);
executeWaitingCommands();
task.createDoneFileIfNoProcessRunning(pidFile, doneFile);
}
Thread.sleep(100);
return LessBytes.toString(LessFiles.read(doneFile)).trim();
} catch (Exception ex) {
log.warn("", ex);
} finally {
updateStats();
clear();
removeFromTask();
}
return null;
}
private void removeFromTask() {
task.setWorkItemThread(null);
}
public int getExitStatusFromString(String exitString) {
int exit;
try {
exit = Integer.parseInt(exitString);
} catch (NumberFormatException ne) {
if (exitString == null) {
exit = 1337;
} else {
exit = 0;
}
}
// re-map 143 (exited on kill -1) to success
if (exit == 143) {
exit = 0;
}
return exit;
}
}