package net.sourceforge.seqware.pipeline.workflowV2.engine.oozie; import io.seqware.pipeline.api.WorkflowEngine; import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.text.MessageFormat; import java.util.Properties; import net.sourceforge.seqware.common.module.ReturnValue; import net.sourceforge.seqware.common.util.Log; import static net.sourceforge.seqware.common.util.Rethrow.rethrow; import net.sourceforge.seqware.common.util.filetools.FileTools; import net.sourceforge.seqware.pipeline.workflowV2.AbstractWorkflowDataModel; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.oozie.client.OozieClient; import org.apache.oozie.client.OozieClientException; import org.apache.oozie.client.WorkflowAction; import org.apache.oozie.client.WorkflowJob; import org.apache.oozie.client.WorkflowJob.Status; /** * This is the implementation of the WorkflowEngine with a Oozie back-end. * * @author dyuen */ public class OozieWorkflowEngine implements WorkflowEngine { private String jobId; private AbstractWorkflowDataModel dataModel; private final boolean useSge; private final String threadsSgeParamFormat; private final String maxMemorySgeParamFormat; private final File nfsWorkDir; private final Configuration conf; private final Path hdfsWorkDir; /** * * @param objectModel * @param useSge * @param threadsSgeParamFormat * @param maxMemorySgeParamFormat * @param createDirectories * true when creating the engine to launch a job */ public OozieWorkflowEngine(AbstractWorkflowDataModel objectModel, boolean useSge, String threadsSgeParamFormat, String maxMemorySgeParamFormat, boolean createDirectories) { this.dataModel = objectModel; this.useSge = useSge; this.threadsSgeParamFormat = threadsSgeParamFormat; this.maxMemorySgeParamFormat = maxMemorySgeParamFormat; this.conf = initConf(objectModel); if (createDirectories) { this.nfsWorkDir = initNfsWorkDir(objectModel); this.hdfsWorkDir = initHdfsWorkDir(objectModel, conf, this.nfsWorkDir); } else { this.nfsWorkDir = null; this.hdfsWorkDir = null; } } public static File initNfsWorkDir(AbstractWorkflowDataModel model) { try { File nfsWorkDir = FileTools.createDirectoryWithUniqueName(new File(model.getEnv().getOOZIE_WORK_DIR()), "oozie"); nfsWorkDir.setWritable(true, false); System.out.println("Using working directory: " + nfsWorkDir.getAbsolutePath()); return nfsWorkDir; } catch (IOException e) { throw rethrow(e); } } public static Configuration initConf(AbstractWorkflowDataModel model) { Configuration conf = new Configuration(); conf.set("mapred.job.tracker", model.getEnv().getMapred_job_tracker()); if (model.getEnv().getFs_default_name() != null) conf.set("fs.default.name", model.getEnv().getFs_default_name()); if (model.getEnv().getFs_defaultFS() != null) conf.set("fs.defaultFS", model.getEnv().getFs_defaultFS()); conf.set("fs.hdfs.impl", model.getEnv().getFs_hdfs_impl()); return conf; } public static Path initHdfsWorkDir(AbstractWorkflowDataModel model, Configuration conf, File nfsWorkDir) { FileSystem fileSystem = null; try { fileSystem = FileSystem.get(conf); Path path = new Path(model.getEnv().getOOZIE_APP_ROOT() + "/" + nfsWorkDir.getName()); fileSystem.mkdirs(path); return fileSystem.getFileStatus(path).getPath(); } catch (IOException e) { throw rethrow(e); } finally { if (fileSystem != null) { try { fileSystem.close(); } catch (IOException e) { // gulp } } } } private static String seqwareJarPath(AbstractWorkflowDataModel objectModel) { return objectModel.getWorkflowBaseDir() + "/lib/seqware-distribution-" + objectModel.getTags().get("seqware_version") + "-full.jar"; } @Override public void prepareWorkflow(AbstractWorkflowDataModel objectModel) { // parse objectmodel this.dataModel = objectModel; this.populateNfsWorkDir(); this.parseDataModel(objectModel, useSge, new File(seqwareJarPath(objectModel))); this.populateHdfsWorkDir(objectModel); } @Override public ReturnValue runWorkflow() { ReturnValue ret = new ReturnValue(ReturnValue.SUCCESS); OozieClient wc = this.getOozieClient(); try { Properties propertiesConf = wc.createConfiguration(); propertiesConf.setProperty(OozieClient.APP_PATH, hdfsWorkDir.toString()); propertiesConf.setProperty("jobTracker", this.dataModel.getEnv().getOOZIE_JOBTRACKER()); propertiesConf.setProperty("nameNode", this.dataModel.getEnv().getOOZIE_NAMENODE()); propertiesConf.setProperty("queueName", this.dataModel.getEnv().getOOZIE_QUEUENAME()); jobId = wc.run(propertiesConf); Log.stdout("Submitted Oozie job: " + jobId); } catch (Exception e) { throw rethrow(e); } return ret; } @Override public ReturnValue watchWorkflow(String jobToken) { OozieClient wc = this.getOozieClient(); try { Properties localConf = wc.createConfiguration(); // localConf.setProperty(OozieClient.APP_PATH, hdfsWorkDir.toString()); localConf.setProperty("jobTracker", this.dataModel.getEnv().getOOZIE_JOBTRACKER()); localConf.setProperty("nameNode", this.dataModel.getEnv().getOOZIE_NAMENODE()); localConf.setProperty("queueName", this.dataModel.getEnv().getOOZIE_QUEUENAME()); return watchWorkflowInternal(wc, jobToken, new ReturnValue()); } catch (Exception e) { throw rethrow(e); } } private ReturnValue watchWorkflowInternal(OozieClient wc, String jobId, ReturnValue ret) throws OozieClientException, InterruptedException { Log.stdout(""); Log.stdout("Polling workflow run status every 10 seconds."); Log.stdout("Terminating this program will NOT affect the running workflow."); Thread.sleep(2 * 1000); // Ensure that we can pull the job info from oozie int maxwait = 5; while (maxwait-- > 0) { try { wc.getJobInfo(jobId); // job info available break; } catch (Exception e) { if (maxwait == 0) { Log.stdout("\nTimed out waiting for workflow job to be available."); throw rethrow(e); } else { Log.stdout("\nWorkflow job pending ..."); Thread.sleep(5 * 1000); } } } while (wc.getJobInfo(jobId).getStatus() == WorkflowJob.Status.RUNNING) { Log.stdout("\nWorkflow job running ..."); printWorkflowInfo(wc.getJobInfo(jobId)); Thread.sleep(10 * 1000); } Log.stdout("\nWorkflow job completed ..."); WorkflowJob job = wc.getJobInfo(jobId); printWorkflowInfo(job); if (job.getStatus() != Status.SUCCEEDED) { ret = new ReturnValue(ReturnValue.FAILURE); } return ret; } private void printWorkflowInfo(WorkflowJob wf) { Log.stdout("Application Path : " + wf.getAppPath()); Log.stdout("Application Name : " + wf.getAppName()); Log.stdout("Application Status : " + wf.getStatus()); Log.stdout("Application Actions:"); for (WorkflowAction action : wf.getActions()) { Log.stdout(MessageFormat.format(" Name: {0} Type: {1} Status: {2}", action.getName(), action.getType(), action.getStatus())); } } /** * copy the local dir to HDFS */ private void populateHdfsWorkDir(AbstractWorkflowDataModel objectModel) { FileSystem fileSystem = null; try { fileSystem = FileSystem.get(conf); Path pathlib = new Path(hdfsWorkDir, "lib"); fileSystem.mkdirs(pathlib); copyFromLocal(fileSystem, nfsWorkDir + "/job.properties", hdfsWorkDir); copyFromLocal(fileSystem, nfsWorkDir + "/workflow.xml", hdfsWorkDir); if (!useSge) { // copy lib copyFromLocal(fileSystem, seqwareJarPath(objectModel), pathlib); } System.out.println("Files copied to " + nfsWorkDir); } catch (RuntimeException e) { throw e; } catch (Exception e) { throw new RuntimeException(e); } finally { if (fileSystem != null) { try { fileSystem.close(); } catch (IOException e) { // gulp } } } } /** * @throws IOException * */ private void populateNfsWorkDir() { try { File file = new File(nfsWorkDir, "job.properties"); try (FileWriter fw = new FileWriter(file)) { fw.write("nameNode=" + this.dataModel.getEnv().getOOZIE_NAMENODE() + "\n"); fw.write("jobTracker=" + this.dataModel.getEnv().getOOZIE_JOBTRACKER() + "\n"); fw.write("queueName=" + this.dataModel.getEnv().getOOZIE_QUEUENAME() + "\n"); fw.write("oozie.wf.application.path=" + this.hdfsWorkDir); } File lib = new File(this.nfsWorkDir, "lib"); lib.mkdir(); } catch (IOException e) { throw rethrow(e); } } @Override public String getWorkingDirectory() { return nfsWorkDir == null ? null : nfsWorkDir.getAbsolutePath(); } /** * return a workflow.xml for hadoop * * @param objectModel * @return */ private File parseDataModel(AbstractWorkflowDataModel objectModel, boolean useSge, File seqwareJar) { File file = new File(nfsWorkDir, "workflow.xml"); // generate dax OozieWorkflowXmlGenerator daxv2 = new OozieWorkflowXmlGenerator(); daxv2.generateWorkflowXml(objectModel, file.getAbsolutePath(), this.nfsWorkDir.getAbsolutePath(), hdfsWorkDir, useSge, seqwareJar, this.threadsSgeParamFormat, this.maxMemorySgeParamFormat); return file; } public static void copyFromLocal(FileSystem fileSystem, String source, Path dstPath) { try { Path srcPath = new Path(source); // Check if the file already exists if (!(fileSystem.exists(dstPath))) { System.out.println("No such destination " + dstPath); return; } fileSystem.copyFromLocalFile(srcPath, dstPath); } catch (RuntimeException e) { throw e; } catch (Exception e) { throw new RuntimeException(e); } } @Override public String getLookupToken() { return this.jobId; } private OozieClient getOozieClient() { OozieClient oc = new OozieClient(this.dataModel.getEnv().getOOZIE_URL()); return oc; } // @Override // public String getId() { // return this.jobId; // } // // @Override // public String getStatus(String id) { // OozieClient oc = this.getOozieClient(); // try { // WorkflowJob wfJob = oc.getJobInfo(id); // if (wfJob == null) // return null; // return wfJob.getStatus().toString(); // } catch (OozieClientException e) { // e.printStackTrace(); // return null; // } // } // // @Override // /** // * get the first failed job's error message // */ // public String getStdErr(String id) { // OozieClient oc = this.getOozieClient(); // StringBuilder sb = new StringBuilder(); // try { // WorkflowJob wfJob = oc.getJobInfo(id); // // if (wfJob == null) // return null; // for (WorkflowAction action : wfJob.getActions()) { // if (action.getErrorMessage() != null) { // sb.append(MessageFormat.format(" Name: {0} Type: {1} ErrorMessage: {2}", // action.getName(), // action.getType(), action.getErrorMessage())); // sb.append("\n"); // } // } // return sb.toString(); // } catch (OozieClientException e) { // e.printStackTrace(); // return null; // } // } // // @Override // public String getStdOut(String id) { // OozieClient oc = this.getOozieClient(); // StringBuilder sb = new StringBuilder(); // try { // WorkflowJob wfJob = oc.getJobInfo(id); // // if (wfJob == null) // return null; // for (WorkflowAction action : wfJob.getActions()) { // if (action.getErrorMessage() != null) { // sb.append(MessageFormat.format(" Name: {0} Type: {1} ErrorMessage: {2}", // action.getName(), // action.getType(), action.getStatus())); // sb.append("\n"); // } // } // return sb.toString(); // } catch (OozieClientException e) { // e.printStackTrace(); // return null; // } // } // // @Override // public String getStatus() { // return this.getStatus(this.getId()); // } }