package eu.dnetlib.iis.common.report; import java.util.Collections; import java.util.Date; import java.util.List; import java.util.Map; import java.util.stream.Collectors; import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.tuple.Pair; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.oozie.client.OozieClient; import org.apache.oozie.client.OozieClientException; import org.apache.oozie.client.WorkflowAction; import org.apache.oozie.client.WorkflowJob; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.google.common.collect.Lists; import eu.dnetlib.iis.common.java.PortBindings; import eu.dnetlib.iis.common.java.Process; import eu.dnetlib.iis.common.java.io.DataStore; import eu.dnetlib.iis.common.java.io.FileSystemPath; import eu.dnetlib.iis.common.java.porttype.AvroPortType; import eu.dnetlib.iis.common.java.porttype.PortType; import eu.dnetlib.iis.common.oozie.OozieClientFactory; import eu.dnetlib.iis.common.schemas.ReportEntry; /** * Java workflow node process that builds execution time report.<br/> * It can report duration of oozie actions that were executed in * specified workflow job.<br/> * The process needs <code>oozieServiceLoc</code> and <code>jobId</code> * properties to successfully connect to oozie and fetch workflow job actions.<br/> * The process writes the built report into an avro datastore of {@link ReportEntry}s * at the location specified by the output port.<br/> * * @author madryk */ public class OozieTimeReportGenerator implements Process { private static final Logger log = LoggerFactory.getLogger(OozieTimeReportGenerator.class); private static final String REPORT_PORT_OUT_NAME = "report"; private static final String WORKFLOW_JOB_ID_PARAM = "jobId"; private static final String OOZIE_SERVICE_LOC_PARAM = "oozieServiceLoc"; private static final String REPORT_PROPERTY_PREFIX = "report."; private OozieClientFactory oozieClientFactory = new OozieClientFactory(); //------------------------ LOGIC -------------------------- @Override public Map<String, PortType> getInputPorts() { return Collections.emptyMap(); } @Override public Map<String, PortType> getOutputPorts() { return Collections.singletonMap(REPORT_PORT_OUT_NAME, new AvroPortType(ReportEntry.SCHEMA$)); } @Override public void run(PortBindings portBindings, Configuration conf, Map<String, String> parameters) throws Exception { List<WorkflowAction> actions = fetchWorkflowActions(parameters.get(OOZIE_SERVICE_LOC_PARAM), parameters.get(WORKFLOW_JOB_ID_PARAM)); Map<String, List<String>> reportKeysToActionNames = mapReportKeysToActionNames(parameters); List<ReportEntry> reportEntries = Lists.newArrayList(); for (Map.Entry<String, List<String>> reportKeyToActionNamesEntry : reportKeysToActionNames.entrySet()) { long totalDuration = 0L; for (String actionName : reportKeyToActionNamesEntry.getValue()) { totalDuration += fetchActionDuration(actions, actionName); } if (totalDuration > 0) { reportEntries.add(ReportEntryFactory.createDurationReportEntry(reportKeyToActionNamesEntry.getKey(), totalDuration)); } } FileSystem fs = FileSystem.get(conf); Path reportPath = portBindings.getOutput().get(REPORT_PORT_OUT_NAME); DataStore.create(reportEntries, new FileSystemPath(fs, reportPath)); } //------------------------ PRIVATE -------------------------- private List<WorkflowAction> fetchWorkflowActions(String oozieUrl, String workflowId) throws OozieClientException { OozieClient oozieClient = oozieClientFactory.createOozieClient(oozieUrl); WorkflowJob job = oozieClient.getJobInfo(workflowId); return job.getActions(); } private long fetchActionDuration(List<WorkflowAction> actions, String actionName) { for (WorkflowAction action : actions) { if (actionName.equals(action.getName())) { Date startDate = action.getStartTime(); Date endDate = action.getEndTime(); return endDate.getTime() - startDate.getTime(); } } log.warn("no action with the name has been specified or executed: " + actionName); return 0; } private Map<String, List<String>> mapReportKeysToActionNames(Map<String, String> parameters) { return parameters.entrySet().stream() .filter(property -> property.getKey().startsWith(REPORT_PROPERTY_PREFIX)) .map(x -> Pair.of(x.getKey().substring(REPORT_PROPERTY_PREFIX.length()), extractActionNames(x.getValue()))) .collect(Collectors.toMap(e -> e.getLeft(), e -> e.getRight())); } private List<String> extractActionNames(String actionNames) { return Lists.newArrayList(StringUtils.split(actionNames, ',')); } }