/**
* Copyright 2007-2008 University Of Southern California
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package edu.isi.pegasus.planner.code.generator;
import edu.isi.pegasus.common.logging.LogFormatter;
import edu.isi.pegasus.common.logging.LogFormatterFactory;
import edu.isi.pegasus.common.logging.LogManager;
import edu.isi.pegasus.planner.classes.ADag;
import edu.isi.pegasus.planner.classes.AggregatedJob;
import edu.isi.pegasus.planner.classes.Job;
import edu.isi.pegasus.planner.classes.PegasusBag;
import edu.isi.pegasus.planner.classes.PegasusFile;
import edu.isi.pegasus.planner.classes.PlannerOptions;
import edu.isi.pegasus.planner.code.CodeGenerator;
import edu.isi.pegasus.planner.code.CodeGeneratorException;
import edu.isi.pegasus.planner.common.PegasusProperties;
import edu.isi.pegasus.planner.namespace.Dagman;
import edu.isi.pegasus.planner.namespace.Metadata;
import edu.isi.pegasus.planner.partitioner.graph.GraphNode;
import edu.isi.pegasus.planner.refiner.DeployWorkerPackage;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.Collection;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.Set;
/**
* A Stampede Events Code Generator that generates events in netlogger format
* for the exectuable workflow. This generators generates the events about
*
* <pre>
* the tasks int he abstract workflow
* the edges in the abstract workflow
* jobs in the executable workflow
* the edges in the executable workflow
* relationship about how the tasks in the abstract workflow map to jobs in the
* executable workflow.
* </pre>
*
* @author Karan Vahi
* @version $Revision$
*/
public class Stampede implements CodeGenerator {
/**
* The suffix to use while constructing the name of the metrics file
*/
public static final String NETLOGGER_BP_FILE_SUFFIX = ".static.bp";
public static final String NETLOGGER_LOG_FORMATTER_IMPLEMENTOR = "Netlogger";
/**
* The attribute key for workflow id.
*/
public static final String WORKFLOW_ID_KEY = "xwf.id";
/**
* The event name for task info
*/
public static final String TASK_EVENT_NAME = "task.info";
/**
* The attribute key for task id
*/
public static final String TASK_ID_KEY = "task.id";
/**
* The attribute key for task type
*/
public static final String TYPE_KEY = "type";
/**
* The attribute key for type description
*/
public static final String TYPE_DESCRIPTION_KEY = "type_desc";
/**
* The attribute key for transformation
*/
public static final String TASK_TRANSFORMATION_KEY = "transformation";
/**
* The attribute key for task arguments.
*/
public static final String ARGUMENTS_KEY = "argv";
/**
* The event name for task edge
*/
public static final String TASK_EDGE_EVENT_NAME = "task.edge";
/**
* The atrribute key for parent task id.
*/
public static final String PARENT_TASK_ID_KEY = "parent.task.id";
/**
* The atrribute key for child task id.
*/
public static final String CHILD_TASK_ID_KEY = "child.task.id";
/**
* The event name for a job
*/
public static final String JOB_EVENT_NAME = "job.info";
/**
* The attribute key for job id
*/
public static final String JOB_ID_KEY = "job.id";
/**
* Teh attribute key for the submit file
*/
public static final String JOB_SUBMIT_FILE_KEY = "submit_file";
/**
* The attribute key for whether a job is clustered or not
*/
public static final String JOB_CLUSTERED_KEY = "clustered";
/**
* The attribute key for how many times a job is retried
*/
public static final String JOB_MAX_RETRIES_KEY = "max_retries";
/**
* The attribute key for the number of tasks in the job
*/
public static final String JOB_TASK_COUNT_KEY = "task_count";
/**
* The attribute key for the executable
*/
public static final String JOB_EXECUTABLE_KEY = "executable";
/**
* The event name for job edge
*/
public static final String JOB_EDGE_EVENT_NAME = "job.edge";
/**
* The atrribute key for parent job id.
*/
public static final String PARENT_JOB_ID_KEY = "parent.job.id";
/**
* The atrribute key for child job id.
*/
public static final String CHILD_JOB_ID_KEY = "child.job.id";
/**
* The event name for task map event
*/
public static final String TASK_MAP_EVENT_NAME = "wf.map.task_job";
//metadata related events
/**
* Marker event to indicate the start of metadata events.
*/
public static final String WF_META_START_EVENT_NAME = "static.meta.start";
/**
* The event name for the event that populates to wf_meta tables
*/
public static final String WF_META_EVENT_NAME = "xwf.meta";
/**
* The event name for the event that populates to task_meta tables
*/
public static final String TASK_META_EVENT_NAME = "task.meta";
/**
* The event name for the event that populates to rc_meta tables that store
* file metadata.
*/
public static final String FILE_META_EVENT_NAME = "rc.meta";
/**
* The event name for task map event that associates LFN with the
* wf id and the job id's.
*/
public static final String FILE_MAP_EVENT_NAME = "wf.map.file";
/**
* Marker event to indicate the end of metadata events.
*/
public static final String WF_META_END_EVENT_NAME = "static.meta.end";
/**
* Identifies the metadata key
*/
public static final String METADATA_KEY = "key";
/**
* Identifies the value for the metadata key
*/
public static final String METADATA_VALUE_KEY = "value";
/**
* Identifies the LFN id for the key
*/
public static final String LFN_ID_KEY = "lfn.id";
/**
* The handle to the netlogger log formatter.
*/
private LogFormatter mLogFormatter;
/**
* The bag of initialization objects.
*/
protected PegasusBag mBag;
/**
* The directory where all the submit files are to be generated.
*/
protected String mSubmitFileDir;
/**
* The object holding all the properties pertaining to Pegasus.
*/
protected PegasusProperties mProps;
/**
* The object containing the command line options specified to the planner
* at runtime.
*/
protected PlannerOptions mPOptions;
/**
* The handle to the logging object.
*/
protected LogManager mLogger;
/**
* Initializes the Code Generator implementation.
*
* @param bag the bag of initialization objects.
*
* @throws CodeGeneratorException in case of any error occuring code generation.
*/
public void initialize( PegasusBag bag ) throws CodeGeneratorException{
mBag = bag;
mProps = bag.getPegasusProperties();
mPOptions = bag.getPlannerOptions();
mSubmitFileDir = mPOptions.getSubmitDirectory();
mLogger = bag.getLogger();
mLogFormatter = LogFormatterFactory.loadInstance( NETLOGGER_LOG_FORMATTER_IMPLEMENTOR );
}
/**
* Generates the code for the executable workflow in terms of a braindump
* file that contains workflow metadata useful for monitoring daemons etc.
*
* @param dag the concrete workflow.
*
* @return the Collection of <code>File</code> objects for the files written
* out.
*
* @throws CodeGeneratorException in case of any error occuring code generation.
*/
public Collection<File> generateCode(ADag dag) throws CodeGeneratorException {
PrintWriter writer = null;
File f = this.getStampedeFile(dag);
boolean generateCodeForExecutableWorkflow = dag.hasWorkflowRefinementStarted();
String uuid = dag.getWorkflowUUID();
try {
writer = new PrintWriter(new BufferedWriter(new FileWriter(f, true) ));
} catch ( IOException ioe ) {
throw new CodeGeneratorException( "Unable to intialize writer to stampede file " + f.getAbsolutePath() , ioe );
}
if( generateCodeForExecutableWorkflow ){
//events generation for executable workflow
for( Iterator<GraphNode> it = dag.jobIterator(); it.hasNext(); ){
GraphNode node = it.next();
Job job = (Job)node.getContent();
generateEventsForExecutableJob( writer, dag, job );
}
//monte wants the task map events generated separately
//en mass. Lets iterate again
for( Iterator<GraphNode> it = dag.jobIterator(); it.hasNext(); ){
GraphNode node = it.next();
Job job = (Job)node.getContent();
generateTaskMapEvents( writer, dag, job );
}
//write out the edge informatiom for the workflow
for( Iterator<GraphNode> it = dag.jobIterator(); it.hasNext() ; ){
GraphNode gn = (GraphNode) it.next();
//get a list of parents of the node
for( GraphNode child : gn.getChildren() ){
mLogFormatter.addEvent( Stampede.JOB_EDGE_EVENT_NAME, Stampede.WORKFLOW_ID_KEY, uuid );
mLogFormatter.add( Stampede.PARENT_JOB_ID_KEY, gn.getID() );
mLogFormatter.add( Stampede.CHILD_JOB_ID_KEY, child.getID() );
writer.println( mLogFormatter.createLogMessage() );
mLogFormatter.popEvent();
}
}
}
else{
//events generation for abstract workflow
for( Iterator<GraphNode> it = dag.jobIterator(); it.hasNext(); ){
GraphNode node = it.next();
Job job = (Job)node.getContent();
generateEventsForDAXTask( writer, dag, job );
}
//write out the edge informatiom for the workflow
for( Iterator<GraphNode> it = dag.jobIterator(); it.hasNext() ; ){
GraphNode parent = (GraphNode) it.next();
//get a list of parents of the node
for( GraphNode child : parent.getChildren() ){
mLogFormatter.addEvent( Stampede.TASK_EDGE_EVENT_NAME, Stampede.WORKFLOW_ID_KEY, uuid );
mLogFormatter.add( Stampede.PARENT_TASK_ID_KEY, ((Job)parent.getContent()).getLogicalID() );
mLogFormatter.add( Stampede.CHILD_TASK_ID_KEY, ((Job)child.getContent()).getLogicalID() );
writer.println( mLogFormatter.createLogMessage() );
mLogFormatter.popEvent();
}
}
//PM-882, PM-916 generates static metadata related events.
//for efficiency while loading in monitord we write them
//after all wf and task events.
//metadata events can only be written out after site selection.
//generateMetadataEventsForWF( dag, writer );
}
writer.close();
Collection<File> result = new LinkedList();
result.add(f);
return result;
}
/**
* Generates stampede events corresponding to jobs/tasks in the DAX
*
* @param writer the writer stream to write the events too
* @param workflow the workflow.
* @param job the job for which to generate the events.
*/
protected void generateEventsForDAXTask(PrintWriter writer, ADag workflow, Job job)
throws CodeGeneratorException {
String wfuuid = workflow.getWorkflowUUID();
//sanity check
if ( !( job.getJobType() == Job.COMPUTE_JOB ||
job.getJobType() == Job.DAG_JOB ||
job.getJobType() == Job.DAX_JOB ) ){
//jobs/tasks in the dax can only be of the above types
throw new CodeGeneratorException(
"Invalid Job Type for a DAX Task while generating Stampede Events of type " + job.getJobTypeDescription() +
" for workflow " + workflow.getAbstractWorkflowName() );
}
mLogFormatter.addEvent( Stampede.TASK_EVENT_NAME, Stampede.WORKFLOW_ID_KEY , wfuuid );
mLogFormatter.add( Stampede.TASK_ID_KEY, job.getLogicalID() );
mLogFormatter.add( Stampede.TYPE_KEY, Integer.toString( job.getJobType() ));
mLogFormatter.add( Stampede.TYPE_DESCRIPTION_KEY, job.getJobTypeDescription() );
mLogFormatter.add( Stampede.TASK_TRANSFORMATION_KEY, job.getCompleteTCName() );
//only add arguments attribute if arguments are not
//null and length > 0 . Job constructor initializes arguments to ""
if( job.getArguments() != null && job.getArguments().length() > 0 ){
mLogFormatter.add( Stampede.ARGUMENTS_KEY, job.getArguments() );
}
writer.println( mLogFormatter.createLogMessage() );
mLogFormatter.popEvent();
}
/**
* Generates stampede events corresponding to an executable job
*
* @param writer the writer stream to write the events too
* @param dag the workflow.
* @param job the job for which to generate the events.
*/
protected void generateEventsForExecutableJob(PrintWriter writer, ADag dag, Job job)
throws CodeGeneratorException{
String wfuuid = dag.getWorkflowUUID();
mLogFormatter.addEvent( Stampede.JOB_EVENT_NAME, Stampede.WORKFLOW_ID_KEY , wfuuid );
mLogFormatter.add( Stampede.JOB_ID_KEY, job.getID() );
mLogFormatter.add( Stampede.JOB_SUBMIT_FILE_KEY, job.getID() + ".sub" );
mLogFormatter.add( Stampede.TYPE_KEY, Integer.toString( job.getJobType() ));
mLogFormatter.add( Stampede.TYPE_DESCRIPTION_KEY, job.getJobTypeDescription() );
mLogFormatter.add( Stampede.JOB_CLUSTERED_KEY, booleanToInt( job instanceof AggregatedJob ) );
mLogFormatter.add( Stampede.JOB_MAX_RETRIES_KEY,
job.dagmanVariables.containsKey( Dagman.RETRY_KEY ) ?
(String)job.dagmanVariables.get( Dagman.RETRY_KEY ):
"0" );
mLogFormatter.add( Stampede.JOB_EXECUTABLE_KEY , job.getRemoteExecutable() );
//only add arguments attribute if arguments are not
//null and length > 0 . Job constructor initializes arguments to ""
if( job.getArguments() != null && job.getArguments().length() > 0 ){
mLogFormatter.add( Stampede.ARGUMENTS_KEY , job.getArguments() );
}
//determine count of jobs
int taskCount = getTaskCount( job );
mLogFormatter.add( Stampede.JOB_TASK_COUNT_KEY, Integer.toString( taskCount ) );
writer.println( mLogFormatter.createLogMessage() );
mLogFormatter.popEvent();
}
/**
* Generates the task.map events that link the jobs in the DAX with the
* jobs in the executable workflow
*
*
* @param writer the writer stream to write the events too
* @param dag the workflow.
* @param job the job for which to generate the events.
*/
protected void generateTaskMapEvents(PrintWriter writer, ADag dag, Job job) {
String wfuuid = dag.getWorkflowUUID();
//add task map events
//only compute jobs/ dax and dag jobs have task events associated
if( job.getJobType() == Job.COMPUTE_JOB ||
job.getJobType() == Job.DAG_JOB ||
job.getJobType() == Job.DAX_JOB ){
// untar jobs created as part of worker package staging
//are of type compute but we don't want
if( job.getLogicalID() == null || job.getLogicalID().isEmpty() ){
//dont warn if a job is compute and transformation name is untar
if( job.getJobType() == Job.COMPUTE_JOB &&
job.getCompleteTCName().equals( DeployWorkerPackage.COMPLETE_UNTAR_TRANSFORMATION_NAME ) ){
//dont do anything
return;
}
else{
//warn and return
mLogger.log( "No corresponding DAX task for compute job " + job.getName() ,
LogManager.WARNING_MESSAGE_LEVEL );
return;
}
}
if( job instanceof AggregatedJob ){
generateTaskMapEvents( writer, dag, (AggregatedJob)job, job.getID() );
}
else{
//create a single task.map event that maps compute job
//to the job in the DAX
mLogFormatter.addEvent( Stampede.TASK_MAP_EVENT_NAME, Stampede.WORKFLOW_ID_KEY , wfuuid );
//to be retrieved
mLogFormatter.add( Stampede.JOB_ID_KEY, job.getID() );
mLogFormatter.add( Stampede.TASK_ID_KEY, job.getLogicalID() );
writer.println( mLogFormatter.createLogMessage() );
mLogFormatter.popEvent();
}
}
}
/**
* Generates the task.map events that link the jobs in the DAX with the
* jobs in the executable workflow
*
*
* @param writer the writer stream to write the events too
* @param dag the workflow.
* @param job the clustered job for which to generate the events.
* @param rootJobId the id of the root clustered job to associate the events with.
*/
protected void generateTaskMapEvents(PrintWriter writer, ADag dag, AggregatedJob job, String rootJobId ) {
String wfuuid = dag.getWorkflowUUID();
//go through the job constituents and task.map events
for( Iterator<Job> cit = job.constituentJobsIterator(); cit.hasNext(); ){
Job constituentJob = cit.next();
if( constituentJob instanceof AggregatedJob ){
//PM-817 recurse in the recursive clustering case to get the mappings generated.
this.generateTaskMapEvents(writer, dag, (AggregatedJob)constituentJob, rootJobId);
}
else if( constituentJob.getJobType() == Job.COMPUTE_JOB ){
//create task.map event
//to the job in the DAX
mLogFormatter.addEvent( Stampede.TASK_MAP_EVENT_NAME, Stampede.WORKFLOW_ID_KEY , wfuuid );
//to be retrieved
mLogFormatter.add( Stampede.JOB_ID_KEY, rootJobId );
//mLogFormatter.add( "exec_job.id", job.getID() );
mLogFormatter.add( Stampede.TASK_ID_KEY, constituentJob.getLogicalID() );
writer.println( mLogFormatter.createLogMessage() );
//writer.write( "\n" );
mLogFormatter.popEvent();
}
else{
//for time being lets warn
mLogger.log( "Constituent Job " + constituentJob.getName() + " not of type compute for clustered job " + job.getName(),
LogManager.WARNING_MESSAGE_LEVEL );
}
}
}
/**
* Generates metadata events for the workflow
*
* @param workflow
*/
public Collection<File> generateMetadataEventsForWF( ADag workflow ) throws CodeGeneratorException {
PrintWriter writer = null;
File f = this.getStampedeFile( workflow );
try {
writer = new PrintWriter(new BufferedWriter(new FileWriter(f, true) ));
} catch ( IOException ioe ) {
throw new CodeGeneratorException( "Unable to intialize writer to stampede file " + f.getAbsolutePath() , ioe );
}
this.generateMetadataEventsForWF(workflow, writer);
writer.close();
Collection<File> result = new LinkedList();
result.add(f);
return result;
}
/**
* Generates metadata events for the workflow
*
* @param writer
* @param workflow
*/
protected void generateMetadataEventsForWF(ADag workflow, PrintWriter writer) {
String wfuuid = workflow.getWorkflowUUID();
//static.meta.start event to indicate start of metadata events
mLogFormatter.addEvent( Stampede.WF_META_START_EVENT_NAME, Stampede.WORKFLOW_ID_KEY, wfuuid );
writer.println( mLogFormatter.createLogMessage() );
mLogFormatter.popEvent();
if( !workflow.getAllMetadata().isEmpty() ){
//generate workflow related metadata events.
Metadata m = workflow.getAllMetadata();
for( Iterator it = m.getProfileKeyIterator(); it.hasNext(); ){
String key = (String) it.next();
mLogFormatter.addEvent( Stampede.WF_META_EVENT_NAME, Stampede.WORKFLOW_ID_KEY, wfuuid );
mLogFormatter.add( Stampede.METADATA_KEY, key );
mLogFormatter.add( Stampede.METADATA_VALUE_KEY, (String) m.get(key));
writer.println( mLogFormatter.createLogMessage() );
mLogFormatter.popEvent();
}
}
//iterator through all the nodes and generate
//task and file related metadata
for( Iterator<GraphNode> jobIt = workflow.jobIterator(); jobIt.hasNext(); ){
GraphNode node = jobIt.next();
Job job = (Job)node.getContent();
if( !job.getMetadata().isEmpty() ){
//generate job related metadata events.
Metadata m = (Metadata) job.getMetadata();
for( Iterator it = m.getProfileKeyIterator(); it.hasNext(); ){
String key = (String) it.next();
mLogFormatter.addEvent( Stampede.TASK_META_EVENT_NAME, Stampede.WORKFLOW_ID_KEY, wfuuid );
mLogFormatter.add( Stampede.TASK_ID_KEY, job.getLogicalID() );
mLogFormatter.add( Stampede.METADATA_KEY, key );
mLogFormatter.add( Stampede.METADATA_VALUE_KEY, (String) m.get(key));
writer.println( mLogFormatter.createLogMessage() );
mLogFormatter.popEvent();
}
//generate file metadata events
generateMetadataEventsForFiles( writer, workflow, job, job.getInputFiles() , false);
generateMetadataEventsForFiles( writer, workflow, job, job.getOutputFiles(), true );
}
}
//static.meta.end event to indicate start of metadata events
mLogFormatter.addEvent( Stampede.WF_META_END_EVENT_NAME, Stampede.WORKFLOW_ID_KEY, wfuuid );
writer.println( mLogFormatter.createLogMessage() );
mLogFormatter.popEvent();
}
/**
* Generates the required events for the files
*
* @param writer the writer
* @param workflow the workflow
* @param job the job in the abstract workflow.
* @param files
* @param areOutput if files are output or not
*/
protected void generateMetadataEventsForFiles(PrintWriter writer, ADag workflow, Job job, Collection<PegasusFile> files, boolean areOutput ) {
String wfuuid = workflow.getWorkflowUUID();
for( Iterator<PegasusFile> pit = files.iterator(); pit.hasNext(); ){
PegasusFile file = pit.next();
boolean hasMetadata = false;
if( !file.getAllMetadata().isEmpty()){
Metadata m = file.getAllMetadata();
hasMetadata = true;
for( Iterator it = m.getProfileKeyIterator(); it.hasNext(); ){
String key = (String) it.next();
mLogFormatter.addEvent( Stampede.FILE_META_EVENT_NAME, Stampede.WORKFLOW_ID_KEY, wfuuid );
mLogFormatter.add( Stampede.LFN_ID_KEY, file.getLFN() );
mLogFormatter.add( Stampede.METADATA_KEY, key );
mLogFormatter.add( Stampede.METADATA_VALUE_KEY, (String) m.get(key));
writer.println( mLogFormatter.createLogMessage() );
mLogFormatter.popEvent();
}
}
//generate the file map event if metadata was associated with the job
//or the register flag is set to true
if( hasMetadata || ( areOutput && !file.getTransientRegFlag()) ){
mLogFormatter.addEvent( Stampede.FILE_MAP_EVENT_NAME, Stampede.WORKFLOW_ID_KEY, wfuuid );
mLogFormatter.add( Stampede.TASK_ID_KEY, job.getLogicalID() );
mLogFormatter.add( Stampede.LFN_ID_KEY, file.getLFN() );
writer.println( mLogFormatter.createLogMessage() );
mLogFormatter.popEvent();
}
}
}
/**
* Method not implemented. Throws an exception.
*
* @param dag the workflow
* @param job the job for which the code is to be generated.
*
* @throws edu.isi.pegasus.planner.code.CodeGeneratorException
*/
public void generateCode( ADag dag, Job job ) throws CodeGeneratorException {
throw new CodeGeneratorException( "Stampede generator only generates code for the whole workflow" );
}
/**
* Returns the task count for a job. The task count is the number of tasks/jobs
* in the DAX that map to this job. jobs inserted by Pegasus, which do not
* have a mapped task from the DAX, will have its task_count set to 0.
*
* @param job the executable job.
*
* @return task count
*/
private int getTaskCount( Job job ) {
int count = 0;
int type = job.getJobType();
if ( job instanceof AggregatedJob && type == Job.COMPUTE_JOB ){
//a clustered job the number of constituent is count
count = ((AggregatedJob)job).numberOfConsitutentJobs();
}
else if ( type == Job.COMPUTE_JOB ){
//non clustered job check whether compute or not
//and make sure there is dax job associated with it
if( job.getLogicalID().length() == 0 ){
//takes care of the untar job that is tagged as compute
mLogger.log( "Not creating event pegasus.task.count for job " + job.getID(),
LogManager.DEBUG_MESSAGE_LEVEL );
count = 0;
}
else{
count = 1;
}
}
return count;
}
/**
* Returns boolean as an integer
*
* @param value the boolean value
*
* @return 0 for false and 1 for true
*/
public String booleanToInt( boolean value ){
return value ? "1" : "0";
}
public boolean startMonitoring() {
throw new UnsupportedOperationException("Not supported yet.");
}
public void reset() throws CodeGeneratorException {
throw new UnsupportedOperationException("Not supported yet.");
}
/**
* Returns the file to which the events are to be written out.
*
* @param dag
* @return
*/
private File getStampedeFile( ADag dag ) throws CodeGeneratorException{
return new File( mSubmitFileDir , Abstract.getDAGFilename( this.mPOptions,
dag.getLabel(),
dag.getIndex(),
Stampede.NETLOGGER_BP_FILE_SUFFIX ) );
}
}