/*
* This file or a portion of this file is licensed under the terms of
* the Globus Toolkit Public License, found in file GTPL, or at
* http://www.globus.org/toolkit/download/license.html. This notice must
* appear in redistributions of this file, with or without modification.
*
* Redistributions of this Software, with or without modification, must
* reproduce the GTPL in: (1) the Software, or (2) the Documentation or
* some other similar material which is provided with the Software (if
* any).
*
* Copyright 1999-2004 University of Chicago and The University of
* Southern California. All rights reserved.
*/
package org.griphyn.vdl.planner;
import edu.isi.pegasus.planner.catalog.transformation.TransformationCatalogEntry;
import java.io.*;
import java.util.*;
import edu.isi.pegasus.common.util.Currently;
import edu.isi.pegasus.common.util.Separator;
import org.griphyn.vdl.dax.*;
import org.griphyn.vdl.classes.LFN;
import org.griphyn.vdl.util.Logging;
/**
* This class generates the shell scripts from a DAX. There is a script
* for each job in the dag, and there is a control script to coordinate
* these jobs.<p>
*
* The scripts are assembled mostly from template files and substitutions.
* The template files reside in <code>$PEGASUS_HOME/share</code>:<p>
*
* <table border="1">
* <tr><th>template</th><th>purpose</th></tr>
* <tr><td>sp-job-1.tmpl</td><td>start of job script</td></tr>
* <tr><td>sp-job-2.tmpl</td><td>unused</td></tr>
* <tr><td>sp-job-3.tmpl</td><td>final portion of job script</td></tr>
* <tr><td>sp-master-1.tmpl</td><td>start of master script</td></tr>
* <tr><td>sp-master-2.tmpl</td><td>intermediary of master script</td></tr>
* <tr><td>sp-master-3.tmpl</td><td>final portion of master script</td></tr>
* <tr><td>sp-master-job.tmpl</td><td>Invocation of job from master</td></tr>
* </table>
*
* The following substitutions are available by default. Some substitutions
* are only available during job generation:<p>
*
* <table border="1">
* <tr><th>variable</th><th>purpose</th></tr>
* <tr><td>DAXLABEL</td><td>user-given label of the workflow</td></tr>
* <tr><td>DV</td><td>Job: fully-qualified DV of job</td></tr>
* <tr><td>FILELIST</td><td>Job: Name of file of output mappings</td></tr>
* <tr><td>HOME</td><td>JRE system property user.home</td></tr>
* <tr><td>JOBID</td><td>Job: the IDxxxxx of the current job</td></tr>
* <tr><td>JOBLOG</td><td>Job: the log file of the job</td></tr>
* <tr><td>JOBSCRIPT</td><td>Job: name of script file for job</td></tr>
* <tr><td>KICKSTART</td><td>if set, path to local kickstart</td><tr>
* <tr><td>LOGFILE</td><td>Name of master log file</td></tr>
* <tr><td>NOW</td><tr>Start time stamp of processing (compile time)</td></tr>
* <tr><td>REGISTER</td><td>0 or 1 for replica registration</td></tr>
* <tr><td>TR</td><td>Job: fully-qualified TR of job</td></tr>
* <tr><td>USER</td><td>JRE system property user.name</td></tr>
* </table>
*
* @author Jens-S. Vöckler
* @author Yong Zhao
* @version $Revision$
*
*/
public class Scriptor
{
/**
* the directory to put the scripts
*/
private String m_dirName;
/**
* the dag structure
*/
private ADAG m_adag;
/**
* name of the dag
*/
private String m_dagName;
/**
* replica catalog
*/
private RCWrapper m_rc;
/**
* site catalog (optional)
*/
private SCWrapper m_sc;
/**
* transformation catalog
*/
private TCWrapper m_tc;
/**
* the hash that holds all the lfn->pfn mapping
*/
private HashMap m_filenameMap;
/**
* the name of the master log file.
*/
private String m_logFile;
/**
* whether to register output files
*/
private boolean m_register;
/**
* path to kickstart
*/
private String m_kickstart;
/**
* buffered writer for control script file
*/
private BufferedWriter m_master;
/**
* Stores the reference to the logger.
*/
private Logging m_log;
/**
* holds the location where templates reside.
*/
private File m_dataDir;
/**
* holds the mapping for permissable substitutions.
*/
private Map m_substitute = null;
/**
* a private copy of this environment's notion of a line separator.
*/
private final static String newline =
System.getProperty( "line.separator", "\r\n" );
/**
* Constructor
*
* @param dirName names the directory into which to produce the scripts.
* @param adag is the DAX as a parsed data structure in memory.
* @param rc is the replica catalog wrapper.
* @param sc is the site catalog wrapper, may be <code>null</code>.
* @param tc is the transformation catalog wrapper.
* @param fnMap is a map containing all filesnames in the DAG.
* @param dataDir is the location of $PEGASUS_HOME/share from properties.
*/
public Scriptor( String dirName, ADAG adag,
RCWrapper rc, SCWrapper sc, TCWrapper tc,
HashMap fnMap, File dataDir )
{
m_dirName = dirName;
m_adag = adag;
m_dataDir = dataDir;
// set dag name
m_dagName = adag.getName();
if ( m_dagName == null ) m_dagName = m_dirName;
m_rc = rc;
m_sc = sc;
m_tc = tc;
m_filenameMap = fnMap;
m_logFile = m_dagName + ".log";
m_register = true;
m_kickstart = null;
if ( m_sc != null ) {
String kl = m_sc.getGridLaunch();
if ( kl != null ) {
File k = new File(kl);
if ( k.exists() ) m_kickstart = kl;
}
}
m_log = Logging.instance();
// prepare substitutions
m_substitute = new TreeMap();
m_substitute.put( "NOW", Currently.iso8601(false,true,false,new Date()) );
m_substitute.put( "DAXLABEL", m_dagName );
m_substitute.put( "USER", System.getProperty("user.name") );
m_substitute.put( "HOME", System.getProperty("user.home") );
m_substitute.put( "LOGFILE", m_logFile );
if ( m_kickstart != null ) m_substitute.put( "KICKSTART", m_kickstart );
m_substitute.put( "REGISTER", m_register ? "1" : "0" );
}
/**
* Sets the flag indicating whether to register output files.
* @param b is a flag to set the registration state.
* @see #getRegister()
*/
public void setRegister( boolean b )
{
this.m_register = b;
addSubstitution( "REGISTER", b ? "1" : "0" );
}
/**
* Gets the flag indicating whether to register output files.
* @return true, if output files are going to be registered.
* @see #setRegister(boolean)
*/
public boolean getRegister()
{
return this.m_register;
}
/**
* Sets kickstart path, if the path is null, kickstart will not be used.
* @param kickstart the path to invoke kickstart
* @see #getKickstart()
*/
public void setKickstart(String kickstart)
{
m_kickstart = kickstart;
if ( kickstart != null ) addSubstitution( "KICKSTART", kickstart );
else removeSubstitution( "KICKSTART" );
}
/**
* Gets the current kickstart path. The location may be null.
* @return the path to kickstart, or <code>null</code>
* @see #setKickstart( String )
*/
public String getKickstart()
{
return this.m_kickstart;
}
/**
* Inserts a substitution into the substitutable variables.
*
* @param key is the template variable name
* @param value is the replacement
* @return the previous setting, or <code>null</code>.
* @see #getSubstitution( String )
*/
public String addSubstitution( String key, String value )
{
return (String) m_substitute.put(key,value);
}
/**
* Obtains the setting of a substitutable variable.
* @param key is the template variable name to query for.
* @return the current setting, or <code>null</code>, if the
* variable does not exist.
* @see #addSubstitution( String, String )
*/
public String getSubstitution( String key )
{
String result = null;
if ( m_substitute.containsKey(key) ) {
result = (String) m_substitute.get(key);
if ( result == null ) result = new String();
}
return result;
}
/**
* Removes a substition.
* @param key is the template variable name to query for.
* @return the current setting, or <code>null</code>, if the
* variable does not exist.
* @see #addSubstitution( String, String )
*/
public String removeSubstitution( String key )
{
return (String) m_substitute.remove(key);
}
/**
* Writes the control script head, including functions for file
* registration.
*
* @return the name of the control (master) script.
* @throws IOException if writing to the master script somehow failes.
*/
public String initializeControlScript()
throws IOException
{
// control script output filename
String controlScript = m_dagName + ".sh";
File controlFile = new File( m_dirName, controlScript );
String fullPath = controlFile.getAbsolutePath();
// existence checks before overwriting
if ( controlFile.exists() ) {
m_log.log( "planner", 0, "Warning: Master file " +
fullPath + " already exists, overwriting");
controlFile.delete();
}
// open master for writing
m_master = new BufferedWriter( new FileWriter(controlFile) );
// copy template while substituting
m_log.log( "planner", 1, "writing control script header" );
copyFromTemplate( m_master, "sp-master-1.tmpl" );
// done
return controlScript;
}
/**
* Adds scripts between stages.
* @exception IOException if adding to the master script fails for
* some reason.
*/
public void intermediateControlScript()
throws IOException
{
m_log.log( "planner", 1, "writing control script between stages" );
copyFromTemplate( m_master, "sp-master-2.tmpl" );
}
/**
* Write the control script tail to the control file.
* @exception IOException if adding to the master script fails for
* some reason.
*/
public void finalizeControlScript()
throws IOException
{
m_log.log( "planner", 1, "writing control script tail" );
copyFromTemplate( m_master, "sp-master-3.tmpl" );
// close master
m_master.flush();
m_master.close();
m_master = null;
}
/**
* Converts a variable into the substituted value. Most of this is just
* a hash lookup, but some are more dynamic.
*
* @param key is the variable to replace
* @return the replacement string, which may be empty, never <code>null</code>.
*/
private String convertVariable( String key )
{
if ( key.equals("NOW") ) {
return Currently.iso8601(false,true,false,new Date());
} else {
return getSubstitution(key);
}
}
/**
* Copies a template file into the open writer. During copy,
* certain substitutions may take place. The substitutable variables
* are dynamically adjusted from the main class.
*
* @param w is the writer open for writing.
* @param tfn is the template base file name.
* @throws IOException in case some io operation goes wrong.
*/
public void copyFromTemplate( Writer w, String tfn )
throws IOException
{
// determine location
File source = new File( m_dataDir, tfn );
if ( source.exists() ) {
// template exists, use it
LineNumberReader lnr = new LineNumberReader( new FileReader(source) );
String line, key, value;
while ( (line = lnr.readLine()) != null ) {
StringBuffer sb = new StringBuffer(line);
// substitute all substitutables
int circuitBreaker = 0;
for ( int p1 = sb.indexOf("@@"); p1 != -1; p1 = sb.indexOf("@@") ) {
int p2 = sb.indexOf( "@@", p1+2 ) + 2;
if ( p2 == -1 ) throw new IOException( "unclosed @@var@@ element" );
key = sb.substring( p1+2, p2-2 );
if ( (value = convertVariable(key)) == null ) {
// does not exist
m_log.log( "planner", 0, "Warning: " + source +":" +
lnr.getLineNumber() +
": Requesting unknown substitution for " + key );
value = new String();
} else {
// protocol substitution
m_log.log( "planner", 3, "Substituting " + key + " => " + value );
}
sb.replace( p1, p2, value );
if ( ++circuitBreaker > 32 ) {
m_log.log( "planner", 0, "Warning: " + lnr.getLineNumber() +
": circuit breaker triggered" );
break;
}
}
w.write(sb.toString());
w.write(newline);
}
// free file handle resource
lnr.close();
} else {
// template does not exist
throw new IOException( "template " + tfn + " not found" );
}
}
/**
* Processes each job in the adag. Also checks for input file
* existence, if necessary.
*
* @param jobID is the DAX-unique job id to generate a scripts for.
* @param checkInputFiles if set, checks in the filesystem for the
* existence of all input files into the job.
* @return the name of the job control script.
* @throws IOException for failure to write any job related files.
*/
public String processJob( String jobID, boolean checkInputFiles )
throws IOException
{
Logging.instance().log( "planner", 0, "processing job: " + jobID );
// get the job reference from ADAG
Job job = m_adag.getJob(jobID);
// script file for this job
String scriptBase = job.getName() + "_" + jobID;
String scriptFile = scriptBase + ".sh";
// file to hold the output file list
String outputList = scriptBase + ".lst";
File of = new File( m_dirName, outputList );
String outputFullPath = of.getAbsolutePath();
if ( of.exists() ) {
m_log.log( "planner", 0, "Warning: output list file " +
outputList + " already exists, overwriting");
of.delete();
}
// add to substitutions - temporarily
addSubstitution( "JOBSCRIPT", scriptFile );
addSubstitution( "FILELIST", outputList );
addSubstitution( "JOBID", jobID );
addSubstitution( "TR", Separator.combine( job.getNamespace(),
job.getName(), job.getVersion()) );
addSubstitution( "DV", Separator.combine( job.getDVNamespace(),
job.getDVName(), job.getDVVersion()) );
// create file with all mappings for just this job
BufferedWriter obw = new BufferedWriter( new FileWriter(of) );
Map lfnMap = new HashMap(); // store mappings for job
for ( Iterator i = job.iterateUses(); i.hasNext(); ) {
Filename fn = (Filename) i.next();
String lfn = fn.getFilename();
// look up LFN in hash
String pfn = (String) m_filenameMap.get(lfn);
if ( pfn == null ) {
// can't find the lfn in the filename list
m_log.log( "planner", 0, "ERROR: LFN " + lfn + "is not in the " +
"<filename> list, please check the DAX!" );
return null;
} else {
lfnMap.put(lfn, pfn);
}
// check if input files exist
if ( checkInputFiles ) {
if ( fn.getLink() == LFN.INPUT ) {
if ( ! (new File(pfn)).canRead() ) {
m_log.log( "planner", 0, "Warning: Unable to read LFN " + lfn );
}
}
}
// write the output file list entry: LFN PFN [abs]
if ( fn.getLink() == LFN.OUTPUT ) {
obw.write( lfn + " " + pfn + newline );
}
}
// finish writing file of output files
obw.flush();
obw.close();
// generate the script for this job
boolean result = generateJobScript( job, scriptFile, outputList, lfnMap );
if ( result ) {
// OK: now add script invocation to master
m_log.log( "planner", 1, "adding job " + jobID + " to master script" );
copyFromTemplate( m_master, "sp-master-job.tmpl" );
} else {
m_log.log( "planner", 0, "Warning: ignoring script " + scriptFile );
}
// always clean up
removeSubstitution( "JOBSCRIPT" );
removeSubstitution( "FILELIST" );
removeSubstitution( "JOBID" );
removeSubstitution( "TR" );
removeSubstitution( "DV" );
return ( result ? scriptFile : null );
}
/**
* Extracts all profiles contained within the job description.
*
* @param job is the job description from the DAX
* @param lfnMap is the mapping to PFNs.
* @return a map of maps. The outer map is indexed by the lower-cased
* namespace identifier. The inner map is indexed by the key within
* the particular namespace. An empty map is possible.
*/
private Map extractProfilesFromJob( Job job, Map lfnMap )
{
Map result = new HashMap();
Map submap = null;
for ( Iterator i = job.iterateProfile(); i.hasNext(); ) {
org.griphyn.vdl.dax.Profile p =
(org.griphyn.vdl.dax.Profile) i.next();
String ns = p.getNamespace().trim().toLowerCase();
String key = p.getKey().trim();
// recreate the vlaue
StringBuffer sb = new StringBuffer(8);
for ( Iterator j = p.iterateLeaf(); j.hasNext(); ) {
Leaf l = (Leaf)j.next();
if ( l instanceof PseudoText ) {
sb.append( ((PseudoText)l).getContent() );
} else {
String lfn = ((Filename)l).getFilename();
sb.append( (String) lfnMap.get(lfn) );
}
}
String value = sb.toString().trim();
// insert at the right place into the result map
if ( result.containsKey(ns) ) {
submap = (Map) result.get(ns);
} else {
result.put( ns, (submap = new HashMap()) );
}
submap.put( key, value );
}
return result;
}
/**
* Combines profiles from two map of maps, with regards to priority.
*
* @param high is the higher priority profile
* @param low is the lower priority profile
* @return a new map with the combination of the two profiles
*/
private Map combineProfiles( Map high, Map low )
{
Set allKeys = new TreeSet( low.keySet() );
allKeys.addAll( high.keySet() );
Map result = new HashMap();
for ( Iterator i=allKeys.iterator(); i.hasNext(); ) {
String key = (String) i.next();
boolean h = high.containsKey(key);
boolean l = low.containsKey(key);
if ( h && l ) {
Map temp = new HashMap( (Map) low.get(key) );
temp.putAll( (Map) high.get(key) );
result.put( key, temp );
} else {
if ( h ) result.put( key, high.get(key) );
else result.put( key, low.get(key) );
}
}
return result;
}
/**
* Extracts the environment settings from the combined profiles.
*
* @param profiles is the combined profile map of maps
* @return a string with combined profiles, or <code>null</code>,
* if not applicable.
*/
private String extractEnvironment( Map profiles )
{
String result = null;
if ( profiles.containsKey("env") ) {
StringBuffer sb = new StringBuffer();
Map env = (Map) profiles.get("env");
for ( Iterator i=env.keySet().iterator(); i.hasNext(); ) {
String key = (String) i.next();
String value = (String) env.get(key);
sb.append(key).append("='").append(value);
sb.append("'; export ").append(key).append(newline);
}
result = sb.toString();
}
return result;
}
/**
* Generates the script for each job.
*
* @param job is an ADAG job for which to generate the script.
* @param scriptFile is the basename of the script for the job.
* @param outputList is the name of a file containing output files.
* @param lfnMap is a map of LFN to PFN.
* @return true if all is well, false to signal an error
*/
private boolean generateJobScript( Job job, String scriptFile,
String outputList, Map lfnMap )
throws IOException
{
String jobID = job.getID();
File f = new File( m_dirName, scriptFile );
String scriptFullPath = f.getAbsolutePath();
if ( f.exists() ) {
m_log.log( "planner", 1, "Warning: Script file " +
scriptFile + " already exists, overwriting" );
f.delete();
}
// kickstart output file
// String kickLog = scriptFullPath.substring(0,scriptFullPath.length()-3) + ".out";
String kickLog = scriptFile.substring( 0, scriptFile.length()-3 ) + ".out";
BufferedWriter bw = new BufferedWriter( new FileWriter(f) );
copyFromTemplate( bw, "sp-job-1.tmpl" );
// full definition name of this job's transformation
String fqdn =
Separator.combine( job.getNamespace(), job.getName(), job.getVersion());
// extract TR profiles
Map tr_profiles = extractProfilesFromJob(job,lfnMap);
// lookup job in TC
List tc = m_tc.lookup( job.getNamespace(), job.getName(),
job.getVersion(), "local" );
if ( tc == null || tc.size() == 0 ) {
m_log.log( "planner", 0, "ERROR: Transformation " + fqdn +
" on site \"local\" not found in TC" );
return false;
} else if ( tc.size() > 1 ) {
m_log.log( "planner", 0, "Warning: Found " + tc.size() +
" matches for " + fqdn + " in TC, using first" );
}
TransformationCatalogEntry tce = (TransformationCatalogEntry) tc.get(0);
// extract SC profiles
Map sc_profiles = ( m_sc == null ? new HashMap() : m_sc.getProfiles() );
// extract TC profiles
Map tc_profiles = m_tc.getProfiles(tce);
// combine profiles by priority
Map temp = combineProfiles( tc_profiles, sc_profiles );
Map profiles = combineProfiles( temp, tr_profiles );
// pfnHint has been deprecated !
if ( profiles.containsKey("hints") ) {
m_log.log( "planner", 0, "Warning: The hints profile namespace " +
"has been deprecated, ignoring keys " +
((Map) profiles.get("hints")).keySet().toString() );
}
// assemble environment variables from profile
String executable = tce.getPhysicalTransformation();
String environment = extractEnvironment(profiles);
// for web service
boolean service = profiles.containsKey("ws");
String invokews = null;
String wsenv = null;
if ( service ) {
// lookup special web service invocation executable
tc = m_tc.lookup( null, "invokews", null, "local" );
if ( tc == null || tc.size() == 0 ) {
// not found
m_log.log( "planner", 0, "ERROR: Transformation invokews not found!" );
return false;
} else if ( tc.size() > 1 ) {
m_log.log( "planner", 0, "Warning: Found " + tc.size() +
" matches for invokews in TC, using first" );
}
tce = (TransformationCatalogEntry) tc.get(0);
invokews = tce.getPhysicalTransformation();
// combine profiles by priority
temp = combineProfiles( m_tc.getProfiles(tce), sc_profiles );
// wsenv = extractEnvironment( combineProfiles( temp, tr_profiles ) );
wsenv = extractEnvironment( temp );
}
// collect commandline arguments for invocation
StringBuffer argument = new StringBuffer();
for ( Iterator i = job.iterateArgument(); i.hasNext(); ) {
Leaf l = (Leaf) i.next();
if ( l instanceof PseudoText ) {
argument.append( ((PseudoText)l).getContent() );
} else {
String lfn = ((Filename)l).getFilename();
argument.append( lfnMap.get(lfn) );
}
}
StringBuffer ks_arg = null;
if ( m_kickstart != null ) {
ks_arg = new StringBuffer(80);
ks_arg.append("-R local -l " ).append(kickLog);
ks_arg.append(" -n \"").append( getSubstitution("TR") );
ks_arg.append("\" -N \"").append( getSubstitution("DV") );
ks_arg.append('"');
}
// process stdin
Filename fn = job.getStdin();
if ( fn != null ) {
if ( m_kickstart != null ) {
ks_arg.append(" -i ").append( (String) lfnMap.get(fn.getFilename()) );
} else {
argument.append(" < ").append( (String) lfnMap.get(fn.getFilename()) );
}
}
// process stdout
fn = job.getStdout();
if ( fn != null ) {
if ( m_kickstart != null ) {
ks_arg.append(" -o ").append( (String) lfnMap.get(fn.getFilename()) );
} else {
argument.append(" > ").append( (String) lfnMap.get(fn.getFilename()) );
}
}
// process stderr
fn = job.getStderr();
if ( fn != null ) {
if ( m_kickstart != null ) {
ks_arg.append(" -e ").append( (String) lfnMap.get(fn.getFilename()) );
} else {
argument.append(" 2> ").append( (String) lfnMap.get(fn.getFilename()) );
}
}
// environment of job
if ( environment != null ) {
bw.write( "# regular job environment setup" + newline +
environment + newline );
}
if ( service ) {
//
// web service invocation
//
Map in = (Map) profiles.get("ws");
Map out = new HashMap( in.size() );
for ( Iterator i=in.keySet().iterator(); i.hasNext(); ) {
String key = (String) i.next();
String value = (String) in.get(key);
out.put( key.trim().toLowerCase(), value.trim() );
}
// check that all required arguments are present
if ( ! ( out.containsKey("porttype") &&
out.containsKey("operation") &&
out.containsKey("input") ) ) {
m_log.log( "planner", 0,
"ERROR: You must specify portType, operation, and input " +
"for a web service invocation!" );
return false;
}
// extra environment for web service?
if ( wsenv != null ) {
bw.write( "# extra WS invocation environment" + newline +
wsenv + newline );
}
// invocation of web service
bw.write( invokews + " -I " + out.get("input") );
if ( out.containsKey("output") )
bw.write( " -O " + out.get("output") );
// rest of invocation
bw.write( " -p " + out.get("porttype") +
" -o " + out.get("operation") +
" " + executable + newline );
} else {
//
// call the executable with argument in the script
//
if ( m_kickstart != null )
bw.write( m_kickstart + " " + ks_arg.toString() + " " );
bw.write( executable + " " + argument + newline );
}
copyFromTemplate( bw, "sp-job-3.tmpl" );
// done
bw.flush();
bw.close();
return true;
}
}