/*
* This file or a portion of this file is licensed under the terms of
* the Globus Toolkit Public License, found in file GTPL, or at
* http://www.globus.org/toolkit/download/license.html. This notice must
* appear in redistributions of this file, with or without modification.
*
* Redistributions of this Software, with or without modification, must
* reproduce the GTPL in: (1) the Software, or (2) the Documentation or
* some other similar material which is provided with the Software (if
* any).
*
* Copyright 1999-2004 University of Chicago and The University of
* Southern California. All rights reserved.
*/
package org.griphyn.vdl.directive;
import java.io.*;
import java.util.*;
import org.griphyn.vdl.dax.*;
import org.griphyn.vdl.classes.LFN;
import org.griphyn.vdl.parser.DAXParser;
import org.griphyn.vdl.util.Logging;
import org.griphyn.vdl.planner.*;
/**
* This class makes concrete plans for a DAX, when planning using the
* shell planner.
*
* @author Jens-S. Vöckler
* @author Yong Zhao
* @version $Revision$
*
* @see org.griphyn.vdl.planner.Scriptor
*/
public class Derive extends Directive
{
public Derive()
throws IOException, MissingResourceException
{
super();
}
/**
* Generates shell scripts for the workflow described by the dax.
* For each derivation, there is a shell script generated, and
* there is a control script to control the execution sequence of
* these shell scripts according to their dependencies.
*
* @param dax is the InputStream for the dax representation
* @param dir is the directory name in which to generate these scripts
* @param build specifies whether to force build mode
* @param register specifies whether to register output files
* @return true if successful, false otherwise
*/
public boolean genShellScripts( InputStream dax,
String dir,
boolean build,
boolean register )
throws java.sql.SQLException, IOException, InterruptedException
{
return genShellScripts(dax, dir, build, register, null);
}
/**
* Generates shell scripts for the workflow described by the dax.
* For each derivation, there is a shell script generated, and
* there is a control script to control the execution sequence of
* these shell scripts according to their dependencies.
*
* @param dax is the InputStream for the dax representation
* @param dir is the directory name in which to generate these scripts
* @param build specifies whether to force build mode
* @param register specifies whether to register output files
* @param kickstart_path specifies the location of kickstart. If null,
* kickstart will not be used.
* @return true if successful, false otherwise
*/
public boolean genShellScripts( InputStream dax,
String dir,
boolean build,
boolean register,
String kickstart_path )
throws java.sql.SQLException, IOException, InterruptedException
{
// sanity check -- is there a destination directory
if ( dir == null || dir.equals("") ) {
m_logger.log( "planner", 0,
"Output directory not specified, using default: test" );
dir = "test";
} else {
m_logger.log( "planner", 0, "Using output directory " + dir );
}
// parse the dax file
m_logger.log( "planner", 1, "Initializing dax parser");
DAXParser daxparser = new DAXParser(m_props.getDAXSchemaLocation());
m_logger.log( "planner", 1, "parsing the dax...");
ADAG adag = daxparser.parse(dax);
// sanity check -- do we have a DAX
if ( adag == null ) {
m_logger.log( "planner", 0, "failed parsing the dax.");
return false;
}
// check output directory -- does it exist?
File f = new File(dir);
if ( f.exists() ) {
if ( ! f.isDirectory() ) {
m_logger.log( "planner", 0, "ERROR: '" + dir + "' is not a directory!" );
throw new IOException( dir + " is not a directory!" );
}
} else {
m_logger.log( "planner", 0, "directory '" + dir +
"' does not exist. Creating." );
f.mkdirs();
}
// connect to replica catalog
RCWrapper rc = null;
try {
rc = new RCWrapper();
} catch ( Exception e ) {
throw new Error( e.getMessage() );
}
m_logger.log( "planner", 2, "Using RC " + rc.getName() );
// connect to transformation catalog
TCWrapper tc = new TCWrapper();
m_logger.log( "planner", 2, "Using TC " + tc.getName() );
// connect to site catalog, optional
SCWrapper sc = new SCWrapper();
m_logger.log( "planner", 2, "Using SC " + sc.getName() );
// lookup all filenames in replica catalog, and populate the
// filename map that is passed around.
m_logger.log( "planner", 1, "processing logical filenames" );
HashMap filenameMap = new HashMap();
for ( Iterator i=adag.iterateFilename(); i.hasNext(); ) {
Filename fn = (Filename) i.next();
String lfn = fn.getFilename();
String pfn = rc.lookup( "local", lfn );
if ( pfn == null ) {
// can't find the lfn->pfn mapping in rc
m_logger.log( "planner", 1, "Info: Failed to find LFN " + lfn +
" in RC, assuming PFN==LFN" );
pfn = lfn;
}
filenameMap.put(lfn, pfn);
}
// convert adag to graph
Graph graph = DAX2Graph.DAG2Graph(adag);
// to build or to make?
if ( build ) {
// build mode
m_logger.log( "planner", 0, "Running in build mode, DAG pruning skipped");
} else {
// make mode
m_logger.log( "planner", 0, "Checking nodes whose outputs already exist");
// check output file existence, if all output files exist, then
// cut this node
boolean cut;
// make reverse topological sort to the graph, i.e. find last
// finished jobs first.
Topology rtp = new Topology( graph.reverseGraph() );
//Hash to keep all existing files
HashMap existMap = new HashMap();
//Hash to keep files to add to exist list for this stage
HashMap addMap = new HashMap();
//Hash to keep files to remove from exist list for this stage
HashMap removeMap = new HashMap();
String jobs[];
// whether we are dealing with last finished jobs
boolean last = true;
while ( (jobs=rtp.stageSort()) != null ) {
int number = jobs.length;
int count = 0;
for ( int i=0; i<number; i++ ) {
String jobID = jobs[i];
cut = true;
Job job = adag.getJob(jobID);
// Hash to keep input files of this job
HashMap inputMap = new HashMap();
for ( Iterator e=job.iterateUses(); e.hasNext(); ) {
Filename fn = (Filename) e.next();
String lfn = fn.getFilename();
// check exist file hash first
if ( ! existMap.containsKey(lfn) ) {
// look up lfn in filename hash
String pfn = (String)filenameMap.get(lfn);
if (pfn == null) {
// lfn is not in the filename list
m_logger.log( "planner", 0,
"ERROR: File '" + lfn +
"' is not in the <filename> list, " +
"please check the DAX!" );
return false;
}
// check if output file exists
if ( fn.getLink() == LFN.OUTPUT ) {
File fp = new File(pfn);
if ( ! fp.exists() ) {
// some output file does not exist.
cut = false;
}
}
if ( fn.getLink() == LFN.INPUT ) {
inputMap.put( lfn, pfn );
}
}
}
if ( cut ) {
// cut node
m_logger.log( "planner", 1, "Removed job " + jobID + " from DAG" );
graph.removeVertex(jobID);
// assume all input files (outputs from upper stages exist)
addMap.putAll(inputMap);
count++;
} else {
// assume all input files not exist.
removeMap.putAll(inputMap);
}
} // for enum
if ( count == number ) {
// output files for all the jobs in this stage exist
if ( last ) {
// this is the last stage, no need to run the dag
m_logger.log( "planner", 0, "All output files already exist, " +
"no computation is needed!" );
return true;
}
// cut all the upper stage jobs
while ( (jobs=rtp.stageSort()) != null ) {
for ( int i=0; i<jobs.length; i++ ) {
m_logger.log( "planner", 1,
"Removed job " + jobs[i] + " from DAG" );
graph.removeVertex(jobs[i]);
}
}
} else {
if ( count == 0 ) {
// none gets cut in this stage
last = false;
continue;
}
// put assumed existing files into exist map
existMap.putAll(addMap);
TreeSet temp = new TreeSet( removeMap.keySet() );
for ( Iterator it=temp.iterator(); it.hasNext(); ) {
String lfn = (String) it.next();
existMap.remove(lfn);
}
}
// now the last stage has been processed
last = false;
} // end while
} // end else
// make topological sort to the graph
Topology tp = new Topology(graph);
// get the topmost jobs
String[] jobs = tp.stageSort();
// dax maybe invalid (empty or has cycle in it)
if ( jobs == null ) {
m_logger.log( "planner", 0, "ERROR: No starting job(s) found, " +
"please check the DAX file!" );
return false;
}
// create a Scriptor instance
Scriptor spt = new Scriptor( dir, adag, rc, sc, tc, filenameMap,
m_props.getDataDir() );
spt.setRegister(register);
// Only set kickstart path if CLI argument was specified.
// However, permit "-k ''" to remote kickstart invocations
if ( kickstart_path != null ) {
int x = kickstart_path.trim().length();
spt.setKickstart( x == 0 ? null : kickstart_path );
}
String ctrlScript = spt.initializeControlScript();
// check involved LFN's for these jobs
for ( int i=0; i < jobs.length; i++ ) {
// process each job and check input file existence
String scriptFile = spt.processJob(jobs[i], true);
if ( scriptFile == null ) {
m_logger.log( "planner", 0,
"ERROR: failed processing job " + jobs[i] );
return false;
}
}
spt.intermediateControlScript();
// process jobs in the following stages
while ( (jobs = tp.stageSort()) != null ) {
for ( int i=0; i < jobs.length; i++ ) {
// not the first stage, no need to check input file existence
String scriptFile = spt.processJob( jobs[i], false );
if ( scriptFile == null ) {
m_logger.log( "planner", 0,
"ERROR: failed processing job " + jobs[i] );
return false;
}
}
spt.intermediateControlScript();
}
spt.finalizeControlScript();
m_logger.log( "planner", 0, "DAG processed successfully" );
m_logger.log( "planner", 1, "changing file permission" );
changePermission(dir);
spt = null;
if ( rc != null ) rc.close();
if ( sc != null ) sc.close();
if ( tc != null ) tc.close();
m_logger.log( "planner", 0, "To run the DAG, execute '" + ctrlScript +
"'" + " in directory " + dir );
return true;
}
/**
* Helper method to change the permissions of shell scripts to be executable.
*/
protected static int changePermission( String dir )
throws IOException, InterruptedException
{
if ( System.getProperty("line.separator").equals("\n") &&
System.getProperty("file.separator").equals("/") &&
System.getProperty("path.separator").equals(":") ) {
String[] me = new String[ 3 ];
me[0] = "/bin/sh";
me[1] = "-c";
me[2] = "chmod 0755 *.sh";
Process p = Runtime.getRuntime().exec( me, null, new File(dir) );
return p.waitFor();
} else {
return -1;
}
}
}