/* * This file or a portion of this file is licensed under the terms of * the Globus Toolkit Public License, found in file GTPL, or at * http://www.globus.org/toolkit/download/license.html. This notice must * appear in redistributions of this file, with or without modification. * * Redistributions of this Software, with or without modification, must * reproduce the GTPL in: (1) the Software, or (2) the Documentation or * some other similar material which is provided with the Software (if * any). * * Copyright 1999-2004 University of Chicago and The University of * Southern California. All rights reserved. */ package org.griphyn.vdl.directive; import edu.isi.pegasus.planner.invocation.InvocationRecord; import edu.isi.pegasus.planner.invocation.JobStatus; import edu.isi.pegasus.planner.invocation.JobStatusSuspend; import edu.isi.pegasus.planner.invocation.JobStatusRegular; import edu.isi.pegasus.planner.invocation.Architecture; import edu.isi.pegasus.planner.invocation.JobStatusSignal; import edu.isi.pegasus.planner.invocation.Job; import edu.isi.pegasus.planner.invocation.Status; import edu.isi.pegasus.planner.invocation.JobStatusFailure; import java.io.*; import java.sql.SQLException; import java.util.Iterator; import java.util.List; import java.util.Date; import java.util.ArrayList; import java.util.MissingResourceException; import edu.isi.pegasus.common.util.Version; import edu.isi.pegasus.common.util.Currently; import edu.isi.pegasus.planner.parser.InvocationParser; import org.griphyn.vdl.dbschema.*; import org.griphyn.vdl.util.Logging; import org.griphyn.vdl.toolkit.FriendlyNudge; import org.griphyn.vdl.util.ChimeraProperties; /** * Main objective of this class is to extract the exit status from * the invocation record returned by kickstart. The expected usage * is another Java class passing a filename, and obtaining the * cooked exit status for the parse. All other details, like removing * non-XML header and tailers, de-concatenation, are handled internally.<p> * * Usage of the class is divided into typically three steps. The first * step is to obtain an instance of the the parser, and configure it * to fit your needs.<p> * * <pre> * ParseKickstart pks = new ParseKickstart(); * ... // set flags * pks.setDatabaseSchema( ptcschema ); * </pre> * * The next step can be executed multiple times, and parse one or more * kickstart output files.<p> * * <pre> * List result = null; * try { * result = pks.parseFile( file ); * } catch ( FriendlyNudge fn ) { * // handle failures * } * </pre> * * Once you are definitely done, it is recommend to dis-associate yourself * from the active database connection.<p> * * <pre> * pks.close(); * pks = null; * </pre> * * @author Jens-S. Vöckler * @author Yong Zhao * @version $Revision$ * * @see org.griphyn.vdl.toolkit.ExitCode * @see org.griphyn.vdl.parser.InvocationParser */ public class ParseKickstart extends Directive { /** * Determines, if an empty output record constitutes a failure or success. * In old Globus 2.0, empty output frequently occurred. With the NFS * bug alleviation, while not fixed, it occurs a lot less frequently. */ private boolean m_emptyFail = true; /** * Determines, if the invocation records go back into the VDC or not. */ private boolean m_noDBase = false; /** * Determines, if the invocation records, when incurring a database failure, * will fail the application or not. */ private boolean m_ignoreDBFail = false; /** * The database schema driver used to connect to the PTC. */ private DatabaseSchema m_dbschema = null; /** * Semi-singleton, dynamically instantiated once for the lifetime. * The properties determine which Xerces parser is being used. */ private InvocationParser m_ip = null; /** * Attaches a workflow label (tag) to all workflows passing thru. */ private String m_wf_label = null; /** * Attaches a workflow mtime to all workflows passing thru. */ private Date m_wf_mtime = null; /** * Default c'tor. */ public ParseKickstart() throws IOException, MissingResourceException { super(); } /** * C'tor which permits the setting of a PTC connection. * * @param dbschema is the database schema to use for the PTC. */ public ParseKickstart( DatabaseSchema dbschema ) throws IOException, MissingResourceException { super(); if ( (m_dbschema = dbschema) == null ) m_noDBase = true; } /** * C'tor which permits the setting of a PTC connection. * * @param dbschema is the database schema to use for the PTC. * @param emptyFail determines, if empty input files are error or OK. */ public ParseKickstart( DatabaseSchema dbschema, boolean emptyFail ) throws IOException, MissingResourceException { super(); if ( (m_dbschema = dbschema) == null ) m_noDBase = true; m_emptyFail = emptyFail; } /** * Sets the database schema. * * @param dbschema is a database schema instance for the PTC. */ public void setDatabaseSchema( DatabaseSchema dbschema ) { m_dbschema = dbschema; } /** * Closes the associated database backend and invalidates the schema. */ public void close() throws SQLException { if ( m_dbschema != null ) m_dbschema.close(); m_dbschema = null; m_ip = null; } /** * Obtains the fail-on-empty-file value. * * @return true, if to fail on empty files. * @see #setEmptyFail( boolean ) */ public boolean getEmptyFail() { return m_emptyFail; } /** * Sets the fail-on-empty-file value. * * @param emptyFail contains the new value, if to fail on empty files. * @see #getEmptyFail() */ public void setEmptyFail( boolean emptyFail ) { m_emptyFail = emptyFail; } /** * Gets the variable to permit connections to the PTC, or * use parse-only mode. * * @return true, if the PTC is intended to be used, false for * parse-only mode. * @see #setNoDBase(boolean) */ public boolean getNoDBase() { return this.m_noDBase; } /** * Sets the parse-only versus PTC mode. * * @param noDBase is true to use the parse-only mode. * @see #getNoDBase() */ public void setNoDBase( boolean noDBase ) { this.m_noDBase = noDBase; } /** * Obtains a dont-fail-on-database-errors mode. * * @return true, if database failures are not fatal. * @see #setIgnoreDBFail(boolean) */ public boolean getIgnoreDBFail() { return this.m_ignoreDBFail; } /** * Sets the dont-fail-on-dbase-errors mode. * * @param ignore is true to render database error non-fatal. * @see #getIgnoreDBFail() */ public void setIgnoreDBFail( boolean ignore ) { this.m_ignoreDBFail = ignore; } /** * Obtains the current value of the workflow label to use. * * @return current workflow label to use, may be <code>null</code>. * @see #setWorkflowLabel(String) */ public String getWorkflowLabel() { return this.m_wf_label; } /** * Sets the workflow label. * * @param label is the (new) workflow label. * @see #getWorkflowLabel() */ public void setWorkflowLabel( String label ) { this.m_wf_label = label; } /** * Obtains the current value of the workflow modification time to use. * * @return current workflow mtime, may be <code>null</code>. * @see #setWorkflowTimestamp(Date) */ public Date getWorkflowTimestamp() { return this.m_wf_mtime; } /** * Sets the workflow modification time to record. * * @param mtime is the (new) workflow mtime. * @see #getWorkflowTimestamp() */ public void setWorkflowTimestamp( Date mtime ) { this.m_wf_mtime = mtime; } /** * Determines the exit code of an invocation record. Currently, * we will determine the exit code from all jobs until failure * or no more jobs. However, set-up and clean-up jobs are ignored. * * @param ivr is the invocation record to put into the database * @return the status code as exit code to signal failure etc. * <pre> * 0 regular exit with exit code 0 * 1 regular exit with exit code > 0 * 2 failure to run program from kickstart * 3 application had died on signal * 4 application was suspended (should not happen) * 5 failure in exit code parsing * 6 impossible case * </pre> */ public int determineExitStatus( InvocationRecord ivr ) { boolean seen = false; for ( Iterator i=ivr.iterateJob(); i.hasNext(); ) { Job job = (Job) i.next(); // set-up/clean-up jobs don't count in failure modes if ( job.getTag().equals("cleanup") ) continue; if ( job.getTag().equals("setup") ) continue; // obtains status from job Status status = job.getStatus(); if ( status == null ) return 6; JobStatus js = status.getJobStatus(); if ( js == null ) { // should not happen return 6; } else if ( js instanceof JobStatusRegular ) { // regular exit code - success or failure? int exitcode = ((JobStatusRegular) js).getExitCode(); if ( exitcode != 0 ) return 1; else seen = true; // continue, if exitcode of 0 to implement chaining !!!! } else if ( js instanceof JobStatusFailure ) { // kickstart failure return 2; } else if ( js instanceof JobStatusSignal ) { // died on signal return 3; } else if ( js instanceof JobStatusSuspend ) { // suspended??? return 4; } else { // impossible/unknown case return 6; } } // success, or no [matching] jobs return seen ? 0 : 5; } /** * Extracts records from the given input file. Since there may be * more than one record per file, especially in the case of MPI, * multiple results are possible, though traditionally only one * will be used. * * @param input is the name of the file that contains the records * @return a list of strings, each representing one invocation record. * The result should not be empty (exception will be thrown). * @throws FriendlyNudge, if the input format was invalid. * The caller has to assume failure to parse the record provided. */ public List extractToMemory( java.io.File input ) throws FriendlyNudge { List result = new ArrayList(); StringWriter out = null; Logging log = getLogger(); // open the files int p1, p2, state = 0; try { BufferedReader in = new BufferedReader( new FileReader(input) ); out = new StringWriter(4096); String line = null; while ( (line = in.readLine()) != null ) { if ( (state & 1) == 0 ) { // try to copy the XML line in any case if ( (p1 = line.indexOf( "<?xml" )) > -1 ) if ( (p2 = line.indexOf( "?>", p1 )) > -1 ) { out.write( line, p1, p2+2 ); log.log( "parser", 2, "state=" + state + ", seen <?xml ...?>" ); } // start state with the correct root element if ( (p1 = line.indexOf( "<invocation")) > -1 ) { if ( p1 > 0 ) line = line.substring( p1 ); log.log( "parser", 2, "state=" + state + ", seen <invocation>" ); ++state; } } if ( (state & 1) == 1 ) { out.write( line ); if ( (p1 = line.indexOf("</invocation>")) > -1 ) { log.log( "parser", 2, "state=" + state + ", seen </invocation>" ); ++state; out.flush(); out.close(); result.add( out.toString() ); out = new StringWriter(4096); } } } in.close(); out.close(); } catch ( IOException ioe ) { throw new FriendlyNudge( "While copying " + input.getPath() + " into temp. file: " + ioe.getMessage(), 5 ); } // some sanity checks if ( state == 0 ) throw new FriendlyNudge( "File " + input.getPath() + " does not contain invocation records," + " assuming failure", 5 ); if ( (state & 1) == 1 ) throw new FriendlyNudge( "File " + input.getPath() + " contains an incomplete invocation record," + " assuming failure", 5 ); // done return result; } /** * Parses the contents of a kickstart output file, and returns a * list of exit codes obtains from the records. * * @param arg0 is the name of the file to read * @return a list with one or more exit code, one for each record. * @throws FriendlyNudge, if parsing of the file goes hay-wire. * @throws IOException if something happens while reading properties * to instantiate the XML parser. * @throws SQLException if accessing the database fails. */ public List parseFile( String arg0 ) throws FriendlyNudge, IOException, SQLException { List result = new ArrayList(); Logging me = getLogger(); me.log( "kickstart", 2, "working with file " + arg0 ); // get access to the invocation parser if ( m_ip == null ) { ChimeraProperties props = ChimeraProperties.instance(); String psl = props.getPTCSchemaLocation(); me.log( "kickstart", 2, "using XML schema location " + psl ); m_ip = new InvocationParser( psl ); } // check input file java.io.File check = new java.io.File(arg0); // test 1: file exists if ( ! check.exists() ) { me.log( "kickstart", 2, "file does not exist, fail with 5" ); throw new FriendlyNudge( "file does not exist " + arg0 + ", assuming failure", 5 ); } // test 2: file is readable if ( ! check.canRead() ) { me.log( "kickstart", 2, "file not readable, fail with 5" ); throw new FriendlyNudge( "unable to read file " + arg0 + ", assuming failure", 5 ); } // test 3: file has nonzero size // FIXME: Actually need to check the record size me.log( "kickstart", 2, "file has size " + check.length() ); if ( check.length() == 0 ) { // deal with 0-byte file if ( getEmptyFail() ) { me.log( "kickstart", 2, "zero size file, fail with 5" ); throw new FriendlyNudge( "file has zero length " + arg0 + ", assuming failure", 5 ); } else { me.log( "kickstart", 2, "zero size file, succeed with 0" ); me.log( "app", 1, "file has zero length " + arg0 + ", assuming success" ); result.add( new Integer(0) ); return result; } } // test 4: extract XML into tmp file me.log( "kickstart", 2, "about to extract content into memory" ); List extract = extractToMemory(check); me.log( "kickstart", 2, extract.size() + " records extracted" ); // testme: for each record obtained, work on it Architecture cachedUname = null; for ( int j=1; j-1 < extract.size(); ++j ) { String temp = (String) extract.get(j-1); me.log( "kickstart", 2, "content[" + j + "] extracted, length " + temp.length() ); // test 5: try to parse XML me.log( "app", 2, "starting to parse invocation" ); me.log( "kickstart", 2, "about to parse invocation record" ); InvocationRecord invocation = m_ip.parse( new StringReader(temp) ); me.log( "kickstart", 2, "done parsing invocation" ); if ( invocation == null ) { me.log( "kickstart", 2, "result record " + j + " is invalid (null), fail with 5" ); throw new FriendlyNudge( "invalid XML invocation record " + j + " in " + arg0 + ", assuming failure", 5 ); } else { me.log( "kickstart", 2, "result record " + j + " appears valid" ); me.log( "app", 1, "invocation " + j + " was parsed successfully" ); } // NEW: attached workflow tag and mtime if ( m_wf_label != null ) invocation.setWorkflowLabel( m_wf_label ); if ( m_wf_mtime != null ) invocation.setWorkflowTimestamp( m_wf_mtime ); // Fix for Pegasus Bug 39 // the machine information tag is created only once for a cluster // the -H flag disables the generation of machine information Architecture uname = invocation.getArchitecture(); if( uname == null ){ //attempt to update with cachedUname invocation.setArchitecture( cachedUname ); } else{ cachedUname = uname; } // insert into database -- iff it is available if ( ! m_noDBase && m_dbschema != null && m_dbschema instanceof PTC ) { PTC ptc = (PTC) m_dbschema; try { // FIXME: (start,host,pid) may not be a sufficient secondary key me.log( "kickstart", 2,"about to obtain secondary key triple" ); if ( ptc.getInvocationID( invocation.getStart(), invocation.getHostAddress(), invocation.getPID() ) == -1 ) { me.log( "kickstart", 2, "new invocation, adding" ); me.log( "app", 1, "adding invocation to database" ); // may throw SQLException ptc.saveInvocation( invocation ); } else { me.log( "kickstart", 2, "existing invocation, skipping" ); me.log( "app", 1, "invocation already exists, skipping!" ); } } catch ( SQLException sql ) { if ( m_ignoreDBFail ) { // if dbase errors are non-fatal, just protocol what is going on. for ( int n=0; sql != null; ++n ) { me.log( "default", 0, "While inserting PTR [" + j + "]:" + n + ": " + sql.getMessage() + ", ignoring" ); sql = sql.getNextException(); } } else { // rethrow, if dbase errors are fatal (default) throw sql; } } // catch } // if use dbase // determine result code int status = 0; me.log( "kickstart", 2, "about to determine exit status" ); status = determineExitStatus( invocation ); me.log( "kickstart", 2, "exit status is " + status ); result.add( new Integer(status) ); } // for // done return result; } }