package net.sourceforge.seqware.pipeline.modules;
import java.io.File;
import java.io.IOException;
import java.io.StringWriter;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import joptsimple.OptionException;
import joptsimple.OptionParser;
import joptsimple.OptionSet;
import net.sourceforge.seqware.common.model.ProcessingAttribute;
import net.sourceforge.seqware.common.module.FileMetadata;
import net.sourceforge.seqware.common.module.ReturnValue;
import net.sourceforge.seqware.common.util.Log;
import net.sourceforge.seqware.common.util.filetools.FileTools;
import net.sourceforge.seqware.pipeline.module.Module;
import net.sourceforge.seqware.pipeline.module.ModuleInterface;
import net.sourceforge.seqware.pipeline.plugins.Metadata;
import org.openide.util.lookup.ServiceProvider;
/**
* This is a very simple module is used to save both a processing event and 0 or more files to the metadb. It does absolutely no computation
* at all, it just saves metadata. You might want to use this if your module or call to genericCommandRunner results in a bunch of files
* being created, each of which need to be associated with different parent objects in the database. You might also use this to combine the
* outputs of several different steps into a single processing event so the UI in the Portal is simplified.
*
* Here's an example of how you might call the program:
*
* ./bin/seqware-runner.sh --no-metadata --module net.sourceforge.seqware.pipeline.modules.GenericMetadataSaver -- --gms-output-file
* test:test:/tmp/foo.txt --gms-algorithm foobarAlgorithm
*
* You could, of course, supply metadata db information so the status is written to the database and also a parent accession along with an
* accession output file to be read in by a subsequent step.
*
* Please use JavaDoc to document each method (the user interface documents will be autogenerated using these comments). See
* http://en.wikipedia.org/wiki/Javadoc for more information.
*
* @author briandoconnor@gmail.com
* @version $Id: $Id
* @deprecated Deprecating this in favour of Metadata(plugin) and the new CLI
*/
@ServiceProvider(service = ModuleInterface.class)
public class GenericMetadataSaver extends Module {
private OptionSet options = null;
private ArrayList<String> cmdParameters = null;
/**
* getOptionParser is an internal method to parse command line args.
*
* @return OptionParser this is used to get command line options
*/
@Override
protected OptionParser getOptionParser() {
OptionParser parser = new OptionParser();
parser.accepts(
"gms-output-file",
"Specify this option one or more times for each output file created by the command called by this module. The argument is a '::' delimited list of type, meta_type, and file_path.")
.withRequiredArg().ofType(String.class).describedAs("Optional: <type::meta_type::file_path>");
parser.accepts("gms-algorithm",
"You can pass in an algorithm name that will be recorded in the metadb if you are writing back to the metadb.")
.withRequiredArg().ofType(String.class).describedAs("Required");
parser.accepts(
"gms-suppress-output-file-check",
"If provided, this will suppress checking that the gms-output-file options contain valid file paths. Useful if these are remote resources like HTTP or S3 file URLs.");
return (parser);
}
/**
* {@inheritDoc}
*
* A method used to return the syntax for this module
*
* @return
*/
@Override
public String get_syntax() {
OptionParser parser = getOptionParser();
StringWriter output = new StringWriter();
try {
parser.printHelpOn(output);
return (output.toString());
} catch (IOException e) {
e.printStackTrace();
return (e.getMessage());
}
}
/**
* {@inheritDoc}
*
* The init method is where you put any code needed to setup your module. Here I set some basic information in the ReturnValue object
* which will eventually populate the "processing" table in seqware_meta_db. I also create a temporary directory using the FileTools
* object.
*
* init is optional
*
* @return
*/
@Override
public ReturnValue init() {
// setup the return value object, notice that we use
// ExitStatus, this is what SeqWare uses to track the status
ReturnValue ret = new ReturnValue();
ret.setExitStatus(ReturnValue.SUCCESS);
// fill in the algorithm field in the processing table
ret.setAlgorithm("GenericMetadataSaver");
// fill in the description field in the processing table
ret.setDescription("This is a simple metadata saver.");
// fill in the version field in the processing table
ret.setVersion("0.7.0");
try {
OptionParser parser = getOptionParser();
// The parameters object is actually an ArrayList of Strings created
// by splitting the command line options by space. JOpt expects a String[]
// an array for this module
ArrayList<String> myParameters = new ArrayList<>();
// an array for everything else that will get passed to the command
cmdParameters = new ArrayList<>();
// should be able to do this since all the --gms-* params except one take
// an argument
for (int i = 0; i < this.getParameters().size(); i++) {
if (this.getParameters().get(i).startsWith("--gms-")) {
myParameters.add(this.getParameters().get(i));
if (!this.getParameters().get(i).equals("--gms-suppress-output-file-check") && i + 1 < this.getParameters().size()) {
myParameters.add(this.getParameters().get(i + 1));
i++;
}
} else {
cmdParameters.add(this.getParameters().get(i));
}
}
options = parser.parse(myParameters.toArray(new String[myParameters.size()]));
// if algo is defined save the new value
if (options.has("gms-algorithm")) {
ret.setAlgorithm((String) options.valueOf("gms-algorithm"));
}
// you can write to "stdout" or "stderr" which will be persisted back to
// the DB
// ret.setStdout(ret.getStdout()+"Output: "+(String)options.valueOf("output-file")+"\n");
} catch (OptionException e) {
e.printStackTrace();
ret.setStderr(e.getMessage());
ret.setExitStatus(ReturnValue.INVALIDPARAMETERS);
}
// now return the ReturnValue
return ret;
}
/**
* {@inheritDoc}
*
* Verifies that the parameters make sense
*
* @return
*/
@Override
public ReturnValue do_verify_parameters() {
// most methods return a ReturnValue object
ReturnValue ret = new ReturnValue();
ret.setExitStatus(ReturnValue.SUCCESS);
// now look at the options and make sure they make sense
for (String option : new String[] { "gms-algorithm" }) {
if (!options.has(option)) {
ret.setExitStatus(ReturnValue.INVALIDPARAMETERS);
String stdErr = ret.getStderr();
ret.setStderr(stdErr + "Must include parameter: --" + option + "\n");
}
}
return ret;
}
/**
* {@inheritDoc}
*
* The do_verify_input method ensures that the input is reasonable and valid for this tool. For this generic command runner we really
* can't tell if the
*
* @return
*/
@Override
public ReturnValue do_verify_input() {
// not much to do, let's make sure the
// temp directory is writable
ReturnValue ret = new ReturnValue();
ret.setExitStatus(ReturnValue.SUCCESS);
// should check the file paths
if (options.has("gms-output-file") && !options.has("gms-suppress-output-file-check")) {
List<String> files = (List<String>) options.valuesOf("gms-output-file");
for (String file : files) {
String[] tokens = file.split("::");
if (FileTools.fileExistsAndReadable(new File(tokens[2])).getExitStatus() != ReturnValue.SUCCESS) {
Log.error("File does not exist or is not readable: " + tokens[2]);
ret.setExitStatus(ReturnValue.FILENOTREADABLE);
return ret;
}
}
}
return ret;
}
/**
* {@inheritDoc}
*
* This is really an optional method but a very good idea. You would test the programs your calling here by running them on a
* "known good" test dataset and then compare the new answer with the previous known good answer. Other forms of testing could be
* encapsulated here as well.
*
* @return
*/
@Override
public ReturnValue do_test() {
// notice the use of "NOTIMPLEMENTED", this signifies that we simply
// aren't doing this step. It's better than just saying SUCCESS
ReturnValue ret = new ReturnValue();
ret.setExitStatus(ReturnValue.NOTIMPLEMENTED);
// not much to do, just return
return ret;
}
/**
* {@inheritDoc}
*
* This is the core of a module. It just saves metadata back to the DB using the standardized ReturnValue object as a wrapper.
*
* @return
*/
@Override
public ReturnValue do_run() {
// prepare the return value
ReturnValue ret = new ReturnValue();
ret.setExitStatus(ReturnValue.SUCCESS);
// track the start time of do_run for timing purposes
ret.setRunStartTstmp(new Date());
// record the file output
if (options.has("gms-output-file")) {
List<String> files = (List<String>) options.valuesOf("gms-output-file");
for (String file : files) {
FileMetadata fm = Metadata.fileString2FileMetadata(file);
ret.getFiles().add(fm);
if (fm.getMetaType().equals("text/key-value") && this.getProcessingAccession() != 0) {
Map<String, String> map = FileTools.getKeyValueFromFile(fm.getFilePath());
Set<ProcessingAttribute> atts = new TreeSet<>();
for (Map.Entry<String, String> entry : map.entrySet()) {
ProcessingAttribute a = new ProcessingAttribute();
a.setTag(entry.getKey());
a.setValue(entry.getValue());
atts.add(a);
}
this.getMetadata().annotateProcessing(this.getProcessingAccession(), atts);
}
}
} else {
Log.info(get_syntax());
}
// note the time do_run finishes
ret.setRunStopTstmp(new Date());
return ret;
}
/**
* {@inheritDoc}
*
* A method to check to make sure the output was created correctly
*
* @return
*/
@Override
public ReturnValue do_verify_output() {
ReturnValue ret = new ReturnValue();
ret.setExitStatus(ReturnValue.NOTIMPLEMENTED);
return ret;
}
/**
* {@inheritDoc}
*
* A cleanup method, make sure you cleanup files that are outside the current working directory since Pegasus won't clean those for you.
*
* clean_up is optional
*
* @return
*/
@Override
public ReturnValue clean_up() {
ReturnValue ret = new ReturnValue();
ret.setExitStatus(ReturnValue.NOTIMPLEMENTED);
return ret;
}
}