package au.com.acpfg.pfa.interproscan;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.rmi.RemoteException;
import java.util.HashMap;
import java.util.Set;
import javax.xml.rpc.ServiceException;
import javax.xml.ws.soap.SOAPFaultException;
import org.knime.core.data.DataCell;
import org.knime.core.data.DataColumnSpec;
import org.knime.core.data.DataColumnSpecCreator;
import org.knime.core.data.DataRow;
import org.knime.core.data.DataTableSpec;
import org.knime.core.data.DataType;
import org.knime.core.data.RowIterator;
import org.knime.core.data.def.DefaultRow;
import org.knime.core.data.def.StringCell;
import org.knime.core.data.image.png.PNGImageCell;
import org.knime.core.data.image.png.PNGImageContent;
import org.knime.core.node.BufferedDataContainer;
import org.knime.core.node.BufferedDataTable;
import org.knime.core.node.CanceledExecutionException;
import org.knime.core.node.ExecutionContext;
import org.knime.core.node.ExecutionMonitor;
import org.knime.core.node.InvalidSettingsException;
import org.knime.core.node.NodeLogger;
import org.knime.core.node.NodeModel;
import org.knime.core.node.NodeSettingsRO;
import org.knime.core.node.NodeSettingsWO;
import org.knime.core.node.defaultnodesettings.SettingsModelBoolean;
import org.knime.core.node.defaultnodesettings.SettingsModelString;
import org.knime.core.node.defaultnodesettings.SettingsModelStringArray;
import uk.ac.ebi.webservices.jaxws.IPRScanClient;
import uk.ac.ebi.webservices.jaxws.stubs.iprscan.ArrayOfString;
import uk.ac.ebi.webservices.jaxws.stubs.iprscan.InputParameters;
import uk.ac.ebi.webservices.jaxws.stubs.iprscan.ObjectFactory;
import uk.ac.ebi.webservices.jaxws.stubs.iprscan.WsResultType;
import au.com.acpfg.xml.reader.XMLCell;
/**
* This is the model implementation of InterProScan.
* Accesses the EBI webservice: interproscan with the user-specified settings
*
* @author Andrew Cassin
*/
public class InterProScanNodeModel extends NodeModel {
/**
* Sequences shorter than 80aa are unlikely to match anything in InterProScan, so
* we skip them to avoid upsetting EBI with useless jobs
*/
public static final int MIN_LIKELY_INTERPROSCAN_DB = 80;
// the logger instance
private static final NodeLogger logger = NodeLogger
.getLogger(InterProScanNodeModel.class);
static final String CFGKEY_EMAIL = "email-address";
static final String CFGKEY_SEQ = "sequences-from";
static final String CFGKEY_USE_CRC = "use-crc?";
static final String CFGKEY_USE_APPL = "algorithms-to-use";
static final String CFGKEY_IMGDIR = "image-directory";
static final String CFGKEY_SAVEIMGS = "save-images?";
static final String CFGKEY_IMGSUBSET= "image-subset";
private static final String DEFAULT_EMAIL = "who@what.ever.some.where";
private static final String DEFAULT_SEQ = "Sequence";
private static final boolean DEFAULT_USE_CRC = true;
private static final String DEFAULT_USE_APPL= "blastprodom";
/**
* The number of seconds to wait if a running job has not yet completed.
* Scales linearly with the number of retries. Must be greater than zero
*/
private static final int MANDATORY_DELAY = 20;
// configure-dialog state which must be persistent
private final SettingsModelString m_email = new SettingsModelString(CFGKEY_EMAIL, DEFAULT_EMAIL);
private final SettingsModelString m_seq = new SettingsModelString(CFGKEY_SEQ, DEFAULT_SEQ);
private final SettingsModelBoolean m_crc = new SettingsModelBoolean(CFGKEY_USE_CRC, DEFAULT_USE_CRC);
private final SettingsModelStringArray m_vec = new SettingsModelStringArray(CFGKEY_USE_APPL, new String[] {DEFAULT_USE_APPL});
private final SettingsModelBoolean m_save_imgs = new SettingsModelBoolean(CFGKEY_SAVEIMGS, false);
private final SettingsModelString m_img_dir = new SettingsModelString(CFGKEY_IMGDIR, "c:/temp");
/**
* Constructor for the node model.
*/
protected InterProScanNodeModel() {
super(1, 1);
m_img_dir.setEnabled(false); // since m_save_imgs is false by default
}
/**
* {@inheritDoc}
*/
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData,
final ExecutionContext exec) throws Exception {
IPRScanClient cli = new IPRScanClient();
int seq_idx = inData[0].getDataTableSpec().findColumnIndex(m_seq.getStringValue());
if (seq_idx < 0) {
throw new Exception("Invalid sequence column... re-configure the node?");
}
if (m_email.getStringValue().equals(DEFAULT_EMAIL) || m_email.getStringValue().trim().length() < 1) {
throw new Exception("You must provide a valid email address. Re-configure the node.");
}
RowIterator it = inData[0].iterator();
// creator of objects for webservice
ObjectFactory of = new ObjectFactory();
// create output container
int n_cols = 3;
if (m_save_imgs.getBooleanValue())
n_cols++;
DataColumnSpec[] cols = new DataColumnSpec[n_cols];
cols[0] = new DataColumnSpecCreator("Job ID", StringCell.TYPE).createSpec();
cols[1] = new DataColumnSpecCreator("InterProScan Results (XML)", XMLCell.TYPE).createSpec();
cols[2] = new DataColumnSpecCreator("InterProScan Tool Output", StringCell.TYPE).createSpec();
if (n_cols > 3) {
cols[3] = new DataColumnSpecCreator("InterProScan Results Summary (PNG)", DataType.getType(PNGImageCell.class)).createSpec();
}
DataTableSpec outputSpec = new DataTableSpec(cols);
BufferedDataContainer container = exec.createDataContainer(outputSpec);
// run the jobs...
double n_rows = inData[0].getRowCount();
exec.setProgress(0.0);
HashMap<String,String> outstanding_jobs = new HashMap<String,String>();
int batch_size = 25;
for (int done=0; (done<n_rows) || (outstanding_jobs.size() > 0); ) {
if (outstanding_jobs.size() < batch_size && it.hasNext()) {
DataRow r = it.next();
DataCell seq_cell = r.getCell(seq_idx);
if (seq_cell.isMissing()) {
continue;
}
StringCell sc = (StringCell) seq_cell;
String seq = sc.getStringValue().trim();
String rkey = r.getKey().getString();
if (seq.length() < 1) {
logger.warn("Skipping empty sequence for row "+rkey);
continue;
}
if (seq.length() < MIN_LIKELY_INTERPROSCAN_DB) {
logger.warn("Sequence for row "+rkey+" is too short to match InterProScan, skipping.");
continue;
}
String job_id = submit_job_async(cli, m_email.getStringValue(), seq, rkey, of);
logger.info("Submitted job for row "+rkey+": "+job_id);
// if no exception thrown, we assume job was successfully submitted, so...
outstanding_jobs.put(job_id, rkey);
} else {
// must wait for entire batch to complete -- EBI terms of service
wait_for_completion(cli, outstanding_jobs.keySet());
// process results
for (String key : outstanding_jobs.keySet()) {
//WsResultType[] types = cli.getResultTypes(key);
//for (WsResultType type : types) {
// System.err.println(type.getIdentifier()+" "+type.getFileSuffix()+" "+type.getMediaType());
//}
byte[] results = cli.getSrvProxy().getResult(key, "xml", null);
DataCell xml = DataType.getMissingCell();
if (results != null) {
xml = new XMLCell(new String(results));
}
// retrieve the InterProSequence so that user can compare with input sequence
String tool = "<html><pre>";
try {
byte[] tool_bytes = cli.getSrvProxy().getResult(key, "out", null);
if (tool_bytes != null && tool_bytes.length > 0) {
tool = "<html><pre>"+new String(tool_bytes);
}
} catch (Exception e) {
logger.warn("No tool output available from EBI, for job:"+key);
}
// fetch an image of the results as well?
if (n_cols > 3) {
// fetch PNG from EBI and install into table (if data available from EBI)
results = cli.getSrvProxy().getResult(key, "visual-png", null);
DataCell png_cell = DataType.getMissingCell();
if (results != null && results.length > 0) {
png_cell = new PNGImageContent(results).toImageCell();
}
container.addRowToTable(new DefaultRow(outstanding_jobs.get(key), new StringCell(key), xml, new StringCell(tool), png_cell));
FileOutputStream fos = null;
try {
fos = new FileOutputStream(new File(m_img_dir.getStringValue(), key+".png"));
fos.write(results);
fos.close();
} catch (Exception e) {
logger.warn("Unable to save: "+key+", reason: "+e.getMessage());
if (fos != null)
fos.close();
}
} else {
container.addRowToTable(new DefaultRow(outstanding_jobs.get(key), new StringCell(key), xml, new StringCell(tool)));
}
}
done += outstanding_jobs.size();
outstanding_jobs.clear();
exec.setProgress(((double)done) / n_rows);
}
exec.checkCanceled();
}
container.close();
BufferedDataTable out = container.getTable();
return new BufferedDataTable[] {out};
}
private String submit_job_async(IPRScanClient cli, String email_address, String seq, String rkey, ObjectFactory of) throws Exception {
for (int retry=0; retry < 4; retry++) {
try {
InputParameters job_params = new InputParameters();
ArrayOfString aos = new ArrayOfString();
for (String appl : m_vec.getStringArrayValue()) {
aos.getString().add(appl.toLowerCase());
}
job_params.setAppl(of.createInputParametersAppl(aos));
job_params.setSequence(of.createInputParametersSequence(seq));
job_params.setGoterms(of.createInputParametersGoterms(new Boolean(true)));
job_params.setNocrc(of.createInputParametersNocrc(new Boolean(!m_crc.getBooleanValue())));
return cli.runApp(m_email.getStringValue(), rkey, job_params);
} catch (RemoteException re) {
throw re;
} catch (ServiceException se) {
throw se;
} catch (SOAPFaultException soape) {
throw soape;
} catch (Exception e) {
int delay = (retry+1)*500; // seconds
logger.warn("Problem when submitting job: "+e.getMessage()+ "... retrying in "+delay+" seconds");
Thread.sleep(delay*1000);
}
}
throw new FailedJobException("Cannot submit job after four attempts... giving up on "+rkey+"!");
}
private void wait_for_completion(IPRScanClient cli, Set<String> keySet)
throws ServiceException, InterruptedException, FailedJobException, IOException {
boolean wait = true; // mandatory wait for first job in batch
for (String s : keySet) {
for (int idx=0; idx < 1000; idx++) {
if (wait) {
int delay = (MANDATORY_DELAY+(idx*MANDATORY_DELAY)); // seconds
logger.info("Pausing to meet EBI requirements: "+delay+" seconds.");
Thread.sleep(delay*1000);
}
String status = cli.checkStatus(s).toLowerCase();
// completed or finished?
if (status.startsWith("complete") || status.startsWith("finish")) {
wait = false; // check status without waiting for rest of batch
break; // wait for next job
} else if (status.startsWith("fail") || status.startsWith("error")) { // something go wrong?
throw new FailedJobException("Job "+s+" has failed at EBI. Aborting run.");
}
else {
// incomplete so just go around again...
wait = true;
}
}
}
// once we get here the entire batch is done
}
/**
* {@inheritDoc}
*/
@Override
protected void reset() {
}
/**
* {@inheritDoc}
*/
@Override
protected DataTableSpec[] configure(final DataTableSpec[] inSpecs)
throws InvalidSettingsException {
return new DataTableSpec[]{null};
}
/**
* {@inheritDoc}
*/
@Override
protected void saveSettingsTo(final NodeSettingsWO settings) {
m_email.saveSettingsTo(settings);
m_seq.saveSettingsTo(settings);
m_crc.saveSettingsTo(settings);
m_vec.saveSettingsTo(settings);
m_save_imgs.saveSettingsTo(settings);
m_img_dir.saveSettingsTo(settings);
}
/**
* {@inheritDoc}
*/
@Override
protected void loadValidatedSettingsFrom(final NodeSettingsRO settings)
throws InvalidSettingsException {
m_email.loadSettingsFrom(settings);
m_seq.loadSettingsFrom(settings);
m_crc.loadSettingsFrom(settings);
m_vec.loadSettingsFrom(settings);
m_save_imgs.loadSettingsFrom(settings);
m_img_dir.loadSettingsFrom(settings);
}
/**
* {@inheritDoc}
*/
@Override
protected void validateSettings(final NodeSettingsRO settings)
throws InvalidSettingsException {
m_email.validateSettings(settings);
m_seq.validateSettings(settings);
m_crc.validateSettings(settings);
m_vec.validateSettings(settings);
m_save_imgs.loadSettingsFrom(settings);
m_img_dir.loadSettingsFrom(settings);
}
/**
* {@inheritDoc}
*/
@Override
protected void loadInternals(final File internDir,
final ExecutionMonitor exec) throws IOException,
CanceledExecutionException {
}
/**
* {@inheritDoc}
*/
@Override
protected void saveInternals(final File internDir,
final ExecutionMonitor exec) throws IOException,
CanceledExecutionException {
}
}