package au.com.acpfg.misc.fastawriter;
import java.io.*;
import org.knime.core.data.*;
import org.knime.core.data.RowKey;
import org.knime.core.data.def.DefaultRow;
import org.knime.core.data.def.*;
import org.knime.core.node.BufferedDataContainer;
import org.knime.core.node.BufferedDataTable;
import org.knime.core.node.CanceledExecutionException;
import org.knime.core.node.defaultnodesettings.*;
import org.knime.core.node.ExecutionContext;
import org.knime.core.node.ExecutionMonitor;
import org.knime.core.node.InvalidSettingsException;
import org.knime.core.node.NodeLogger;
import org.knime.core.node.NodeModel;
import org.knime.core.node.NodeSettingsRO;
import org.knime.core.node.NodeSettingsWO;
/**
* This is the model implementation of FastaWriter.
* Creates a .fasta file with the specified accession, description, and sequence columns
*
* @author Andrew Cassin
*/
public class FastaWriterNodeModel extends NodeModel {
// the logger instance
private static final NodeLogger logger = NodeLogger
.getLogger(FastaWriterNodeModel.class);
/** the settings key which is used to retrieve and
store the settings (from the dialog or from a settings file)
(package visibility to be usable from the dialog). */
static final String CFGKEY_ACCSN = "Accession";
static final String CFGKEY_DESCR = "Description";
static final String CFGKEY_SEQ = "Sequence";
static final String CFGKEY_FILE = "Output Filename";
static final String CFGKEY_OVERWRITE = "overwrite";
static final String CFGKEY_MAXLINELEN= "max-sequence-line-length";
// example value: the models count variable filled from the dialog
// and used in the models execution method. The default components of the
// dialog work with "SettingsModels".
private final SettingsModelColumnName m_accsn_col = (SettingsModelColumnName) make(CFGKEY_ACCSN);
private final SettingsModelColumnName m_descr_col = (SettingsModelColumnName) make(CFGKEY_DESCR);
private final SettingsModelColumnName m_seq_col = (SettingsModelColumnName) make(CFGKEY_SEQ);
private final SettingsModelString m_filename = (SettingsModelString) make(CFGKEY_FILE);
private final SettingsModelBoolean m_overwrite = (SettingsModelBoolean) make(CFGKEY_OVERWRITE);
private final SettingsModelIntegerBounded m_max_line_len = (SettingsModelIntegerBounded) make(CFGKEY_MAXLINELEN);
/**
* Constructor for the node model.
*/
protected FastaWriterNodeModel() {
super(1, 1);
}
public static SettingsModel make(String field_name) {
if (field_name.equals(CFGKEY_ACCSN))
return new SettingsModelColumnName(CFGKEY_ACCSN, CFGKEY_ACCSN);
else if (field_name.equals(CFGKEY_DESCR))
return new SettingsModelColumnName(CFGKEY_DESCR, CFGKEY_DESCR);
else if (field_name.equals(CFGKEY_SEQ))
return new SettingsModelColumnName(CFGKEY_SEQ, CFGKEY_SEQ);
else if (field_name.equals(CFGKEY_FILE))
return new SettingsModelString(CFGKEY_FILE, "");
else if (field_name.equals(CFGKEY_OVERWRITE))
return new SettingsModelBoolean(CFGKEY_OVERWRITE, false);
else if (field_name.equals(CFGKEY_MAXLINELEN))
return new SettingsModelIntegerBounded(CFGKEY_MAXLINELEN, 80, 10, 100000);
else
return null;
}
/**
* {@inheritDoc}
*/
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData,
final ExecutionContext exec) throws Exception {
logger.info("Writing fasta file... "+m_filename.getStringValue());
String fname = m_filename.getStringValue();
if (fname == null || fname.length() < 1) {
throw new Exception("No filename specified... nothing to save!");
}
File f = new File(fname);
if (!m_overwrite.getBooleanValue() && f.exists()) {
throw new Exception("Will not overwrite existing: "+fname+" - configure the node to override if this is what you want.");
}
// replicate input data on output port
DataTableSpec inSpec = inData[0].getDataTableSpec();
DataTableSpec outSpec = new DataTableSpec("Input Data", inSpec, new DataTableSpec());
// the execution context will provide us with storage capacity, in this
// case a data container to which we will add rows sequentially
// Note, this container can also handle arbitrary big data tables, it
// will buffer to disc if necessary.
BufferedDataContainer container = exec.createDataContainer(outSpec);
PrintWriter out_stream = new PrintWriter(new BufferedWriter(new FileWriter(fname)));
boolean accsn_use_rid = m_accsn_col.useRowID();
int accsn_idx = inSpec.findColumnIndex(m_accsn_col.getStringValue());
boolean descr_use_rid = m_descr_col.useRowID();
int descr_idx = inSpec.findColumnIndex(m_descr_col.getStringValue());
boolean seq_use_rid = m_seq_col.useRowID();
int seq_idx = inSpec.findColumnIndex(m_seq_col.getStringValue());
int maxll = m_max_line_len.getIntValue();
RowIterator it = inData[0].iterator();
for (int i = 0; i < inData[0].getRowCount(); i++) {
DataRow r = it.next();
String accsn, descr, seq;
if (accsn_use_rid) {
accsn = r.getKey().getString();
} else {
DataCell cell = r.getCell(accsn_idx);
if (cell.isMissing())
continue;
accsn = cell.toString();
}
if (descr_use_rid) {
descr = r.getKey().toString();
} else {
DataCell cell = r.getCell(descr_idx);
if (cell.isMissing())
continue;
descr = cell.toString();
}
if (seq_use_rid) {
seq = r.getKey().toString();
} else {
DataCell cell = r.getCell(seq_idx);
if (cell.isMissing())
continue;
seq = cell.toString();
}
int len = seq.length();
// for correct FASTA files, ignore row if no valid sequence...
if (len > 0) {
out_stream.println(">"+accsn+" "+descr);
if (len > maxll) {
int offset = 0;
int written = 0;
while (offset < len) {
int end = offset + maxll;
if (end > len) {
end = len;
}
String substring = seq.substring(offset, end);
written += substring.length();
out_stream.println(substring);
offset += maxll;
}
if (written != len) {
throw new Exception("Could not save sequence (written != sequence length): "+accsn);
}
} else {
out_stream.println(seq);
}
}
container.addRowToTable(r);
// check if the execution monitor was canceled
if (i % 100 == 0) {
try {
exec.checkCanceled();
} catch (CanceledExecutionException ce) {
out_stream.close(); // avoid file leak
throw ce;
}
exec.setProgress(i / (double)inData[0].getRowCount(),
"Writing row " + i);
}
}
out_stream.close();
// once we are done, we close the container and return its table
container.close();
BufferedDataTable out = container.getTable();
return new BufferedDataTable[]{out};
}
/**
* {@inheritDoc}
*/
@Override
protected void reset() {
// TODO Code executed on reset.
// Models build during execute are cleared here.
// Also data handled in load/saveInternals will be erased here.
}
/**
* {@inheritDoc}
*/
@Override
protected DataTableSpec[] configure(final DataTableSpec[] inSpecs)
throws InvalidSettingsException {
// TODO: check if user settings are available, fit to the incoming
// table structure, and the incoming types are feasible for the node
// to execute. If the node can execute in its current state return
// the spec of its output data table(s) (if you can, otherwise an array
// with null elements), or throw an exception with a useful user message
return new DataTableSpec[]{null};
}
/**
* {@inheritDoc}
*/
@Override
protected void saveSettingsTo(final NodeSettingsWO settings) {
m_accsn_col.saveSettingsTo(settings);
m_descr_col.saveSettingsTo(settings);
m_seq_col.saveSettingsTo(settings);
m_filename.saveSettingsTo(settings);
m_overwrite.saveSettingsTo(settings);
}
/**
* {@inheritDoc}
*/
@Override
protected void loadValidatedSettingsFrom(final NodeSettingsRO settings)
throws InvalidSettingsException {
m_accsn_col.loadSettingsFrom(settings);
m_descr_col.loadSettingsFrom(settings);
m_seq_col.loadSettingsFrom(settings);
m_filename.loadSettingsFrom(settings);
m_overwrite.loadSettingsFrom(settings);
}
/**
* {@inheritDoc}
*/
@Override
protected void validateSettings(final NodeSettingsRO settings)
throws InvalidSettingsException {
m_accsn_col.validateSettings(settings);
m_descr_col.validateSettings(settings);
m_seq_col.validateSettings(settings);
m_filename.validateSettings(settings);
m_overwrite.validateSettings(settings);
}
/**
* {@inheritDoc}
*/
@Override
protected void loadInternals(final File internDir,
final ExecutionMonitor exec) throws IOException,
CanceledExecutionException {
}
/**
* {@inheritDoc}
*/
@Override
protected void saveInternals(final File internDir,
final ExecutionMonitor exec) throws IOException,
CanceledExecutionException {
}
}