package au.com.acpfg.misc.biojava; import org.knime.core.data.*; import org.knime.core.data.def.DefaultRow; import org.knime.core.data.def.JoinedRow; import org.knime.core.data.def.StringCell; import org.knime.core.node.*; import org.biojava.bio.seq.*; import org.biojava.bio.proteomics.*; import org.biojava.bio.symbol.*; public class FrameTranslationProcessor implements BioJavaProcessorInterface { private boolean m_incl_na_seqs; // include NA frames for use by later processing steps public FrameTranslationProcessor(String task) { m_incl_na_seqs = false; if (task.toLowerCase().endsWith("(incl. na frames)")) { m_incl_na_seqs = true; } } public void execute(BioJavaProcessorNodeModel m, ExecutionContext exec, NodeLogger logger, BufferedDataTable[] inData, BufferedDataContainer c) throws Exception { if (m.areSequencesProtein()) { throw new Exception("Cannot perform this task with protein sequences!"); } int n_rows = inData[0].getRowCount(); int done = 0; RowIterator it = inData[0].iterator(); boolean is_dna = m.areSequencesDNA(); int ncols = 6; if (m_incl_na_seqs) { ncols += 6; } while (it.hasNext()) { DataRow r = it.next(); String str = m.getSequence(r); if (str == null || str.length() < 1) continue; SymbolList syms = m.getSequenceAsSymbol(str); DataCell[] cells = new DataCell[ncols]; for (int i=0; i<3; i++) { // take the reading frame SymbolList rf = syms.subList(i+1, syms.length()-(syms.length() - i) % 3); // if it is DNA transcribe it to RNA first if (is_dna) { rf = DNATools.toRNA(rf); } SymbolList prot = RNATools.translate(rf); cells[i+3] = new StringCell(prot.seqString()); if (m_incl_na_seqs) { cells[i+9] = new StringCell(rf.seqString().toUpperCase()); } // reverse frame translation rf = RNATools.reverseComplement(rf); prot = RNATools.translate(rf); cells[i] = new StringCell(prot.seqString()); if (m_incl_na_seqs) { cells[i+6] = new StringCell(rf.seqString().toUpperCase()); } } // add all the cells into the row DataRow row = new DefaultRow(r.getKey(), cells); c.addRowToTable(new JoinedRow(r, row)); done++; if (done % 100 == 0) { exec.checkCanceled(); exec.setProgress(((double)done) / n_rows, "Processed "+done+" sequences."); } } } public DataTableSpec get_table_spec() { int add_na = 0; if (m_incl_na_seqs) add_na = 6; DataColumnSpec[] allColSpecs = new DataColumnSpec[6+add_na]; allColSpecs[0] = new DataColumnSpecCreator("Translation Frame -3", StringCell.TYPE).createSpec(); allColSpecs[1] = new DataColumnSpecCreator("Translation Frame -2", StringCell.TYPE).createSpec(); allColSpecs[2] = new DataColumnSpecCreator("Translation Frame -1", StringCell.TYPE).createSpec(); allColSpecs[3] = new DataColumnSpecCreator("Translation Frame +1", StringCell.TYPE).createSpec(); allColSpecs[4] = new DataColumnSpecCreator("Translation Frame +2", StringCell.TYPE).createSpec(); allColSpecs[5] = new DataColumnSpecCreator("Translation Frame +3", StringCell.TYPE).createSpec(); if (m_incl_na_seqs) { allColSpecs[6] = new DataColumnSpecCreator("NA Frame -3", StringCell.TYPE).createSpec(); allColSpecs[7] = new DataColumnSpecCreator("NA Frame -2", StringCell.TYPE).createSpec(); allColSpecs[8] = new DataColumnSpecCreator("NA Frame -1", StringCell.TYPE).createSpec(); allColSpecs[9] = new DataColumnSpecCreator("NA Frame +1", StringCell.TYPE).createSpec(); allColSpecs[10] = new DataColumnSpecCreator("NA Frame +2", StringCell.TYPE).createSpec(); allColSpecs[11] = new DataColumnSpecCreator("NA Frame +3", StringCell.TYPE).createSpec(); } DataTableSpec outputSpec = new DataTableSpec(allColSpecs); return outputSpec; } @Override public boolean isMerged() { return true; } }