package au.com.acpfg.misc.spectra.peakextractor; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.BitSet; import java.util.HashMap; import java.util.Map; import java.util.Set; import org.knime.core.data.DataCell; import org.knime.core.data.DataColumnSpec; import org.knime.core.data.DataColumnSpecCreator; import org.knime.core.data.DataRow; import org.knime.core.data.DataTableSpec; import org.knime.core.data.DataType; import org.knime.core.data.RowIterator; import org.knime.core.data.RowKey; import org.knime.core.data.collection.CollectionCellFactory; import org.knime.core.data.collection.ListCell; import org.knime.core.data.container.CellFactory; import org.knime.core.data.container.CloseableRowIterator; import org.knime.core.data.def.DefaultRow; import org.knime.core.data.def.DoubleCell; import org.knime.core.data.def.IntCell; import org.knime.core.data.def.StringCell; import org.knime.core.node.BufferedDataContainer; import org.knime.core.node.BufferedDataTable; import org.knime.core.node.CanceledExecutionException; import org.knime.core.node.defaultnodesettings.SettingsModelBoolean; import org.knime.core.node.defaultnodesettings.SettingsModelIntegerBounded; import org.knime.core.node.defaultnodesettings.SettingsModelStringArray; import org.knime.core.node.ExecutionContext; import org.knime.core.node.ExecutionMonitor; import org.knime.core.node.InvalidSettingsException; import org.knime.core.node.NodeLogger; import org.knime.core.node.NodeModel; import org.knime.core.node.NodeSettingsRO; import org.knime.core.node.NodeSettingsWO; import au.com.acpfg.misc.spectra.SpectralDataInterface; /** * This is the model implementation of SpectralPeakExtractor. * Extracts data defining a peak from any cell supporting SpectralDataInterface (defined in the SpectraReader node) * * @author Andrew Cassin */ public class SpectralPeakExtractorNodeModel extends NodeModel { // the logger instance private static final NodeLogger logger = NodeLogger .getLogger(SpectralPeakExtractorNodeModel.class); /* what tolerance to the left OR right of the peak should be considered part of the peak? * This value may be customized by the user in the configure dialog for the node */ public final double DEFAULT_PEAK_HALF_WIDTH = 0.05; // UNITLESS: IE. Da or Th depending on user public final double BIN_SIZE = 0.1; /** the settings key which is used to retrieve and store the settings (from the dialog or from a settings file) (package visibility to be usable from the dialog). */ static final String SPECTRA_COLUMNS = "spectra-cols"; private final SettingsModelStringArray m_spectra_cols = new SettingsModelStringArray(SPECTRA_COLUMNS, new String[] {}); private final Map<String, PeakWindow[]> m_column2Peaks = new HashMap<String, PeakWindow[]>(); /* internal state for hasPeaksOfInterest() */ private BitSet m_peakset; /* used for row keys -- internal use only */ private int m_row; /** * Constructor for the node model. */ protected SpectralPeakExtractorNodeModel() { // one incoming port and one outgoing port super(1, 1); m_peakset = null; } /** * {@inheritDoc} */ @Override protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception { String[] cols = m_spectra_cols.getStringArrayValue(); logger.info("Extracting peak data from "+cols.length+" spectra columns."); int[] col_vec_idx = new int[cols.length]; BitSet[] extracted_peaks = new BitSet[cols.length]; for (int i=0; i<cols.length; i++) { col_vec_idx[i] = inData[0].getDataTableSpec().findColumnIndex(cols[i]); if (col_vec_idx[i] < 0) { throw new InvalidSettingsException("Cannot find column: "+cols[i]); } PeakWindow[] pw = m_column2Peaks.get(cols[i]); assert(pw != null && pw.length > 0); ArrayList<Double> mz = new ArrayList<Double>(); double max_mz = Double.NEGATIVE_INFINITY; for (int j=0; j<pw.length; j++) { double lBound = pw[j].getMZ() - pw[j].getLeft(); double uBound = pw[j].getMZ() + pw[j].getRight(); if (uBound > max_mz) max_mz = uBound; for (double k=lBound; k<uBound; k += BIN_SIZE) { mz.add(k); } mz.add(uBound); // make sure upper bound is represented in window of interest } double[] d_mz = new double[mz.size()]; for (int j=0; j<d_mz.length; j++) { d_mz[j] = mz.get(j).doubleValue(); } extracted_peaks[i] = calcPeakSet(d_mz, max_mz); } // the data table spec of the single output table, // the table will have three columns: DataColumnSpec[] allColSpecs = new DataColumnSpec[10]; allColSpecs[0] = new DataColumnSpecCreator("Spectra ID", StringCell.TYPE).createSpec(); allColSpecs[1] = new DataColumnSpecCreator("User-Specified Peak Name", StringCell.TYPE).createSpec(); allColSpecs[2] = new DataColumnSpecCreator("User-Specified Peak M/Z", DoubleCell.TYPE).createSpec(); allColSpecs[3] = new DataColumnSpecCreator("Input RowID", StringCell.TYPE).createSpec(); allColSpecs[4] = new DataColumnSpecCreator("Number of Intensities", IntCell.TYPE).createSpec(); allColSpecs[5] = new DataColumnSpecCreator("Sum of Intensities", DoubleCell.TYPE).createSpec(); allColSpecs[6] = new DataColumnSpecCreator("Intensities within tolerance", DataType.getType(ListCell.class, DoubleCell.TYPE)).createSpec(); allColSpecs[7] = new DataColumnSpecCreator("M/Z within tolerance", DataType.getType(ListCell.class, DoubleCell.TYPE)).createSpec(); allColSpecs[8] = new DataColumnSpecCreator("Maximum Peak Intensity in Spectra", DoubleCell.TYPE).createSpec(); allColSpecs[9] = new DataColumnSpecCreator("Minimum Peak Intensity in Spectra", DoubleCell.TYPE).createSpec(); DataTableSpec outputSpec = new DataTableSpec(allColSpecs); // the execution context will provide us with storage capacity, in this // case a data container to which we will add rows sequentially // Note, this container can also handle arbitrary big data tables, it // will buffer to disc if necessary. BufferedDataContainer container = exec.createDataContainer(outputSpec); RowIterator it = inData[0].iterator(); int todo = inData[0].getRowCount(); int done = 0; // first compute the target bits for each column specified in node configure // process each row logger.info("Processing "+todo+" rows."); int interesting = 0; m_row = 1; while (it.hasNext()) { DataRow r = it.next(); int k = 0; for (int col_idx : col_vec_idx) { SpectralDataInterface sdi = (SpectralDataInterface) r.getCell(col_idx); if (sdi != null && hasPeaksOfInterest(sdi, extracted_peaks[k])) { // still not sure if the exact window has been met, so we check for this before saving rows... if (savePeaksOfInterest(sdi, container, m_column2Peaks.get(cols[k]), r.getKey().toString())) interesting++; } k++; } if (done % 100 == 0) { exec.checkCanceled(); exec.setProgress(((double)done/todo), "Processing row "+done); } done++; } logger.info("Found "+interesting+" rows (total "+todo+") with some peaks matching your peak windows."); // once we are done, we close the container and return its table container.close(); BufferedDataTable out = container.getTable(); logger.info("done!"); return new BufferedDataTable[]{out}; } protected BitSet calcPeakSet(double[] mz, double max) { int n_bits = ((int)(max / BIN_SIZE))+1; BitSet ret = new BitSet(n_bits); // NB: all bits cleared by default for (int i=0; i<mz.length; i++) { int sel = ((int)(mz[i] / BIN_SIZE)) + 1; ret.set(sel); } return ret; } protected boolean hasPeaksOfInterest(SpectralDataInterface sdi, BitSet target) { assert(sdi != null && target != null); assert(m_peakset != null); double[] mz = sdi.getMZ(); // parallel ordered arrays: mz & intensity ie. equal length double[] intensity = sdi.getIntensity(); assert(mz.length == intensity.length); BitSet mz_bitset = calcPeakSet(mz, sdi.getMaxMZ()); return (mz_bitset.intersects(target)); } protected DataCell safe_cell(String s) { return (s != null) ? new StringCell(s) : DataType.getMissingCell(); } protected boolean savePeaksOfInterest(SpectralDataInterface sdi, BufferedDataContainer c, PeakWindow[] peaks, String rowkey) { int added = 0; // number of rows added to table for the current spectra double[] mz = sdi.getMZ(); double[] intensity = sdi.getIntensity(); ArrayList<DoubleCell> matching_mz = new ArrayList<DoubleCell>(); ArrayList<DoubleCell> matching_i = new ArrayList<DoubleCell>(); for (PeakWindow cur : peaks) { double lBound = cur.getMZ() - cur.getLeft(); double uBound = cur.getMZ() + cur.getRight(); double sum = 0.0; int n_peaks = 0; for (int i=0; i<mz.length; i++) { if (mz[i] >= lBound && mz[i] < uBound) { assert(intensity[i] > 0.0); // does it make sense to have a peak with zero intensity? n_peaks++; sum += intensity[i]; matching_i.add(new DoubleCell(intensity[i])); matching_mz.add(new DoubleCell(mz[i])); } } if (n_peaks > 0) { DataCell[] cells = new DataCell[10]; cells[0] = safe_cell(sdi.getID()); cells[1] = safe_cell(cur.getName()); cells[2] = new DoubleCell(cur.getMZ()); cells[3] = new StringCell(rowkey); cells[4] = new IntCell(n_peaks); cells[5] = new DoubleCell(sum); cells[6] = CollectionCellFactory.createListCell(matching_i); cells[7] = CollectionCellFactory.createListCell(matching_mz); cells[8] = new DoubleCell(sdi.getIntensityMostIntense()); cells[9] = new DoubleCell(sdi.getIntensityLeastIntense()); DefaultRow row = new DefaultRow(new RowKey("Match"+m_row), cells); m_row++; c.addRowToTable(row); added++; matching_i.clear(); matching_mz.clear(); } } return (added > 0) ? true : false; } /** * {@inheritDoc} */ @Override protected void reset() { // NO-OP } /** * {@inheritDoc} */ @Override protected DataTableSpec[] configure(final DataTableSpec[] inSpecs) throws InvalidSettingsException { // TODO: check if user settings are available, fit to the incoming // table structure, and the incoming types are feasible for the node // to execute. If the node can execute in its current state return // the spec of its output data table(s) (if you can, otherwise an array // with null elements), or throw an exception with a useful user message return new DataTableSpec[]{null}; } /** * {@inheritDoc} */ @Override protected void saveSettingsTo(final NodeSettingsWO settings) { for (String peakKey : m_column2Peaks.keySet()) { NodeSettingsWO column = settings.addNodeSettings(peakKey); PeakWindow[] peaks = m_column2Peaks.get(peakKey); for (int b = 0; b < peaks.length; b++) { NodeSettingsWO bin = column.addNodeSettings(peaks[b].getName() + "_" + b); try { //logger.info("model save "+peaks[b].getName()+"_"+b); peaks[b].saveToSettings(bin); } catch (InvalidSettingsException e) { // TODO Auto-generated catch block logger.warn(e); e.printStackTrace(); } } } //logger.info("model save spectra columns "+m_column2Peaks.keySet().size()); settings.addStringArray(SPECTRA_COLUMNS, m_column2Peaks.keySet().toArray(new String[0])); } /** * {@inheritDoc} */ @Override protected void loadValidatedSettingsFrom(final NodeSettingsRO settings) throws InvalidSettingsException { String[] vec = settings.getStringArray(SPECTRA_COLUMNS); m_spectra_cols.setStringArrayValue(vec); m_column2Peaks.clear(); // logger.info("model load spectra columns "+vec.length); for (int i = 0; i < vec.length; i++) { NodeSettingsRO column = settings.getNodeSettings(vec[i].toString()); Set<String> peak_names = column.keySet(); PeakWindow[] peaks = new PeakWindow[peak_names.size()]; int s = 0; for (String peakKey : peak_names) { NodeSettingsRO bin = column.getNodeSettings(peakKey); peaks[s] = new PeakWindow(bin); s++; } m_column2Peaks.put(vec[i], peaks); } } /** * {@inheritDoc} */ @Override protected void validateSettings(final NodeSettingsRO settings) throws InvalidSettingsException { StringBuffer sb = new StringBuffer(); String[] columns2extract = settings.getStringArray(SPECTRA_COLUMNS); if (columns2extract.length < 1) { sb.append("No spectra column selected!\n"); } for (String col : columns2extract) { NodeSettingsRO node_set =null; try { node_set = settings.getNodeSettings(col); } catch (InvalidSettingsException e) { sb.append("Could not find peaks for column: "+col+"!\n"); } if (node_set != null) { Set<String> peakIds = node_set.keySet(); if (peakIds.size() < 1) { sb.append("You must add a peak for "+col+"!\n"); } } } if (sb.length() > 0) { throw new InvalidSettingsException(sb.toString()); } } /** * {@inheritDoc} */ @Override protected void loadInternals(final File internDir, final ExecutionMonitor exec) throws IOException, CanceledExecutionException { // NO-OP } /** * {@inheritDoc} */ @Override protected void saveInternals(final File internDir, final ExecutionMonitor exec) throws IOException, CanceledExecutionException { // NO-OP } }