/************************************************************************* * * * This file is part of the 20n/act project. * * 20n/act enables DNA prediction for synthetic biology/bioengineering. * * Copyright (C) 2017 20n Labs, Inc. * * * * Please direct all queries to act@20n.com. * * * * This program is free software: you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation, either version 3 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program. If not, see <http://www.gnu.org/licenses/>. * * * *************************************************************************/ package com.act.lcms; import java.util.List; import java.util.Arrays; import java.util.ArrayList; import java.util.Iterator; import java.util.Collections; import java.io.PrintStream; import java.io.OutputStream; import java.io.FileOutputStream; import org.apache.commons.lang3.tuple.Pair; public class CompareTwoNetCDFAroundMass { static final double mzTolerance = 0.01; static final int maxNumMzTolerated = 3; private double extractMZ(double mzWanted, List<Pair<Double, Double>> intensities, double time) { double intensityFound = 0; int numWithinPrecision = 0; double mzLowRange = mzWanted - mzTolerance; double mzHighRange = mzWanted + mzTolerance; // we expect there to be pretty much only one intensity value in the precision // range we are looking at. But if a lot of masses show up then complain for (Pair<Double, Double> mz_int : intensities) { double mz = mz_int.getLeft(); double intensity = mz_int.getRight(); if (mz > mzLowRange && mz < mzHighRange) { intensityFound += intensity; numWithinPrecision++; if (intensity > 100000) System.out.format("time: %f\tmz: %f\t intensity: %f\n", time, mz, intensity); } } if (numWithinPrecision > maxNumMzTolerated) { System.out.format("Only expected %d, but found %d in the mz range [%f, %f]\n", maxNumMzTolerated, numWithinPrecision, mzLowRange, mzHighRange); } return intensityFound; } private List<Pair<Double, Double>> extractMZSlice(double mz, Iterator<LCMSSpectrum> spectraIt, int numOut, String tagfname) { List<Pair<Double, Double>> mzSlice = new ArrayList<>(); int pulled = 0; // iterate through first few, or all if -1 given while (spectraIt.hasNext() && (numOut == -1 || pulled < numOut)) { LCMSSpectrum timepoint = spectraIt.next(); // get all (mz, intensity) at this timepoint List<Pair<Double, Double>> intensities = timepoint.getIntensities(); // this time point is valid to look at if its max intensity is around // the mass we care about. So lets first get the max peak location double intensityForMz = extractMZ(mz, intensities, timepoint.getTimeVal()); // the above is Pair(mz_extracted, intensity), where mz_extracted = mz // we now add the timepoint val and the intensity to the output mzSlice.add(Pair.of(timepoint.getTimeVal(), intensityForMz)); pulled++; } System.out.format("Done: %s\n\n", tagfname); return mzSlice; } /** * Extracts a window around a particular m/z value within the spectra * @param file The netCDF file with the full LCMS (lockmass corrected) spectra * @param mz The m/z value +- 1 around which to extract data for * @param numTimepointsToExamine Since we stream the netCDF in, we can choose * to look at the first few timepoints or -1 for all */ public List<List<Pair<Double, Double>>> getSpectraForMass(double mz, String[] fnames, int numTimepointsToExamine) throws Exception { List<List<Pair<Double, Double>>> extracted = new ArrayList<>(); LCMSParser parser = new LCMSNetCDFParser(); for (String fname : fnames) { Iterator<LCMSSpectrum> iter = parser.getIterator(fname); List<Pair<Double, Double>> mzSlice = extractMZSlice(mz, iter, numTimepointsToExamine, fname); extracted.add(mzSlice); } return extracted; } private static boolean areNCFiles(String[] fnames) { for (String n : fnames) { System.out.println(".nc file = " + n); if (!n.endsWith(".nc")) return false; } return true; } public static void main(String[] args) throws Exception { if (args.length < 5 || !areNCFiles(Arrays.copyOfRange(args, 3, args.length))) { throw new RuntimeException("Needs: \n" + "(1) mass value, e.g., 132.0772 for debugging, \n" + "(2) how many timepoints to process (-1 for all), \n" + "(3) prefix for .data and rendered .pdf \n" + "(4,5..) 2 or more NetCDF .nc files" ); } String fmt = "pdf"; Double mz = Double.parseDouble(args[0]); Integer numSpectraToProcess = Integer.parseInt(args[1]); String outPrefix = args[2]; String outImg = outPrefix.equals("-") ? null : outPrefix + "." + fmt; String outData = outPrefix.equals("-") ? null : outPrefix + ".data"; CompareTwoNetCDFAroundMass c = new CompareTwoNetCDFAroundMass(); String[] netCDF_fnames = Arrays.copyOfRange(args, 3, args.length); List<List<Pair<Double, Double>>> spectra = c.getSpectraForMass(mz, netCDF_fnames, numSpectraToProcess); // Write data output to outfile PrintStream out = outData == null ? System.out : new PrintStream(new FileOutputStream(outData)); // print out the spectra to outData for (List<Pair<Double, Double>> spectraInFile : spectra) { for (Pair<Double, Double> xy : spectraInFile) { out.format("%.4f\t%.4f\n", xy.getLeft(), xy.getRight()); out.flush(); } // delimit this dataset from the rest out.print("\n\n"); } // find the ymax across all spectra, so that we can have a uniform y scale Double yrange = 0.0; for (List<Pair<Double, Double>> spectraInFile : spectra) { Double ymax = 0.0; for (Pair<Double, Double> xy : spectraInFile) { Double intensity = xy.getRight(); if (ymax < intensity) ymax = intensity; } if (yrange < ymax) yrange = ymax; } if (outData != null) { // if outData is != null, then we have written to .data file // now render the .data to the corresponding .pdf file // first close the .data out.close(); // render outData to outFILE using gnuplo Gnuplotter plotter = new Gnuplotter(); plotter.plot2D(outData, outImg, netCDF_fnames, "time in seconds", yrange, "intensity", fmt); } } }