/* * Copyright (c) 2003-2012 Fred Hutchinson Cancer Research Center * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.fhcrc.cpl.viewer.feature; import modwt.Filter; import modwt.Transform; import org.apache.log4j.Logger; import org.fhcrc.cpl.toolbox.datastructure.Pair; import org.fhcrc.cpl.toolbox.proteomics.Scan; import org.fhcrc.cpl.toolbox.proteomics.MSRun; import org.fhcrc.cpl.toolbox.proteomics.feature.Spectrum; import org.fhcrc.cpl.toolbox.proteomics.feature.Feature; import org.fhcrc.cpl.toolbox.CPUTimer; import org.fhcrc.cpl.toolbox.datastructure.FloatRange; import org.fhcrc.cpl.toolbox.datastructure.Tree2D; import org.fhcrc.cpl.viewer.feature.extraction.SpectrumResampler; import java.io.InputStream; import java.util.*; /** * User: mbellew * Date: Nov 2, 2004 */ public class FeatureStrategyPeakClustersOld extends FeatureStrategyUsingWindow // extends FeatureExtractor { static Logger _log = Logger.getLogger(FeatureStrategyPeakClustersOld.class); static int _WindowMargin = 64; static int _FeatureScanWindowStart; static int _FeatureScanWindowWidth; static float _FeatureMzWindowStart; static float _FeatureMzWindowHeight; static float _AverageWindowWidth; static CPUTimer timerAnalyze = new CPUTimer("FeatureStrategyPeakClustersOld.analyze"); static CPUTimer timerResample = new CPUTimer("FeatureStrategyPeakClustersOld.get and resample"); static CPUTimer timerBackground = new CPUTimer("FeatureStrategyPeakClustersOld.background"); static CPUTimer timerExtractPeaks = new CPUTimer("FeatureStrategyPeakClustersOld.peaks"); static CPUTimer timerExtractPeptides = new CPUTimer("FeatureStrategyPeakClustersOld.peptides"); static { initProps(); } private static boolean bPropsInitialized = false; //start and end scan numbers int _startNum = 0; int _endNum = 0; Scan[] _scans = null; public boolean debugReturnPeaks = false; /** * Load properties from feature.properties */ private static void initProps() { assert false == (bPropsInitialized = false); if (bPropsInitialized) return; try { InputStream is = ExtractEdgeFeatures.class.getResourceAsStream("features.properties"); Properties props = new Properties(); props.load(is); _FeatureScanWindowStart = Integer.parseInt(((String)props.get("feature.ScanWindowStart")).trim()); _FeatureScanWindowWidth = Integer.parseInt(((String)props.get("feature.ScanWindowWidth")).trim()); _FeatureMzWindowStart = Float.parseFloat(((String)props.get("feature.MzWindowStart")).trim()); _FeatureMzWindowHeight = Float.parseFloat(((String)props.get("feature.MzWindowHeight")).trim()); _AverageWindowWidth = Integer.parseInt(((String)props.get("feature.AverageWindowWidth")).trim()); } catch (java.io.IOException x) { x.printStackTrace(); } } public FeatureStrategyPeakClustersOld(MSRun run, int scanIndex, int count, int maxCharge, FloatRange range, double sn) { super(run, scanIndex, count, maxCharge, range, sn); int c2 = Math.max(256, count + 2 * _WindowMargin); scanIndex = Math.max(0, scanIndex - (c2 - count) / 2); int scanMax = Math.min(scanIndex + c2, run.getScanCount()); count = scanMax - scanIndex; _scans = getScans(run, scanIndex, count); _startNum = run.getScan(scanIndex).getNum(); _endNum = run.getScan(scanMax - 1).getNum(); } public int getType() { return TYPE_2D; } public static float[] _thresholdElution(float[] elution) { // UNDONE: cache intermediate arrays int K = 3; int N = elution.length; Pair<float[][], float[][]> tmp = new Pair<float[][], float[][]>(null, null); tmp.first = Transform.decompose(elution, N, K, new Filter("haar"), "modwt", "periodic", tmp.first); tmp.second = Transform.multiresolution(tmp.first, N, K, new Filter("haar"), "modwt", "periodic", tmp.second); float[][] mra = tmp.second; float[] threshold; // { // threshold = new float[N]; // for (int i = 0; i < N; i++) // { // threshold[i] = mra[2][i] + mra[3][i]; // + mra[4][i] + mra[5][i] + mra[6][i]; // } // } // even smoother threshold = mra[3]; return threshold; } public Feature[] _analyze() throws InterruptedException { Feature[] features = analyzeWindow(_scans, 256, _WindowMargin); List<Feature> filtered = new ArrayList<Feature>(); for (Feature feature : features) { if (feature.scan >= _startNum && feature.scan <= _endNum) filtered.add(feature); } return filtered.toArray(new Feature[filtered.size()]); } // // So what's up with all these smoothers??? // // Really this is a problem, the sample rate varies drastically // across experimental setups. Most notably due to the number // of MS1 vs. MS2 scans, but also scan rate of the machine, and // the elution gradient. I've settled on smoothTHRESHOLD for now. // // If the MS1 scan rate is too low, using this type of 2D // algorithm will not work well. // public static Smooth2D smoothTHRESHOLD = new Smooth2D() { protected float[] SmoothSpectra(float[] spectrum) { return spectrum; } public float[] SmoothElution(float[] elution) { return _thresholdElution(elution); } }; Smooth2D smoothALOT = new Smooth2D() { protected float[] SmoothSpectra(float[] spectrum) { //Spectrum.SmoothALittle(spectrum); return spectrum; } protected float[] SmoothElution(float[] elution) { return Spectrum.FFTsmooth(elution, 12, false); } }; Smooth2D smoothMEDIUM = new Smooth2D() { protected float[] SmoothSpectra(float[] spectrum) { //Spectrum.SmoothALittle(spectrum); return spectrum; } protected float[] SmoothElution(float[] elution) { return Spectrum.FFTsmooth(elution, 6, false); } }; Smooth2D smoothD4 = new Smooth2D() { protected float[] SmoothSpectra(float[] spectrum) { //Spectrum.SmoothALittle(spectrum); return spectrum; } protected float[] SmoothElution(float[] elution) { return Spectrum.WaveletD4(elution); } }; Smooth2D smoothALITTLE = new Smooth2D() { protected float[] SmoothSpectra(float[] spectrum) { //Spectrum.SmoothALittle(spectrum); return spectrum; } protected float[] SmoothElution(float[] elution) { Spectrum.SmoothALittle(elution); return elution; } }; Smooth2D smoothNONE = new Smooth2D() { protected float[] SmoothSpectra(float[] spectrum) { return spectrum; } protected float[] SmoothElution(float[] elution) { return elution; } }; /** * Resample the spectra, within the M/Z range _mzRange, * onto a regular grid, with frequency RESAMPLE_FREQ * @param scans * @param currentThread * @param useMedianSmooth * @return * @throws InterruptedException */ protected float[][] resampleSpectra(Scan[] scans, Thread currentThread, boolean useMedianSmooth) throws InterruptedException { float[][] resampledSpectra = new float[scans.length][]; for (int i = 0; i < scans.length; i++) { float[][] raw = scans[i].getSpectrum(); if (currentThread.isInterrupted()) throw new InterruptedException(); resampledSpectra[i] = Spectrum.Resample(raw, _mzRange, SpectrumResampler.getResampleFrequency()); } int height = resampledSpectra[0].length; { float[] row = null, s = null; for (int i = 0; i < height; i++) { row = Spectrum.getRow(resampledSpectra, i, row); if (useMedianSmooth) // this will remove lockspray { // use median s = Spectrum.MedianSmooth(row, row.length, s); Spectrum.SmoothALittle(s); Spectrum.setRow(resampledSpectra, i, s); } else { Spectrum.SmoothALittle(row); Spectrum.setRow(resampledSpectra, i, row); } } } return resampledSpectra; } /** * Calculate median intensity at each point on the grid * * @param scans * @param spectra * @param width * @param height * @return */ protected float[][] calculateMedian(Scan[] scans, float[][] spectra, int width, int height) { float[][] median = new float[spectra.length][]; for (int i = 0; i < scans.length; i++) median[i] = Spectrum.MedianWindow(spectra[i], height, 2 * SpectrumResampler.getResampleFrequency(), false); float[] row = null; for (int r = 0; r < height; r++) { row = Spectrum.getRow(spectra, r, row); float[] m = Spectrum.MedianWindow(row, width, SpectrumResampler.getResampleFrequency(), false); for (int s = 0; s < m.length; s++) median[s][r] = Math.max(median[s][r], m[s]); } return median; } /** * THIS IS THE MAIN FEATURE FINDING ROUTINE * * Structure: * Spectrum.Resample() * Smoothing * Separate background from signal (Spectrum.RemoveBackground) * Extract peaks -- wavelet decomposition, smooth, ExtractMaxima2D.analyze() * Sort by decreasing intensity * Throw out peaks with < 5 scans * Break up what look like double-humped elution profiles by breaking peaks * at valleys in the profile * For each peak: * -Compute the extents * -Walking down scans, make sure that the peak's intensity * is greater than the intensities of the two m/z's around it * -Make sure that intensity is above threshold * -If you start going back up in intensity, stop * -Same for end scan * -If too short (<5 scans), toss it out * -"Integrate" to determine total intensity: for each scan, add a * block of intensity corresponding to the intensity on that scan * multiplied by half the amount of time between the adjoining scans * -Create a feature to represent the individual peak * Exclude all features initially * Filter features: for each feature: * find all neighbors within 5 scans and 1.1 m/z units * if the m/z's are too close (within 1.5 / ExtractMaxima2D.getResampleFrequency(), for some reason), no dice * make sure they're lined up, scan-wise * If not enough scans combined in the two features, no dice * If intensities not high enough, no dice * If we got here, unexclude both * FeatureStrategyUsingWindow.ExtractPeptideFeatures(), to tie features together * Dump windows around feature, if asked for * Change scan numbers, which are currently indexes, to the actual scan numbers * If centroided, call AccurateMassCentroid() to fix mass * */ protected Collection<Feature> analyze2D(Scan[] scans) throws InterruptedException { boolean useMedianSmooth = false; Thread currentThread = Thread.currentThread(); // debug counters int d_rawPeaks = 0; int d_tooShort = 0; int d_filtered = 0; _logDebug("analyze2D " + scans[0].getNum() + "-" + scans[scans.length - 1].getNum()); assert timerAnalyze.start(); // // Convert data into 2D matrix // we will do all processing on this data until the end and // then process back to "scan" space // assert timerResample.start(); float[][] spectra = resampleSpectra(scans, currentThread, useMedianSmooth); assert timerResample.stop(); int width = spectra.length; int height = spectra[0].length; _logDebug("analyze2D datasize = " + (width * height * 4)); // // separate background and signal components // we're pretty conservative about what we call background, // we can use background and/or median later to filter peaks // further. // timerBackground.start(); float[][] background = Spectrum.RemoveBackground(spectra); float[][] median = calculateMedian(scans, spectra, width, height); timerBackground.stop(); if (currentThread.isInterrupted()) throw new InterruptedException(); // // the D3 matrix is used to locate peaks, // the D3 processing effectively sharpens the raw data, this does a great job of cleaning // up messy data, e.g. wide/overlapping peaks, and stray peaks in centroided data // // why D3? This is based on the *default* resampling frequency of 36/Da and our max expected charge // which I'm presuming to be 6. Level three represents a feature width of 2^3=8, which // is near to 36/6. A big change in resampling or desired max charge, might require a // change. // // todo: now that resampling frequency is parameterized, see if this needs to be optimized // timerExtractPeaks.start(); float[][] waveletsD3 = new float[width][]; Pair<float[][], float[][]> tmp = new Pair<float[][], float[][]>(null, null); for (int s = 0; s < spectra.length; s++) waveletsD3[s] = Spectrum.WaveletD3(spectra[s], tmp); // // extract all maxima from a time smoothed version of the wavelet transformed data // // we have a lot of versions of the data now, but it's helpful to keep it all around // float[][] smooth = new float[waveletsD3.length][]; for (int s = 0; s < spectra.length; s++) smooth[s] = waveletsD3[s].clone(); smoothTHRESHOLD.smooth(smooth); Spectrum.Peak[] maxD3 = ExtractMaxima2D.analyze(smooth, _mzRange.min, SpectrumResampler.getResampleInterval(), null, 0.0F); d_rawPeaks = maxD3.length; // ???? if no raw peaks to process, bail with empty collection to avoid out // of bounds errors if ( 0 == d_rawPeaks ) return new ArrayList<Feature>(); Arrays.sort(maxD3, Spectrum.comparePeakIntensityDesc); if (true) { _logDebug("raw peaks: " + maxD3.length); if (maxD3.length > 0) for (int i = 0; i < 10; i++) _logDebug(" " + (i) + ": " + maxD3[maxD3.length * i / 100].getIntensity()); if (maxD3.length > 0) for (int i = 1; i < 10; i++) _logDebug(" " + (i * 10) + ": " + maxD3[maxD3.length * i / 10].getIntensity()); _logDebug(" 100: " + maxD3[maxD3.length - 1].getIntensity()); } // // We probably have a LOT of peaks at this point. 95% are extremely small. // The size<shortPeak check in this loop will throw out most, and I // don't need to be too aggressive here // // NOTE: this loop also tries to handle "double humped elution profiles" // by breaking peaks at "valleys" in the elution profile. This could perhaps // be improved upon with a second pass that specifically handles this case and // makes sure that in a region the splitting is consistent. // //TODO: parameterize. Careful, because this is messed with below int shortPeak = 5; ArrayList<Feature> waveletPeaks = new ArrayList<Feature>(); for (int i = 0, max = maxD3.length; i < max; i++) { Spectrum.Peak peak = maxD3[i]; int imz = Math.round((peak.mz - _mzRange.min) * SpectrumResampler.getResampleFrequency()); if (imz < 2 || imz >= height - 2) continue; int scan = peak.scan; float peakIn = spectra[peak.scan][imz]; float peakBg = background[peak.scan][imz]; float peakMd = median[peak.scan][imz]; // compute extent of this feature // this is complicated by the fact that we may have // double-humped peaks we need to separate int scanStart = scan; int scanEnd = scan; float thresholdIn = 1 + 0.5F * background[scan][imz] + 2 * median[scan][imz]; thresholdIn = Math.max(peakIn / 20, thresholdIn); float minIn = Float.MAX_VALUE; int scanMinIn = scan; for (; scanStart > 0; scanStart--) { float spectraIn = spectra[scanStart][imz]; // are we still on the ridge and > 5% of max if (spectraIn < thresholdIn || waveletsD3[scanStart][imz] < waveletsD3[scanStart][imz - 2] || waveletsD3[scanStart][imz] < waveletsD3[scanStart][imz + 2]) { scanStart++; break; } // have we crossed a 'valley' float smoothIn = smooth[scanStart][imz]; if (smoothIn < minIn) { minIn = spectraIn; scanMinIn = scanStart; } else if (smoothIn > minIn * 1.1 + thresholdIn) // made up cutoff { scanStart = scanMinIn; break; } } minIn = Float.MAX_VALUE; scanMinIn = scan; for (; scanEnd < width - 1; scanEnd++) { float spectraIn = spectra[scanEnd][imz]; // are we still on the ridge and > 5% of max if (spectraIn < thresholdIn || waveletsD3[scanEnd][imz] < waveletsD3[scanEnd][imz - 2] || waveletsD3[scanEnd][imz] < waveletsD3[scanEnd][imz + 2]) { scanEnd--; break; } // have we crossed a 'valley' float smoothIn = smooth[scanEnd][imz]; if (smoothIn < minIn) { minIn = spectraIn; scanMinIn = scanEnd; } else if (smoothIn > minIn * 1.1 + thresholdIn) // made up cutoff { scanEnd = scanMinIn; break; } } if (scanEnd - scanStart + 1 < shortPeak) { d_tooShort++; continue; } // total intensity calculation // "integrate" intensity over time float totalIn = 0; for (int s = scanStart; s <= scanEnd; s++) { double in = spectra[s][imz]; double time = 1.0; if (s > 0 && s + 1 < scans.length) time = (scans[s + 1].getDoubleRetentionTime() - scans[s - 1].getDoubleRetentionTime()) / 2; totalIn += time * in; } Feature f = new Feature(peak.scan, scanStart, scanEnd, peak.mz, peakIn, 0, -1, 0); f.background = peakBg; f.median = peakMd; f.totalIntensity = totalIn; waveletPeaks.add(f); } if (currentThread.isInterrupted()) throw new InterruptedException(); Collections.sort(waveletPeaks, Spectrum.compareFeatureLengthDesc); // some stats if (_log.isDebugEnabled()) { _logDebug("FEATURE LENGTHS " + waveletPeaks.size() + " peaks"); if (waveletPeaks.size() > 0) { for (int i = 0; i < 10; i++) _logDebug(" " + (i) + ": " + maxD3[waveletPeaks.size() * i / 100].getIntensity()); } if (waveletPeaks.size() > 0) { for (int i = 1; i < 10; i++) _logDebug(" " + (i * 10) + ": " + maxD3[waveletPeaks.size() * i / 10].getIntensity()); } } // OK, up the bar for a feature (min length of at largest peak in the feature) shortPeak++; _logDebug("SHORT PEAK " + shortPeak); // // Filtering // // Sticking with the idea of persistance, eliminate peaks // that don't seem to have correllated peaks // Tree2D tree = new Tree2D(); for (Feature f : waveletPeaks) { f.excluded = true; tree.add(f.scan, f.mz, f); } Collections.sort(waveletPeaks, Spectrum.compareFeatureLengthDesc); //QUESTION: so only features with other related features get included... so how do we //end up with any single-peak features? for (Feature f : waveletPeaks) { int lenF = f.getScanLast() - f.getScanFirst() + 1; ArrayList<Feature> l = (ArrayList<Feature>) tree.getPoints(f.scan - 5, f.mz - 1.1F, f.scan + 5, f.mz + 1.1F); for (Feature neighbor : l) { int lenN = neighbor.getScanLast() - neighbor.getScanFirst() + 1; // we want features of different masses //QUESTION: what is the significance of 1.5/ExtractMaxima2D.getResampleFrequency()? if (Math.abs(neighbor.mz - f.mz) < 1.5 / SpectrumResampler.getResampleFrequency()) continue; // are centers aligned int s = Math.max(f.getScanFirst(), neighbor.getScanFirst()); int e = Math.min(f.getScanLast(), neighbor.getScanLast()); if (f.scan < s || f.scan > e || neighbor.scan < s || neighbor.scan > e) continue; // if neither is chosen yet, add a small hurdle if (f.excluded && neighbor.excluded) { //QUESTION: why checking the total number of scans in both? if (lenF + lenN < shortPeak) continue; if (f.intensity < 1 + 0.5 * f.background + 3 * Math.max(1, f.median) && neighbor.intensity < 1 + 0.5 * f.background + 3 * Math.max(1, neighbor.median)) continue; } //QUESTION: why aren't we tying these together somehow at this point? f.excluded = false; neighbor.excluded = false; } } ArrayList<Feature> copy = new ArrayList<Feature>(); for (Feature f : waveletPeaks) { if (!f.excluded) copy.add(f); else d_filtered++; } waveletPeaks = copy; assert timerExtractPeaks.stop(); if (currentThread.isInterrupted()) throw new InterruptedException(); if (_log.isDebugEnabled()) { _logDebug("kept " + waveletPeaks.size() + " peaks after filtering"); Collections.sort(waveletPeaks, Spectrum.comparePeakIntensityDesc); for (int i = 0; i < waveletPeaks.size() && i < 100; i++) _logDebug(waveletPeaks.get(i).toString()); } if (debugReturnPeaks) // see intermediate results { for (Feature peak : waveletPeaks) { // fix up scan Scan scan = scans[peak.scan]; peak.setTime((float)scan.getDoubleRetentionTime()); peak.scan = scan.getNum(); peak.scanFirst = scans[peak.scanFirst].getNum(); peak.scanLast = scans[peak.scanLast].getNum(); peak.setPeaks(1); // otherwise these will get filtered by default } return waveletPeaks; } Feature[] peaks = waveletPeaks.toArray(new Feature[0]); Arrays.sort(peaks, Spectrum.comparePeakScanAsc);// debug only Arrays.sort(peaks, Spectrum.comparePeakMzAsc);// debug only _logDebug(Feature.getFeatureHeader(null)); //too verbose // for (Feature peak : peaks) // { // _logDebug(peak.toString(null)); // } // // ExtractPeptideFeatures // // combine peaks into features representing peptides // assert timerExtractPeptides.start(); ArrayList<Feature> peptidesAll = ExtractPeptideFeatures(_run, peaks); // // Extract a window of intensities around each Feature // // this is may be used by downstream analsysis programs // int dumpWindowSize = getDumpWindowSize(); if ( dumpWindowSize > 0 ) { int nSamples = dumpWindowSize * SpectrumResampler.getResampleFrequency(); for (Feature f : peptidesAll) { int mzIndex = (int)( ( f.mz - _mzRange.min) * SpectrumResampler.getResampleFrequency() ); int scanIndex = f.scan; f.intensityLeadingPeaks = dumpWindowSize; f.intensityTrailingPeaks = dumpWindowSize; f.intensityWindow = new float[2 * nSamples]; int k = 0; for (int j = mzIndex - nSamples; j < mzIndex + nSamples; j++, k++) { if ( j < 0 || j >= spectra[scanIndex].length ) f.intensityWindow[k] = 0.f; // pad with zeros if we hit an edge else f.intensityWindow[k] = spectra[scanIndex][j]; } } } // // fix up scanNum // for (Feature f : peptidesAll) { Scan scan = scans[f.scan]; f.setTime((float)scan.getDoubleRetentionTime()); f.scan = scan.getNum(); f.setScanFirst(scans[f.getScanFirst()].getNum()); f.setScanLast(scans[f.getScanLast()].getNum()); } // if data are centroided, or if requested explicitly for profile // mode data, attempt to get accurate masses if (getAccurateMassAdjustmentScans() > 0 || _run.getHeaderInfo().getDataProcessing().getCentroided() == 1) { Feature[] a = peptidesAll.toArray(new Feature[0]); Arrays.sort(a, Spectrum.comparePeakScanAsc); for (int i = 0; i < a.length; i++) { // float mz = AccurateMassCentroid(_run, a[i]); float mz = AccurateMass(_run, a[i], getAccurateMassAdjustmentScans()); if (mz > 0) { a[i].setMz(mz); a[i].setAccurateMZ(true); a[i].updateMass(); } if (currentThread.isInterrupted()) throw new InterruptedException(); } } assert timerExtractPeptides.stop(); assert timerAnalyze.stop(); CPUTimer.dumpAllTimers(); return peptidesAll; } Collection PeaksAsFeatures(Spectrum.Peak[] peaks) { ArrayList l = new ArrayList(); for (int p = 0; p < peaks.length; p++) l.add(new Feature(peaks[p])); return l; } public static float AccurateMass(MSRun run, Feature f, int nScans) { if (run.getHeaderInfo().getDataProcessing().getCentroided() == 1) return AccurateMassCentroid(run, f); else return AccurateMassProfile(run, f, nScans); } public static float AccurateMassCentroid(MSRun run, Feature f) { // NOTE about isotopes // some heavy isotopes are greater than +1Da (C=1.0033), and some are less (N=0.9971) // I use 1.0013 as working number (C13 is more common than N15) final double ISOTOPE_FACTOR = 1.0013; // CONSIDER: does it make sense to do any sort of averaging across a few scans? Scan scan = run.getScan(run.getIndexForScanNum(f.scan)); float[][] s = scan.getSpectrum(); double delta = .66 / SpectrumResampler.getResampleFrequency(); double mzP0 = 0; double sumMZ = 0; double sumIN = 0; for (int i = 0; i < 2 && i < f.comprised.length; i++) { double mzPi; if (f.comprised[i] == null) mzPi = f.mz + i * ISOTOPE_FACTOR / f.charge; else mzPi = f.comprised[i].mz; // find biggest close peak // could use more complicated "distance" measure // but in centroided data there is usually only one 'real' peak // in a space this small with maybe some tiny side peaks int p = Arrays.binarySearch(s[0], (float)(mzPi - delta)); if (p < 0) p = -1 * (p + 1); double mzBiggest = 0; double inBiggest = 0; for (; p < s[0].length; p++) { float mz = s[0][p]; if (mz > mzPi + delta) break; float in = s[1][p]; if (in > inBiggest) { mzBiggest = mz; inBiggest = in; } } if (mzBiggest == 0) // UNDONE: use wider window??? { //System.out.println("missed " + i + "\t" + f.toString()); return 0; } if (f.charge == 0) return (float)mzBiggest; if (i == 0) mzP0 = mzBiggest; // weighted average of peaks sumMZ += inBiggest * (mzBiggest - i * ISOTOPE_FACTOR / f.charge); sumIN += inBiggest; } double avgMZ = sumMZ / sumIN; // if we got this all right we'd expect ABS(newMZ-mzP0) to be less // than the resolution of the machine // I'm going to assume that the centroided means FT (very accurate) if (Math.abs(avgMZ - mzP0) > mzP0 * 5.0 / 1000000.0) return 0; // NOTE: could return avgMZ here, however // a) it's very close to mzP0 (see test) // b) I suspect people would rather see the value of peak from the source data return (float)mzP0; } /** * adjustment of profile-mode mass; average results from some number of adjacent scans */ public static float AccurateMassProfile(MSRun run, Feature f, int nScans) { if (nScans <= 0) return 0.f; int scanIndex = run.getIndexForScanNum(f.scan); int lowScanIndex = (int) Math.max(scanIndex - (nScans - 1)/2.0 + .5, 0); int highScanIndex = (int) Math.min(scanIndex + (nScans - 1)/2.0 + .5, run.getScanCount() - 1); float sumMz = 0.f; int n = 0; for (int s = lowScanIndex; s <= highScanIndex; s++) { Scan scan = run.getScan(s); float maxMz = AccurateMassProfileCenter(scan, f); if (maxMz > 0.f) { sumMz += maxMz; n++; } } return n > 0 ? sumMz / n : 0.f; } /** * adjustment of profile-mode mass using max peak */ private static float AccurateMassProfileMax(Scan scan, Feature f) { float[][] s = scan.getSpectrum(); double delta = .5 / SpectrumResampler.getResampleFrequency(); double lowMz = f.mz - delta; double highMz = f.mz + delta; int p = Arrays.binarySearch(s[0], (float) lowMz); if (p < 0) p = -1 * (p + 1); double maxMz = 0; double maxInt = 0; for (; p < s[0].length; p++) { if (s[0][p] > highMz) break; if (s[1][p] > maxInt) { maxMz = s[0][p]; maxInt = s[1][p]; } } return (float) maxMz; } /** * adjustment of profile-mode mass using center of mass */ private static float AccurateMassProfileCenter(Scan scan, Feature f) { float[][] s = scan.getSpectrum(); double delta = .66666667 / SpectrumResampler.getResampleFrequency(); double lowMz = f.mz - delta; double highMz = f.mz + delta; int p = Arrays.binarySearch(s[0], (float) lowMz); if (p < 0) p = -1 * (p + 1); double sumMz = 0; double sumInt = 0; for (; p < s[0].length; p++) { if (s[0][p] > highMz) break; sumMz += s[0][p] * s[1][p]; // mz weighted by intensity sumInt += s[1][p]; } // Couldn't figure out a decent match if (sumInt <= 0.0) return 0.f; return (float) (sumMz/sumInt); } protected static void _logDebug(String s) { _log.debug(s); } }