/*
* Copyright (c) 2003-2012 Fred Hutchinson Cancer Research Center
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.fhcrc.cpl.viewer.feature.extraction;
import org.fhcrc.cpl.toolbox.proteomics.MSRun;
import org.fhcrc.cpl.toolbox.proteomics.feature.Feature;
import org.fhcrc.cpl.toolbox.proteomics.feature.Spectrum;
import org.fhcrc.cpl.toolbox.proteomics.Scan;
import org.fhcrc.cpl.toolbox.proteomics.MassUtilities;
import org.fhcrc.cpl.toolbox.statistics.BasicStatistics;
import org.fhcrc.cpl.toolbox.datastructure.Pair;
import org.apache.log4j.Logger;
import java.util.Arrays;
/**
* For adjusting the masses of features found in a resampled space, to take
* advantage of the higher mass accuracy to be found in the unresampled space
*
* Optionally recalculate both total and maximum intensity for each feature. This is done in a completely
* different way than the original calculation (which occurs in resampled space), and so the two types of
* values should not be compared. Calculation is done here for massive performance benefit over recalculating
* afterward, and because recalculating intensity based on accurate-mass peaks only makes sense if we
* can determine mass accurately. Currently this is only done for profile-mode data.
*
* If intensity based on accurate mass is specified, and no intensity values are found in the narrow
* mass range specified, then accurate mass is invalidated, as well.
*/
public class AccurateMassAdjuster
{
private static Logger _log = Logger.getLogger(AccurateMassAdjuster.class);
public static final int DEFAULT_SCAN_WINDOW_SIZE = 1;
protected int _scanWindowSize = DEFAULT_SCAN_WINDOW_SIZE;
//modes of calculating accurate mass in profile mode. CENTER uses the mean m/z value from all scans, and
//each scan's value is the weighted mean of all the values in range. MAX uses the maximum m/z value from
//all scans, and each scan's value is the m/z value at maximum intensity in range.
//CENTER is more appropriate for peptides, where multiple peptides in range are unlikely.
//MAX is more appropriate for metabolites, where multiple metabolites in range are very likely.
public static final int PROFILE_MASS_MODE_CENTER = 0;
public static final int PROFILE_MASS_MODE_MAX = 1;
public static final int DEFAULT_PROFILE_MASS_MODE = PROFILE_MASS_MODE_CENTER;
//When calculating the mass of profile-mode data, use a weighted average, or maximum?
protected int profileMassMode = DEFAULT_PROFILE_MASS_MODE;
//proportion of the resampling bucket size (e.g., 1/36Da for a resmpling frequency of 36) to search
//for the highest peak
public static final double DEFAULT_RESAMPLING_SIZE_PROPORTION = .66666667;
protected double resamplingSizeProportion = DEFAULT_RESAMPLING_SIZE_PROPORTION;
protected boolean shouldAdjustComprisedMasses = false;
//dhmay adding 20100511
protected boolean shouldRecalculateIntensities = false;
//todo: This should be adjustable.
//Right now it's only used by FeatureStrategySmallMolecule, so tight tolerance
//is appropriate. What tolerance, exactly? Need to find a good way to figure it out
protected float intensityRecalcPPMError = 3f;
protected int maxUsedPeaks = 2;
//dhmay adding for debugging
protected float debugMass = 303.29f;
protected float debugDeltaMass = 0.05f;
protected boolean massDebugMode = false;
public void adjustAllMasses(MSRun run, Feature[] features)
throws InterruptedException
{
_log.debug("adjustAllMasses, " + toString());
Thread currentThread = Thread.currentThread();
Arrays.sort(features, Spectrum.comparePeakScanAsc);
for (Feature feature : features)
{
float mz = calculateAccurateMz(run, feature);
if (mz > 0)
{
feature.setMz(mz);
feature.setAccurateMZ(true);
feature.updateMass();
}
if (currentThread.isInterrupted())
throw new InterruptedException();
}
}
/**
* Switch on whether the run is in centroid mode and do the appropriate thing
* @param run
* @param f
* @return
*/
public float calculateAccurateMz(MSRun run, Feature f)
{
if (run.getHeaderInfo().getDataProcessing().getCentroided() == 1)
return calculateAccurateMzCentroid(run, f);
else
return calculateAccurateMzProfile(run, f);
}
public float calculateAccurateMzCentroid(MSRun run, Feature f)
{
// NOTE about isotopes
// some heavy isotopes are greater than +1Da (C=1.0033), and some are less (N=0.9971)
// I use 1.0013 as working number (C13 is more common than N15)
final double ISOTOPE_FACTOR = 1.0013;
// CONSIDER: does it make sense to do any sort of averaging across a few scans?
Scan scan = run.getScan(run.getIndexForScanNum(f.scan));
float[][] s = scan.getSpectrum();
double delta = .66 / SpectrumResampler.getResampleFrequency();
int numPeaksToUse = Math.min(maxUsedPeaks, f.comprised.length);
double mzP0 = 0;
double[] peakSumMZs = new double[numPeaksToUse];
double[] peakSumINs = new double[numPeaksToUse];
for (int i = 0; i < 2 && i < f.comprised.length; i++)
{
double mzPi;
if (f.comprised[i] == null)
mzPi = f.mz + i * ISOTOPE_FACTOR / f.charge;
else
mzPi = f.comprised[i].mz;
// find biggest close peak
// could use more complicated "distance" measure
// but in centroided data there is usually only one 'real' peak
// in a space this small with maybe some tiny side peaks
int p = Arrays.binarySearch(s[0], (float)(mzPi - delta));
if (p < 0) p = -1 * (p + 1);
double mzBiggest = 0;
double inBiggest = 0;
for (; p < s[0].length; p++)
{
float mz = s[0][p];
if (mz > mzPi + delta)
break;
float in = s[1][p];
if (in > inBiggest)
{
mzBiggest = mz;
inBiggest = in;
}
}
if (mzBiggest == 0) // UNDONE: use wider window???
{
//System.out.println("missed " + i + "\t" + f.toString());
return 0;
}
if (f.charge == 0)
{
return (float)mzBiggest;
}
if (i == 0)
mzP0 = mzBiggest;
double peakMz = inBiggest * (mzBiggest - i * ISOTOPE_FACTOR / f.charge);
// weighted average of peaks
peakSumMZs[i] = peakMz;
peakSumINs[i] += inBiggest;
}
double avgMZ = BasicStatistics.sum(peakSumMZs) / BasicStatistics.sum(peakSumINs);
for (int i=0; i<maxUsedPeaks; i++)
{
if (f.comprised[i] != null)
f.comprised[i].setMz((float)(peakSumMZs[i] / peakSumINs[i]));
}
// if we got this all right we'd expect ABS(newMZ-mzP0) to be less
// than the resolution of the machine
// I'm going to assume that the centroided means FT (very accurate)
if (Math.abs(avgMZ - mzP0) > mzP0 * 5.0 / 1000000.0)
{
return 0;
}
// NOTE: could return avgMZ here, however
// a) it's very close to mzP0 (see test)
// b) I suspect people would rather see the value of peak from the source data
return (float)mzP0;
}
/**
*
* adjustment of profile-mode mass; average results from some number of adjacent scans.
* Optionally adjust masses of comprised peaks, too. Note: if comprised peak mass adjustment fails,
* there's no report made of the failure.
*
* @param run
* @param f
* @return
*/
public float calculateAccurateMzProfile(MSRun run, Feature f)
{
if (Math.abs(debugMass - f.mass) < debugDeltaMass)
{
massDebugMode = true;
_log.debug("MASS: " + f.mass);
}
if (_scanWindowSize <= 0)
return 0.f;
int scanIndex = run.getIndexForScanNum(f.scan);
int lowScanIndex = (int) Math.max(scanIndex - (_scanWindowSize - 1)/2.0 + .5, 0);
int highScanIndex = (int) Math.min(scanIndex + (_scanWindowSize - 1)/2.0 + .5, run.getScanCount() - 1);
Pair<Float, Pair<Float, Float>> accMzAndIntensities = calculateAccurateMzProfileForMultiScanPeak(
run, f.mz, f.charge, lowScanIndex, highScanIndex);
float monoisotopicAdjustedMz = accMzAndIntensities.first;
if (shouldRecalculateIntensities && monoisotopicAdjustedMz > 0)
{
f.intensity = accMzAndIntensities.second.first;
f.totalIntensity = accMzAndIntensities.second.second;
}
if (shouldAdjustComprisedMasses)
{
for (int i=0; i<f.comprised.length; i++)
{
if (f.comprised[i] == null)
continue;
if (i==0)
{
if (monoisotopicAdjustedMz > 0)
f.comprised[i].setMz(monoisotopicAdjustedMz);
}
else
{
Pair<Float, Pair<Float, Float>> accMzAndIntensitiesComprisedPeak =
calculateAccurateMzProfileForMultiScanPeak(run, f.comprised[i].mz,
f.charge, lowScanIndex, highScanIndex);
float accPeakMz = accMzAndIntensitiesComprisedPeak.first;
if (accPeakMz > 0)
{
f.comprised[i].setMz(accPeakMz);
if (shouldRecalculateIntensities)
{
f.comprised[i].intensity = accMzAndIntensitiesComprisedPeak.second.first;
//nothing to do with total intensity for comprised peak
}
}
}
}
}
massDebugMode = false;
return monoisotopicAdjustedMz;
}
/**
* Calculate accurate MZ for a multi-scan peak.
*
* Optionally adjust intensity of each peak based on accurate mass; if we do that, and can't find
* intensity values near accurate mass, invalidate accurate mass.
* @param run
* @param mz
* @param charge
* @param lowScanIndex
* @param highScanIndex
* @return
*/
protected Pair<Float, Pair<Float, Float>> calculateAccurateMzProfileForMultiScanPeak(
MSRun run, float mz, int charge, int lowScanIndex, int highScanIndex)
{
//for PROFILE_MASS_MODE_CENTER
float sumMz = 0.f;
int n = 0;
// float[] scanMzValues = new float[highScanIndex - lowScanIndex + 1];
//List<Float> nonzeroScanMzValues = new ArrayList<Float>();
//for PROFILE_MASS_MODE_MAX
float mzAtMaxInt = 0;
float maxInt = 0;
for (int s = lowScanIndex; s <= highScanIndex; s++)
{
Scan scan = run.getScan(s);
if (massDebugMode)
_log.debug("Scan " + s);
Pair<Float, Float> scanMzAndInt = calculateAccurateMzProfileForScan(scan, mz);
float scanMz = scanMzAndInt.first;
float scanInt = scanMzAndInt.second;
if (scanMz > 0.f)
{
sumMz += scanMz;
n++;
// scanMzValues[s-lowScanIndex] = scanMz;
//nonzeroScanMzValues.add(scanMz);
if (scanInt > maxInt)
{
maxInt = scanInt;
mzAtMaxInt = scanMz;
}
}
}
//float massSpreadPPM = MassUtilities.convertDaToPPM((float) (BasicStatistics.max(nonzeroScanMzValues) - BasicStatistics.min(nonzeroScanMzValues)), mz);
//if (massSpreadPPM > 5) System.err.println("BIG MASS SPREAD: " + massSpreadPPM);
float accMz = 0.f;
if (n > 0) //n always > 0 if profileMassMode == PROFILE_MASS_MODE_MAX
{
switch (profileMassMode)
{
case PROFILE_MASS_MODE_CENTER:
accMz = sumMz / n;
break;
case PROFILE_MASS_MODE_MAX:
accMz = mzAtMaxInt;
break;
}
}
Pair<Float, Pair<Float, Float>> result = new Pair<Float, Pair<Float, Float>>(accMz, null);
//if we found an accurate mz, then recalculate intensity if we're supposed to.
//This could result in accMz being invalidated
if (shouldRecalculateIntensities && (accMz > 0))
{
Scan[] scans = run.getScans();
//if charge 0, assume charge 1
int absChargeMin1 = Math.max(Math.abs(charge), 1);
float deltaMz = MassUtilities.convertPPMToDa(intensityRecalcPPMError,
Feature.convertMzToMass(mz, absChargeMin1));
float lowAccMz = accMz - deltaMz;
float highAccMz = accMz + deltaMz;
float totalIntensitySum = 0;
float maxScanIntensity = 0;
for (int i=lowScanIndex; i<=highScanIndex; i++)
{
Scan scan = run.getScan(i);
float[][] spectrum = scan.getSpectrum();
int p = Arrays.binarySearch(spectrum[0], lowAccMz);
if (p < 0)
p = -1 * (p + 1);
float intensityThisScan = 0;
for (; p<spectrum[0].length; p++)
{
if (spectrum[0][p] > highAccMz)
break;
intensityThisScan += spectrum[1][p];
}
maxScanIntensity = Math.max(intensityThisScan, maxScanIntensity);
//"integrate" scan intensity over time; multiply this scan's intensity by half the difference
//in time between the scan before and after
if (intensityThisScan > 0)
{
double time = 1.0;
if (i > 0 && i + 1 < run.getScanCount())
time = (scans[i + 1].getDoubleRetentionTime() -
scans[i - 1].getDoubleRetentionTime()) /
2;
totalIntensitySum += time * intensityThisScan;
}
}
//if no intensities in range, something went terribly wrong, i.e., the "accurate" mass value
// wasn't so accurate after all. Don't correct accurate mass or assign intensity
if (maxScanIntensity == 0)
{
result.first = 0f;
//System.err.println("No acc intensity! accMz = " + accMz + ", low=" + lowAccMz + ", high=" + highAccMz + ", orig=" + mz);
//for (float val : scanMzValues)
// System.err.println("\t" + val);
//
// for (int i=lowScanIndex; i<=highScanIndex; i++)
// {
// Scan scan = run.getScan(i);
////System.err.println("\t\tscan " + i);
// float[][] spectrum = scan.getSpectrum();
//
// int p = Arrays.binarySearch(spectrum[0], lowAccMz);
// if (p < 0)
// p = -1 * (p + 1);
// float intensityThisScan = 0;
// for (; p<spectrum[0].length; p++)
// {
// if (spectrum[0][p] > highAccMz)
// break;
////System.err.println("\t\t\tmz=" + spectrum[0][p] + ", int=" + spectrum[1][p]);
//
// intensityThisScan += spectrum[1][p];
// }
// }
//
}
else
result.second = new Pair<Float, Float>(maxScanIntensity, totalIntensitySum);
}
return result;
}
/**
* adjustment of profile-mode mass using either maximum or center-of-mass, depending on profileMassMode
* @param scan
* @param mz
* @return pair containing acc mass and intensity. intensity is either sum or max, depending on profileMassMode
*/
protected Pair<Float, Float> calculateAccurateMzProfileForScan(Scan scan, float mz)
{
float[][] s = scan.getSpectrum();
double delta = resamplingSizeProportion / SpectrumResampler.getResampleFrequency();
double lowMz = mz - delta;
double highMz = mz + delta;
int p = Arrays.binarySearch(s[0], (float) lowMz);
if (p < 0)
p = -1 * (p + 1);
double sumMz = 0;
double sumInt = 0;
double mzAtMaxInt = 0;
double maxInt = 0;
for (; p < s[0].length; p++)
{
if (s[0][p] > highMz)
{
if (massDebugMode) _log.debug("outofrange: " + s[0][p]);
break;
}
switch (profileMassMode)
{
case PROFILE_MASS_MODE_CENTER:
sumMz += s[0][p] * s[1][p]; // mz weighted by intensity
sumInt += s[1][p];
break;
case PROFILE_MASS_MODE_MAX:
if (s[1][p] > maxInt)
{
mzAtMaxInt = s[0][p];
maxInt = s[1][p];
}
break;
}
}
float result = 0;
switch (profileMassMode)
{
case PROFILE_MASS_MODE_CENTER:
if (massDebugMode)
_log.debug("\tmzWeightedMean=" + sumMz/sumInt + ", int=" + sumInt);
// Couldn't figure out a decent match
if (sumInt <= 0.0)
result = 0.f;
else
result = (float) (sumMz/sumInt);
return new Pair<Float, Float>(result, (float) sumInt);
case PROFILE_MASS_MODE_MAX:
if (massDebugMode)
_log.debug("\tmzMax=" + mzAtMaxInt + ", int=" + maxInt);
result = (float) mzAtMaxInt;
return new Pair<Float, Float>(result, (float) maxInt);
}
//never gets here
return null;
}
public String toString()
{
return "AccurateMassAdjuster: adjustAllMasses, profileMassMode=" + profileMassMode + ", scanWindow: " + _scanWindowSize +
", resamplingProporation: " + resamplingSizeProportion;
}
public int getScanWindowSize()
{
return _scanWindowSize;
}
public void setScanWindowSize(int scanWindowSize)
{
_scanWindowSize = scanWindowSize;
}
public double getResamplingSizeProportion()
{
return resamplingSizeProportion;
}
public void setResamplingSizeProportion(double resamplingSizeProportion)
{
this.resamplingSizeProportion = resamplingSizeProportion;
}
public int getProfileMassMode()
{
return profileMassMode;
}
public void setProfileMassMode(int profileMassMode)
{
this.profileMassMode = profileMassMode;
}
public static Logger get_log()
{
return _log;
}
public static void set_log(Logger _log)
{
AccurateMassAdjuster._log = _log;
}
public boolean isShouldAdjustComprisedMasses() {
return shouldAdjustComprisedMasses;
}
public void setShouldAdjustComprisedMasses(boolean shouldAdjustComprisedMasses) {
this.shouldAdjustComprisedMasses = shouldAdjustComprisedMasses;
}
public boolean isShouldRecalculateIntensities() {
return shouldRecalculateIntensities;
}
public void setShouldRecalculateIntensities(boolean shouldRecalculateIntensities) {
this.shouldRecalculateIntensities = shouldRecalculateIntensities;
}
public float getIntensityRecalcPPMError() {
return intensityRecalcPPMError;
}
public void setIntensityRecalcPPMError(float intensityRecalcPPMError) {
this.intensityRecalcPPMError = intensityRecalcPPMError;
}
}