/* * Copyright (c) 2003-2012 Fred Hutchinson Cancer Research Center * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.fhcrc.cpl.viewer.quant; import org.apache.log4j.Logger; import org.fhcrc.cpl.toolbox.proteomics.feature.Spectrum; import java.util.List; /** * Methods for correcting isotopically labeled light and heavy areas to account for peak overlap. * * NOTE: This class is not currently used by Q3, which has its own implementation. This implementation should be * numerically equivalent to the correction in Q3, which is done using matrices. * * TODO: make Q3 use this class for peak overlap correction */ public class PeakOverlapCorrection { private static Logger _log = Logger.getLogger(PeakOverlapCorrection.class); //dhmay adding 20100312 public static final float INFINITE_RATIO_VALUE = 999f; //store the first 25 elements of the factorial sequence, so we don't have to calculate it every time protected static final double[] FACTORIAL_SEQUENCE_25 = new double[] { 1.0, 1.0, 2.0, 6.0, 24.0, 120.0, 720.0, 5040.0, 40320.0, 362880.0, 3628800.0, 39916800.0, 479001600.0, 6227020800.0, 87178291200.0, 1.307674e+12, 2.092279e+13, 3.556874e+14, 6.402374e+15, 1.216451e+17, 2.432902e+18, 5.109094e+19, 1.124001e+21, 2.585202e+22, 6.204484e+23}; /** * Get isotope intensities based on a simple poisson model. Sum of all intensities approaches 1 as len -> infinity * p(x) = lambda^x exp(-lambda)/x! * * Equivalent R code: * * iso <- function(mass, len) * { * lambda=mass/1800; * result=c(); * for (i in 0:len-1) * { * result = c(result, lambda^i * exp(-lambda)/factorial(i)); * } * result; * } * @param mass * @param len * @return */ public static double[] getIsotopicDistribution(double mass, int len) { double lambda = mass / 1800; double lambdaExp = Math.exp(-lambda); double[] result = new double[len]; for (int i = 0; i < len; i++) if (i < FACTORIAL_SEQUENCE_25.length) result[i] = Math.pow(lambda, i) / FACTORIAL_SEQUENCE_25[i] * lambdaExp; else result[i] = 0; // Not really zero, but too small to matter return result; } /** * This is trivial, but it's nice to have it to use elsewhere. Technically it needs a slight correction * for mass defect, but in practice, not really unless the peak difference is gargantuan * @param lightMass * @param heavyMass * @return */ public static int calcNumPeaksSeparation(double lightMass, double heavyMass) { return (int) Math.round((heavyMass - lightMass)); } /** * Account for any incursion of light onto heavy area, and return adjusted areas. Based on raw peak area * calculations that begin with the firstpeak (for monoisotope, use 0) and continue through the last * free-and-clear light peak that's <= highestPeakMax * @param lightMass * @param heavyMass * @param rawLightArea * @param rawHeavyArea * @param firstPeak the peak to start with (0-based). Normally (e.g., for Q3) this will be the monoisotope * @param highestPeakMax MAXIMUM highest isotopic peak used in calculation of areas (0-based). If greater than * the number of Daltons of separation between light and heavy - 1, we assume only safe peaks (no overlap) used. * @return a pair of (light, heavy) adjusted areas. Adjusted areas represent the inferred sum of * intensity across all isotopic peaks (even those that overlap), so may be very different from raw * areas, especially for high masses */ public static double[] correctLightHeavyAreas(double lightMass, double heavyMass, double rawLightArea, double rawHeavyArea, int firstPeak, int highestPeakMax) { //number of Daltons separating light and heavy. Same as number of safe peaks for light int numDaltonsSeparation = calcNumPeaksSeparation(lightMass, heavyMass); //assumption about the number of highest light and heavy peak actually used -- maxPeaks or the number of //light peaks that don't intrude on heavy, whichever's less int maxPeakUsed = Math.min(highestPeakMax, numDaltonsSeparation-1); //Get the full isotopic distribution of the light peaks //For some reason, Q3 uses the heavy mass, rather than the light, for this distribution calculation. //It really doesn't matter that much, so preserving that behavior double[] fullLightIsotopicDistribution = getIsotopicDistribution(heavyMass, numDaltonsSeparation + maxPeakUsed + 1); //head gets sum of first numPeaksUsed peak distribution values. //tail gets the sum of the first numPeaksUsed values starting with the heavy monoisotope double head = 0; for (int i = firstPeak; i <= maxPeakUsed; i++) head += fullLightIsotopicDistribution[i]; double tail = 0; int lastUsedHeavyPeakIndex = numDaltonsSeparation + maxPeakUsed; //System.err.println(numDaltonsSeparation + ", " + maxPeakUsed + ", " + lastUsedHeavyPeakIndex + ", " + firstPeak); for (int i = numDaltonsSeparation + firstPeak; i <= lastUsedHeavyPeakIndex; i++) { tail += fullLightIsotopicDistribution[i]; //System.err.println(" " + fullLightIsotopicDistribution[i]); } //if (tail > 0) System.err.println(tail); //Corrected light area represents inferred sum across all peaks double correctedLightArea = rawLightArea / head; //Corrected heavy area first subtracts the inferred contribution of light peaks, then expands to represent //inferred sum over all peaks //dhmay changing 20100312. The corrected heavy area can be negative. Fix it to zero if so. Note: be sure //to check for this when calculating ratio double correctedHeavyArea = Math.max(0, (rawHeavyArea - (tail * correctedLightArea)) / head); _log.debug("Raw: " + rawLightArea + ", " + rawHeavyArea + ". Sep: " + numDaltonsSeparation + ". head=" + head + ", tail=" + tail + ". Corrected: " + correctedLightArea + ", " + correctedHeavyArea); return new double[] { correctedLightArea, correctedHeavyArea }; } /** * Cover method for starting with monoisotope * @param lightMass * @param heavyMass * @param ratio * @param maxPeaks * @return */ public static double correctRatioForOverlap(double lightMass, double heavyMass, float ratio, int maxPeaks) { return correctRatioForOverlap(lightMass, heavyMass, ratio, 0, maxPeaks); } /** * Correct the ratio by calling correctLightHeavyAreas, dividing light by heavy. Absolute intensities don't * matter in this calculation, just the ratio * @param lightMass * @param heavyMass * @param ratio * @param firstPeak * @param maxPeaks * @return */ public static double correctRatioForOverlap(double lightMass, double heavyMass, float ratio, int firstPeak, int maxPeaks) { double[] lightHeavy = correctLightHeavyAreas(lightMass, heavyMass, 1, 1f / ratio, firstPeak, maxPeaks); //dhmay adding 20090312 if (lightHeavy[1] == 0) return INFINITE_RATIO_VALUE; return lightHeavy[0] / lightHeavy[1]; } /** * Calculate a KL score, using the passed-in 6-peak intensity template as the ideal. * * Cribbed from Spectrum.KLPoissonDistance and made more flexible * @param idealPeaks must sum to 1 * @param peakIntensities must sum to 1 * @return */ public static float calcKLUsingTemplate(float[] idealPeaks, float[] peakIntensities) { assert idealPeaks.length == peakIntensities.length; double diff = 0.0; double sumP = 0.0; double sumQ = 0.0; int pqMinLength = Math.min(idealPeaks.length, peakIntensities.length); //System.err.println("KL, peaks: " + pqMinLength); for (int k = 0; k < pqMinLength ; k++) { diff += idealPeaks[k] * Math.log((double)idealPeaks[k] / peakIntensities[k]); sumP += idealPeaks[k]; sumQ += peakIntensities[k]; //System.err.println(" Peak " + k + ", p=" + idealPeaks[k] + ", q=" + peakIntensities[k] + ", diff: " + // (idealPeaks[k] * Math.log((double)idealPeaks[k] / peakIntensities[k]))); } double kl = diff / Spectrum.LN2; kl = Math.max(kl, 0.0); //System.err.println("KL: " + kl); assert Math.abs(sumP-1.0) < 0.001 && Math.abs(sumQ-1.0) < 0.001; return (float) kl; } }