/* * Copyright (c) 2003-2012 Fred Hutchinson Cancer Research Center * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.fhcrc.cpl.viewer.quant; import org.fhcrc.cpl.viewer.feature.extraction.FeatureFinder; import org.apache.log4j.Logger; import org.apache.log4j.Level; import org.fhcrc.cpl.toolbox.Rounder; import org.fhcrc.cpl.toolbox.datastructure.Pair; import org.fhcrc.cpl.toolbox.proteomics.*; import org.fhcrc.cpl.toolbox.proteomics.feature.Feature; import org.fhcrc.cpl.toolbox.proteomics.feature.Spectrum; import org.fhcrc.cpl.toolbox.proteomics.feature.matching.FeatureSetMatcher; import java.util.*; import java.util.List; /** * This class contains the code for deciding whether an isotopic label quantitation event is good or not. * It's aimed at running-time efficiency: there are several tests for different kinds of badness, and they're run * roughly in order of increasing time investment. Therefore, all that's retained is a good-bad call, and if bad, an * indication of /one/ of the (potentially several) things that is bad. * * This uses a big HACK, adding a dummy search score to peptide features to store the flag reason description. */ public class QuantEventAssessor { public static Logger _log = Logger.getLogger(QuantEventAssessor.class); public static final int FLAG_REASON_OK = 0; public static final int FLAG_REASON_COELUTING = 1; public static final int FLAG_REASON_DISSIMILAR_KL = 2; public static final int FLAG_REASON_DISSIMILAR_MS1_RATIO = 3; public static final int FLAG_REASON_BIG_2PEAK_KL = 4; public static final int FLAG_REASON_MISSING_PEAKS = 5; public static final int FLAG_REASON_UNEVALUATED = 6; public static final int FLAG_REASON_OTHER = 7; public static final String[] flagReasonDescriptions = new String[] { "OK", "Coeluting peptide", "Dissimilar light/heavy KL", "Singlepeak Ratio Different", "Big 2-peak KL", "Missing peaks", "Unevaluated", "Other", }; public static final String[] flagReasonCodes = new String[] { "OK", "CoelutingPeptide", "DissimilarKL", "MS1MS2RatioDiff", "Big2PeakKL", "MissingPeaks", "Unevaluated", "Other", }; //boundaries for the one-peak ratio. If it's outside these boundaries, snap it to them. This is artificial, //of course, but it's necessary to prevent ratios in the thousands (or more!) from drowning out all other //ratios for a protein public static final float MIN_RATIO_ONE_PEAK = 0.04f; public static final float MAX_RATIO_ONE_PEAK = 25f; //Default high and low "extreme" ratios. public static final float DEFAULT_EXTREME_RATIO_HIGH = 2f; public static final float DEFAULT_EXTREME_RATIO_LOW = 1f / DEFAULT_EXTREME_RATIO_HIGH; //If an event has an Extreme ratio, and a problem with it could only //mean that the correct value is more Extreme, then we call the event OK. protected float extremeRatioHigh = DEFAULT_EXTREME_RATIO_HIGH; protected float extremeRatioLow = DEFAULT_EXTREME_RATIO_LOW; //Should we perform all the checks, for bad events (for capturing all info), or stop at the first one (for performance) protected boolean shouldPerformAllChecks = true; public static String getAssessmentCodeDesc(int assessmentCode) { return flagReasonDescriptions[assessmentCode]; } public static int parseAssessmentCodeString(String curationStatusString) { //System.err.println(" " + curationStatusString); if (flagReasonCodes[FLAG_REASON_OK].equals(curationStatusString)) return FLAG_REASON_OK; if (flagReasonCodes[FLAG_REASON_COELUTING].equals(curationStatusString)) return FLAG_REASON_COELUTING; if (flagReasonCodes[FLAG_REASON_DISSIMILAR_KL].equals(curationStatusString)) return FLAG_REASON_DISSIMILAR_KL; if (flagReasonCodes[FLAG_REASON_DISSIMILAR_MS1_RATIO].equals(curationStatusString)) return FLAG_REASON_DISSIMILAR_MS1_RATIO; if (flagReasonCodes[FLAG_REASON_BIG_2PEAK_KL].equals(curationStatusString)) return FLAG_REASON_BIG_2PEAK_KL; if (flagReasonCodes[FLAG_REASON_OTHER].equals(curationStatusString)) return FLAG_REASON_OTHER; if (flagReasonCodes[FLAG_REASON_MISSING_PEAKS].equals(curationStatusString)) return FLAG_REASON_MISSING_PEAKS; return FLAG_REASON_UNEVALUATED; } public static final String REASON_DUMMY_SEARCH_SCORE_NAME = "dummy_flag_desc"; //Maximum allowed proportion of highest peak intensity that the peak 1Da below monoisotope can have //dhmay changing from 0.3 to 0.35, 20091029. Was too sensitive public static final float DEFAULT_PEAKBELOW_INTENSITY_RATIO_CUTOFF = 0.35f; protected float peakBelowIntensityRatioCutoff = DEFAULT_PEAKBELOW_INTENSITY_RATIO_CUTOFF; public static final int DEFAULT_NUM_SCANS_AROUND_EVENT = 0; protected int numScansAroundEventToConsider = DEFAULT_NUM_SCANS_AROUND_EVENT; public static final float DEFAULT_PEAK_PPM_TOLERANCE = 50; protected float peakPPMTolerance = DEFAULT_PEAK_PPM_TOLERANCE; //maximum KL score for either light or heavy peaks, only first two peaks used in calculation public static final float DEFAULT_MAX_2PEAK_KL = 10.0f; protected float max2PeakKL = DEFAULT_MAX_2PEAK_KL; //maximum KL score heavy peaks, using light peaks as template // public static final float DEFAULT_MAX_2PEAK_KL_HEAVY_VS_LIGHT = 5f; // protected float max2PeakKLHeavyVsLight = DEFAULT_MAX_2PEAK_KL_HEAVY_VS_LIGHT; // private int labelType = QuantitationUtilities.LABEL_LYCINE; protected boolean showCharts = false; // public static final float SILAC_LABEL_MASS = 134.115092f; // public static final float SILAC_LABEL_MASSDIFF_PERRESIDUE = 6.020129f; // // // public static final float ACRYLAMIDE_LABEL_LIGHTMASS = 174.0458f; // public static final float ACRYLAMIDE_LABEL_HEAVYMASS = 177.05591f; // public static final float ACRYLAMIDE_LABEL_MASSDIFF_PERRESIDUE = // ACRYLAMIDE_LABEL_HEAVYMASS - ACRYLAMIDE_LABEL_LIGHTMASS; protected int numPeaksToUse = 5; protected int numRawPeaksToKeep = 2; //for coeluting peptide check, the maximum proportion (in the theoretical distribution) of the below-monoisotope //peak within the whole isotopic distribution. Want to make sure that this is distinguishable from a 1-peak wonder float maxCoelutingPeptideContributionOfBelowMonoPeak = 0.5f; //Maximum KL score of a coeluting peptide in another charge, to consider it an adequate explanation for a //below-monoisotope peak. //This value has a big effect on false positives float maxKLCoelutingOtherChargePeptide = 1f; //minimum proportion of total light feature intensity that the peak /below/ the heavy monoisotope can have, //in order for us /not/ to check for a peak below the heavy monoisotope protected float minSignificantPeakContributionBelowMonoisotope = 0.02f; protected float maxLightHeavyOverlapToIgnore = 0.02f; protected float maxKlDiff = 0.15f; protected float minKlRatio = 0.7f; //Maximum allowed difference between log ratio we calculate (simply, by most-intense peaks) and algorithm protected float maxLogRatioDiff = (float) (Math.log(1.5) - Math.log(1)); //Ratios must be higher than minFlagRatio, OR lower than maxFlagRatio, to be flagged protected float minFlagRatio = 0f; protected float maxFlagRatio = 999f; //Should events be flagged if there are peaks missing and they wouldn't otherwise be flagged protected boolean shouldFlagIfMissingPeaks = false; //Should events be flagged if KL scores for light and heavy are different? protected boolean shouldFlagDifferentKL = true; //scaffolding for calculating ratios using regression based on one datapoint per-scan, like RelEx. //I think that method pretty much doesn't work very well. // List<Float> singlePeakSlopeRatios = new ArrayList<Float>(); // List<Float> multiPeakSlopeRatios = new ArrayList<Float>(); public QuantEventAssessor() { } /** * Assess this feature as a quantitative event * @param feature * @param run * @return */ public QuantEventAssessment assessFeature(Feature feature, MSRun run) { return assessQuantEvent(new QuantEvent(feature, ""), run); } /** * Assess this quantitative event. Return assessment. Side effect: set event.algorithmicAssessment * @param event * @param run * @return */ public QuantEventAssessment assessQuantEvent(QuantEvent event, MSRun run) { float ratio = event.getRatio(); if (ratio < getMinFlagRatio() && ratio > getMaxFlagRatio()) { _log.debug("Skipping ratio " + ratio); return new QuantEventAssessment(FLAG_REASON_UNEVALUATED, "Not evaluated: ratio near parity"); } float lightMass = Math.max((event.getLightMz() - Spectrum.HYDROGEN_ION_MASS) * event.getCharge(), 0.0f); float heavyMass = Math.max((event.getHeavyMz() - Spectrum.HYDROGEN_ION_MASS) * event.getCharge(), 0.0f); QuantPeakSetSummary lightPeaksSummary = calcPeakIntensities(event.getFirstHeavyQuantScan(), event.getLastHeavyQuantScan(), lightMass, event.getLightMz(), event.getCharge(), run, numPeaksToUse); QuantPeakSetSummary heavyPeaksSummary = calcPeakIntensities(event.getFirstHeavyQuantScan(), event.getLastHeavyQuantScan(), heavyMass, event.getHeavyMz(), event.getCharge(), run, numPeaksToUse); float intensityBelowLight = lightPeaksSummary.sumIntensityPeakBelow; float intensityBelowHeavy = heavyPeaksSummary.sumIntensityPeakBelow; List<Float> lightPeakIntensities = lightPeaksSummary.peakSumIntensities; List<Float> heavyPeakIntensities = heavyPeaksSummary.peakSumIntensities; _log.debug("**light, " + lightPeakIntensities.get(0) + ", " + lightPeakIntensities.get(1) + ", " + lightPeakIntensities.get(2) + ", " + lightPeakIntensities.get(3)); _log.debug("**heavy, " + heavyPeakIntensities.get(0) + ", " + heavyPeakIntensities.get(1) + ", " + heavyPeakIntensities.get(2) + ", " + heavyPeakIntensities.get(3)); _log.debug("**light KL2: " + scaleAndCalcKL(lightMass, lightPeakIntensities, 2) + ", heavy KL2: " + scaleAndCalcKL(heavyMass, heavyPeakIntensities, 2)); int numPeaksSeparation = PeakOverlapCorrection.calcNumPeaksSeparation(lightPeaksSummary.monoisotopicMass, heavyPeaksSummary.monoisotopicMass); _log.debug("Light mass: " + lightMass + ", Heavy mass " + heavyMass + ", num peaks separation: " + numPeaksSeparation); int highestPeakIndex = Spectrum.calcMaxIdealPeakIndex(lightPeaksSummary.monoisotopicMass); float lightAreaHighestPeak = lightPeakIntensities.get(highestPeakIndex); float heavyAreaHighestPeak = heavyPeakIntensities.get(highestPeakIndex); float singlePeakRatio = lightAreaHighestPeak / heavyAreaHighestPeak; //dhmay adding 20091027. Set boundaries on the single-peak ratio, so we don't create such an extreme ratio //that it dominates the geometric mean calculation singlePeakRatio = Math.max(MIN_RATIO_ONE_PEAK, Math.min(singlePeakRatio, MAX_RATIO_ONE_PEAK)); float algRatio = event.getRatio(); double[] lightIsotopicDistribution = PeakOverlapCorrection.getIsotopicDistribution( lightPeaksSummary.monoisotopicMass, numPeaksSeparation+1); double[] heavyIsotopicDistribution = PeakOverlapCorrection.getIsotopicDistribution( heavyPeaksSummary.monoisotopicMass, numPeaksSeparation+1); //Determine whether there's a significant light peak below the heavy mono carefully: use whichever light-heavy //ratio is higher, algorithm or single-peak float lastLightPeakContributionToHeavyMono = (float) lightIsotopicDistribution[numPeaksSeparation-1] * Math.max(singlePeakRatio, algRatio) / (float) heavyIsotopicDistribution[0]; boolean lightHasSignificantPeakBelowHeavy = lastLightPeakContributionToHeavyMono > minSignificantPeakContributionBelowMonoisotope; //for (int i=0; i<numPeaksSeparation; i++) System.err.println("Light peak " + i + ", " + lightIsotopicDistribution[i]); _log.debug("Light intrusion on heavy? " + lightHasSignificantPeakBelowHeavy + ", last light contribution: " + lastLightPeakContributionToHeavyMono + ", cap: " + minSignificantPeakContributionBelowMonoisotope); //Determine whether there's significant light-heavy distribution overlap using algorithm ratio boolean lightHasSignificantHeavyOverlap = lightIsotopicDistribution[numPeaksSeparation] * algRatio / heavyIsotopicDistribution[0] > maxLightHeavyOverlapToIgnore; QuantEventAssessment result = new QuantEventAssessment(FLAG_REASON_OK, ""); //dhmay adding check for 0 light or heavy area in BOTH of the first two peaks of either light or heavy if ((result.isGood() || shouldPerformAllChecks) && shouldFlagIfMissingPeaks) { if (lightPeakIntensities.get(0) == 0 && lightPeakIntensities.get(1) == 0) { result.setFlag(FLAG_REASON_MISSING_PEAKS, true); result.setCheckExplanation(FLAG_REASON_MISSING_PEAKS, "Missing first two light peaks"); } else if (heavyPeakIntensities.get(0) == 0 && heavyPeakIntensities.get(1) == 0) { result.setFlag(FLAG_REASON_MISSING_PEAKS, true); result.setCheckExplanation(FLAG_REASON_MISSING_PEAKS, "Missing first two heavy peaks"); } } //Calculate a ratio from the theoretically most-intense peak, compare with algorithm ratio if (result.isGood() ||shouldPerformAllChecks) { int lightIndexOfHeavyHighestPeak = numPeaksSeparation + highestPeakIndex; float lightPeakIntrusionHeavyHighest = (float) PeakOverlapCorrection.getIsotopicDistribution( lightPeaksSummary.monoisotopicMass, lightIndexOfHeavyHighestPeak+1)[lightIndexOfHeavyHighestPeak]; if (lightPeakIntrusionHeavyHighest >= maxLightHeavyOverlapToIgnore) { singlePeakRatio = (float) PeakOverlapCorrection.correctRatioForOverlap( lightPeaksSummary.monoisotopicMass, heavyPeaksSummary.monoisotopicMass, singlePeakRatio, highestPeakIndex, highestPeakIndex); } float logRatioDiff = (float) Math.abs(Math.log(singlePeakRatio) - Math.log(algRatio)); _log.debug("MS1 Ratio: " + singlePeakRatio + ", alg ratio: " + algRatio + ", log diff: " + Rounder.round(logRatioDiff,3) + ", to beat: " + maxLogRatioDiff); if (logRatioDiff > maxLogRatioDiff) { if ((algRatio >= extremeRatioHigh && singlePeakRatio >= extremeRatioHigh) || (algRatio <= extremeRatioLow && singlePeakRatio <= extremeRatioLow)) { _log.debug("Giving MS1 ratio diff a pass because of extreme ratio: algorithm: " + algRatio + ", MS1: " + singlePeakRatio); } else { //dhmay added check for extreme ratios about which we don't care, 20091029 result.setFlag(FLAG_REASON_DISSIMILAR_MS1_RATIO, true); result.setCheckExplanation(FLAG_REASON_DISSIMILAR_MS1_RATIO, "Singlepeak=" + Rounder.round(singlePeakRatio, 3) + " algorithm=" + Rounder.round(algRatio, 3)); } } } //if (reason == FLAG_REASON_DISSIMILAR_MS1_RATIO) System.err.println("****Scan " + event.getScan() + ", reasonDesc=" + reasonDesc); float lightKl2Peaks = 0f; float heavyKl2Peaks = 0f; //check the KL of both "features". If peaks overlapping, calculate a special ideal distribution for heavy if (result.isGood() ||shouldPerformAllChecks) { lightKl2Peaks = scaleAndCalcKL(lightMass, lightPeakIntensities, 2); heavyKl2Peaks = 0f; if (lightHasSignificantHeavyOverlap) { //if there's significant overlap, calculate a new template peak distribution for heavy, //based on the ratio that the algorithm gives us float[] heavyIdealDistNoOverwrite = Spectrum.Poisson(heavyPeaksSummary.monoisotopicMass); float[] heavyIdealDist = new float[heavyIdealDistNoOverwrite.length]; System.arraycopy(heavyIdealDistNoOverwrite, 0, heavyIdealDist, 0, heavyIdealDist.length); float[] lightIdealDist = Spectrum.Poisson(lightPeaksSummary.monoisotopicMass); int numPeaksOverlap = lightIdealDist.length - numPeaksSeparation; //calculate a new sum for the ideal heavy peaks float newHeavySum = 1; for (int i=0; i<numPeaksOverlap; i++) { float thisPeakExtra = (lightIdealDist[i+numPeaksSeparation] * algRatio); heavyIdealDist[i] += thisPeakExtra; newHeavySum += thisPeakExtra; } //make the new heavy distribution sum to 1 for (int i=0; i<heavyIdealDist.length; i++) { heavyIdealDist[i] /= (newHeavySum); } heavyKl2Peaks = scaleAndCalcKL(heavyIdealDist, heavyPeakIntensities, 2); } else heavyKl2Peaks = scaleAndCalcKL(heavyPeaksSummary.monoisotopicMass, heavyPeakIntensities, 2); float klDiff = Math.abs(lightKl2Peaks - heavyKl2Peaks); float klRatio = lightKl2Peaks / heavyKl2Peaks; _log.debug("Light KL: " + lightKl2Peaks + ", Heavy KL: " + heavyKl2Peaks + ": diff=" + klDiff + ", ratio=" + klRatio); if (klDiff > maxKlDiff && klRatio < minKlRatio && shouldFlagDifferentKL) { //The KL scores are significantly different. But if it's a very extreme ratio and the worse score is on //the /less/ intense one, we don't care (it doesn't matter if there's something coeluting) if ((algRatio < (extremeRatioLow*2) && lightKl2Peaks > heavyKl2Peaks) || (algRatio > (extremeRatioHigh*2) && heavyKl2Peaks > lightKl2Peaks)) { result.setCheckExplanation(FLAG_REASON_DISSIMILAR_KL, "KL scores different, but worse one is low-abundance"); } else { //OK, no excuses, KL scores are just different. Flag. result.setFlag(FLAG_REASON_DISSIMILAR_KL, true); result.setCheckExplanation(FLAG_REASON_DISSIMILAR_KL, "light KL=" + Rounder.round(lightKl2Peaks,3) + " heavy=" + Rounder.round(heavyKl2Peaks,3) + " diff=" + Rounder.round(klDiff,3) + " ratio=" + Rounder.round(klRatio,3)); } } if ((result.isGood() || shouldPerformAllChecks) && (lightKl2Peaks > max2PeakKL || heavyKl2Peaks > max2PeakKL)) { String explanation = ""; result.setFlag(FLAG_REASON_BIG_2PEAK_KL, true); if (lightKl2Peaks > max2PeakKL) explanation = explanation + "light KL=" + lightKl2Peaks + " "; if (heavyKl2Peaks > max2PeakKL) explanation = explanation + "heavy KL=" + heavyKl2Peaks; result.setCheckExplanation(FLAG_REASON_BIG_2PEAK_KL, explanation); } } //dhmay moving this check down to be last, since it potentially involves feature-finding //See if the intensity of the peak 1Da below either monoisotope is high enough to worry about. if (result.isGood() || shouldPerformAllChecks) { Pair<Boolean, String> statusAndDesc = checkCoelutingPeptides(intensityBelowLight, lightPeakIntensities, intensityBelowHeavy, heavyPeakIntensities, lightMass, event.getLightMz(), heavyMass, event.getHeavyMz(), event.getCharge(), run, event.getFirstHeavyQuantScan(), event.getLastHeavyQuantScan(), lightHasSignificantPeakBelowHeavy, algRatio, event.getScan(), lightKl2Peaks, heavyKl2Peaks); result.setCheckExplanation(FLAG_REASON_COELUTING, statusAndDesc.second); result.setFlag(FLAG_REASON_COELUTING, statusAndDesc.first); } _log.debug(result.toString()); result.setSinglePeakRatio(singlePeakRatio); for (int i=0; i<flagReasonCodes.length; i++) { if (i != FLAG_REASON_OK && result.getFlagBitmap()[i]) { result.setBad(); break; } } event.setAlgorithmicAssessment(result); return result; } /** * Check for coeluting peptides that might explain a high below-monoisotope peak. * Pulling this out as a separate method because it got kind of complicated. * @param intensityBelowLight * @param lightPeakIntensities * @param intensityBelowHeavy * @param heavyPeakIntensities * @param lightMass * @param lightMz * @param heavyMass * @param heavyMz * @param charge * @param run * @param firstScan * @param lastScan * @param lightHasSignificantPeakBelowHeavy * @param algRatio * @param scan * @param lightKl2Peaks * @param heavyKl2Peaks * @return */ protected Pair<Boolean, String> checkCoelutingPeptides(float intensityBelowLight, List<Float> lightPeakIntensities, float intensityBelowHeavy, List<Float> heavyPeakIntensities, float lightMass, float lightMz, float heavyMass, float heavyMz, int charge, MSRun run, int firstScan, int lastScan, boolean lightHasSignificantPeakBelowHeavy, float algRatio, int scan, float lightKl2Peaks, float heavyKl2Peaks) { boolean resultIsBad = false; String resultDesc = ""; float belowIntensityRatioLight = intensityBelowLight / lightPeakIntensities.get(0); //Only evaluate heavy if if light/heavy peaks not overlapping float belowIntensityRatioHeavy = lightHasSignificantPeakBelowHeavy ? 0 : intensityBelowHeavy / heavyPeakIntensities.get(0); _log.debug("BELOW: light=" + intensityBelowLight + ", ratio="+belowIntensityRatioLight + ", heavy=" +intensityBelowHeavy + ", ratio=" + belowIntensityRatioHeavy); if (Math.max(belowIntensityRatioLight, belowIntensityRatioHeavy) > peakBelowIntensityRatioCutoff) { //extra check to ignore coeluting peptides that could only make an extreme //ratio more extreme boolean correctedRatioCouldOnlyGetSmaller = belowIntensityRatioHeavy < peakBelowIntensityRatioCutoff; boolean correctedRatioCouldOnlyGetBigger = belowIntensityRatioLight < peakBelowIntensityRatioCutoff; if (!(correctedRatioCouldOnlyGetSmaller && algRatio <= extremeRatioLow) && !(correctedRatioCouldOnlyGetBigger && algRatio >= extremeRatioHigh)) { //_log.setLevel(Level.DEBUG); _log.debug("\n**Coelute, SCAN " + scan + ", charge=" + charge + ", lightmz: " + lightMz + ", heavymz: " + heavyMz + ", light ratio: " + belowIntensityRatioLight + ", heavy ratio: " + belowIntensityRatioHeavy); //OK, we've got a potentially problematic event due to coeluting peptide. //Now we check to see if that peak below the monoisotope is actually a problem. boolean lightCheckedAndOK = false; if (belowIntensityRatioLight >= peakBelowIntensityRatioCutoff) { _log.debug("\tCheck light"); if ((lightKl2Peaks < 1f) && (scalePeaksSum1(lightPeakIntensities).get(0) >= 0.1) && checkPeakBelowExplainedByOtherChargeFeature(lightMz, charge, run, firstScan, lastScan)) { resultDesc = "Coeluting peak below light, but explained by other feature"; lightCheckedAndOK = true; } else { resultIsBad = true; resultDesc = "Coeluting intensity ratio LIGHT=" + Rounder.round(belowIntensityRatioLight,3) + " heavy=" + Rounder.round(belowIntensityRatioHeavy,3); } } if ((resultIsBad || shouldPerformAllChecks) && belowIntensityRatioHeavy >= peakBelowIntensityRatioCutoff) { _log.debug("\tCheck heavy"); if ((heavyKl2Peaks < 1f) && (scalePeaksSum1(heavyPeakIntensities).get(0) >= 0.1) && checkPeakBelowExplainedByOtherChargeFeature(heavyMz, charge, run, firstScan, lastScan)) { if (lightCheckedAndOK) resultDesc = "Coeluting peak below light and heavy, but explained by other features"; else resultDesc = "Coeluting peak below heavy, but explained by other feature"; } else { resultIsBad = true; resultDesc = "Coeluting intensity ratio light=" + Rounder.round(belowIntensityRatioLight,3) + " HEAVY=" + Rounder.round(belowIntensityRatioHeavy,3); } } //_log.setLevel(Level.INFO); } else _log.debug("Giving coeluting peptide a pass because of extreme ratio: " + algRatio + ", could only get smaller? " + correctedRatioCouldOnlyGetSmaller + ", bigger? " + correctedRatioCouldOnlyGetBigger); } return new Pair<Boolean, String>(resultIsBad, resultDesc); } /** * Scale a set of peak intensities so they sum to 1 * @param peakIntensities * @return */ protected List<Float> scalePeaksSum1(List<Float> peakIntensities) { List<Float> result = new ArrayList<Float>(); float peakSum = 0f; for (float peakIntensity : peakIntensities) peakSum += peakIntensity; for (float peakIntensity : peakIntensities) result.add(peakIntensity / peakSum); return result; } /** * Check for a peptide from another charge that might explain a big below-monoisotope peak. If the peptide's * from another charge, then the peaks might not overlap our peptide's peaks. Right now, only handling charge * 2 vs. 3. * * Require at least 2 peaks in the clear and that the peak below the monoisotope be at most proportion * maxCoelutingPeptideContributionOfBelowMonoPeak of the total distribution (to ignore one-peak wonders, which are * indistinguishable from an actual coeluting peptide in the same charge), and that the KL of the other-charge * feature be low * @param mz * @param charge * @param run * @param firstScan * @param lastScan * @return */ protected boolean checkPeakBelowExplainedByOtherChargeFeature(float mz, int charge, MSRun run, int firstScan, int lastScan) { float peakSeparationMass = (float) MassCalibrationUtilities.DEFAULT_THEORETICAL_MASS_WAVELENGTH; List<Float> monoMassesToCheckPeaks = new ArrayList<Float>(); List<Integer> numbersOfPeaksToCheck = new ArrayList<Integer>(); float mzPeakBelow = mz - (peakSeparationMass / (float) charge); float[] idealPeaksThisMass = null; float massPeakBelow = 0f; int otherPeptideCharge = 0; switch (charge) { case 2: otherPeptideCharge = 3; massPeakBelow = MassUtilities.calcMassForMzAndCharge(mzPeakBelow, otherPeptideCharge); idealPeaksThisMass = Spectrum.Poisson(massPeakBelow); if (idealPeaksThisMass[1] < maxCoelutingPeptideContributionOfBelowMonoPeak) { _log.debug("\tcharge 2, type 1"); monoMassesToCheckPeaks.add(massPeakBelow-peakSeparationMass); numbersOfPeaksToCheck.add(3); } if (idealPeaksThisMass[0] < maxCoelutingPeptideContributionOfBelowMonoPeak) { _log.debug("\tcharge 2, type 2"); monoMassesToCheckPeaks.add(massPeakBelow); numbersOfPeaksToCheck.add(3); } break; case 3: otherPeptideCharge = 2; massPeakBelow = MassUtilities.calcMassForMzAndCharge(mzPeakBelow, otherPeptideCharge); idealPeaksThisMass = Spectrum.Poisson(massPeakBelow); if (idealPeaksThisMass[1] < maxCoelutingPeptideContributionOfBelowMonoPeak) { _log.debug("\tcharge 3, type 1"); monoMassesToCheckPeaks.add(massPeakBelow-peakSeparationMass); numbersOfPeaksToCheck.add(3); } if (idealPeaksThisMass[0] < maxCoelutingPeptideContributionOfBelowMonoPeak) { _log.debug("\tcharge 3, type 2"); monoMassesToCheckPeaks.add(massPeakBelow); numbersOfPeaksToCheck.add(2); } break; default: return false; //todo: handle charge 1, 4 } //todo: make this twice as efficient by pulling out intensities for multiple features at same time for (int monoIndex=0; monoIndex<monoMassesToCheckPeaks.size(); monoIndex++) { float monoMassToCheck = monoMassesToCheckPeaks.get(monoIndex); int numPeaksToCheck = numbersOfPeaksToCheck.get(monoIndex); _log.debug("\tchecking " + monoMassToCheck); float mzTolerance = (MassUtilities.calculateAbsoluteDeltaMass( monoMassToCheck, peakPPMTolerance, FeatureSetMatcher.DELTA_MASS_TYPE_PPM)) / otherPeptideCharge; float[] mzValues = new float[numPeaksToCheck]; for (int i=0; i<mzValues.length; i++) { mzValues[i] = MassUtilities.calcMzForMassAndCharge(monoMassToCheck + (i * peakSeparationMass), otherPeptideCharge); } float[] otherPeptideIntensities = extractPeakSumIntensities(firstScan, lastScan, mzValues, mzTolerance, run); if (_log.isDebugEnabled()) for (int i=0; i<mzValues.length; i++) System.err.println("\t\tmz: " + mzValues[i] + ", int: " + otherPeptideIntensities[i]); float otherPeptideKL = scaleAndCalcKL(monoMassToCheck, otherPeptideIntensities); _log.debug("\tKL: " + otherPeptideKL); if (otherPeptideKL < maxKLCoelutingOtherChargePeptide) { _log.debug("GOOD!"); return true; } else _log.debug("BAD!"); } return false; } /** * NEVER FULLY IMPLEMENTED. This approach doesn't look promising, because feature-finding works really badly * when there are multiple features jumbled together * @param intensityBelowLight * @param lightPeakIntensities * @param intensityBelowHeavy * @param heavyPeakIntensities * @param lightMass * @param lightMz * @param heavyMass * @param heavyMz * @param charge * @param run * @param numPeaksToUse * @param firstScan * @param lastScan * @return */ // protected int checkCoelutingFeatures(float intensityBelowLight, List<Float> lightPeakIntensities, // float intensityBelowHeavy, List<Float> heavyPeakIntensities, // float lightMass, float lightMz, float heavyMass, float heavyMz, // float charge, MSRun run, int numPeaksToUse, // int firstScan, int lastScan) // { // //m/z tolerance around each peak, for this charge and mass // float peakMassTolerance = MassUtilities.calculateAbsoluteDeltaMass(heavyMass, peakPPMTolerance, // FeatureSetMatcher.DELTA_MASS_TYPE_PPM); // // Class featureStrategyClass = FeatureExtractor.getDefaultClass(); // float featureFindingMzRangeLow = lightMz - (2.0f / charge) - 0.1f; // float featureFindingMzRangeHigh = heavyMz + ((numPeaksToUse + 3) / charge) + 0.1f; // FloatRange mzRangeExtract = new FloatRange(featureFindingMzRangeLow, featureFindingMzRangeHigh); // int firstScanIndexWithSlop = Math.max(run.getIndexForScanNum(firstScan)-2, 0); // int lastScanIndexWithSlop = Math.min(run.getScanCount()-1, run.getIndexForScanNum(lastScan) + 2); // //System.err.println(" Range: scan: " + run.getScanNumForIndex(firstScanIndexWithSlop) + ", " + run.getScanNumForIndex(lastScanIndexWithSlop) + ". mz: " + // featureFindingMzRangeLow + "," + featureFindingMzRangeHigh); // FeatureFinder featureFinder = // new FeatureFinder(run, firstScanIndexWithSlop, lastScanIndexWithSlop - firstScanIndexWithSlop + 1, // PeakCombiner.DEFAULT_MAX_CHARGE, // mzRangeExtract, // featureStrategyClass, false); // Feature[] allRegionFeatures = null; // try // { // allRegionFeatures = featureFinder.findPeptides().getFeatures(); // } // catch (InterruptedException e) // {} // // //todo: this is inefficient, hitting all the features in turn. Should store these in a tree // Arrays.sort(allRegionFeatures, new Feature.MassAscComparator()); // Feature lightFeature = null; // Feature heavyFeature = null; // boolean pastLightMass = false; // boolean pastHeavyMass = false; // //assign the most-intense features with m/z of the light and heavy monoisotopes // for (Feature feature : allRegionFeatures) // { // //first, check to see if we're beyond the m/z range we care about for feature monoisotopic masses // float featureMass = feature.getMass(); // if (!pastLightMass) // pastLightMass = featureMass > lightMass + peakMassTolerance; // pastHeavyMass = featureMass > heavyMass + peakMassTolerance; // if (pastHeavyMass) // break; //System.err.println("Mass: " + feature.getMass() + ", mz: " + feature.getMz() + ", charge: " + feature.getCharge() + ", peaks: " + feature.getPeaks() + ", scan: " + feature.getScan()); // if (!pastLightMass && Math.abs(featureMass - lightMass) < peakMassTolerance) // { // if (lightFeature == null || feature.getIntensity() > lightFeature.getIntensity()) // lightFeature = feature; // } // if (pastLightMass && Math.abs(featureMass - heavyMass) < peakMassTolerance) // { // if (heavyFeature == null || feature.getIntensity() > heavyFeature.getIntensity()) // heavyFeature = feature; // } // } //System.err.println("FEATURES: " + allRegionFeatures.length + ", LIGHTMASS: " + lightMass + ", HEAVYMASS: " + heavyMass + ", LIGHT: " + (lightFeature != null) + ", HEAVY: " + (heavyFeature != null)); // // return FLAG_REASON_COELUTING; // } //Utility methods for calculating KL on various things. Move elsewhere? /** * Calculate a KL score for a list of peak intensities. Have to normalize intensity first by dividing by peak sum * @param mass * @param peakIntensities * @return */ protected float scaleAndCalcKL(float mass, List<Float> peakIntensities) { return scaleAndCalcKL(Spectrum.Poisson(mass), peakIntensities, peakIntensities.size()); } protected float scaleAndCalcKL(float mass, float[] peakIntensities) { return scaleAndCalcKL(Spectrum.Poisson(mass), peakIntensities, peakIntensities.length); } /** * Calculate a KL score for a list of peak intensities. Have to normalize intensity first by dividing by peak sum * @param mass * @param peakIntensities * @param numPeaksToUse * @return */ protected float scaleAndCalcKL(float mass, List<Float> peakIntensities, int numPeaksToUse) { return scaleAndCalcKL(Spectrum.Poisson(mass), peakIntensities, numPeaksToUse); } protected float scaleAndCalcKL(float[] idealPeaks, List<Float> peakIntensities, int numPeaksToUse) { float[] peakIntensitiesFloat = new float[peakIntensities.size()]; for (int i=0; i<peakIntensities.size(); i++) peakIntensitiesFloat[i] = peakIntensities.get(i); return scaleAndCalcKL(idealPeaks, peakIntensitiesFloat, numPeaksToUse); } protected float scaleAndCalcKL(float[] idealPeaks, float[] peakIntensities) { return scaleAndCalcKL(idealPeaks, peakIntensities, numPeaksToUse); } protected float scaleAndCalcKL(float[] idealPeaks, float[] peakIntensities, int numPeaksToUse) { float[] peakIntensitiesXPeaks = peakIntensities; float[] idealPeaksXPeaks = idealPeaks; if (numPeaksToUse < idealPeaks.length || (idealPeaks.length != peakIntensities.length)) { peakIntensitiesXPeaks = new float[numPeaksToUse]; idealPeaksXPeaks = new float[numPeaksToUse]; float sumRealPeaks = 0; float sumIdealPeaks = 0; for (int i=0; i<numPeaksToUse; i++) { if (i < peakIntensities.length) peakIntensitiesXPeaks[i] = peakIntensities[i]; peakIntensitiesXPeaks[i] = Math.max(0.1f, peakIntensitiesXPeaks[i]); sumRealPeaks += peakIntensitiesXPeaks[i]; sumIdealPeaks += idealPeaks[i]; idealPeaksXPeaks[i] = idealPeaks[i]; } for (int i = 0; i < numPeaksToUse; i++) { peakIntensitiesXPeaks[i] /= sumRealPeaks; idealPeaksXPeaks[i] /= sumIdealPeaks; //System.err.println(peakIntensities6Peaks[i]); } } return PeakOverlapCorrection.calcKLUsingTemplate(idealPeaksXPeaks, peakIntensitiesXPeaks); } /** * Return the peak quant summary, with intensities of all peaks. * Use raw intensities, NOT resampled * @param firstScan * @param lastScan * @param mass * @param mz * @param charge * @param run * @param numPeaks * @return */ public QuantPeakSetSummary calcPeakIntensities(int firstScan, int lastScan, float mass, float mz, float charge, MSRun run, int numPeaks) { float mzTol = (MassUtilities.calculateAbsoluteDeltaMass( mass, peakPPMTolerance, FeatureSetMatcher.DELTA_MASS_TYPE_PPM)) / charge; QuantPeakSetSummary result = new QuantPeakSetSummary(); result.monoisotopicMass = mass; result.peakSumIntensities = new ArrayList<Float>(); float[] mzValues = new float[numPeaks+1]; for (int peakIndex=-1; peakIndex<numPeaks; peakIndex++) { mzValues[peakIndex+1] = mz + ((peakIndex * (float) MassCalibrationUtilities.DEFAULT_THEORETICAL_MASS_WAVELENGTH) / charge); } float[] peakIntensities = extractPeakSumIntensities(firstScan, lastScan, mzValues, mzTol, run); result.sumIntensityPeakBelow = peakIntensities[0]; for (int i=1; i<numPeaks+1; i++) result.peakSumIntensities.add(peakIntensities[i]); return result; } /** * Extract the maximum intensities of the peaks at the defined mz values, within mzTolerance, summed over * all scans in the range firstScan..lastScan * @param firstScan * @param lastScan * @param mzValues * @param mzTolerance * @param run * @return */ protected float[] extractPeakSumIntensities(int firstScan, int lastScan, float[] mzValues, float mzTolerance, MSRun run) { //Define the scan index range, making sure not to go out of bounds //assumes heavy and light scan extents same int firstScanIndex = Math.max(Math.abs( run.getIndexForScanNum(firstScan)) - numScansAroundEventToConsider, 0); int lastScanIndex = Math.min(Math.abs( run.getIndexForScanNum(lastScan)) + numScansAroundEventToConsider, run.getScanCount()-1); lastScanIndex = Math.max(firstScanIndex, lastScanIndex); Scan[] scans = FeatureFinder.getScans(run, firstScanIndex, lastScanIndex - firstScanIndex + 1); int numPeaks = mzValues.length; float[] result = new float[numPeaks]; for (int scanIndex = 0; scanIndex < scans.length; scanIndex++) { float[][] spectrum = scans[scanIndex].getSpectrum(); for (int i=0; i<numPeaks; i++) { float peakMzCenter = mzValues[i]; int startIndex = Arrays.binarySearch(spectrum[0], peakMzCenter - mzTolerance); startIndex = startIndex < 0 ? -(startIndex+1) : startIndex; int endIndex = Arrays.binarySearch(spectrum[0], peakMzCenter + mzTolerance); endIndex = endIndex < 0 ? -(endIndex+1) : endIndex; //dhmay 20091222, in a hurry -- range check if (endIndex >= spectrum[1].length) endIndex = spectrum[1].length-1; float maxIntensityThisPeak = 0; for (int j=startIndex; j<=endIndex; j++) maxIntensityThisPeak = Math.max(maxIntensityThisPeak, spectrum[1][j]); result[i] += maxIntensityThisPeak; } } return result; } public float getPeakPPMTolerance() { return peakPPMTolerance; } public void setPeakPPMTolerance(float peakPPMTolerance) { this.peakPPMTolerance = peakPPMTolerance; } public static Logger getLogger() { return _log; } public float getMinFlagRatio() { return minFlagRatio; } public void setMinFlagRatio(float minFlagRatio) { this.minFlagRatio = minFlagRatio; } public float getMaxFlagRatio() { return maxFlagRatio; } public void setMaxFlagRatio(float maxFlagRatio) { this.maxFlagRatio = maxFlagRatio; } public boolean isShowCharts() { return showCharts; } public void setShowCharts(boolean showCharts) { this.showCharts = showCharts; } // public int getLabelType() // { // return labelType; // } // public void setLabelType(int labelType) // { // this.labelType = labelType; // } /** * Data structure to pass around summary information about one set of peaks (light or heavy) */ public static class QuantPeakSetSummary { protected float monoisotopicMass; protected float sumIntensityPeakBelow; protected List<Float> peakSumIntensities; public QuantPeakSetSummary() { peakSumIntensities = new ArrayList<Float>(); } public String toString() { StringBuffer resultBuf = new StringBuffer("QuantPeakSetSummary, monoMass = " + monoisotopicMass + ", peakBelow=" + sumIntensityPeakBelow + ", peak intensities: "); for (int i=0; i<peakSumIntensities.size(); i++) resultBuf.append(" " + (i+1) + "=" + peakSumIntensities.get(i)); return resultBuf.toString(); } public List<Float> getPeakSumIntensities() { return peakSumIntensities; } } /** * Class for communicating assessment results */ public static class QuantEventAssessment { protected float singlePeakRatio; protected boolean[] flagBitmap = new boolean[flagReasonCodes.length]; protected String[] checkExplanations = new String[flagReasonCodes.length]; public String toString() { //todo: explanations? StringBuffer result = new StringBuffer("Assessment. Flags:"); for (int i=0; i<flagBitmap.length; i++) result.append(" " + flagReasonCodes[i] + "=" + flagBitmap[i]); result.append(", single-peak: " + singlePeakRatio); return result.toString(); } public QuantEventAssessment(int status, String explanation) { setStatus(status); setExplanation(explanation); } public QuantEventAssessment(boolean[] flagBitmap, String explanation) { this.flagBitmap = flagBitmap; setCheckExplanation(getStatus(), explanation); } public int getStatus() { for (int i=0; i<flagBitmap.length; i++) if (flagBitmap[i]) return i; return -1; } public void setBad() { setFlag(FLAG_REASON_OK, false); } public void setFlag(int status, boolean value) { flagBitmap[status] = value; } public void setStatus(int status) { flagBitmap = new boolean[flagReasonCodes.length]; flagBitmap[status] = true; } public void setCheckExplanation(int status, String description) { checkExplanations[status] = description; } public String getCheckExplanation(int status) { return checkExplanations[status]; } public String getExplanation() { return getCheckExplanation(getStatus()); } public void setExplanation(String explanation) { setCheckExplanation(getStatus(), explanation); } public float getSinglePeakRatio() { return singlePeakRatio; } public void setSinglePeakRatio(float singlePeakRatio) { this.singlePeakRatio = singlePeakRatio; } public boolean isGood() { return flagBitmap[FLAG_REASON_OK]; } public boolean[] getFlagBitmap() { return flagBitmap; } public void setFlagBitmap(boolean[] flagBitmap) { this.flagBitmap = flagBitmap; } } public boolean isShouldFlagIfMissingPeaks() { return shouldFlagIfMissingPeaks; } public void setShouldFlagIfMissingPeaks(boolean shouldFlagIfMissingPeaks) { this.shouldFlagIfMissingPeaks = shouldFlagIfMissingPeaks; } public boolean isShouldFlagDifferentKL() { return shouldFlagDifferentKL; } public void setShouldFlagDifferentKL(boolean shouldFlagDifferentKL) { this.shouldFlagDifferentKL = shouldFlagDifferentKL; } public boolean isShouldPerformAllChecks() { return shouldPerformAllChecks; } public void setShouldPerformAllChecks(boolean shouldPerformAllChecks) { this.shouldPerformAllChecks = shouldPerformAllChecks; } public int getNumPeaksToUse() { return numPeaksToUse; } public void setNumPeaksToUse(int numPeaksToUse) { this.numPeaksToUse = numPeaksToUse; } }