/* * Copyright (c) 2003-2012 Fred Hutchinson Cancer Research Center * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.fhcrc.cpl.viewer.amt; import org.fhcrc.cpl.toolbox.proteomics.feature.FeatureSet; import org.fhcrc.cpl.toolbox.proteomics.feature.Feature; import org.fhcrc.cpl.toolbox.proteomics.feature.AnalyzeICAT; import org.fhcrc.cpl.toolbox.proteomics.feature.extraInfo.IsotopicLabelExtraInfoDef; import org.fhcrc.cpl.toolbox.proteomics.feature.extraInfo.MS2ExtraInfoDef; import org.fhcrc.cpl.toolbox.proteomics.commandline.arguments.ModificationListArgumentDefinition; import org.fhcrc.cpl.toolbox.statistics.BasicStatistics; import org.fhcrc.cpl.toolbox.ApplicationContext; import org.fhcrc.cpl.toolbox.proteomics.PeptideGenerator; import org.fhcrc.cpl.toolbox.proteomics.MS2Modification; import org.fhcrc.cpl.toolbox.proteomics.ModifiedAminoAcid; import org.apache.log4j.Logger; import java.util.*; /** * Methods for performing labeled quantitation on AMT data. * * This is very simple. After AMT matching, we simply locate all peptides for which we've found * both the light and heavy pair (given an isotopic label definition) with the same modification * state and charge. We consider that one feature and record the light area, heavy area, and ratio */ public class AmtLabeledQuant { protected static Logger _log = Logger.getLogger(AmtLabeledQuant.class); //default acrylamide label public static final double ACRYLAMIDE_MASS_DIFF = 3.0101; public static final Character ACRYLAMIDE_RESIDUE = 'C'; protected AnalyzeICAT.IsotopicLabel isotopicLabel = DEFAULT_ISOTOPIC_LABEL; public static final AnalyzeICAT.IsotopicLabel DEFAULT_ISOTOPIC_LABEL = new AnalyzeICAT.IsotopicLabel((float) PeptideGenerator.AMINO_ACID_MONOISOTOPIC_MASSES[ACRYLAMIDE_RESIDUE], (float) (PeptideGenerator.AMINO_ACID_MONOISOTOPIC_MASSES[ACRYLAMIDE_RESIDUE] + ACRYLAMIDE_MASS_DIFF), ACRYLAMIDE_RESIDUE, 3); public static final MS2Modification[] DEFAULT_OTHER_MODS = new MS2Modification[] { ModificationListArgumentDefinition.safeCreateModificationFromString("C57.021"), ModificationListArgumentDefinition.safeCreateModificationFromString("M16V"), //Acrylamide light label weight ModificationListArgumentDefinition.safeCreateModificationFromString("C14.01557"), }; protected MS2Modification[] otherModifications = DEFAULT_OTHER_MODS; //TODO: this should probably be expressed in ppm. //TODO: come to think of it, mass tolerance really shouldn't come into play. //TODO: I should be identifying the modification state in the AMT match. protected static final double DEFAULT_MASS_SLOP = 0.1; public static final float DEFAULT_MIN_RATIO_HIGH_PROB = 0.7f; public static final float DEFAULT_MIN_RATIO_LOW_PROB = 0.05f; protected float minRatioHigherProbability = DEFAULT_MIN_RATIO_HIGH_PROB; protected float minRatioLowerProbability = DEFAULT_MIN_RATIO_LOW_PROB; protected double massSlop = DEFAULT_MASS_SLOP; protected boolean showCharts = false; protected List<Float> logRatiosAllFiles = new ArrayList<Float>(); protected boolean perCharge = true; float maxRatio = 15f; float minRatio = 1f/15f; public static final float DEFAULT_MIN_PROB_DIFF_WITHIN_STATE = 0.1f; protected float minProbDifferenceWithinState = DEFAULT_MIN_PROB_DIFF_WITHIN_STATE; public static final float DEFAULT_MAX_SECOND_BEST_PROB_WITHIN_STATE = 0.4f; protected float maxSecondBestProbWithinState = DEFAULT_MAX_SECOND_BEST_PROB_WITHIN_STATE; public AmtLabeledQuant() { } /** * do the actual work */ public void quantitate(FeatureSet featureSet) { Map<String, Map<Integer, List<Feature>>> peptideChargeFeatureListMap = new HashMap<String, Map<Integer, List<Feature>>>(); List<Float> possibleHeavyMasses = calculateHeavyLabelResidueMasses(); for (Feature feature : featureSet.getFeatures()) { String peptide = MS2ExtraInfoDef.getFirstPeptide(feature); if (peptide == null) continue; Map<Integer, List<Feature>> thisPeptideChargeFeatureListMap = peptideChargeFeatureListMap.get(peptide); if (thisPeptideChargeFeatureListMap == null) { thisPeptideChargeFeatureListMap = new HashMap<Integer, List<Feature>>(); peptideChargeFeatureListMap.put(peptide, thisPeptideChargeFeatureListMap); } int charge = feature.getCharge(); List<Feature> featureList = thisPeptideChargeFeatureListMap.get(charge); if (featureList == null) { featureList = new ArrayList<Feature>(); thisPeptideChargeFeatureListMap.put(charge, featureList); } featureList.add(feature); } List<Feature> featuresWithRatios = new ArrayList<Feature>(); List<Feature> featuresWithoutRatios = new ArrayList<Feature>(); List<Float> allLogRatios = new ArrayList<Float>(); for (String peptide : peptideChargeFeatureListMap.keySet()) { int numExpectedLabels = 1; numExpectedLabels = 0; for (int i=0; i<peptide.length(); i++) { if (peptide.charAt(i) == isotopicLabel.getResidue()) numExpectedLabels++; } //if no labeled residues this peptide, skip it if (numExpectedLabels == 0) { Map<Integer, List<Feature>> chargeFeatureMap = peptideChargeFeatureListMap.get(peptide); for (List<Feature> featureList : chargeFeatureMap.values()) featuresWithoutRatios.addAll(featureList); continue; } List<Feature> featuresWithRatiosThisPeptide = new ArrayList<Feature>(); for (int charge=1; charge<10; charge++) { Map<Integer, List<Feature>> thisPeptideChargeFeatureListMap = peptideChargeFeatureListMap.get(peptide); List<Feature> featureList = thisPeptideChargeFeatureListMap.get(charge); //no features this peptide this charge if (featureList == null) continue; //Take out all features that don't pass the lower probability threshold List<Feature> tooLowProbFeatureList = new ArrayList<Feature>(); for (Feature feature : featureList) if (MS2ExtraInfoDef.getPeptideProphet(feature) < minRatioLowerProbability) tooLowProbFeatureList.add(feature); featuresWithoutRatios.addAll(tooLowProbFeatureList); featureList.removeAll(tooLowProbFeatureList); if (featureList.size() == 1) { featuresWithoutRatios.add(featureList.get(0)); continue; } List<Feature> lowFeatureList = new ArrayList<Feature>(); List<Feature> highFeatureList = new ArrayList<Feature>(); for (Feature feature : featureList) { int numLabels = 0; List<ModifiedAminoAcid>[] modifiedAcids = MS2ExtraInfoDef.getModifiedAminoAcids(feature); if (modifiedAcids != null) { for (int i=0; i<modifiedAcids.length; i++) { if (peptide.charAt(i) == isotopicLabel.getResidue()) { if (modifiedAcids[i] != null) { for (ModifiedAminoAcid mod : modifiedAcids[i]) { //System.err.println("\tGot mod: " + mod + ", mass: " + mod.getMass() + ", expected: " + possibleHeavyMasses.get(0)); //System.err.println("\tGot mod: " + mod + ", mass: " + mod.getMass() + ", expected: " + labelHeavyMass); for (float possibleHeavyMass : possibleHeavyMasses) if (Math.abs(mod.getMass() - possibleHeavyMass) < massSlop) { numLabels++; break; } } } } } } if (numLabels == 0) lowFeatureList.add(feature); else if (numLabels == numExpectedLabels) highFeatureList.add(feature); else featuresWithoutRatios.add(feature); } //System.err.println("\t" + lowFeatureList.size() + ", " + highFeatureList.size()); if (lowFeatureList.isEmpty() || highFeatureList.isEmpty()) { featuresWithoutRatios.addAll(lowFeatureList); featuresWithoutRatios.addAll(highFeatureList); continue; } Feature lowWinner = pickWinnerForModState(lowFeatureList); Feature highWinner = pickWinnerForModState(highFeatureList); //System.err.println("Evaluating " + featureList.size() + " features..."); if (lowWinner != null && highWinner != null) { //System.err.println("\tRatio!"); Feature processedPair = processFeaturePair(lowWinner, highWinner); if (processedPair != null) { double maxProb = Math.max(MS2ExtraInfoDef.getPeptideProphet(lowWinner), MS2ExtraInfoDef.getPeptideProphet(highWinner)); MS2ExtraInfoDef.setPeptideProphet(processedPair, maxProb); featuresWithRatiosThisPeptide.add(processedPair); lowFeatureList.remove(lowWinner); highFeatureList.remove(highWinner); } // else ApplicationContext.setMessage("Tossing out extreme ratio"); featuresWithoutRatios.addAll(lowFeatureList); featuresWithoutRatios.addAll(highFeatureList); } else { ApplicationContext.setMessage("Can't calculate ratio for peptide " + peptide + " in charge " + charge + ": " + featureList.size() + " features, " + lowFeatureList.size() + " low and " + highFeatureList.size() + " high."); featuresWithoutRatios.addAll(lowFeatureList); featuresWithoutRatios.addAll(highFeatureList); } /* //todo: handle more than two better if (featureList.size() == 2) { Feature lightFeature = featureList.get(0); Feature heavyFeature = featureList.get(1); boolean addedRatio = false; double lightProb = MS2ExtraInfoDef.getPeptideProphet(lightFeature); double heavyProb = MS2ExtraInfoDef.getPeptideProphet(heavyFeature); double minProb = Math.min(lightProb,heavyProb); double maxProb = Math.max(lightProb,heavyProb); int lightLabelCount = IsotopicLabelExtraInfoDef.getLabelCount(lightFeature); int heavyLabelCount = IsotopicLabelExtraInfoDef.getLabelCount(heavyFeature); System.err.println("Label counts: " + lightLabelCount + ", " + heavyLabelCount); if ( minProb >= minRatioLowerProbability && maxProb >= minRatioHigherProbability) { double massDiff = heavyFeature.getMass() - lightFeature.getMass(); float massDiffError = (float) (massDiff - (numExpectedLabels * separation)); if (massDiffError < (massSlop)) { Feature processedPair = processFeaturePair(lightFeature, heavyFeature); if (processedPair != null) { MS2ExtraInfoDef.setPeptideProphet(processedPair, maxProb); featuresWithRatiosThisPeptide.add(processedPair); addedRatio = true; } } } if (!addedRatio) { featuresWithoutRatios.addAll(featureList); } } else if (featureList.size() > 2) { //OK, we've got too many features. Still, if the two //highest-probability features are separated, one light, one heavy, //that's a good sign, keep the ratio. //Otherwise, punt float highestProb = -1f; Feature featureWithHighestProb = null; for (Feature feature : featureList) { float prob = (float) MS2ExtraInfoDef.getPeptideProphet(feature); if (prob > highestProb) { featureWithHighestProb = feature; highestProb = prob; } } if (highestProb < minRatioHigherProbability) { featuresWithoutRatios.addAll(featureList); continue; } Feature featureWithSecondHighestProb = null; float secondHighestProb = -1f; for (Feature feature : featureList) { if (feature.equals(featureWithHighestProb)) continue; float prob = (float) MS2ExtraInfoDef.getPeptideProphet(feature); if (prob > secondHighestProb) { featureWithSecondHighestProb = feature; secondHighestProb = prob; } } Feature lightFeature = featureWithHighestProb; Feature heavyFeature = featureWithSecondHighestProb; if (featureWithSecondHighestProb.getMass() < featureWithHighestProb.getMass()) { heavyFeature = featureWithHighestProb; lightFeature = featureWithSecondHighestProb; } double massDiff = heavyFeature.getMass() - lightFeature.getMass(); float massDiffError = (float) (massDiff - (numExpectedLabels * separation)); if (massDiffError < (massSlop)) { Feature processedPair = processFeaturePair(lightFeature, heavyFeature); if (processedPair != null) featuresWithRatiosThisPeptide.add(processedPair); } else { ApplicationContext.infoMessage(featureList.size() + " features for peptide " + peptide + " in charge " + charge + ", punting"); featuresWithoutRatios.addAll(featureList); } } */ } if (featuresWithRatiosThisPeptide.size() > 0) { for (Feature feature : featuresWithRatiosThisPeptide) allLogRatios.add((float) Math.log(IsotopicLabelExtraInfoDef.getRatio(feature))); if (perCharge) { featuresWithRatios.addAll(featuresWithRatiosThisPeptide); for (Feature feature : featuresWithRatiosThisPeptide) allLogRatios.add((float) Math.log(IsotopicLabelExtraInfoDef.getRatio(feature))); } else { float[] ratiosThisPeptide = new float[featuresWithRatiosThisPeptide.size()]; for (int i=0; i<ratiosThisPeptide.length; i++) ratiosThisPeptide[i] = (float) IsotopicLabelExtraInfoDef.getRatio(featuresWithRatiosThisPeptide.get(i)); float ratioThisPeptide = BasicStatistics.geometricMean(ratiosThisPeptide); Feature representativeFeature = featuresWithRatiosThisPeptide.get(0); IsotopicLabelExtraInfoDef.setRatio(representativeFeature,ratioThisPeptide); double newLightIntensity = ratioThisPeptide * IsotopicLabelExtraInfoDef.getHeavyIntensity(representativeFeature); IsotopicLabelExtraInfoDef.setLightIntensity(representativeFeature, newLightIntensity); featuresWithRatios.add(representativeFeature); allLogRatios.add((float) Math.log(ratioThisPeptide)); } } } ApplicationContext.infoMessage("Features with ratios: " + featuresWithRatios.size()); List<Feature> allFeatures = new ArrayList<Feature>(); allFeatures.addAll(featuresWithRatios); allFeatures.addAll(featuresWithoutRatios); featureSet.setFeatures(allFeatures.toArray(new Feature[allFeatures.size()])); } /** * Returns the highest-probability feature in the list, or null if no feature stands * out above the rest enough * @param featuresInModState * @return */ protected Feature pickWinnerForModState(List<Feature> featuresInModState) { if (featuresInModState.size() == 1) return featuresInModState.get(0); float highestProb = -1f; float secondHighestProb = -1f; Feature highestProbFeature = null; for (Feature feature : featuresInModState) { float featureProb = (float) MS2ExtraInfoDef.getPeptideProphet(feature); if (featureProb > highestProb) { secondHighestProb = highestProb; highestProb = featureProb; highestProbFeature = feature; } else if (featureProb > secondHighestProb) { secondHighestProb = featureProb; } } if (secondHighestProb < maxSecondBestProbWithinState && (highestProb - secondHighestProb) > minProbDifferenceWithinState) return highestProbFeature; else return null; } protected Feature processFeaturePair(Feature lightFeature, Feature heavyFeature) { float lightIntensity = lightFeature.getIntensity(); float heavyIntensity = heavyFeature.getIntensity(); float ratio = (lightIntensity / heavyIntensity); if (ratio < minRatio || ratio > maxRatio) return null; IsotopicLabelExtraInfoDef.setRatio(lightFeature,ratio); IsotopicLabelExtraInfoDef.setLightIntensity(lightFeature, lightIntensity); IsotopicLabelExtraInfoDef.setHeavyIntensity(lightFeature, heavyIntensity); if (isotopicLabel != null) { IsotopicLabelExtraInfoDef.setLabel(lightFeature, isotopicLabel); } return lightFeature; } /** * calculate the masses that represent a heavy label on the relevant residue * * Warning -- we're not handling the case in which there are THREE variable modifications -- * the label and two others. But, come on, that's, you know, crazy. * @return */ protected List<Float> calculateHeavyLabelResidueMasses() { List<Float> lightLabelMasses = new ArrayList<Float>(); float lightMass = (float) PeptideGenerator.AMINO_ACID_MONOISOTOPIC_MASSES[isotopicLabel.getResidue()]; _log.debug("calculateHeavyLabelResidueMasses. Monoisotopic mass of " + isotopicLabel.getResidue() + ": " + lightMass); for (MS2Modification mod : otherModifications) { if (!mod.getVariable() && mod.getAminoAcid().charAt(0) == isotopicLabel.getResidue()) lightMass += mod.getMassDiff(); } _log.debug("calculateHeavyLabelResidueMasses. Mass of " + isotopicLabel.getResidue() + " with static mods: " + lightMass); lightLabelMasses.add(lightMass); for (MS2Modification mod : otherModifications) { //here's where we'd need to add code to handle combinations of multiple var mods if (mod.getVariable() && mod.getAminoAcid().charAt(0) == isotopicLabel.getResidue()) lightLabelMasses.add(lightMass + mod.getMassDiff()); } //I'm lazy. the var "lightLabelMasses" now will actually represent heavy label masses for (int i=0; i<lightLabelMasses.size(); i++) lightLabelMasses.set(i, lightLabelMasses.get(i) + (isotopicLabel.getHeavy() - isotopicLabel.getLight())); _log.debug("calculateHeavyLabelResidueMasses. Heavy masses: "); for (float mass : lightLabelMasses) _log.debug("\t" + mass); return lightLabelMasses; } public AnalyzeICAT.IsotopicLabel getIsotopicLabel() { return isotopicLabel; } public void setIsotopicLabel(AnalyzeICAT.IsotopicLabel isotopicLabel) { this.isotopicLabel = isotopicLabel; } public double getMassSlop() { return massSlop; } public void setMassSlop(double massSlop) { this.massSlop = massSlop; } public float getMinRatioHigherProbability() { return minRatioHigherProbability; } public void setMinRatioHigherProbability(float minRatioHigherProbability) { this.minRatioHigherProbability = minRatioHigherProbability; } public float getMinRatioLowerProbability() { return minRatioLowerProbability; } public void setMinRatioLowerProbability(float minRatioLowerProbability) { this.minRatioLowerProbability = minRatioLowerProbability; } public MS2Modification[] getOtherModifications() { return otherModifications; } public void setOtherModifications(MS2Modification[] otherModifications) { this.otherModifications = otherModifications; } }