package com.compomics.util.experiment.identification.ptm.ptmscores;
import com.compomics.util.Util;
import com.compomics.util.experiment.biology.Ion;
import com.compomics.util.experiment.biology.IonFactory;
import com.compomics.util.experiment.biology.NeutralLoss;
import com.compomics.util.experiment.biology.PTM;
import com.compomics.util.experiment.biology.Peptide;
import com.compomics.util.experiment.identification.spectrum_annotation.NeutralLossesMap;
import com.compomics.util.experiment.identification.matches.IonMatch;
import com.compomics.util.experiment.identification.matches.ModificationMatch;
import com.compomics.util.experiment.identification.spectrum_annotation.spectrum_annotators.PeptideSpectrumAnnotator;
import com.compomics.util.experiment.massspectrometry.MSnSpectrum;
import com.compomics.util.experiment.massspectrometry.Peak;
import com.compomics.util.experiment.massspectrometry.Spectrum;
import com.compomics.util.math.statistics.distributions.BinomialDistribution;
import com.compomics.util.experiment.identification.spectrum_annotation.AnnotationSettings;
import com.compomics.util.preferences.SequenceMatchingPreferences;
import com.compomics.util.experiment.identification.spectrum_annotation.SpecificAnnotationSettings;
import com.compomics.util.maps.KeyUtils;
import com.compomics.util.math.BasicMathFunctions;
import java.io.IOException;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import org.apache.commons.math.MathException;
import org.apache.commons.math.util.FastMath;
/**
* This class estimates the PhosphoRS score as described in
* http://www.ncbi.nlm.nih.gov/pubmed/22073976. Warning: the calculation in its
* present form is very slow for multiply modified peptides, peptides with many
* modification sites, and noisy spectra. Typically, avoid scoring deamidation
* sites.
*
* @author Marc Vaudel
*/
public class PhosphoRS {
/**
* The window size in m/z.
*/
public static final double WINDOW_SIZE = 100.0;
/**
* The maximal depth to use per window. 8 in the original paper, Must be
* greater than 1.
*/
public static final int MAX_DEPTH = 8;
/**
* The minimal depth to use per window.
*/
public static final int MIN_DEPTH = 2;
/**
* The number of binomial distributions kept in cache.
*/
private static int distributionCacheSize = 1000;
/**
* The binomial distributions cache.
*/
private static HashMap<Double, HashMap<Integer, BinomialDistribution>> distributionCache = new HashMap<Double, HashMap<Integer, BinomialDistribution>>();
/**
* Returns the PhosphoRS sequence probabilities for the PTM possible
* locations. 1 is the first amino acid. The N-terminus is indexed 0 and the
* C-terminus with the peptide length+1. Note that PTMs found on peptides
* must be loaded in the PTM factory
* (com.compomics.util.experiment.biology.PTMFactory), and if the scoring
* involves protein terminal PTMs, the protein sequences must be loaded in
* the sequence factory
* (com.compomics.util.experiment.identification.SequenceFactory) and
* indexed using the protein tree (see getDefaultProteinTree in
* SequenceFactory). PTMs of same mass should be scored together and given
* in the PTMs list. Neutral losses of mass equal to the mass of the PTM
* will be ignored. Neutral losses to be accounted for should be given in
* the SpecificAnnotationSettings and will be ignored if
* accountNeutralLosses is false.
*
* @param peptide the peptide of interest
* @param ptms the PTMs to score, for instance different phosphorylations
* (the PTMs are considered as indistinguishable, i.e. of same mass)
* @param spectrum the corresponding spectrum
* @param annotationSettings the global annotation settings
* @param specificAnnotationSettings the annotation settings specific to
* this peptide and spectrum
* @param accountNeutralLosses a boolean indicating whether or not the
* calculation shall account for neutral losses.
* @param sequenceMatchingPreferences the sequence matching preferences for
* peptide to protein mapping
* @param ptmSequenceMatchingPreferences the sequence matching preferences
* for PTM to peptide mapping
* @param spectrumAnnotator the peptide spectrum annotator to use for
* spectrum annotation, can be null
*
* @return a map site > phosphoRS site probability
*
* @throws java.io.IOException exception thrown whenever an error occurred
* while reading or writing a file
* @throws java.lang.InterruptedException exception thrown whenever a
* threading issue occurred while scoring the PTM
* @throws java.lang.ClassNotFoundException exception thrown whenever an
* error occurred while deserializing an object from the protein tree (the
* protein sequence index)
* @throws java.sql.SQLException exception thrown whenever an error occurred
* while interacting with the protein tree
* @throws org.apache.commons.math.MathException exception thrown whenever a
* math error occurred while computing the score.
*/
public static HashMap<Integer, Double> getSequenceProbabilities(Peptide peptide, ArrayList<PTM> ptms, MSnSpectrum spectrum, AnnotationSettings annotationSettings,
SpecificAnnotationSettings specificAnnotationSettings, boolean accountNeutralLosses, SequenceMatchingPreferences sequenceMatchingPreferences,
SequenceMatchingPreferences ptmSequenceMatchingPreferences, PeptideSpectrumAnnotator spectrumAnnotator)
throws IOException, InterruptedException, ClassNotFoundException, SQLException, MathException {
if (ptms.isEmpty()) {
throw new IllegalArgumentException("No PTM given for PhosphoRS calculation.");
}
if (spectrumAnnotator == null) {
spectrumAnnotator = new PeptideSpectrumAnnotator();
}
int nPTM = 0;
if (peptide.isModified()) {
for (ModificationMatch modMatch : peptide.getModificationMatches()) {
if (modMatch.isVariable()) {
for (PTM ptm : ptms) {
if (ptm.getName().equals(modMatch.getTheoreticPtm())) {
nPTM++;
}
}
}
}
}
if (nPTM == 0) {
throw new IllegalArgumentException("Given PTMs not found in the peptide for PhosphoRS calculation.");
}
double ptmMass = ptms.get(0).getMass();
NeutralLossesMap annotationNeutralLosses = specificAnnotationSettings.getNeutralLossesMap(),
scoringLossesMap = new NeutralLossesMap();
if (accountNeutralLosses) {
// here annotation are sequence and modification independant
for (String neutralLossName : annotationNeutralLosses.getAccountedNeutralLosses()) {
NeutralLoss neutralLoss = NeutralLoss.getNeutralLoss(neutralLossName);
if (Math.abs(neutralLoss.getMass() - ptmMass) > specificAnnotationSettings.getFragmentIonAccuracyInDa(spectrum.getMaxMz())) {
scoringLossesMap.addNeutralLoss(neutralLoss, 1, 1);
}
}
}
SpecificAnnotationSettings scoringAnnotationSetttings = specificAnnotationSettings.clone();
scoringAnnotationSetttings.setNeutralLossesMap(scoringLossesMap);
HashMap<Ion.IonType, HashSet<Integer>> ions = specificAnnotationSettings.getIonTypes(),
newIons = new HashMap<Ion.IonType, HashSet<Integer>>(1);
for (Ion.IonType ionType : ions.keySet()) {
if (ionType == Ion.IonType.PEPTIDE_FRAGMENT_ION) {
newIons.put(ionType, ions.get(ionType));
}
}
scoringAnnotationSetttings.setSelectedIonsMap(newIons);
ArrayList<Integer> possibleSites = new ArrayList<Integer>();
int peptideLength = peptide.getSequence().length();
for (PTM ptm : ptms) {
if (ptm.isNTerm()) {
if (peptide.getPotentialModificationSites(ptm, sequenceMatchingPreferences, ptmSequenceMatchingPreferences).contains(1)) {
possibleSites.add(0);
}
} else if (ptm.isCTerm()) {
if (peptide.getPotentialModificationSites(ptm, sequenceMatchingPreferences, ptmSequenceMatchingPreferences).contains(peptideLength)) {
possibleSites.add(peptideLength + 1);
}
} else {
for (int potentialSite : peptide.getPotentialModificationSites(ptm, sequenceMatchingPreferences, ptmSequenceMatchingPreferences)) {
if (!possibleSites.contains(potentialSite)) {
possibleSites.add(potentialSite);
}
}
}
}
Collections.sort(possibleSites);
HashMap<String, Double> profileToScoreMap = new HashMap<String, Double>(possibleSites.size());
HashMap<String, ArrayList<Integer>> profileToSitesMap = new HashMap<String, ArrayList<Integer>>(possibleSites.size());
if (possibleSites.size() > nPTM) {
spectrum = filterSpectrum(spectrum, scoringAnnotationSetttings);
Peptide noModPeptide = Peptide.getNoModPeptide(peptide, ptms);
ArrayList<ArrayList<Integer>> possibleProfiles = getPossibleModificationProfiles(possibleSites, nPTM);
ArrayList<String> possibleProfileKeys = new ArrayList<String>(possibleProfiles.size());
for (ArrayList<Integer> profile : possibleProfiles) {
String profileKey = KeyUtils.getKey(profile);
possibleProfileKeys.add(profileKey);
profileToSitesMap.put(profileKey, profile);
}
HashMap<String, Peptide> profileToPeptide = getPossiblePeptidesMap(peptide, ptms, possibleProfiles);
HashMap<String, HashMap<Integer, HashMap<Integer, ArrayList<Ion>>>> profileToPossibleFragments = getPossiblePeptideFragments(profileToPeptide, scoringAnnotationSetttings);
HashMap<String, Integer> profileToN = getPossiblePeptideToN(profileToPeptide, profileToPossibleFragments, spectrumAnnotator, scoringAnnotationSetttings);
HashMap<Double, ArrayList<String>> siteDeterminingIonsMap = getSiteDeterminingIons(noModPeptide, possibleProfiles, ptms, spectrumAnnotator, scoringAnnotationSetttings);
ArrayList<Double> siteDeterminingIons = new ArrayList<Double>(siteDeterminingIonsMap.keySet());
double minMz = spectrum.getMinMz(), maxMz = spectrum.getMaxMz(), tempMax;
HashMap<Double, Peak> reducedSpectrum = new HashMap<Double, Peak>();
double d = specificAnnotationSettings.getFragmentIonAccuracy();
double dOverW = d / WINDOW_SIZE;
dOverW = -FastMath.log10(dOverW);
int nDecimals = ((int) dOverW) + 1;
double halfWindow = WINDOW_SIZE / 2;
while (minMz < maxMz) {
tempMax = minMz + WINDOW_SIZE;
if (specificAnnotationSettings.isFragmentIonPpm()) {
Double refMz = minMz + halfWindow;
d = specificAnnotationSettings.getFragmentIonAccuracyInDa(refMz);
dOverW = d / WINDOW_SIZE;
dOverW = -FastMath.log10(dOverW);
nDecimals = ((int) dOverW) + 1;
}
HashMap<Double, Peak> extractedPeakList = spectrum.getSubSpectrum(minMz, tempMax);
if (!extractedPeakList.isEmpty()) {
MSnSpectrum tempSpectrum = new MSnSpectrum(spectrum.getLevel(), spectrum.getPrecursor(), spectrum.getSpectrumTitle()
+ "_PhosphoRS_minMZ_" + minMz, extractedPeakList, spectrum.getFileName());
ArrayList<MSnSpectrum> spectra = getReducedSpectra(tempSpectrum);
HashMap<String, HashSet<Double>> profileToSiteDeterminingIonsMz = new HashMap<String, HashSet<Double>>(siteDeterminingIons.size());
for (double ionMz : siteDeterminingIons) {
if (ionMz > minMz && ionMz <= tempMax) {
ArrayList<String> profiles = siteDeterminingIonsMap.get(ionMz);
for (String profileKey : profiles) {
HashSet<Double> mzs = profileToSiteDeterminingIonsMz.get(profileKey);
if (mzs == null) {
mzs = new HashSet<Double>(1);
profileToSiteDeterminingIonsMz.put(profileKey, mzs);
}
mzs.add(ionMz);
}
}
}
if (!profileToSiteDeterminingIonsMz.isEmpty()) {
ArrayList<ArrayList<Double>> deltas = new ArrayList<ArrayList<Double>>(spectra.size());
int nDeltas = 0;
for (MSnSpectrum currentSpectrum : spectra) {
ArrayList<Double> bigPs = new ArrayList<Double>(possibleProfileKeys.size());
ArrayList<Double> currentDeltas = new ArrayList<Double>(possibleProfileKeys.size());
ArrayList<HashSet<Double>> scored = new ArrayList<HashSet<Double>>(possibleProfileKeys.size());
boolean profileWithNoSiteDeterminingIonsScored = false;
double currentP = getp(currentSpectrum, WINDOW_SIZE, d, nDecimals);
for (String profileKey : possibleProfileKeys) {
HashSet<Double> tempSiteDeterminingIons = profileToSiteDeterminingIonsMz.get(profileKey);
if (tempSiteDeterminingIons == null) {
if (!profileWithNoSiteDeterminingIonsScored) {
profileWithNoSiteDeterminingIonsScored = true;
Peptide tempPeptide = profileToPeptide.get(profileKey);
HashMap<Integer, HashMap<Integer, ArrayList<Ion>>> possibleFragmentIons = profileToPossibleFragments.get(profileKey);
Integer n = profileToN.get(profileKey);
Double bigP = getPhosphoRsScoreP(tempPeptide, possibleFragmentIons, currentSpectrum, currentP, n, spectrumAnnotator, annotationSettings, scoringAnnotationSetttings);
BasicMathFunctions.checkProbabilityRange(bigP);
bigPs.add(bigP);
}
} else {
boolean alreadyScored = false;
for (HashSet<Double> scoredIons : scored) {
if (Util.sameSets(tempSiteDeterminingIons, scoredIons)) {
alreadyScored = true;
break;
}
}
if (!alreadyScored) {
Peptide tempPeptide = profileToPeptide.get(profileKey);
Integer n = profileToN.get(profileKey);
HashMap<Integer, HashMap<Integer, ArrayList<Ion>>> possibleFragmentIons = profileToPossibleFragments.get(profileKey);
Double bigP = getPhosphoRsScoreP(tempPeptide, possibleFragmentIons, currentSpectrum, currentP, n, spectrumAnnotator, annotationSettings, scoringAnnotationSetttings);
BasicMathFunctions.checkProbabilityRange(bigP);
bigPs.add(bigP);
scored.add(tempSiteDeterminingIons);
}
}
}
Collections.sort(bigPs);
for (int j = 0; j < bigPs.size() - 1; j++) {
Double pJ = bigPs.get(j);
Double pJPlusOne = bigPs.get(j + 1);
Double delta = pJ / pJPlusOne;
currentDeltas.add(delta);
}
if (currentDeltas.size() > nDeltas) {
nDeltas = currentDeltas.size();
}
deltas.add(currentDeltas);
}
int bestI = 0;
Double largestDelta = 0.0;
for (int j = 0; j < nDeltas && largestDelta == 0.0; j++) {
for (int i = 0; i < deltas.size(); i++) {
ArrayList<Double> tempDeltas = deltas.get(i);
if (j < tempDeltas.size() && tempDeltas.get(j) > largestDelta) {
largestDelta = tempDeltas.get(j);
bestI = i;
}
}
}
if (bestI < MIN_DEPTH - 1 && MIN_DEPTH -1 < spectra.size()) {
bestI = MIN_DEPTH - 1;
}
if (bestI > MAX_DEPTH - 1) {
bestI = MAX_DEPTH - 1;
}
reducedSpectrum.putAll(spectra.get(bestI).getPeakMap());
} else {
Double bestP = 0.0;
int bestI = 0;
HashMap<Integer, ArrayList<Ion>> expectedFragmentIons = spectrumAnnotator.getExpectedIons(scoringAnnotationSetttings, peptide);
int nExpectedFragmentIons = 0;
for (ArrayList<Ion> expectedIons : expectedFragmentIons.values()) {
nExpectedFragmentIons += expectedIons.size();
}
IonFactory fragmentFactory = IonFactory.getInstance();
HashMap<Integer, HashMap<Integer, ArrayList<Ion>>> possibleFragmentIons = fragmentFactory.getFragmentIons(peptide, scoringAnnotationSetttings);
for (int i = 0; i < spectra.size(); i++) {
MSnSpectrum currentSpectrum = spectra.get(i);
double currentP = getp(currentSpectrum, WINDOW_SIZE, d, nDecimals);
Double bigP = getPhosphoRsScoreP(peptide, possibleFragmentIons, currentSpectrum, currentP, nExpectedFragmentIons, spectrumAnnotator, annotationSettings, scoringAnnotationSetttings);
BasicMathFunctions.checkProbabilityRange(bigP);
if (bigP < bestP) {
bestP = bigP;
bestI = i;
}
}
reducedSpectrum.putAll(spectra.get(bestI).getPeakMap());
}
}
minMz = tempMax;
}
MSnSpectrum phosphoRsSpectrum = new MSnSpectrum(spectrum.getLevel(), spectrum.getPrecursor(), spectrum.getSpectrumTitle() + "_phosphoRS", reducedSpectrum, spectrum.getFileName());
double w = spectrum.getMaxMz() - spectrum.getMinMz();
if (specificAnnotationSettings.isFragmentIonPpm()) {
Double refMz = spectrum.getMinMz() + (w / 2);
d = specificAnnotationSettings.getFragmentIonAccuracyInDa(refMz);
}
dOverW = d / w;
dOverW = -FastMath.log10(dOverW);
nDecimals = ((int) dOverW) + 1;
double currentP = getp(phosphoRsSpectrum, w, d, nDecimals);
HashMap<String, Double> pInvMap = new HashMap<String, Double>(possibleProfileKeys.size());
Double pInvTotal = 0.0;
for (String profileKey : possibleProfileKeys) {
Peptide tempPeptide = profileToPeptide.get(profileKey);
Integer n = profileToN.get(profileKey);
HashMap<Integer, HashMap<Integer, ArrayList<Ion>>> possibleFragmentIons = profileToPossibleFragments.get(profileKey);
Double bigP = getPhosphoRsScoreP(tempPeptide, possibleFragmentIons, phosphoRsSpectrum, currentP, n, spectrumAnnotator, annotationSettings, scoringAnnotationSetttings);
BasicMathFunctions.checkProbabilityRange(bigP);
Double pInv = 1.0 / bigP;
pInvMap.put(profileKey, pInv);
pInvTotal += pInv;
}
if (pInvTotal <= 0) {
throw new IllegalArgumentException("PhosphoRS probability <= 0.");
}
for (String profileKey : possibleProfileKeys) {
Double phosphoRsProbability = pInvMap.get(profileKey) / pInvTotal; //in percent
BasicMathFunctions.checkProbabilityRange(phosphoRsProbability);
phosphoRsProbability *= 100;
profileToScoreMap.put(profileKey, phosphoRsProbability);
}
} else if (possibleSites.size() == nPTM) {
String profileKey = KeyUtils.getKey(possibleSites);
profileToScoreMap.put(profileKey, 100.0);
profileToSitesMap.put(profileKey, possibleSites);
} else {
throw new IllegalArgumentException("Found less potential modification sites than PTMs during PhosphoRS calculation. Peptide key: " + peptide.getKey());
}
HashMap<Integer, Double> scores = new HashMap<Integer, Double>();
for (String profile : profileToScoreMap.keySet()) {
Double score = profileToScoreMap.get(profile);
ArrayList<Integer> sites = profileToSitesMap.get(profile);
for (Integer site : sites) {
Double previousScore = scores.get(site);
if (previousScore == null) {
scores.put(site, score);
} else {
Double newScore = score + previousScore;
scores.put(site, newScore);
}
}
}
for (int site : possibleSites) {
if (!scores.keySet().contains(site)) {
throw new IllegalArgumentException("Site " + site + " not scored for modification " + ptmMass + " in spectrum " + spectrum.getSpectrumTitle() + " of file " + spectrum.getFileName() + ".");
}
}
return scores;
}
/**
* Returns the PhosphoRS score of the given peptide on the given spectrum.
* This method returns P and not -10.log(P).
*
* @param peptide the peptide of interest
* @param spectrum the spectrum of interest
* @param p the probability for a calculated fragment matching one of the
* experimental masses by chance as estimated by PhosphoRS
* @param n the number of expected ions
* @param spectrumAnnotator spectrum annotator
* @param annotationSettings the global annotation settings
* @param scoringAnnotationSettings the annotation settings specific to this
* peptide and spectrum
*
* @return the phosphoRS score
*/
private static Double getPhosphoRsScoreP(Peptide peptide, HashMap<Integer, HashMap<Integer, ArrayList<Ion>>> possiblePeptideFragments, MSnSpectrum spectrum, double p, int n, PeptideSpectrumAnnotator spectrumAnnotator,
AnnotationSettings annotationSettings, SpecificAnnotationSettings scoringAnnotationSettings) throws MathException {
BinomialDistribution distribution = null;
HashMap<Integer, BinomialDistribution> distributionsAtP = distributionCache.get(p);
boolean inCache = true;
if (distributionsAtP != null) {
distribution = distributionsAtP.get(n);
}
if (distribution == null) {
distribution = new BinomialDistribution(n, p);
inCache = false;
}
ArrayList<IonMatch> matches = spectrumAnnotator.getSpectrumAnnotation(annotationSettings, scoringAnnotationSettings, spectrum, peptide, possiblePeptideFragments);
int k = 0;
for (IonMatch ionMatch : matches) {
if (ionMatch.ion.getType() == Ion.IonType.PEPTIDE_FRAGMENT_ION) {
k++;
}
}
if (k == 0) {
return 1.0;
}
Double result = distribution.getDescendingCumulativeProbabilityAt((double) k);
if (!inCache && !distribution.isCacheEmpty()) {
addDistributionToCache(p, n, distribution);
}
return result;
}
/**
* Adds a distribution to the cache and manages the cache size.
*
* @param p the distribution p
* @param n the distribution n
*/
private static synchronized void addDistributionToCache(double p, int n, BinomialDistribution binomialDistribution) {
if (distributionCache.size() >= distributionCacheSize) {
HashSet<Double> keys = new HashSet<Double>(distributionCache.keySet());
for (Double key : keys) {
distributionCache.remove(key);
if (distributionCache.size() < distributionCacheSize) {
break;
}
}
}
HashMap<Integer, BinomialDistribution> distributionsAtP = distributionCache.get(p);
if (distributionsAtP == null) {
distributionsAtP = new HashMap<Integer, BinomialDistribution>(2);
distributionCache.put(p, distributionsAtP);
}
distributionsAtP.put(n, binomialDistribution);
}
/**
* The probability p for a calculated fragment matching one of the
* experimental masses by chance as estimated in the PhosphoRS algorithm.
*
* @param spectrum the spectrum studied
* @param w the m/z range considered
* @param d the m/z tolerance in daltons
* @param nDecimals the number of decimals to use
*
* @return the probability p for a calculated fragment matching one of the
* experimental masses by chance as estimated in the PhosphoRS algorithm.
*/
private static double getp(Spectrum spectrum, double w, double d, int nDecimals) {
if (w == 0.0) {
return 1.0;
}
int N = spectrum.getPeakMap().size();
if (N <= 1) {
return 1.0;
}
double p = d * N / w;
if (p > 1) {
p = 1;
}
double roundedP = Util.floorDouble(p, nDecimals);
return roundedP;
}
/**
* Returns a map of the different possible peptides for the different
* profiles.
*
* @param peptide the peptide of interest
* @param ptms the PTMs to score
* @param possibleProfiles the different profiles
*
* @return a map of the different peptides for the different profiles
*
* @throws IOException exception thrown whenever an error occurred while
* reading a protein sequence
* @throws InterruptedException exception thrown whenever an error occurred
* while reading a protein sequence
* @throws ClassNotFoundException if a ClassNotFoundException occurs
* @throws SQLException if an SQLException occurs
*/
private static HashMap<String, Peptide> getPossiblePeptidesMap(Peptide peptide, ArrayList<PTM> ptms, ArrayList<ArrayList<Integer>> possibleProfiles) throws IOException, SQLException, ClassNotFoundException, InterruptedException {
String representativePTM = ptms.get(0).getName();
HashMap<String, Peptide> result = new HashMap<String, Peptide>(possibleProfiles.size());
int peptideLength = peptide.getSequence().length();
for (ArrayList<Integer> profile : possibleProfiles) {
Peptide tempPeptide = Peptide.getNoModPeptide(peptide, ptms);
for (int pos : profile) {
int index = pos;
if (index == 0) {
index = 1;
} else if (index == peptideLength + 1) {
index = peptideLength;
}
tempPeptide.addModificationMatch(new ModificationMatch(representativePTM, true, index));
}
String profileKey = KeyUtils.getKey(profile);
result.put(profileKey, tempPeptide);
}
return result;
}
/**
* Returns a map of the number of possible fragment ions for every peptide
* indexed by the corresponding profile.
*
* @param possiblePeptides map of the possible peptides for every profile
* @param possiblePeptideFragments the possible peptide fragments for every
* peptide
* @param spectrumAnnotator the spectrum annotator
* @param scoringAnnotationSetttings the spectrum scoring annotation
* settings
*
* @return a map of the number of possible fragment ions for every peptide
*/
private static HashMap<String, Integer> getPossiblePeptideToN(HashMap<String, Peptide> possiblePeptides, HashMap<String, HashMap<Integer, HashMap<Integer, ArrayList<Ion>>>> possiblePeptideFragments, PeptideSpectrumAnnotator spectrumAnnotator, SpecificAnnotationSettings scoringAnnotationSetttings) {
HashMap<String, Integer> result = new HashMap<String, Integer>(possiblePeptides.size());
for (String profileKey : possiblePeptides.keySet()) {
Peptide peptide = possiblePeptides.get(profileKey);
HashMap<Integer, HashMap<Integer, ArrayList<Ion>>> fragmentsForProfile = possiblePeptideFragments.get(profileKey);
HashMap<Integer, ArrayList<Ion>> expectedFragmentIons = spectrumAnnotator.getExpectedIons(scoringAnnotationSetttings, peptide, fragmentsForProfile);
int n = 0;
for (ArrayList<Ion> ions : expectedFragmentIons.values()) {
n += ions.size();
}
result.put(profileKey, n);
}
return result;
}
/**
* Returns a map of the possible ions for every peptide of every profile.
*
* @param possiblePeptides map of the possible peptides for every profile
* @param scoringAnnotationSetttings the spectrum scoring annotation
* settings
*
* @return a map of the possible ions for every peptide of every profile
*/
private static HashMap<String, HashMap<Integer, HashMap<Integer, ArrayList<Ion>>>> getPossiblePeptideFragments(HashMap<String, Peptide> possiblePeptides, SpecificAnnotationSettings scoringAnnotationSetttings) {
HashMap<String, HashMap<Integer, HashMap<Integer, ArrayList<Ion>>>> result = new HashMap<String, HashMap<Integer, HashMap<Integer, ArrayList<Ion>>>>(possiblePeptides.size());
IonFactory fragmentFactory = IonFactory.getInstance();
for (String profileKey : possiblePeptides.keySet()) {
Peptide peptide = possiblePeptides.get(profileKey);
HashMap<Integer, HashMap<Integer, ArrayList<Ion>>> possibleFragmentIons = fragmentFactory.getFragmentIons(peptide, scoringAnnotationSetttings);
result.put(profileKey, possibleFragmentIons);
}
return result;
}
/**
* Returns the possible modification profiles given the possible sites and
* number of modifications. Sites are sorted in increasing order.
*
* @param possibleSites the possible modification sites in increasing order
* @param nPtms the number of modifications
*
* @return a list of possible modification profiles
*/
private static ArrayList<ArrayList<Integer>> getPossibleModificationProfiles(ArrayList<Integer> possibleSites, int nPtms) {
ArrayList<ArrayList<Integer>> result = new ArrayList<ArrayList<Integer>>();
for (int pos : possibleSites) {
ArrayList<Integer> profile = new ArrayList<Integer>(nPtms);
profile.add(pos);
result.add(profile);
}
for (int i = 2; i <= nPtms; i++) {
ArrayList<ArrayList<Integer>> resultAtI = new ArrayList<ArrayList<Integer>>((int) (1.5 * result.size()));
for (ArrayList<Integer> previousProfile : result) {
int lastPos = previousProfile.get(previousProfile.size() - 1);
for (int pos : possibleSites) {
if (pos > lastPos) {
ArrayList<Integer> profile = new ArrayList<Integer>(previousProfile);
profile.add(pos);
resultAtI.add(profile);
}
}
}
result = resultAtI;
}
return result;
}
/**
* Returns a map of all potential site determining ions indexed by their
* m/z.
*
* @param noModPeptide the version of the peptide which does not contain the
* modification of interest
* @param possibleProfiles the possible modification profiles to inspect
* @param ptms the PTMs scored
* @param spectrumAnnotator the spectrum annotator used throughout the
* scoring
* @param scoringAnnotationSetttings the annotation settings specific to
* this peptide and spectrum
*
* @return a map of all potential site determining ions indexed by their m/z
*/
private static HashMap<Double, ArrayList<String>> getSiteDeterminingIons(Peptide noModPeptide, ArrayList<ArrayList<Integer>> possibleProfiles, ArrayList<PTM> ptms, PeptideSpectrumAnnotator spectrumAnnotator, SpecificAnnotationSettings scoringAnnotationSetttings) {
String sequence = noModPeptide.getSequence();
Peptide peptide = new Peptide(sequence, noModPeptide.getModificationMatches());
int sequenceLength = sequence.length();
String representativePTM = ptms.get(0).getName();
HashMap<Double, ArrayList<String>> siteDeterminingIons = new HashMap<Double, ArrayList<String>>();
HashMap<Double, ArrayList<String>> commonIons = new HashMap<Double, ArrayList<String>>();
for (ArrayList<Integer> modificationProfile : possibleProfiles) {
for (int pos : modificationProfile) {
int position;
if (pos == 0) {
position = 1;
} else if (pos == sequenceLength + 1) {
position = sequenceLength;
} else {
position = pos;
}
peptide.addModificationMatch(new ModificationMatch(representativePTM, true, position));
}
HashSet<Double> mzs = new HashSet<Double>(2);
for (ArrayList<Ion> ions : spectrumAnnotator.getExpectedIons(scoringAnnotationSetttings, peptide).values()) {
for (Ion ion : ions) {
if (ion.getType() == Ion.IonType.PEPTIDE_FRAGMENT_ION) {
for (int charge : scoringAnnotationSetttings.getSelectedCharges()) {
double mz = ion.getTheoreticMz(charge);
mzs.add(mz);
}
}
}
}
String profileKey = KeyUtils.getKey(modificationProfile);
for (double mz : mzs) {
if (commonIons.isEmpty()) {
ArrayList<String> profiles = new ArrayList<String>(2);
commonIons.put(mz, profiles);
profiles.add(profileKey);
} else if (!commonIons.containsKey(mz)) {
ArrayList<String> profiles = siteDeterminingIons.get(mz);
if (profiles == null) {
profiles = new ArrayList<String>(2);
siteDeterminingIons.put(mz, profiles);
}
profiles.add(profileKey);
}
}
for (double mz : new HashSet<Double>(commonIons.keySet())) {
if (!mzs.contains(mz)) {
siteDeterminingIons.put(mz, commonIons.get(mz));
commonIons.remove(mz);
} else {
commonIons.get(mz).add(profileKey);
}
}
}
return siteDeterminingIons;
}
/**
* Returns a list of spectra containing only the most intense ions. The
* index of the spectrum in the list corresponds to the increasing number of
* peaks, ie the depth, starting with depth 1.
*
* @param spectrum the spectrum of interest
*
* @return a list of spectra containing only the most intense ions.
*/
private static ArrayList<MSnSpectrum> getReducedSpectra(MSnSpectrum spectrum) {
if (spectrum.isEmpty()) {
throw new IllegalArgumentException("Attempting to extract peaks from an empty spectrum.");
}
ArrayList<MSnSpectrum> reducedSpectra = new ArrayList<MSnSpectrum>(MAX_DEPTH);
HashMap<Double, ArrayList<Peak>> intensityToPeakMap = new HashMap<Double, ArrayList<Peak>>(spectrum.getPeakMap().size());
for (Peak peak : spectrum.getPeakList()) {
double intensity = peak.intensity;
ArrayList<Peak> peaks = intensityToPeakMap.get(intensity);
if (peaks == null) {
peaks = new ArrayList<Peak>();
intensityToPeakMap.put(intensity, peaks);
}
peaks.add(peak);
}
ArrayList<Double> intensities = new ArrayList<Double>(intensityToPeakMap.keySet());
Collections.sort(intensities, Collections.reverseOrder());
int depth = 0;
HashMap<Double, Peak> mzToPeak = new HashMap<Double, Peak>(1);
for (double intensity : intensities) {
for (Peak peak : intensityToPeakMap.get(intensity)) {
mzToPeak.put(peak.mz, peak);
depth++;
reducedSpectra.add(new MSnSpectrum(spectrum.getLevel(), spectrum.getPrecursor(), spectrum.getSpectrumTitle() + "_" + depth, mzToPeak, spectrum.getFileName()));
if (depth > MAX_DEPTH) {
break;
}
HashMap<Double, Peak> newMzToPeak = new HashMap<Double, Peak>(depth + 1);
newMzToPeak.putAll(mzToPeak);
mzToPeak = newMzToPeak;
}
if (depth > MAX_DEPTH) {
break;
}
}
return reducedSpectra;
}
/**
* Filters the spectrum so that p is lower or equal to 1 by retaining the
* most intense peaks in a window of 10 times the ms2 tolerance.
*
* @param spectrum the original spectrum
* @param scoringAnnotationSetttings the annotation settings
*
* @return the filtered spectrum
*
* @throws java.lang.InterruptedException exception thrown if the thread is
* interrupted
*/
private static MSnSpectrum filterSpectrum(MSnSpectrum spectrum, SpecificAnnotationSettings scoringAnnotationSetttings) throws InterruptedException {
Double window;
Integer maxPeaks;
double ms2Tolerance = scoringAnnotationSetttings.getFragmentIonAccuracyInDa(spectrum.getMaxMz());
if (ms2Tolerance <= 10) {
window = 10 * ms2Tolerance;
maxPeaks = 10;
} else {
window = WINDOW_SIZE;
maxPeaks = (int) (window / ms2Tolerance);
}
if (maxPeaks < 1) {
throw new IllegalArgumentException("All peaks removed by filtering.");
}
HashMap<Double, Peak> peakMap = spectrum.getPeakMap(),
newMap = new HashMap<Double, Peak>(peakMap.size()),
tempMap = new HashMap<Double, Peak>();
Double refMz = null;
for (Double mz : spectrum.getOrderedMzValues()) {
if (refMz == null) {
refMz = mz;
} else if (mz > refMz + window) {
if (tempMap.size() <= maxPeaks) {
newMap.putAll(tempMap);
tempMap.clear();
} else {
ArrayList<Double> intensities = new ArrayList<Double>(tempMap.keySet());
Collections.sort(intensities, Collections.reverseOrder());
for (int i = 0; i < Math.min(intensities.size(), maxPeaks); i++) {
Double intensity = intensities.get(i);
Peak peak = tempMap.get(intensity);
newMap.put(peak.mz, peak);
}
tempMap.clear();
}
refMz += window;
}
Peak peak = peakMap.get(mz);
tempMap.put(peak.intensity, peak);
}
ArrayList<Double> intensities = new ArrayList<Double>(tempMap.keySet());
Collections.sort(intensities, Collections.reverseOrder());
for (int i = 0; i < Math.min(intensities.size(), maxPeaks); i++) {
Double intensity = intensities.get(i);
Peak peak = tempMap.get(intensity);
newMap.put(peak.mz, peak);
}
return new MSnSpectrum(spectrum.getLevel(), spectrum.getPrecursor(), spectrum.getSpectrumTitle() + "_filtered", newMap, spectrum.getFileName());
}
}