/*
* Copyright (c) 2003-2012 Fred Hutchinson Cancer Research Center
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.fhcrc.cpl.toolbox.proteomics;
import org.apache.log4j.Logger;
import org.fhcrc.cpl.toolbox.proteomics.feature.Feature;
import org.fhcrc.cpl.toolbox.datastructure.Pair;
import java.util.*;
/**
* Utilities for working with peptide sequences and modifications
*/
public class PeptideUtilities
{
protected static Logger _log = Logger.getLogger(PeptideUtilities.class);
public static final float MOD_MASS_SLOP = 0.1f;
/**
* Given a peptide sequence and a list of modifications for every position in the sequence,
* and a specified modification residue and mass, checks to see whether the peptide contains the
* modification.
*
* Returns true if the peptide does NOT contain the modification on any residue, false if it
* does. Special case: returns false if the peptide does not contain the residue.
* @param peptide
* @param mods
* @param residue
* @param modMass
* @return
*/
public static boolean checkForModNoResidues(String peptide, List<ModifiedAminoAcid>[] mods,
char residue, float modMass)
{
if (!peptide.contains("" + residue))
return false;
if (mods == null)
return true;
for (int i=0; i<peptide.length(); i++)
{
if (peptide.charAt(i) == residue)
{
List<ModifiedAminoAcid> modsThisResidue = mods[i];
if (modsThisResidue == null)
continue;
for (ModifiedAminoAcid mod : modsThisResidue)
if (Math.abs(mod.getMass() - modMass) < MOD_MASS_SLOP)
return false;
}
}
return true;
}
/**
* Given a peptide sequence and a list of modifications for every position in the sequence,
* and a specified modification residue and mass, checks to see whether the peptide contains the
* modification.
*
* Returns true if the peptide contains the modification on every single occurrence of the residue,
* false if it does not. Special case: returns false if the peptide does not contain the residue.
* @param peptide
* @param mods
* @param residue
* @param modMass
* @return
*/
public static boolean checkForModAllResidues(String peptide, List<ModifiedAminoAcid>[] mods,
char residue, float modMass)
{
if (!peptide.contains("" + residue))
return false;
if (mods == null)
return false;
for (int i=0; i<peptide.length(); i++)
{
if (peptide.charAt(i) == residue)
{
boolean foundIt = false;
List<ModifiedAminoAcid> modsThisResidue = mods[i];
if (modsThisResidue == null)
return false;
for (ModifiedAminoAcid mod : modsThisResidue)
if (Math.abs(mod.getMass() - modMass) < MOD_MASS_SLOP)
{
foundIt = true;
break;
}
if (!foundIt)
return false;
}
}
return true;
}
/**
* Calculate peptide masses for all possible modification states for this peptide, given the list of
* possible static and variable modifications. All variable modifications are either applied to
* all of the appropriate residue, or none. So, e.g., ELVISMMM either has all oxidized M's or none.
*
* This is done recursively. This would be somewhat more
* efficient if it reordered the list to put the static modifications first.
* @param peptideSequence
* @param mods
* @return
*/
public static List<Pair<List<ModifiedAminoAcid>[], Float>> calculatePeptideMassesForMods(
String peptideSequence, List<MS2Modification> mods)
{
List<MS2Modification> modsThisPeptideHas = new ArrayList<MS2Modification>();
for (MS2Modification mod : mods)
if (peptideSequence.contains(mod.getAminoAcid()))
{
modsThisPeptideHas.add(mod);
}
if (modsThisPeptideHas.isEmpty())
{
float mass = (float) new PeptideGenerator().createPeptideForFullyTrypticPeptideSequence(
peptideSequence).getMonoisotopicMass();
Pair<List<ModifiedAminoAcid>[], Float> noModsPair =
new Pair<List<ModifiedAminoAcid>[], Float>(new List[peptideSequence.length()], mass);
List<Pair<List<ModifiedAminoAcid>[], Float>> result = new ArrayList<Pair<List<ModifiedAminoAcid>[], Float>>(1);
result.add(noModsPair);
return result;
}
else
return recursivelyAddMassesForMods(peptideSequence, new ArrayList<MS2Modification>(),
modsThisPeptideHas);
}
/**
* Recursively generate modified sequences and masses for all possible masses, given the list of
* variable modifications known to exist in this peptide and the list of modifications
* already applied
* @param peptideSequence
* @param appliedMods
* @param modsRemaining
* @return
*/
protected static List<Pair<List<ModifiedAminoAcid>[], Float>> recursivelyAddMassesForMods(
String peptideSequence,
List<MS2Modification> appliedMods,
List<MS2Modification> modsRemaining)
{
List<Pair<List<ModifiedAminoAcid>[], Float>> result = new ArrayList<Pair<List<ModifiedAminoAcid>[], Float>>();
if (modsRemaining.size() == 0)
{
Pair<List<ModifiedAminoAcid>[], Float> modsAndMass =
calcMassForPeptideWithMods(peptideSequence, appliedMods);
result.add(modsAndMass);
return result;
}
//This is hacky and wasteful.
//TODO: find a better way to deal with keeping the integrity of these lists through recursion
List<MS2Modification> modsRemainingCopy =
new ArrayList<MS2Modification>(modsRemaining.size());
modsRemainingCopy.addAll(modsRemaining);
modsRemaining = modsRemainingCopy;
List<MS2Modification> appliedModsCopy =
new ArrayList<MS2Modification>(appliedMods.size());
appliedModsCopy.addAll(appliedMods);
appliedMods = appliedModsCopy;
MS2Modification mod = modsRemaining.get(0);
modsRemainingCopy.remove(mod);
if (mod.getVariable())
result.addAll(recursivelyAddMassesForMods(peptideSequence,
appliedMods, modsRemaining));
appliedMods.add(mod);
result.addAll(recursivelyAddMassesForMods(peptideSequence,
appliedMods,
modsRemaining));
return result;
}
/**
* Modifications are applied all-or-nothing. If we modify one residue, we modify all
* of that residue
* @param peptideSequence
* @param modifications
* @return
*/
protected static Pair<List<ModifiedAminoAcid>[], Float> calcMassForPeptideWithMods(String peptideSequence,
List<MS2Modification> modifications)
{
double mass = PeptideGenerator.computeMass(peptideSequence.getBytes(),
0, peptideSequence.length(),
PeptideGenerator.AMINO_ACID_MONOISOTOPIC_MASSES);
List<ModifiedAminoAcid>[] modifiedAminoAcids = new ArrayList[peptideSequence.length()];
if (modifications.size() > 0)
{
Map<String, List<MS2Modification>> acidModListMap =
new HashMap<String, List<MS2Modification>>();
for (MS2Modification mod : modifications)
{
String acidString = mod.getAminoAcid();
List<MS2Modification> modListThisAcid = acidModListMap.get(acidString);
if (modListThisAcid == null)
{
modListThisAcid = new ArrayList<MS2Modification>();
acidModListMap.put(acidString, modListThisAcid);
}
modListThisAcid.add(mod);
}
for (int i=0; i<peptideSequence.length(); i++)
{
char thisResidue = peptideSequence.charAt(i);
double massThisResidue =
PeptideGenerator.AMINO_ACID_MONOISOTOPIC_MASSES[thisResidue];
List<MS2Modification> modListThisAcid = acidModListMap.get("" + thisResidue);
if (modListThisAcid != null)
{
for (MS2Modification mod : modListThisAcid)
{
mass += mod.getMassDiff();
List<ModifiedAminoAcid> modsThisResidue = modifiedAminoAcids[i];
if (modsThisResidue == null)
{
modsThisResidue = new ArrayList<ModifiedAminoAcid>();
modifiedAminoAcids[i] = modsThisResidue;
}
massThisResidue += mod.getMassDiff();
ModifiedAminoAcid moddedAcid = new ModifiedAminoAcid(thisResidue,
massThisResidue);
modsThisResidue.add(moddedAcid);
}
}
}
}
return new Pair<List<ModifiedAminoAcid>[], Float>(modifiedAminoAcids, (float) mass);
}
/*
protected static Pair<List<ModifiedAminoAcid>[], Float> calcMassForPeptideWithMods(String peptideSequence,
List<MS2Modification> modifications)
{
double mass = PeptideGenerator.computeMass(peptideSequence.getBytes(),
0, peptideSequence.length(),
PeptideGenerator.AMINO_ACID_MONOISOTOPIC_MASSES);
List<ModifiedAminoAcid>[] modifiedAminoAcids = new ArrayList[peptideSequence.length()];
if (modifications.size() > 0)
{
Map<String, List<MS2Modification>> acidStaticModListMap =
new HashMap<String, List<MS2Modification>>();
Map<String, List<MS2Modification>> acidVarModListMap =
new HashMap<String, List<MS2Modification>>();
for (MS2Modification mod : modifications)
{
Map<String, List<MS2Modification>> appropriateMap = acidStaticModListMap;
if (mod.getVariable())
appropriateMap = acidVarModListMap;
String acidString = mod.getAminoAcid();
List<MS2Modification> modListThisAcid = appropriateMap.get(acidString);
if (modListThisAcid == null)
{
modListThisAcid = new ArrayList<MS2Modification>();
appropriateMap.put(acidString, modListThisAcid);
}
modListThisAcid.add(mod);
}
for (int i=0; i<peptideSequence.length(); i++)
{
char thisResidue = peptideSequence.charAt(i);
double staticMassThisResidue =
PeptideGenerator.AMINO_ACID_MONOISOTOPIC_MASSES[thisResidue];
List<MS2Modification> staticModListThisAcid = acidStaticModListMap.get("" + thisResidue);
if (staticModListThisAcid != null)
{
for (MS2Modification staticMod : staticModListThisAcid)
{
mass += staticMod.getMassDiff();
List<ModifiedAminoAcid> modsThisResidue = modifiedAminoAcids[i];
if (modsThisResidue == null)
{
modsThisResidue = new ArrayList<ModifiedAminoAcid>();
modifiedAminoAcids[i] = modsThisResidue;
}
staticMassThisResidue += staticMod.getMassDiff();
ModifiedAminoAcid moddedAcid = new ModifiedAminoAcid(thisResidue,
staticMassThisResidue);
modsThisResidue.add(moddedAcid);
}
}
List<MS2Modification> varModListThisAcid = acidVarModListMap.get("" + thisResidue);
if (varModListThisAcid != null)
{
for (MS2Modification varMod : varModListThisAcid)
{
mass += varMod.getMassDiff();
List<ModifiedAminoAcid> modsThisResidue = modifiedAminoAcids[i];
if (modsThisResidue == null)
{
modsThisResidue = new ArrayList<ModifiedAminoAcid>();
modifiedAminoAcids[i] = modsThisResidue;
}
ModifiedAminoAcid moddedAcid = new ModifiedAminoAcid(thisResidue,
staticMassThisResidue + varMod.getMassDiff());
modsThisResidue.add(moddedAcid);
}
}
}
}
return new Pair<List<ModifiedAminoAcid>[], Float>(modifiedAminoAcids, (float) mass);
}
*/
}