/*
* Copyright (c) 2003-2012 Fred Hutchinson Cancer Research Center
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.fhcrc.cpl.viewer.amt;
import java.util.*;
import java.util.List;
import org.apache.log4j.Logger;
import org.fhcrc.cpl.toolbox.proteomics.feature.Feature;
import org.fhcrc.cpl.toolbox.proteomics.feature.FeatureSet;
import org.fhcrc.cpl.toolbox.proteomics.feature.extraInfo.AmtExtraInfoDef;
import org.fhcrc.cpl.toolbox.proteomics.feature.extraInfo.MS2ExtraInfoDef;
import org.fhcrc.cpl.toolbox.proteomics.PeptideGenerator;
import org.fhcrc.cpl.toolbox.proteomics.MS2Modification;
import org.fhcrc.cpl.toolbox.proteomics.ModifiedAminoAcid;
/**
* This class Generates FeatureSets based on AMT Database entries. These featuresets
* are what's actually used in matching.
*/
public class AmtDatabaseFeatureSetGenerator
{
static Logger _log = Logger.getLogger(AmtDatabaseFeatureSetGenerator.class);
protected AmtDatabase amtDatabase;
public AmtDatabaseFeatureSetGenerator(AmtDatabase amtDatabase)
{
this.amtDatabase = amtDatabase;
}
/**
* Create a feature set based on this AMT database, accounting for expected
* modifications.
* That is, bump up the mass by the mass of any static modifications, and
* create duplicate features for any variable modifictions
* TODO: maybe interrogate modifications[] to see if any of these match up with
* TODO: the mods we've already got in the db, and get the hydrophobicity accordingly
*
* @param modifications
* @return
*/
public Feature[] createFeaturesForModifications(MS2Modification[] modifications)
{
List<MS2Modification> varModList = new ArrayList<MS2Modification>();
List<MS2Modification> staticModList = new ArrayList<MS2Modification>();
if (modifications != null)
for (MS2Modification modification : modifications)
{
if (modification.getVariable())
varModList.add(modification);
else
staticModList.add(modification);
}
//for (MS2Modification mod : varModList) System.err.println(mod);
List<Feature> resultList = new ArrayList<Feature>();
for (AmtPeptideEntry peptideEntry : amtDatabase.getEntries())
{
//This call is a bit confusing. We start off the recursive call declaring that all the static
//mods should be applied, and supplying the variable mod list as the list of mods to potentially
//apply. Later recursive calls will or will not add variable mods to this list, one by one.
List<Feature> featuresForThisPeptide =
generateModFeaturesForPeptide(peptideEntry,
peptideEntry.getMedianObservedHydrophobicity(),
staticModList, varModList);
resultList.addAll(featuresForThisPeptide);
}
return resultList.toArray(new Feature[resultList.size()]);
}
/**
* Note: Assumes that a variable modification is either fully applied, to all
* residues in the peptide, or not applied at all. So, e.g., ELVISMMM either has all
* oxidized M's or none.
* @param peptideEntry
* @param observedHydrophobicity
* @param staticMods
* @param varMods
* @return
*/
public static List<Feature>generateModFeaturesForPeptide(AmtPeptideEntry peptideEntry,
double observedHydrophobicity,
List<MS2Modification> staticMods,
List<MS2Modification> varMods)
{
String peptideSequence = peptideEntry.getPeptideSequence();
List<MS2Modification> staticModsThisFeature =
new ArrayList<MS2Modification>();
for (MS2Modification mod : staticMods)
{
if (peptideSequence.contains(mod.getAminoAcid()))
staticModsThisFeature.add(mod);
}
List<MS2Modification> varModsThisFeature =
new ArrayList<MS2Modification>();
for (MS2Modification mod : varMods)
{
if (peptideSequence.contains(mod.getAminoAcid()))
varModsThisFeature.add(mod);
}
//System.err.println(peptideSequence + ", static: " + staticModsThisFeature + ", var: " + varModsThisFeature);
return recursivelyAddFeaturesForMods(peptideEntry,
observedHydrophobicity, staticModsThisFeature, varModsThisFeature);
}
/**
* Recursively generate features for all possible masses, given the list of
* /variable/ modifications known to exist in this peptide and the list of modifications
* already applied
* @param peptideEntry
* @param observedHydrophobicity
* @param appliedMods already-applied modifications. This will include all static mods and a growing
* list of variable mods
* @param varModsRemaining variable mods remaining. None of these mods should ever be static
* @return
*/
protected static List<Feature> recursivelyAddFeaturesForMods(AmtPeptideEntry peptideEntry,
double observedHydrophobicity,
List<MS2Modification> appliedMods,
List<MS2Modification> varModsRemaining)
{
List<Feature> result = new ArrayList<Feature>();
if (varModsRemaining.size() == 0)
{
Feature feature = createFeatureForPeptideWithMods(peptideEntry,
observedHydrophobicity, appliedMods);
result.add(feature);
return result;
}
//This is hacky and wasteful.
//TODO: find a better way to deal with keeping the integrity of these lists through recursion
List<MS2Modification> modsRemainingCopy =
new ArrayList<MS2Modification>(varModsRemaining.size());
modsRemainingCopy.addAll(varModsRemaining);
varModsRemaining = modsRemainingCopy;
List<MS2Modification> appliedModsCopy =
new ArrayList<MS2Modification>(appliedMods.size());
appliedModsCopy.addAll(appliedMods);
appliedMods = appliedModsCopy;
MS2Modification mod = varModsRemaining.get(0);
modsRemainingCopy.remove(mod);
result.addAll(recursivelyAddFeaturesForMods(peptideEntry, observedHydrophobicity,
appliedMods, varModsRemaining));
appliedMods.add(mod);
result.addAll(recursivelyAddFeaturesForMods(peptideEntry, observedHydrophobicity,
appliedMods,
varModsRemaining));
return result;
}
/**
* Modifications are applied all-or-nothing. If we modify one residue, we modify all
* of that residue
* @param peptideEntry
* @param observedHydrophobicity
* @param modifications
* @return
*/
protected static Feature createFeatureForPeptideWithMods(AmtPeptideEntry peptideEntry,
double observedHydrophobicity,
List<MS2Modification> modifications)
{
String peptideSequence = peptideEntry.getPeptideSequence();
Feature feature = new Feature();
MS2ExtraInfoDef.addPeptide(feature, peptideSequence);
MS2ExtraInfoDef.setPeptideProphet(feature, peptideEntry.calculateIDProbability());
feature.setPeaks(1);
feature.setScanCount(1);
//Hack! Hack! Hack! I've been using this to be able to match two of these sets
// to each other. Really there's no good value for scan, though, so, hey, whatever.
feature.setScan((int) ((1000 * (observedHydrophobicity)) + 2000));
feature.setIntensity(1000);
AmtExtraInfoDef.setObservedHydrophobicity(feature, observedHydrophobicity);
double mass = PeptideGenerator.computeMass(peptideSequence.getBytes(),
0, peptideSequence.length(),
PeptideGenerator.AMINO_ACID_MONOISOTOPIC_MASSES);
if (modifications.size() > 0)
{
Map<String, List<MS2Modification>> acidStaticModListMap =
new HashMap<String, List<MS2Modification>>();
Map<String, List<MS2Modification>> acidVarModListMap =
new HashMap<String, List<MS2Modification>>();
for (MS2Modification mod : modifications)
{
Map<String, List<MS2Modification>> appropriateMap = acidStaticModListMap;
if (mod.getVariable())
appropriateMap = acidVarModListMap;
String acidString = mod.getAminoAcid();
List<MS2Modification> modListThisAcid = appropriateMap.get(acidString);
if (modListThisAcid == null)
{
modListThisAcid = new ArrayList<MS2Modification>();
appropriateMap.put(acidString, modListThisAcid);
}
modListThisAcid.add(mod);
}
List<ModifiedAminoAcid>[] modifiedAminoAcids = new ArrayList[peptideSequence.length()];
for (int i=0; i<peptideSequence.length(); i++)
{
char thisResidue = peptideSequence.charAt(i);
double staticMassThisResidue =
PeptideGenerator.AMINO_ACID_MONOISOTOPIC_MASSES[thisResidue];
List<MS2Modification> staticModListThisAcid = acidStaticModListMap.get("" + thisResidue);
if (staticModListThisAcid != null)
{
for (MS2Modification staticMod : staticModListThisAcid)
{
mass += staticMod.getMassDiff();
List<ModifiedAminoAcid> modsThisResidue = modifiedAminoAcids[i];
if (modsThisResidue == null)
{
modsThisResidue = new ArrayList<ModifiedAminoAcid>();
modifiedAminoAcids[i] = modsThisResidue;
}
staticMassThisResidue += staticMod.getMassDiff();
ModifiedAminoAcid moddedAcid = new ModifiedAminoAcid(thisResidue,
staticMassThisResidue);
modsThisResidue.add(moddedAcid);
}
}
List<MS2Modification> varModListThisAcid = acidVarModListMap.get("" + thisResidue);
if (varModListThisAcid != null)
{
for (MS2Modification varMod : varModListThisAcid)
{
mass += varMod.getMassDiff();
List<ModifiedAminoAcid> modsThisResidue = modifiedAminoAcids[i];
if (modsThisResidue == null)
{
modsThisResidue = new ArrayList<ModifiedAminoAcid>();
modifiedAminoAcids[i] = modsThisResidue;
}
ModifiedAminoAcid moddedAcid = new ModifiedAminoAcid(thisResidue,
staticMassThisResidue + varMod.getMassDiff());
modsThisResidue.add(moddedAcid);
}
}
}
MS2ExtraInfoDef.setModifiedAminoAcids(feature, modifiedAminoAcids);
}
feature.setMass((float) mass);
feature.updateMz();
return feature;
}
/**
* create a featureset that represents all the features from a run within an
* amt database, with their original times.
*
* Note: scan will be incorrectB.
* @param runEntry
* @param modifications
* @return
*/
public FeatureSet createFeatureSetForRun(AmtRunEntry runEntry,
MS2Modification[] modifications)
{
List<MS2Modification> varModList = new ArrayList<MS2Modification>();
List<MS2Modification> staticModList = new ArrayList<MS2Modification>();
if (modifications != null)
for (MS2Modification modification : modifications)
{
if (modification.getVariable())
varModList.add(modification);
else
staticModList.add(modification);
}
List<Feature> resultList = new ArrayList<Feature>();
for (AmtPeptideEntry peptideEntry : amtDatabase.getPeptideEntriesForRun(runEntry))
{
AmtPeptideEntry.AmtPeptideObservation obs =
peptideEntry.getObservationForRun(runEntry);
List<Feature> featuresForThisObservation =
generateModFeaturesForPeptide(peptideEntry,
obs.getObservedHydrophobicity(),
staticModList, varModList);
for (Feature feature : featuresForThisObservation)
{
feature.setTime((float) obs.getTimeInRun());
resultList.add(feature);
}
}
return new FeatureSet(resultList.toArray(new Feature[resultList.size()]));
}
}