/*
* Copyright (c) 2003-2012 Fred Hutchinson Cancer Research Center
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.fhcrc.cpl.viewer.amt;
import java.util.Map;
import java.util.HashMap;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.io.File;
import java.io.PrintWriter;
import java.io.FileNotFoundException;
import org.apache.log4j.Logger;
import org.fhcrc.cpl.toolbox.statistics.BasicStatistics;
import org.fhcrc.cpl.toolbox.proteomics.PeptideGenerator;
import org.fhcrc.cpl.toolbox.proteomics.MS2Modification;
import org.fhcrc.cpl.toolbox.proteomics.ModifiedAminoAcid;
/**
* A representation of a full AMT database. Stores everything we'd want
* to read from or write to amtXml files, and allows us to manipulate the
* database.
*
* An AMT database contains a hierarchical structure, exactly equivalent
* to the structure defined in the amtXml schema. Briefly:
* -An AMT database contains a number of runs.
* -An AMT database also contains a number of known Aminoacid Modifications,
* which are referenced in the runs and in the modification states (see below)
* -An AMT database also contains a number of peptide entries.
* -Each peptide entry contains multiple Modification states.
* -Each Modification State contains multiple observations.
* -Each observation knows its hydrophobicity, quality score, and the run
* it came from.
*
* Retention times for observations can come from MS/MS observations, or from matched
* MS1 feature times (preferred). This is controlled by AmtDatabaseBuilder and isn't
* tracked in the database, so you'd best keep track yourself.
*
* There are obvious connections between AMT databases and peptides, and
* Features and FeatureSets, but all references to Features are kept
* out of this class.
*/
public class AmtDatabase
implements Cloneable
{
static Logger _log = Logger.getLogger(AmtDatabase.class);
public static int DEFAULT_PRECISION=10;
//Map of peptide entries. Key = peptide sequence
protected HashMap<String, AmtPeptideEntry> mAmtPeptideEntryMap = null;
//List of runs. Note: The sequence of each run is its index + 1
protected List<AmtRunEntry> mAmtRunEntries = null;
//tolerance used when determining whether two modifications are the same.
//Units are Da
//TODO: should be able to make this MUCH smaller. Can't. What the heck?
public static final float MODIFICATION_EQUALITY_MASS_TOLERANCE = .1f;
//list of modifications. Note: The sequence of each mod is its index + 1
protected List<MS2Modification> mAminoacidModifications = null;
//Map from AMT run entries to their sequence number within this database.
//For conversion to XML, which has no other way of linking observations
//to runs
protected HashMap<AmtRunEntry,Integer> mAmtRunSequenceMap = null;
//Map from modifications to their sequence number within this database.
//For conversion to XML, which has no other way of linking observations
//to runs
protected HashMap<MS2Modification,Integer> mAminoacidModificationSequenceMap = null;
//File that the database was loaded from
protected File mAmtDBSourceFile = null;
//modification states' modified residue masses will be rounded to the nearest
//multiple of this value when stored. For bucketing slightly different masses
public static final double DEFAULT_MODIFICATION_MASS_ROUNDING_FACTOR = 1;
//defaults for hydrophobicity algorithm name and version
public static final String DEFAULT_HYDROPHOBICITY_ALGORITHM_NAME = "krokhin";
public static final double DEFAULT_HYDROPHOBICITY_ALGORITHM_VERSION = 3;
protected String mHydrophobicityAlgorithmName =
DEFAULT_HYDROPHOBICITY_ALGORITHM_NAME;
protected double mHydrophobicityAlgorithmVersion =
DEFAULT_HYDROPHOBICITY_ALGORITHM_VERSION;
public AmtDatabase()
{
init();
}
/**
* Initialize hashtables, etc
*/
protected void init()
{
mAmtPeptideEntryMap = new HashMap<String, AmtPeptideEntry>();
mAmtRunEntries = new ArrayList<AmtRunEntry>();
mAmtRunSequenceMap = new HashMap<AmtRunEntry,Integer>();
mAminoacidModificationSequenceMap = new HashMap<MS2Modification,Integer>();
mAminoacidModifications = new ArrayList<MS2Modification>();
}
/**
* VERY basic summary information
* @return
*/
public String toString()
{
return "(AMT Database with " + numRuns() + " runs, " + numAminoacidModifications() +
" distinct modifications, and " + numEntries() + " entries)";
}
/**
* Structure of database will be copied. Individual entries, however, will be the same,
* so don't mess with them. Same with run entries.
*
* Hence, not a deep copy, not completely shallow. Waist-deep.
*
* @return
*/
public Object waistDeepCopy()
{
AmtDatabase cloneDb = new AmtDatabase();
cloneDb.addOrOverrideEntriesWithAnotherDatabase(this);
return cloneDb;
}
public MS2Modification findExistingEquivalentModification(MS2Modification newMS2Mod)
{
//System.err.println("findequiv 1, newms2mod=" + newMS2Mod);
if (mAminoacidModifications == null)
return null;
for (MS2Modification ms2Mod : mAminoacidModifications)
{
//System.err.println(" findequiv: " + ms2Mod);
if (ms2Mod.getAminoAcid().equalsIgnoreCase(newMS2Mod.getAminoAcid()) &&
(Math.abs(ms2Mod.getMassDiff() - newMS2Mod.getMassDiff()) < MODIFICATION_EQUALITY_MASS_TOLERANCE) &&
(ms2Mod.getVariable() == newMS2Mod.getVariable()))
{
//System.err.println(" found equivalent");
return ms2Mod;
}
}
return null;
}
/**
* Given an instance of a modification, figure out which MS2Modification within
* this run it represents. If none, return null
* @return
*/
public MS2Modification resolveMS2VariableModification(String residue, float massDiff,
AmtRunEntry runEntry)
{
_log.debug("resolveMS2VarMod 1, " + residue + ", " + massDiff);
for (MS2Modification ms2Mod : runEntry.getVariableModifications())
{
_log.debug(" checking against " + ms2Mod);
if (ms2Mod.getAminoAcid().equalsIgnoreCase(residue) &&
(Math.abs(ms2Mod.getMassDiff() - massDiff) <
MODIFICATION_EQUALITY_MASS_TOLERANCE))
{
_log.debug("resolveMS2VarMods, found match: " + ms2Mod);
return ms2Mod;
}
}
return null;
}
/**
* Add an observation. Requires all the things that an observation needs
* to know about, including the AmtRunEntry that it's associated with --
* and that run entry must be a valid entry in THIS database
* @param peptideSequence
* @param modifiedAminoAcids
* @param qualityScore
* @param hydrophobicity
* @param runEntry
*/
public void resolveModsAndAddObservation(String peptideSequence,
List<ModifiedAminoAcid>[] modifiedAminoAcids,
double qualityScore,
double hydrophobicity,
AmtRunEntry runEntry,
Map<String, Integer> spectralCountsMap,
double timeInRun)
{
if (peptideSequence.contains("X"))
{
_log.debug("Skipping peptide sequence with aminoacid 'X'. Peptide: " +
peptideSequence);
return;
}
List<MS2Modification>[] ms2Modifications =
resolveMods(peptideSequence, modifiedAminoAcids,
runEntry);
addObservation(peptideSequence, ms2Modifications, qualityScore,
hydrophobicity, runEntry, spectralCountsMap, timeInRun);
}
/**
* add an observation, having already resolved the modifications
* @param peptideSequence
* @param ms2Modifications
* @param qualityScore
* @param hydrophobicity
* @param runEntry
* @param spectralCountsMap
* @param timeInRun
*/
public void addObservation(String peptideSequence,
List<MS2Modification>[] ms2Modifications,
double qualityScore,
double hydrophobicity,
AmtRunEntry runEntry,
Map<String, Integer> spectralCountsMap,
double timeInRun)
{
if (spectralCountsMap == null)
addObservation(peptideSequence, ms2Modifications, qualityScore,
hydrophobicity, runEntry,
AmtPeptideEntry.AmtPeptideObservation.SPECTRAL_COUNT_UNKNOWN,
timeInRun);
else
addObservation(peptideSequence, ms2Modifications, qualityScore,
hydrophobicity, runEntry, spectralCountsMap.get(peptideSequence),
timeInRun);
}
/**
* Add an observation. Requires all the things that an observation needs
* to know about, including the AmtRunEntry that it's associated with --
* and that run entry must be a valid entry in THIS database
* @param peptideSequence
* @param modifiedAminoAcids
* @param runEntry
*/
public List<MS2Modification>[] resolveMods(String peptideSequence,
List<ModifiedAminoAcid>[] modifiedAminoAcids,
AmtRunEntry runEntry)
{
_log.debug("resolveMods 1");
List<MS2Modification>[] ms2Modifications =
(List<MS2Modification>[]) new List[peptideSequence.length()];
for (int i=0; i<ms2Modifications.length; i++)
ms2Modifications[i] = new ArrayList<MS2Modification>();
Map<String, Float> residueBaseMassMap = new HashMap<String, Float>();
Map<String, List<MS2Modification>> residueStaticModMap =
new HashMap<String, List<MS2Modification>>();
//TODO: this should be moved up higher, not done for each peptide
_log.debug("resolveMods, peptide " + peptideSequence + ", calculating static mods for each residue");
for (int i=0; i<peptideSequence.length(); i++)
{
String residueString = "" + peptideSequence.charAt(i);
if (!residueBaseMassMap.containsKey(residueString))
{
float residueBaseMass =
(float) PeptideGenerator.AMINO_ACID_MONOISOTOPIC_MASSES[peptideSequence.charAt(i)];
_log.debug(" Residue " + residueString + ", monoisotopic mass " + residueBaseMass);
List<MS2Modification> staticModsThisResidue =
new ArrayList<MS2Modification>();
residueStaticModMap.put(residueString, staticModsThisResidue);
//resolve static modifications
MS2Modification[] staticMods = runEntry.getStaticModifications();
if (staticMods != null)
{
for (MS2Modification staticMod : staticMods)
{
if (residueString.equalsIgnoreCase(staticMod.getAminoAcid()))
{
if (ms2Modifications[i] == null)
ms2Modifications[i] = new ArrayList<MS2Modification>();
staticModsThisResidue.add(staticMod);
residueBaseMass += staticMod.getMassDiff();
_log.debug(" Adding static mod of " + staticMod.getMassDiff());
}
}
}
_log.debug(" Added total " + staticModsThisResidue.size() + " modifications, residue base mass=" + residueBaseMass);
residueBaseMassMap.put(residueString, residueBaseMass);
}
//add static mods calculated above
ms2Modifications[i].addAll(residueStaticModMap.get(residueString));
}
//variable mods
if (modifiedAminoAcids != null && modifiedAminoAcids.length > 0)
{
//variable modifications
for (int i = 0; i < modifiedAminoAcids.length; i++)
{
if (modifiedAminoAcids[i] == null)
continue;
for (ModifiedAminoAcid modifiedAminoAcid : modifiedAminoAcids[i])
{
_log.debug(" Trying to add modification " + modifiedAminoAcids[i]);
String aminoAcidAsString = modifiedAminoAcid.getAminoAcidAsString();
float effectiveMassDiff =
(float) (modifiedAminoAcid.getMass() -
residueBaseMassMap.get(aminoAcidAsString));
//if the effective mass diff is small, no need to account for it
if (Math.abs(effectiveMassDiff) > MODIFICATION_EQUALITY_MASS_TOLERANCE)
{
MS2Modification runMod =
resolveMS2VariableModification(aminoAcidAsString,
effectiveMassDiff,
runEntry);
if (runMod != null)
ms2Modifications[i].add(runMod);
else
throw new IllegalArgumentException("Unable to resolve variable modification on peptide " +
peptideSequence + " : " + modifiedAminoAcid + ", base residue mass: " +
residueBaseMassMap.get(aminoAcidAsString) + ", effective mass diff: " + effectiveMassDiff);
}
}
}
}
//if no mods at all, null out result
//This is hokey.
boolean atLeastOnePopulatedMod = false;
for (List<MS2Modification> modList : ms2Modifications)
{
if (modList.size() > 0)
atLeastOnePopulatedMod = true;
}
if (_log.isDebugEnabled())
{
StringBuffer annotatedPeptideSequence = new StringBuffer();
for (int i=0; i<peptideSequence.length(); i++)
{
annotatedPeptideSequence.append(peptideSequence.charAt(i));
for (MS2Modification mod : ms2Modifications[i])
{
annotatedPeptideSequence.append("[+" + mod.getMassDiff());
if (mod.getVariable())
annotatedPeptideSequence.append("(V)");
annotatedPeptideSequence.append("]");
}
}
_log.debug("*** " + annotatedPeptideSequence);
}
if (!atLeastOnePopulatedMod)
ms2Modifications = null;
return ms2Modifications;
}
/**
* Add an observation. Requires all the things that an observation needs
* to know about, including the AmtRunEntry that it's associated with --
* and that run entry must be a valid entry in THIS database
* @param peptideSequence
* @param qualityScore
* @param hydrophobicity
* @param runEntry
*/
public void addObservation(String peptideSequence,
List<MS2Modification>[] ms2Modifications,
double qualityScore,
double hydrophobicity,
AmtRunEntry runEntry,
int spectralCount,
double timeInRun)
{
AmtPeptideEntry entryForPeptide = mAmtPeptideEntryMap.get(peptideSequence);
AmtPeptideEntry.AmtPeptideObservation observation =
AmtPeptideEntry.AmtPeptideObservation.createObservation(
hydrophobicity,
qualityScore,
runEntry,
timeInRun);
observation.setSpectralCount(spectralCount);
if (entryForPeptide == null)
{
entryForPeptide =
AmtPeptideEntry.createEntryFromObservation(
peptideSequence, ms2Modifications,
observation);
mAmtPeptideEntryMap.put(peptideSequence,entryForPeptide);
}
else
{
entryForPeptide.addObservation(peptideSequence,
ms2Modifications,
observation);
}
}
public void addObservationsFromEntry(AmtPeptideEntry newEntry)
{
addObservationsFromEntry(newEntry, null);
}
/**
* Given an entry, add all the observations from all modification states in that entry
* to the database.
* If there's no existing entry for this peptide, create one
* @param newEntry
*/
public void addObservationsFromEntry(AmtPeptideEntry newEntry,
Map<MS2Modification, MS2Modification> oldNewModMap)
{
String peptideSequence = newEntry.getPeptideSequence();
AmtPeptideEntry entryForPeptide = mAmtPeptideEntryMap.get(peptideSequence);
for (AmtPeptideEntry.AmtPeptideModificationStateEntry modEntry :
newEntry.getModificationStateEntries())
{
if (modEntry.getModifications() != null)
{
for (List<MS2Modification> mods : modEntry.getModifications())
{
if (mods == null)
continue;
if (oldNewModMap != null)
for (int i=0; i<mods.size(); i++)
{
if (oldNewModMap.containsKey(mods.get(i)))
{
mods.set(i, oldNewModMap.get(mods.get(i)));
}
}
}
}
}
if (entryForPeptide == null)
mAmtPeptideEntryMap.put(peptideSequence, newEntry);
else
{
for (AmtPeptideEntry.AmtPeptideModificationStateEntry modEntry :
newEntry.getModificationStateEntries())
{
entryForPeptide.addModificationStateEntry(modEntry);
}
}
}
/**
* For each entry in another AmtDatabase, add all that entry's observations
* to this database. If this involves creating new entries here, so be it. If entries
* already exist, augment them with the new data
* @param otherDatabase
*/
public void addObservationsFromAnotherDatabase(AmtDatabase otherDatabase)
{
Map<MS2Modification, MS2Modification> oldNewModMap =
new HashMap<MS2Modification, MS2Modification>();
for (AmtRunEntry runEntry : otherDatabase.getRuns())
{
Map<MS2Modification, MS2Modification> oldNewModMapThisRun =
addRunEntry(runEntry);
oldNewModMap.putAll(oldNewModMapThisRun);
}
if (_log.isDebugEnabled())
{
for (MS2Modification oldMod : oldNewModMap.keySet())
{
_log.debug("Adding new run, modification map: ");
_log.debug(" " + oldMod + " -> " + oldNewModMap.get(oldMod) +
" (ID " + getSequenceForAminoacidModification(oldNewModMap.get(oldMod)) + ")");
}
}
for (AmtPeptideEntry otherDatabasePeptideEntry : otherDatabase.getEntries())
{
addObservationsFromEntry(otherDatabasePeptideEntry, oldNewModMap);
}
}
/**
* For each entry in another AmtDatabase, add it to this database,
* overriding an existing entry if one exists. All data from existing
* overridden entries will be lost.
*
* No checking is done to determine if we have any orphan runs.
* @param otherDatabase
*/
public void addOrOverrideEntriesWithAnotherDatabase(AmtDatabase otherDatabase)
{
for (AmtRunEntry runEntry : otherDatabase.getRuns())
addRunEntry(runEntry);
for (AmtPeptideEntry otherDatabasePeptideEntry : otherDatabase.getEntries())
{
addOrOverrideEntry(otherDatabasePeptideEntry);
}
}
/**
* Save to a tsv file in some hokey format
* @param tsvFile
* @throws FileNotFoundException
*/
public void saveToTsvSpreadsheet(File tsvFile) throws FileNotFoundException
{
PrintWriter pw = new PrintWriter(tsvFile);
AmtRunEntry[] runs = getRuns();
int numRuns = runs.length;
Map<AmtRunEntry,Integer> runToIndexMap = new HashMap<AmtRunEntry,Integer>(numRuns);
for (int i=0; i<numRuns; i++)
runToIndexMap.put(runs[i],i);
//write header line
pw.write("sequence\tmass\tcalch\thaverage");
for (int i=0; i<numRuns; i++)
pw.write("\th" + i + "\tt" + i);
pw.write("\n");
double sentinelHValue = -999999;
double[] hydrophobicitiesInRuns = new double[numRuns];
double[] timesInRuns = new double[numRuns];
for (AmtPeptideEntry peptideEntry : getEntries())
{
pw.write(peptideEntry.getPeptideSequence() + "\t" +
peptideEntry.getMass(PeptideGenerator.AMINO_ACID_MONOISOTOPIC_MASSES) + "\t" +
peptideEntry.getPredictedHydrophobicity() +
//"\t" + peptideEntry.getMass() +
"\t" + peptideEntry.getMedianObservedHydrophobicity());
Arrays.fill(hydrophobicitiesInRuns,sentinelHValue);
for (AmtPeptideEntry.AmtPeptideObservation peptideObservation : peptideEntry.getObservations())
{
hydrophobicitiesInRuns[runToIndexMap.get(peptideObservation.getRunEntry())] =
peptideObservation.getObservedHydrophobicity();
timesInRuns[runToIndexMap.get(peptideObservation.getRunEntry())] =
peptideObservation.getTimeInRun();
}
for (int i=0; i<hydrophobicitiesInRuns.length; i++)
{
pw.write("\t");
if (hydrophobicitiesInRuns[i] != sentinelHValue)
pw.write("" + hydrophobicitiesInRuns[i]);
else
pw.write("NA");
pw.write("\t");
if (hydrophobicitiesInRuns[i] != sentinelHValue)
{
pw.write("" + timesInRuns[i]);
}
else
pw.write("NA");
}
pw.write("\n");
}
pw.close();
}
//stuff related to peptide entries
/**
* This isn't free, because internally we store these as a HashMap
* @return
*/
public AmtPeptideEntry[] getEntries()
{
return mAmtPeptideEntryMap.values().toArray(new AmtPeptideEntry[mAmtPeptideEntryMap.size()]);
}
public AmtPeptideEntry[] getPeptideEntriesForRun(AmtRunEntry runEntry)
{
List<AmtPeptideEntry> resultList = new ArrayList<AmtPeptideEntry>();
for (AmtPeptideEntry peptideEntry : getEntries())
{
if (peptideEntry.getObservationForRun(runEntry) != null)
resultList.add(peptideEntry);
}
return resultList.toArray(new AmtPeptideEntry[resultList.size()]);
}
/**
* this REALLY isn't free
* @param runEntry
* @return
*/
public AmtPeptideEntry.AmtPeptideObservation[] getObservationsForRun(AmtRunEntry runEntry)
{
List<AmtPeptideEntry.AmtPeptideObservation> observations =
new ArrayList<AmtPeptideEntry.AmtPeptideObservation>();
for (AmtPeptideEntry peptideEntry : getEntries())
{
AmtPeptideEntry.AmtPeptideObservation obs =
peptideEntry.getObservationForRun(runEntry);
if (obs != null)
observations.add(obs);
}
return observations.toArray(new AmtPeptideEntry.AmtPeptideObservation[observations.size()]);
}
/**
* Return the minimum time-in-run value for any observation in this run
* @param runEntry
* @return
*/
public double getMinTimeInRun(AmtRunEntry runEntry)
{
double result = Double.MAX_VALUE;
for (AmtPeptideEntry.AmtPeptideObservation obs : getObservationsForRun(runEntry))
{
if (obs.getTimeInRun() < result)
result = obs.getTimeInRun();
}
return result;
}
/**
* Return the maximum time-in-run value for any observation in this run
* @param runEntry
* @return
*/
public double getMaxTimeInRun(AmtRunEntry runEntry)
{
double result = Double.MIN_VALUE;
for (AmtPeptideEntry.AmtPeptideObservation obs : getObservationsForRun(runEntry))
{
if (obs.getTimeInRun() > result)
result = obs.getTimeInRun();
}
return result;
}
public String[] getPeptides()
{
return mAmtPeptideEntryMap.keySet().toArray(new String[0]);
}
public AmtPeptideEntry getEntry(String peptideSequence)
{
return mAmtPeptideEntryMap.get(peptideSequence);
}
public boolean contains(String peptideSequence)
{
return getEntry(peptideSequence) != null;
}
/**
* Add a peptide entry, blowing away the existing entry if
* it was there. Make no attempt to reconcile runs.
* @param overridingEntry
*/
protected void addOrOverrideEntry(AmtPeptideEntry overridingEntry)
{
mAmtPeptideEntryMap.put(overridingEntry.getPeptideSequence(),
overridingEntry);
}
/**
* Remove an entry for a given sequence
* @param peptideSequence
*/
public void removeEntry(String peptideSequence)
{
mAmtPeptideEntryMap.remove(peptideSequence);
}
/**
* count 'em
* @return
*/
public int numEntries()
{
return mAmtPeptideEntryMap.keySet().size();
}
//stuff related to aminoacid modifications
/**
* count 'em
* @return
*/
public int numAminoacidModifications()
{
if (mAminoacidModifications == null)
return 0;
return mAminoacidModifications.size();
}
/**
* Add a run. The sequence of this mod will be the new size of the ArrayList after addition
*/
public void addAminoacidModification(MS2Modification newMod)
{
//System.err.println("Adding mod: " + newMod);
mAminoacidModifications.add(newMod);
mAminoacidModificationSequenceMap.put(newMod,numAminoacidModifications());
}
/**
* Get the mod with the specified sequence.
* Note: sequence is one-based and ArrayLists are zero-based, so we subtract one
* when referencing the ArrayList;
* @param sequence (one-based)
* @return
*/
public MS2Modification getAminoacidModificationBySequence(int sequence)
{
if (sequence > numAminoacidModifications())
return null;
return mAminoacidModifications.get(sequence-1);
}
/**
* Return an array containing all runs
* @return
*/
public MS2Modification[] getAminoacidModifications()
{
if (mAminoacidModifications == null)
return null;
return mAminoacidModifications.toArray(new MS2Modification[0]);
}
/**
*
* @return the sequence of this run entry, or -1 if it's not in the database
*/
public int getSequenceForAminoacidModification(MS2Modification mod)
{
Integer modSequence = mAminoacidModificationSequenceMap.get(mod);
if (modSequence == null)
return -1;
return modSequence;
}
//stuff related to runs
/**
* count 'em
* @return
*/
public int numRuns()
{
return mAmtRunEntries.size();
}
/**
* Add a run. The sequence of this run will be the new size of the ArrayList after addition
* @param newRunEntry
*/
public Map<MS2Modification, MS2Modification> addRunEntry(AmtRunEntry newRunEntry)
{
_log.debug("Adding run entry");
_log.debug("Before add, num mods: " + this.numAminoacidModifications());
//override all modifications on the run entry that are already in the
// database with their equivalents in the database.
//So that everything agrees BEFORE
//we start adding observations
Map<MS2Modification, MS2Modification> oldNewModMap =
newRunEntry.overrideDuplicateModifications(this);
//check for any modifications that aren't already in the database,
//add them all
//For ones that were there, update the object reference in the run
if (newRunEntry.getModifications() != null)
{
MS2Modification[] newRunModArray = newRunEntry.getModifications();
_log.debug("Run entry has " + newRunEntry.getModifications().length + " modifications");
_log.debug("Existing database has " + this.getAminoacidModifications().length + " modifications");
for (int i=0; i<newRunModArray.length; i++)
{
MS2Modification newMs2Mod = newRunModArray[i];
_log.debug("Evaluating modification for inclusion: " + newMs2Mod);
boolean alreadyHasMod = false;
for (MS2Modification existingMod : getAminoacidModifications())
{
_log.debug(" Comparing against: " + existingMod);
if (existingMod == newMs2Mod)
{
// oldNewModMap.put(newRunModArray[i], existingMod);
newRunModArray[i] = existingMod;
alreadyHasMod = true;
_log.debug(" Found it already there");
break;
}
}
if (!alreadyHasMod)
{
addAminoacidModification(newMs2Mod);
_log.debug("Inserting it, now we've got " + this.numAminoacidModifications());
}
}
newRunEntry.setModifications(newRunModArray);
}
mAmtRunEntries.add(newRunEntry);
mAmtRunSequenceMap.put(newRunEntry,numRuns());
_log.debug("After run add, num mods: " + this.numAminoacidModifications());
return oldNewModMap;
}
/**
* Get the run with the specified sequence.
* Note: sequence is one-based and ArrayLists are zero-based, so we subtract one
* when referencing the ArrayList;
* @param sequence (one-based)
* @return
*/
public AmtRunEntry getRunBySequence(int sequence)
{
if (sequence > numRuns())
return null;
return mAmtRunEntries.get(sequence-1);
}
/**
* Return an array containing all runs
* @return
*/
public AmtRunEntry[] getRuns()
{
return mAmtRunEntries.toArray(new AmtRunEntry[0]);
}
/**
*
* @param runEntry
* @return the sequence of this run entry, or -1 if it's not in the database
*/
public int getSequenceForRun(AmtRunEntry runEntry)
{
Integer runSequence = mAmtRunSequenceMap.get(runEntry);
if (runSequence == null)
return -1;
return runSequence;
}
//Consider moving this stuff to a new diagnostic class
/**
* Calculate the mean difference of all median peptide H observations from prediction
* @return
*/
public double calculateMeanDifferenceFromPredictedHydro()
{
double[] medianHydDifferencesFromPredicted = new double[numEntries()];
int i = 0;
for (AmtPeptideEntry entry : getEntries())
{
medianHydDifferencesFromPredicted[i++] =
Math.abs(entry.getMedianObservedHydrophobicity() - entry.getPredictedHydrophobicity());
}
return BasicStatistics.mean(medianHydDifferencesFromPredicted);
}
/**
* Calculate the standard deviation of all deviations of median peptide H observations from prediction
* @return
*/
public double calculateStandardDeviationDifferenceFromPredictedHydro()
{
double[] medianHydDeviationsFromPredicted = new double[numEntries()];
int i = 0;
for (AmtPeptideEntry entry : getEntries())
{
medianHydDeviationsFromPredicted[i++] =
Math.abs(entry.getMedianObservedHydrophobicity() -
entry.getPredictedHydrophobicity());
}
return BasicStatistics.standardDeviation(medianHydDeviationsFromPredicted);
}
public String getHydrophobicityAlgorithmName()
{
return mHydrophobicityAlgorithmName;
}
public void setHydrophobicityAlgorithmName(String hydrophobicityAlgorithmName)
{
this.mHydrophobicityAlgorithmName = hydrophobicityAlgorithmName;
}
public double getHydrophobicityAlgorithmVersion()
{
return mHydrophobicityAlgorithmVersion;
}
public void setHydrophobicityAlgorithmVersion(double hydrophobicityAlgorithmVersion)
{
this.mHydrophobicityAlgorithmVersion = hydrophobicityAlgorithmVersion;
}
public File getAmtDBSourceFile()
{
return mAmtDBSourceFile;
}
public void setAmtDBSourceFile(File mAmtDBSourceFile)
{
this.mAmtDBSourceFile = mAmtDBSourceFile;
}
}