package com.compomics.util.experiment.identification.matches;
import com.compomics.util.experiment.biology.Peptide;
import com.compomics.util.experiment.identification.IdentificationMatch;
import com.compomics.util.experiment.identification.spectrum_assumptions.PeptideAssumption;
import com.compomics.util.experiment.identification.SpectrumIdentificationAssumption;
import com.compomics.util.experiment.identification.spectrum_assumptions.TagAssumption;
import com.compomics.util.experiment.identification.protein_inference.proteintree.ProteinTree;
import com.compomics.util.experiment.identification.amino_acid_tags.matchers.TagMatcher;
import com.compomics.util.experiment.identification.protein_inference.PeptideProteinMapping;
import com.compomics.util.preferences.SequenceMatchingPreferences;
import java.io.IOException;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
/**
* This class models a spectrum match.
*
* @author Marc Vaudel
*/
public class SpectrumMatch extends IdentificationMatch {
/**
* The version UID for Serialization/Deserialization compatibility.
*/
static final long serialVersionUID = 3227760855215444318L;
/**
* The index of the matched spectrum.
*/
private String spectrumKey;
/**
* Map of the identification algorithm assumption: advocate number >
* score > assumptions.
*/
private HashMap<Integer, HashMap<Double, ArrayList<SpectrumIdentificationAssumption>>> assumptionsMap = null;
/**
* A tag assumptions map. advocate number > assumptions.
*/
private HashMap<Integer, HashMap<String, ArrayList<TagAssumption>>> tagAssumptionsMap = null;
/**
* The size of the keys used for the tag assumptions map.
*/
private int tagAssumptionsMapKeySize = -1;
/**
* The best peptide assumption.
*/
private PeptideAssumption bestPeptideAssumption;
/**
* The best tag assumption.
*/
private TagAssumption bestTagAsssumption;
/**
* The spectrum number in the mgf file. Will be used in case the spectrum
* title does not match.
*/
private Integer spectrumNumber = null;
/**
* Constructor for the spectrum match.
*/
public SpectrumMatch() {
}
/**
* Constructor for the spectrum match.
*
* @param spectrumKey the matched spectrumKey
* @param assumption the matching assumption
*/
public SpectrumMatch(String spectrumKey, SpectrumIdentificationAssumption assumption) {
int advocateId = assumption.getAdvocate();
if (assumptionsMap == null) {
assumptionsMap = new HashMap<Integer, HashMap<Double, ArrayList<SpectrumIdentificationAssumption>>>(1);
}
assumptionsMap.put(advocateId, new HashMap<Double, ArrayList<SpectrumIdentificationAssumption>>());
assumptionsMap.get(advocateId).put(assumption.getScore(), new ArrayList<SpectrumIdentificationAssumption>());
assumptionsMap.get(advocateId).get(assumption.getScore()).add(assumption);
this.spectrumKey = spectrumKey;
}
/**
* Constructor for the spectrum match.
*
* @param spectrumKey the matched spectrum key
*/
public SpectrumMatch(String spectrumKey) {
this.spectrumKey = spectrumKey;
}
/**
* Getter for the best peptide assumption.
*
* @return the best peptide assumption for the spectrum
*/
public PeptideAssumption getBestPeptideAssumption() {
return bestPeptideAssumption;
}
/**
* Setter for the best peptide assumption.
*
* @param bestAssumption the best peptide assumption for the spectrum
*/
public void setBestPeptideAssumption(PeptideAssumption bestAssumption) {
this.bestPeptideAssumption = bestAssumption;
}
/**
* Getter for the best tag assumption.
*
* @return the best tag assumption for the spectrum
*/
public TagAssumption getBestTagAssumption() {
return bestTagAsssumption;
}
/**
* Setter for the best tag assumption.
*
* @param bestTagAsssumption the best tag assumption for the spectrum
*/
public void setBestTagAssumption(TagAssumption bestTagAsssumption) {
this.bestTagAsssumption = bestTagAsssumption;
}
@Override
public String getKey() {
return spectrumKey;
}
/**
* Return all assumptions for the specified search engine indexed by their
* e-value. Null if none found.
*
* @param advocateId the desired advocate ID
*
* @return all assumptions
*/
public HashMap<Double, ArrayList<SpectrumIdentificationAssumption>> getAllAssumptions(int advocateId) {
if (assumptionsMap == null) {
return null;
}
return assumptionsMap.get(advocateId);
}
/**
* Return all assumptions for all identification algorithms as a list. Null
* if none found.
*
* @return all assumptions
*/
public ArrayList<SpectrumIdentificationAssumption> getAllAssumptions() {
if (assumptionsMap == null) {
return null;
}
ArrayList<SpectrumIdentificationAssumption> result = new ArrayList<SpectrumIdentificationAssumption>();
for (HashMap<Double, ArrayList<SpectrumIdentificationAssumption>> seMap : assumptionsMap.values()) {
for (double eValue : seMap.keySet()) {
result.addAll(seMap.get(eValue));
}
}
return result;
}
/**
* Returns the assumptions map: advocate id > score > list of
* assumptions.
*
* @return the assumptions map
*/
public HashMap<Integer, HashMap<Double, ArrayList<SpectrumIdentificationAssumption>>> getAssumptionsMap() {
return assumptionsMap;
}
/**
* Removes all assumptions but the best ones from the spectrum map.
*/
public void removeAssumptions() {
assumptionsMap = null;
}
/**
* Add a first hit.
*
* @param otherAdvocateId the index of the new advocate
* @param otherAssumption the new identification assumption
* @param ascendingScore indicates whether the score is ascending when hits
* get better
*/
public void addHit(int otherAdvocateId, SpectrumIdentificationAssumption otherAssumption, boolean ascendingScore) {
if (assumptionsMap == null) {
assumptionsMap = new HashMap<Integer, HashMap<Double, ArrayList<SpectrumIdentificationAssumption>>>(1);
}
HashMap<Double, ArrayList<SpectrumIdentificationAssumption>> advocateMap = assumptionsMap.get(otherAdvocateId);
if (advocateMap == null) {
advocateMap = new HashMap<Double, ArrayList<SpectrumIdentificationAssumption>>(1);
assumptionsMap.put(otherAdvocateId, advocateMap);
}
double score = otherAssumption.getScore();
ArrayList<SpectrumIdentificationAssumption> assumptionList = advocateMap.get(score);
if (assumptionList == null) {
assumptionList = new ArrayList<SpectrumIdentificationAssumption>(1);
advocateMap.put(score, assumptionList);
}
assumptionList.add(otherAssumption);
}
@Override
public MatchType getType() {
return MatchType.Spectrum;
}
/**
* Replaces the new key. The key of the PSM should always be the same as the
* spectrum key it links to.
*
* @param newKey the new key
*/
public void setKey(String newKey) {
this.spectrumKey = newKey;
}
/**
* Returns the spectrum number in the spectrum file. Returns null if not
* implemented (versions older than 3.4.17). 1 is the first spectrum.
*
* @return the spectrum number in the spectrum file
*/
public Integer getSpectrumNumber() {
return spectrumNumber;
}
/**
* Sets the spectrum number in the spectrum file. 1 is the first spectrum.
*
* @param spectrumNumber the spectrum number in the spectrum file
*/
public void setSpectrumNumber(Integer spectrumNumber) {
this.spectrumNumber = spectrumNumber;
}
/**
* Removes an assumption from the mapping.
*
* @param assumption the peptide assumption to remove
*/
public void removeAssumption(SpectrumIdentificationAssumption assumption) {
if (assumptionsMap != null) {
ArrayList<Integer> seToRemove = new ArrayList<Integer>();
for (int se : assumptionsMap.keySet()) {
ArrayList<Double> eValueToRemove = new ArrayList<Double>();
for (double eValue : assumptionsMap.get(se).keySet()) {
assumptionsMap.get(se).get(eValue).remove(assumption);
if (assumptionsMap.get(se).get(eValue).isEmpty()) {
eValueToRemove.add(eValue);
}
}
for (double eValue : eValueToRemove) {
assumptionsMap.get(se).remove(eValue);
}
if (assumptionsMap.get(se).isEmpty()) {
seToRemove.add(se);
}
}
for (int se : seToRemove) {
assumptionsMap.remove(se);
}
}
}
/**
* Indicates whether the spectrum match contains a peptide assumption from a
* search engine.
*
* @return a boolean indicating whether the spectrum match contains an
* assumption
*/
public boolean hasAssumption() {
if (assumptionsMap == null) {
return false;
}
for (int se : assumptionsMap.keySet()) {
for (ArrayList<SpectrumIdentificationAssumption> assumptionsAtScore : assumptionsMap.get(se).values()) {
if (!assumptionsAtScore.isEmpty()) {
return true;
}
}
}
return false;
}
/**
* Indicates whether the spectrum match contains a peptide assumption for
* the given advocate (for example a search engine, see the Advocate class)
*
* @param advocateId The index of the advocate
* @return a boolean indicating whether the spectrum match contains a
* peptide assumption for the given advocate
*/
public boolean hasAssumption(int advocateId) {
if (assumptionsMap == null) {
return false;
}
if (assumptionsMap.containsKey(advocateId)) {
for (ArrayList<SpectrumIdentificationAssumption> assumptionsAtEvalue : assumptionsMap.get(advocateId).values()) {
if (!assumptionsAtEvalue.isEmpty()) {
return true;
}
}
}
return false;
}
/**
* Creates a peptide based spectrum match where peptide assumptions are
* deduced from tag assumptions. The original tag assumption is added to the
* peptide match as refinement parameter
*
* @param proteinTree the protein tree to use to map tags to peptides
* @param sequenceMatchingPreferences the sequence matching preferences
* @param massTolerance the MS2 mass tolerance to use
* @param scoreInAscendingOrder boolean indicating whether the tag score is
* in the ascending order; ie the higher the score, the better the match.
* @param tagMatcher the tag matcher to use
* @param ascendingScore indicates whether the score is ascending when hits
* get better
*
* @return a new spectrum match containing the peptide assumptions made from
* the tag assumptions.
*
* @throws IOException if an IOException occurs
* @throws SQLException if an SQLException occurs
* @throws ClassNotFoundException if a ClassNotFoundException occurs
* @throws InterruptedException if an InterruptedException occurs
*/
public SpectrumMatch getPeptidesFromTags(ProteinTree proteinTree, TagMatcher tagMatcher, SequenceMatchingPreferences sequenceMatchingPreferences, Double massTolerance,
boolean scoreInAscendingOrder, boolean ascendingScore)
throws IOException, InterruptedException, ClassNotFoundException, SQLException {
SpectrumMatch spectrumMatch = new SpectrumMatch(spectrumKey);
if (assumptionsMap == null) {
return spectrumMatch;
}
for (int advocateId : assumptionsMap.keySet()) {
int rank = 1;
ArrayList<Double> scores = new ArrayList<Double>(assumptionsMap.get(advocateId).keySet());
if (scoreInAscendingOrder) {
Collections.sort(scores);
} else {
Collections.sort(scores, Collections.reverseOrder());
}
for (double score : scores) {
ArrayList<SpectrumIdentificationAssumption> originalAssumptions = assumptionsMap.get(advocateId).get(score);
for (SpectrumIdentificationAssumption assumption : originalAssumptions) {
if (assumption instanceof TagAssumption) {
TagAssumption tagAssumption = (TagAssumption) assumption;
ArrayList<PeptideProteinMapping> proteinMapping
= proteinTree.getProteinMapping(tagAssumption.getTag(), tagMatcher, sequenceMatchingPreferences, massTolerance);
for (Peptide peptide : PeptideProteinMapping.getPeptides(proteinMapping, sequenceMatchingPreferences)) {
PeptideAssumption peptideAssumption = new PeptideAssumption(peptide, rank, advocateId,
assumption.getIdentificationCharge(), score, assumption.getIdentificationFile());
peptideAssumption.setRawScore(score);
peptideAssumption.addUrParam(tagAssumption);
spectrumMatch.addHit(advocateId, peptideAssumption, ascendingScore);
}
}
}
}
}
return spectrumMatch;
}
/**
* Returns a map containing the tag assumptions of this spectrum assumptions
* indexed by the beginning of the longest amino acid sequence. The
* beginning of the sequence is made unique according to the sequence
* matching preferences.
*
* @param keySize the size to use for the keys
* @param sequenceMatchingPreferences the sequence matching preferences
*
* @return a map containing the tag assumptions of this spectrum assumptions
* indexed by the beginning of the longest amino acid sequence
*/
public HashMap<Integer, HashMap<String, ArrayList<TagAssumption>>> getTagAssumptionsMap(int keySize, SequenceMatchingPreferences sequenceMatchingPreferences) {
if (tagAssumptionsMap == null || keySize != tagAssumptionsMapKeySize) {
tagAssumptionsMap = new HashMap<Integer, HashMap<String, ArrayList<TagAssumption>>>(assumptionsMap.size());
for (int advocate : assumptionsMap.keySet()) {
HashMap<String, ArrayList<TagAssumption>> advocateMap = tagAssumptionsMap.get(advocate);
if (advocateMap == null) {
advocateMap = new HashMap<String, ArrayList<TagAssumption>>();
tagAssumptionsMap.put(advocate, advocateMap);
}
for (Collection<SpectrumIdentificationAssumption> spectrumIdentificationAssumptions : assumptionsMap.get(advocate).values()) {
for (SpectrumIdentificationAssumption spectrumIdentificationAssumption : spectrumIdentificationAssumptions) {
if (spectrumIdentificationAssumption instanceof TagAssumption) {
TagAssumption tagAssumption = (TagAssumption) spectrumIdentificationAssumption;
String longestSequence = tagAssumption.getTag().getLongestAminoAcidSequence();
if (longestSequence.length() < keySize) {
throw new IllegalArgumentException("Tag " + tagAssumption.getTag()
+ " cannot be indexed. Longest amino acid sequence " + longestSequence
+ " should be of length >= " + keySize + ".");
}
String subSequence = longestSequence.substring(0, keySize);
ArrayList<TagAssumption> tagAssumptions = advocateMap.get(subSequence);
if (tagAssumptions == null) {
tagAssumptions = new ArrayList<TagAssumption>();
advocateMap.put(subSequence, tagAssumptions);
}
tagAssumptions.add(tagAssumption);
}
}
}
}
tagAssumptionsMapKeySize = keySize;
}
return tagAssumptionsMap;
}
/**
* Removes the tags assumptions map to free memory.
*/
public void removeTagAssumptionsMap() {
tagAssumptionsMap = null;
}
}