package com.compomics.util.experiment.biology; import com.compomics.util.Util; import com.compomics.util.experiment.identification.matches.ModificationMatch; import com.compomics.util.experiment.identification.amino_acid_tags.TagComponent; import com.compomics.util.experiment.personalization.ExperimentObject; import com.compomics.util.experiment.identification.identification_parameters.PtmSettings; import com.compomics.util.preferences.SequenceMatchingPreferences; import java.awt.Color; import java.util.ArrayList; import java.util.HashMap; /** * This class represents a series of amino acids with associated modifications. * * @author Marc Vaudel */ public class AminoAcidSequence extends ExperimentObject implements TagComponent { /** * The sequence as string. */ private String sequence; /** * The sequence as string builder. */ private StringBuilder sequenceStringBuilder = null; /** * The sequence as amino acid pattern. */ private AminoAcidPattern aminoAcidPattern = null; /** * The modifications carried by the amino acid sequence at target amino * acids. 1 is the first amino acid. */ private HashMap<Integer, ArrayList<ModificationMatch>> modifications = null; /** * Creates a blank sequence. All maps are null. */ public AminoAcidSequence() { } /** * Constructor taking a sequence of amino acids as input. * * @param sequence a sequence of amino acids */ public AminoAcidSequence(String sequence) { this.sequence = sequence; } /** * Constructor taking a sequence of amino acids as input. * * @param sequence a sequence of amino acids * @param modifications the modifications of this sequence in a map */ public AminoAcidSequence(String sequence, HashMap<Integer, ArrayList<ModificationMatch>> modifications) { this.sequence = sequence; this.modifications = modifications; } /** * Creates a sequence from another sequence. * * @param sequence the other sequence */ public AminoAcidSequence(AminoAcidSequence sequence) { this.sequence = sequence.getSequence(); HashMap<Integer, ArrayList<ModificationMatch>> modificationMatches = sequence.getModificationMatches(); if (modificationMatches != null) { modifications = new HashMap<Integer, ArrayList<ModificationMatch>>(modificationMatches.size()); for (int site : modificationMatches.keySet()) { ArrayList<ModificationMatch> oldModifications = modificationMatches.get(site); ArrayList<ModificationMatch> newModifications = new ArrayList<ModificationMatch>(oldModifications.size()); for (ModificationMatch modificationMatch : oldModifications) { newModifications.add(modificationMatch.clone()); } modifications.put(site, newModifications); } } } /** * Returns the sequence as String. * * @return the sequence as String */ public String getSequence() { setSequenceStringBuilder(false); if (sequence != null) { return sequence; } else { return ""; } } /** * Returns the amino acid at the given index on the sequence in its single * letter code. 0 is the first amino acid. * * @param aa the index on the sequence * * @return the amino acid at the given index on the sequence in its single * letter code */ public char charAt(int aa) { setSequenceStringBuilder(false); return sequence.charAt(aa); } /** * Returns the amino acid at the given index on the sequence. 0 is the first * amino acid. * * @param aa the index on the sequence * * @return the amino acid at the given index on the sequence */ public AminoAcid getAminoAcidAt(int aa) { return AminoAcid.getAminoAcid(charAt(aa)); } /** * Sets the sequence. * * @param aminoAcidSequence the sequence */ public void setSequence(String aminoAcidSequence) { sequenceStringBuilder = null; this.sequence = aminoAcidSequence; } /** * replaces the amino acid at the given position by the given amino acid * represented by its single letter code. 0 is the first amino acid. * * @param index the index where the amino acid should be set. * @param aa the amino acid to be set */ public void setAaAtIndex(int index, char aa) { setSequenceStringBuilder(true); sequenceStringBuilder.setCharAt(index, aa); } /** * Returns this amino acid sequence as amino acid pattern. * * @return this amino acid sequence as amino acid pattern */ public AminoAcidPattern getAsAminoAcidPattern() { setSequenceStringBuilder(false); if (aminoAcidPattern == null) { aminoAcidPattern = AminoAcidPattern.getAminoAcidPatternFromString(sequence); if (modifications != null) { for (Integer location : modifications.keySet()) { for (ModificationMatch modMatch : modifications.get(location)) { aminoAcidPattern.addModificationMatch(location, modMatch); } } } } return aminoAcidPattern; } /** * Loads the sequence in the string builder. */ private void setSequenceStringBuilder(boolean stringbuilder) { if (stringbuilder && sequenceStringBuilder == null) { if (sequence != null) { sequenceStringBuilder = new StringBuilder(sequence); sequence = null; } else { sequenceStringBuilder = new StringBuilder(1); } } else if (sequence == null && sequenceStringBuilder != null) { sequence = sequenceStringBuilder.toString(); } } /** * the sequence is kept in different formats internally. Calling this method * removes them from the cache. */ public void emptyInternalCaches() { sequenceStringBuilder = null; aminoAcidPattern = null; } /** * Indicates whether the sequence is found in the given amino acid sequence. * * @param aminoAcidSequence the amino acid sequence * @param sequenceMatchingPreferences the sequence matching preferences * * @return a boolean indicating whether the sequence is found in the given * amino acid sequence */ public boolean matchesIn(String aminoAcidSequence, SequenceMatchingPreferences sequenceMatchingPreferences) { return firstIndex(aminoAcidSequence, sequenceMatchingPreferences) >= 0; } /** * Indicates whether the sequence is found in the given amino acid sequence. * * @param aminoAcidSequence the amino acid sequence * @param sequenceMatchingPreferences the sequence matching preferences * * @return a boolean indicating whether the sequence is found in the given * amino acid sequence */ public boolean matchesIn(AminoAcidSequence aminoAcidSequence, SequenceMatchingPreferences sequenceMatchingPreferences) { return matchesIn(aminoAcidSequence.getSequence(), sequenceMatchingPreferences); } /** * Indicates whether the sequence matches the given amino acid sequence in * size and according to the given matching preferences. * * @param aminoAcidSequence the amino acid sequence * @param sequenceMatchingPreferences the sequence matching preferences * * @return a boolean indicating whether the sequence is found in the given * amino acid sequence */ public boolean matches(String aminoAcidSequence, SequenceMatchingPreferences sequenceMatchingPreferences) { return length() == aminoAcidSequence.length() && firstIndex(aminoAcidSequence, sequenceMatchingPreferences) >= 0; } /** * Indicates whether the sequence matches the given amino acid sequence in * size and according to the given matching preferences. * * @param aminoAcidSequence the amino acid sequence * @param sequenceMatchingPreferences the sequence matching preferences * * @return a boolean indicating whether the sequence is found in the given * amino acid sequence */ public boolean matches(AminoAcidSequence aminoAcidSequence, SequenceMatchingPreferences sequenceMatchingPreferences) { return matches(aminoAcidSequence.getSequence(), sequenceMatchingPreferences); } /** * Returns the first index where the amino acid sequence is found in the * given sequence. -1 if not found. 0 is the first amino acid. * * @param aminoAcidSequence the amino acid sequence to look into * @param sequenceMatchingPreferences the sequence matching preferences * * @return the first index where the amino acid sequence is found */ public int firstIndex(String aminoAcidSequence, SequenceMatchingPreferences sequenceMatchingPreferences) { return getAsAminoAcidPattern().firstIndex(aminoAcidSequence, sequenceMatchingPreferences, 0); } /** * Indicates whether another AminoAcidPattern targets the same sequence * without accounting for PTM localization. Modifications are considered * equal when of same mass. Modifications should be loaded in the PTM * factory. * * @param anotherPattern the other AminoAcidPattern * @param sequenceMatchingPreferences the sequence matching preferences * * @return true if the other AminoAcidPattern targets the same sequence */ public boolean isSameSequenceAndModificationStatusAs(AminoAcidPattern anotherPattern, SequenceMatchingPreferences sequenceMatchingPreferences) { if (!anotherPattern.matches(anotherPattern, sequenceMatchingPreferences)) { return false; } PTMFactory ptmFactory = PTMFactory.getInstance(); HashMap<Double, Integer> masses1 = new HashMap<Double, Integer>(); for (int i = 1; i <= length(); i++) { ArrayList<ModificationMatch> tempModifications = getModificationsAt(i); for (ModificationMatch modMatch : tempModifications) { PTM ptm = ptmFactory.getPTM(modMatch.getTheoreticPtm()); double mass = ptm.getMass(); Integer occurrence = masses1.get(mass); if (occurrence == null) { masses1.put(mass, 1); } else { masses1.put(mass, occurrence + 1); } } } HashMap<Double, Integer> masses2 = new HashMap<Double, Integer>(); for (int i = 1; i <= length(); i++) { ArrayList<ModificationMatch> tempModifications = anotherPattern.getModificationsAt(i); for (ModificationMatch modMatch : tempModifications) { PTM ptm = ptmFactory.getPTM(modMatch.getTheoreticPtm()); double mass = ptm.getMass(); Integer occurrence = masses2.get(mass); if (occurrence == null) { masses2.put(mass, 1); } else { masses2.put(mass, occurrence + 1); } } } if (masses1.size() != masses2.size()) { return false; } for (Double mass : masses1.keySet()) { Integer occurrence1 = masses1.get(mass); Integer occurrence2 = masses2.get(mass); if (occurrence2 == null || occurrence2.intValue() != occurrence1) { return false; } } for (int i = 1; i <= length(); i++) { ArrayList<ModificationMatch> mods1 = getModificationsAt(i); ArrayList<ModificationMatch> mods2 = anotherPattern.getModificationsAt(i); if (mods1.size() != mods2.size()) { return false; } for (int j = 0; j < mods1.size(); j++) { ModificationMatch modificationMatch1 = mods1.get(j); ModificationMatch modificationMatch2 = mods2.get(j); if (!modificationMatch1.equals(modificationMatch2)) { return false; } } } return true; } /** * Returns the length of the sequence in amino acids. * * @return the length of the sequence in amino acids */ public int length() { if (sequence != null) { return sequence.length(); } else if (sequenceStringBuilder != null) { return sequenceStringBuilder.length(); } else { return 0; } } /** * Appends another sequence at the end of this sequence. * * @param otherSequence the other sequence to append. */ public void appendCTerm(AminoAcidSequence otherSequence) { setSequenceStringBuilder(true); int previousLength = length(); sequenceStringBuilder.append(otherSequence.getSequence()); HashMap<Integer, ArrayList<ModificationMatch>> modificationMatches = otherSequence.getModificationMatches(); if (modificationMatches != null) { for (int otherSite : modificationMatches.keySet()) { int newSite = otherSite + previousLength; for (ModificationMatch oldModificationMatch : modificationMatches.get(otherSite)) { ModificationMatch newModificationMatch = oldModificationMatch.clone(); oldModificationMatch.setModificationSite(newSite); addModificationMatch(newSite, newModificationMatch); } } } } /** * Appends a series of unmodified amino acids to the sequence. * * @param otherSequence a series of unmodified amino acids represented by * their single letter code */ public void appendCTerm(String otherSequence) { setSequenceStringBuilder(true); sequenceStringBuilder.append(otherSequence); } /** * Inserts another sequence in this sequence. * * @param offset the index where this sequence should be inserted, 0 is the * first amino acid. * @param otherSequence the other sequence to insert. */ public void insert(int offset, AminoAcidSequence otherSequence) { setSequenceStringBuilder(true); sequenceStringBuilder.insert(0, otherSequence.getSequence()); int otherSequenceLength = otherSequence.length(); HashMap<Integer, ArrayList<ModificationMatch>> otherModificationMatches = otherSequence.getModificationMatches(); if (otherModificationMatches != null || modifications != null) { int otherSize = 0; if (otherModificationMatches != null) { otherSize = otherModificationMatches.size(); } int newSize = 0; if (modifications != null) { newSize = modifications.size(); } HashMap<Integer, ArrayList<ModificationMatch>> newModificationMatches = new HashMap<Integer, ArrayList<ModificationMatch>>(otherSize + newSize); if (otherModificationMatches != null) { for (int site : otherModificationMatches.keySet()) { ArrayList<ModificationMatch> modMatches = otherModificationMatches.get(site); ArrayList<ModificationMatch> newModMatches = new ArrayList<ModificationMatch>(modMatches.size()); for (ModificationMatch modificationMatch : modMatches) { newModMatches.add(modificationMatch.clone()); } newModificationMatches.put(site, newModMatches); } } if (modifications != null) { for (int site : modifications.keySet()) { int newSite = site + otherSequenceLength; ArrayList<ModificationMatch> modMatches = modifications.get(site); ArrayList<ModificationMatch> newModMatches = new ArrayList<ModificationMatch>(modMatches.size()); for (ModificationMatch oldModificationMatch : modifications.get(site)) { ModificationMatch newModificationMatch = oldModificationMatch.clone(); oldModificationMatch.setModificationSite(newSite); newModMatches.add(newModificationMatch); } newModificationMatches.put(site, newModMatches); } } modifications = newModificationMatches; } } /** * Inserts another sequence in this sequence. * * @param offset the index where this sequence should be inserted, 0 is the * first amino acid. * @param otherSequence the other sequence to insert. */ public void insert(int offset, String otherSequence) { setSequenceStringBuilder(true); sequenceStringBuilder.insert(offset, otherSequence); } /** * Appends another sequence at the beginning of this sequence keeping the * original order. * * @param otherSequence the other sequence to append. */ public void appendNTerm(AminoAcidSequence otherSequence) { insert(0, otherSequence); } /** * Appends a series of unmodified amino acids to the beginning sequence * keeping the original order. * * @param otherSequence a series of unmodified amino acids represented by * their single letter code */ public void appendNTerm(String otherSequence) { insert(0, otherSequence); } /** * Getter for the modifications carried by this sequence in a map: aa number * > modification matches. 1 is the first amino acid. * * @return the modifications matches as found by the search engine */ public HashMap<Integer, ArrayList<ModificationMatch>> getModificationMatches() { return modifications; } /** * Returns a list of the indexes of the amino acids carrying a modification. * 1 is the first amino acid. * * @return a list of the indexes of the amino acids carrying a modification */ public ArrayList<Integer> getModificationIndexes() { if (modifications == null) { return new ArrayList<Integer>(); } return new ArrayList<Integer>(modifications.keySet()); } /** * Returns the modifications found at a given localization. * * @param localization the localization as amino acid number. 1 is the first * amino acid. * * @return the modifications found at a given localization as a list. */ public ArrayList<ModificationMatch> getModificationsAt(int localization) { if (modifications != null) { ArrayList<ModificationMatch> result = modifications.get(localization); if (result != null) { return result; } } return new ArrayList<ModificationMatch>(); } /** * Removes a modification match in the given sequence. * * @param localisation the localization of the modification * @param modificationMatch the modification match to remove */ public void removeModificationMatch(int localisation, ModificationMatch modificationMatch) { ArrayList<ModificationMatch> modificationMatches = modifications.get(localisation); if (modificationMatches != null) { modificationMatches.remove(modificationMatch); if (modificationMatches.isEmpty()) { modifications.remove(localisation); } } } /** * Clears the list of imported modification matches. */ public void clearModificationMatches() { if (modifications != null) { modifications.clear(); } } /** * Adds a modification to one of the amino acid sequence. * * @param localization the index of the amino acid retained as target of the * modification. 1 is the first amino acid. * @param modificationMatch the modification match */ public void addModificationMatch(int localization, ModificationMatch modificationMatch) { int index = localization - 1; if (index < 0) { throw new IllegalArgumentException("Wrong modification target index " + localization + ", 1 is the first amino acid for PTM localization."); } if (modifications == null) { modifications = new HashMap<Integer, ArrayList<ModificationMatch>>(); } ArrayList<ModificationMatch> modificationMatches = modifications.get(localization); if (modificationMatches == null) { modificationMatches = new ArrayList<ModificationMatch>(); modifications.put(localization, modificationMatches); } modificationMatches.add(modificationMatch); } /** * Adds a list of modifications to one of the amino acid sequence. * * @param localization the index of the amino acid retained as target of the * modification. 1 is the first amino acid. * @param modificationMatches the modification matches */ public void addModificationMatches(int localization, ArrayList<ModificationMatch> modificationMatches) { int index = localization - 1; if (index < 0) { throw new IllegalArgumentException("Wrong modification target index " + localization + ", 1 is the first amino acid for PTM localization."); } if (modifications == null) { modifications = new HashMap<Integer, ArrayList<ModificationMatch>>(); } ArrayList<ModificationMatch> modificationMatchesAtIndex = modifications.get(localization); if (modificationMatchesAtIndex == null) { modificationMatchesAtIndex = new ArrayList<ModificationMatch>(); modifications.put(localization, modificationMatchesAtIndex); } modificationMatches.addAll(modificationMatches); } /** * Changes the localization of a modification match. * * @param modificationMatch the modification match of interest * @param oldLocalization the old localization * @param newLocalization the new localization */ public void changeModificationSite(ModificationMatch modificationMatch, int oldLocalization, int newLocalization) { int oldIndex = oldLocalization - 1; if (oldIndex < 0) { throw new IllegalArgumentException("Wrong modification old target index " + oldLocalization + ", 1 is the first amino acid for PTM localization."); } if (modifications == null || !modifications.containsKey(oldIndex) || !modifications.get(oldIndex).contains(modificationMatch)) { throw new IllegalArgumentException("Modification match " + modificationMatch + " not found at index " + oldLocalization + "."); } modifications.get(oldIndex).remove(modificationMatch); addModificationMatch(newLocalization, modificationMatch); } /** * Returns the modified sequence as an tagged string with potential * modification sites color coded or with PTM tags, e.g, <mox>. /!\ * this method will work only if the PTM found in the peptide are in the * PTMFactory. /!\ This method uses the modifications as set in the * modification matches of this peptide and displays all of them. Note: this * does not include HTML start end tags or terminal annotation. * * @param modificationProfile the modification profile of the search * @param useHtmlColorCoding if true, color coded HTML is used, otherwise * PTM tags, e.g, <mox>, are used * @param useShortName if true the short names are used in the tags * @param excludeAllFixedPtms if true, all fixed PTMs are excluded * @return the modified sequence as a tagged string */ public String getTaggedModifiedSequence(PtmSettings modificationProfile, boolean useHtmlColorCoding, boolean useShortName, boolean excludeAllFixedPtms) { HashMap<Integer, ArrayList<String>> confidentModificationSites = new HashMap<Integer, ArrayList<String>>(); HashMap<Integer, ArrayList<String>> representativeModificationSites = new HashMap<Integer, ArrayList<String>>(); HashMap<Integer, ArrayList<String>> secondaryModificationSites = new HashMap<Integer, ArrayList<String>>(); HashMap<Integer, ArrayList<String>> fixedModificationSites = new HashMap<Integer, ArrayList<String>>(); if (modifications != null) { for (int modSite : modifications.keySet()) { for (ModificationMatch modificationMatch : modifications.get(modSite)) { String modName = modificationMatch.getTheoreticPtm(); if (modificationMatch.isVariable()) { if (modificationMatch.isConfident()) { if (!confidentModificationSites.containsKey(modSite)) { confidentModificationSites.put(modSite, new ArrayList<String>()); } confidentModificationSites.get(modSite).add(modName); } else { if (!representativeModificationSites.containsKey(modSite)) { representativeModificationSites.put(modSite, new ArrayList<String>()); } representativeModificationSites.get(modSite).add(modName); } } else if (!excludeAllFixedPtms) { if (!fixedModificationSites.containsKey(modSite)) { fixedModificationSites.put(modSite, new ArrayList<String>()); } fixedModificationSites.get(modSite).add(modName); } } } } setSequenceStringBuilder(false); return getTaggedModifiedSequence(modificationProfile, sequence, confidentModificationSites, representativeModificationSites, secondaryModificationSites, fixedModificationSites, useHtmlColorCoding, useShortName); } /** * Returns the modified sequence as an tagged string with potential * modification sites color coded or with PTM tags, e.g, <mox>. /!\ * This method will work only if the PTM found in the peptide are in the * PTMFactory. * * @param modificationProfile the modification profile of the search * @param sequence the amino acid sequence to annotate * @param confidentModificationSites the confidently localized variable * modification sites in a map: aa number > list of modifications (1 is * the first AA) (can be null) * @param representativeAmbiguousModificationSites the representative site * of the ambiguously localized variable modifications in a map: aa number * > list of modifications (1 is the first AA) (can be null) * @param secondaryAmbiguousModificationSites the secondary sites of the * ambiguously localized variable modifications in a map: aa number > * list of modifications (1 is the first AA) (can be null) * @param fixedModificationSites the fixed modification sites in a map: aa * number > list of modifications (1 is the first AA) (can be null) * @param useHtmlColorCoding if true, color coded HTML is used, otherwise * PTM tags, e.g, <mox>, are used * @param useShortName if true the short names are used in the tags * @return the tagged modified sequence as a string */ public static String getTaggedModifiedSequence(PtmSettings modificationProfile, String sequence, HashMap<Integer, ArrayList<String>> confidentModificationSites, HashMap<Integer, ArrayList<String>> representativeAmbiguousModificationSites, HashMap<Integer, ArrayList<String>> secondaryAmbiguousModificationSites, HashMap<Integer, ArrayList<String>> fixedModificationSites, boolean useHtmlColorCoding, boolean useShortName) { if (confidentModificationSites == null) { confidentModificationSites = new HashMap<Integer, ArrayList<String>>(); } if (representativeAmbiguousModificationSites == null) { representativeAmbiguousModificationSites = new HashMap<Integer, ArrayList<String>>(); } if (secondaryAmbiguousModificationSites == null) { secondaryAmbiguousModificationSites = new HashMap<Integer, ArrayList<String>>(); } if (fixedModificationSites == null) { fixedModificationSites = new HashMap<Integer, ArrayList<String>>(); } StringBuilder modifiedSequence = new StringBuilder(sequence.length()); for (int aa = 1; aa <= sequence.length(); aa++) { int aaIndex = aa - 1; char aminoAcid = sequence.charAt(aaIndex); if (confidentModificationSites.containsKey(aa) && !confidentModificationSites.get(aa).isEmpty()) { addTaggedResidue(modifiedSequence, aa, aminoAcid, 1, modificationProfile, confidentModificationSites, useHtmlColorCoding, useShortName); } else if (representativeAmbiguousModificationSites.containsKey(aa) && !representativeAmbiguousModificationSites.get(aa).isEmpty()) { addTaggedResidue(modifiedSequence, aa, aminoAcid, 2, modificationProfile, representativeAmbiguousModificationSites, useHtmlColorCoding, useShortName); } else if (secondaryAmbiguousModificationSites.containsKey(aa) && !secondaryAmbiguousModificationSites.get(aa).isEmpty()) { addTaggedResidue(modifiedSequence, aa, aminoAcid, 3, modificationProfile, secondaryAmbiguousModificationSites, useHtmlColorCoding, useShortName); } else if (fixedModificationSites.containsKey(aa) && !fixedModificationSites.get(aa).isEmpty()) { addTaggedResidue(modifiedSequence, aa, aminoAcid, 1, modificationProfile, fixedModificationSites, useHtmlColorCoding, useShortName); } else { modifiedSequence.append(aminoAcid); } } return modifiedSequence.toString(); } /** * Helper method for annotating the modified sequence as an tagged string * with potential modification sites. * * @param modifiedSequence the modified sequence to add the new annotations * to * @param aaIndex the current sequence index * @param aminoAcid the current amino acid * @param localizationConfidenceLevel the localization confidence level * @param modificationProfile the modification profile of the search * @param modificationSites the current modification sites * @param useHtmlColorCoding if true, color coded HTML is used, otherwise * PTM tags, e.g, <mox>, are used * @param useShortName if true the short names are used in the tags * @return the tagged modified sequence as a string */ private static void addTaggedResidue(StringBuilder modifiedSequence, int aaIndex, char aminoAcid, int localizationConfidenceLevel, PtmSettings modificationProfile, HashMap<Integer, ArrayList<String>> modificationSites, boolean useHtmlColorCoding, boolean useShortName) { PTMFactory ptmFactory = PTMFactory.getInstance(); if (modificationSites.get(aaIndex).size() == 1) { modifiedSequence.append(getTaggedResidue(aminoAcid, modificationSites.get(aaIndex).get(0), modificationProfile, localizationConfidenceLevel, useHtmlColorCoding, useShortName)); } else { boolean modificationAdded = false; for (String ptmName : modificationSites.get(aaIndex)) { PTM ptm = ptmFactory.getPTM(ptmName); if (ptm.getType() == PTM.MODAA && !modificationAdded) { // there should only be one... modifiedSequence.append(getTaggedResidue(aminoAcid, ptmName, modificationProfile, localizationConfidenceLevel, useHtmlColorCoding, useShortName)); modificationAdded = true; } } if (!modificationAdded) { modifiedSequence.append(aminoAcid); } } } /** * Returns the single residue as a tagged string (HTML color or PTM tag). * Modified sites are color coded according to three levels: 1- black * foreground, colored background 2- colored foreground, white background 3- * colored foreground * * @param residue the residue to tag * @param ptmName the name of the PTM * @param modificationProfile the modification profile * @param localizationConfidenceLevel the localization confidence level * @param useHtmlColorCoding if true, color coded HTML is used, otherwise * PTM tags, e.g, <mox>, are used * @param useShortName if true the short names are used in the tags * @return the single residue as a tagged string */ public static String getTaggedResidue(char residue, String ptmName, PtmSettings modificationProfile, int localizationConfidenceLevel, boolean useHtmlColorCoding, boolean useShortName) { StringBuilder taggedResidue = new StringBuilder(); PTMFactory ptmFactory = PTMFactory.getInstance(); PTM ptm = ptmFactory.getPTM(ptmName); if (ptm.getType() == PTM.MODAA) { if (!useHtmlColorCoding) { if (localizationConfidenceLevel == 1 || localizationConfidenceLevel == 2) { if (useShortName) { taggedResidue.append(residue).append("<").append(ptm.getShortName()).append(">"); } else { taggedResidue.append(residue).append("<").append(ptmName).append(">"); } } else if (localizationConfidenceLevel == 3) { taggedResidue.append(residue); } } else { Color ptmColor = modificationProfile.getColor(ptmName); switch (localizationConfidenceLevel) { case 1: taggedResidue.append("<span style=\"color:#").append(Util.color2Hex(Color.WHITE)).append(";background:#").append(Util.color2Hex(ptmColor)).append("\">").append(residue).append("</span>"); break; case 2: taggedResidue.append("<span style=\"color:#").append(Util.color2Hex(ptmColor)).append(";background:#").append(Util.color2Hex(Color.WHITE)).append("\">").append(residue).append("</span>"); break; case 3: // taggedResidue.append("<span style=\"color:#").append(Util.color2Hex(ptmColor)).append("\">").append(residue).append("</span>"); // taggedResidue.append("<span style=\"color:#").append(Util.color2Hex(Color.BLACK)).append(";background:#").append(Util.color2Hex(Color.WHITE)).append("\">").append(residue).append("</span>"); taggedResidue.append(residue); break; default: throw new IllegalArgumentException("No formatting implemented for localization confidence level " + localizationConfidenceLevel + "."); } } } else { taggedResidue.append(residue); } return taggedResidue.toString(); } /** * Indicates whether another sequence has a matching sequence. Modifications * are considered equal when of same mass. Modifications should be loaded in * the PTM factory. * * @param anotherSequence the other AminoAcidPattern * @param sequenceMatchingPreferences the sequence matching preferences * * @return true if the other AminoAcidPattern targets the same sequence */ public boolean isSameAs(AminoAcidSequence anotherSequence, SequenceMatchingPreferences sequenceMatchingPreferences) { if (!matches(anotherSequence, sequenceMatchingPreferences)) { return false; } PTMFactory ptmFactory = PTMFactory.getInstance(); for (int i = 1; i <= length(); i++) { ArrayList<ModificationMatch> mods1 = getModificationsAt(i); ArrayList<ModificationMatch> mods2 = anotherSequence.getModificationsAt(i); if (mods1.size() != mods2.size()) { return false; } for (ModificationMatch modificationMatch1 : mods1) { PTM ptm1 = ptmFactory.getPTM(modificationMatch1.getTheoreticPtm()); boolean found = false; for (ModificationMatch modificationMatch2 : mods2) { PTM ptm2 = ptmFactory.getPTM(modificationMatch2.getTheoreticPtm()); if (ptm1.getMass() == ptm2.getMass()) { // @TODO: compare against the accuracy found = true; break; } } if (!found) { return false; } } } return true; } /** * Indicates whether another sequence targets the same sequence without * accounting for PTM localization. Modifications are considered equal when * of same mass. Modifications should be loaded in the PTM factory. * * @param anotherSequence the other sequence * @param sequenceMatchingPreferences the sequence matching preferences * * @return true if the other AminoAcidPattern targets the same sequence */ public boolean isSameSequenceAndModificationStatusAs(AminoAcidSequence anotherSequence, SequenceMatchingPreferences sequenceMatchingPreferences) { if (!matches(anotherSequence, sequenceMatchingPreferences)) { return false; } PTMFactory ptmFactory = PTMFactory.getInstance(); HashMap<Double, Integer> masses1 = new HashMap<Double, Integer>(); for (int i = 1; i <= length(); i++) { ArrayList<ModificationMatch> tempModifications = getModificationsAt(i); for (ModificationMatch modMatch : tempModifications) { PTM ptm = ptmFactory.getPTM(modMatch.getTheoreticPtm()); double mass = ptm.getMass(); Integer occurrence = masses1.get(mass); if (occurrence == null) { masses1.put(mass, 1); } else { masses1.put(mass, occurrence + 1); } } } HashMap<Double, Integer> masses2 = new HashMap<Double, Integer>(); for (int i = 1; i <= length(); i++) { ArrayList<ModificationMatch> tempModifications = anotherSequence.getModificationsAt(i); for (ModificationMatch modMatch : tempModifications) { PTM ptm = ptmFactory.getPTM(modMatch.getTheoreticPtm()); double mass = ptm.getMass(); Integer occurrence = masses2.get(mass); if (occurrence == null) { masses2.put(mass, 1); } else { masses2.put(mass, occurrence + 1); } } } if (masses1.size() != masses2.size()) { return false; } for (Double mass : masses1.keySet()) { Integer occurrence1 = masses1.get(mass); Integer occurrence2 = masses2.get(mass); if (occurrence2 == null || occurrence2.intValue() != occurrence1) { return false; } } for (int i = 1; i <= length(); i++) { ArrayList<ModificationMatch> mods1 = getModificationsAt(i); ArrayList<ModificationMatch> mods2 = anotherSequence.getModificationsAt(i); if (mods1.size() != mods2.size()) { return false; } for (int j = 0; j < mods1.size(); j++) { ModificationMatch modificationMatch1 = mods1.get(j); ModificationMatch modificationMatch2 = mods2.get(j); if (!modificationMatch1.equals(modificationMatch2)) { return false; } } } return true; } /** * Returns an amino acid sequence which is a reversed version of the current * pattern. * * @return an amino acid sequence which is a reversed version of the current * pattern */ public AminoAcidSequence reverse() { setSequenceStringBuilder(false); AminoAcidSequence newSequence = new AminoAcidSequence((new StringBuilder(sequence)).reverse().toString()); if (modifications != null) { for (int i : modifications.keySet()) { int reversed = length() - i + 1; for (ModificationMatch modificationMatch : modifications.get(i)) { ModificationMatch newMatch = new ModificationMatch(modificationMatch.getTheoreticPtm(), modificationMatch.isVariable(), reversed); if (modificationMatch.isConfident()) { newMatch.setConfident(true); } if (modificationMatch.isInferred()) { newMatch.setInferred(true); } newSequence.addModificationMatch(reversed, newMatch); } } } return newSequence; } /** * Indicates whether the given sequence contains an amino acid which is in * fact a combination of amino acids. * * @param sequence the sequence of interest * * @return a boolean indicating whether the given sequence contains an amino * acid which is in fact a combination of amino acids */ public static boolean hasCombination(String sequence) { for (int i = 0; i < sequence.length(); i++) { char aa = sequence.charAt(i); AminoAcid aminoAcid = AminoAcid.getAminoAcid(aa); if (aminoAcid.iscombination()) { return true; } } return false; } /** * Returns a list of all combinations which can be created from a sequence * when expanding ambiguous amino acids like Xs. * * @param sequence the sequence of interest * * @return a list of all combinations which can be created from a sequence * when expanding ambiguous amino acids like Xs */ public static ArrayList<StringBuilder> getCombinations(String sequence) { ArrayList<StringBuilder> newCombination, combination = new ArrayList<StringBuilder>(); for (int i = 0; i < sequence.length(); i++) { newCombination = new ArrayList<StringBuilder>(); char aa = sequence.charAt(i); AminoAcid aminoAcid = AminoAcid.getAminoAcid(aa); for (char newAa : aminoAcid.getSubAminoAcids(false)) { if (combination.isEmpty()) { StringBuilder stringBuilder = new StringBuilder(sequence.length()); stringBuilder.append(newAa); newCombination.add(stringBuilder); } else { for (StringBuilder stringBuilder : combination) { StringBuilder newStringBuilder = new StringBuilder(sequence.length()); newStringBuilder.append(stringBuilder); newStringBuilder.append(newAa); newCombination.add(newStringBuilder); } } } combination = newCombination; } return combination; } /** * Returns a boolean indicating whether the given sequence contains ambiguous amino acids like X. * * @param sequence the amino acid sequence to inspect * * @return a boolean indicating whether the given sequence contains ambiguous amino acids */ public static boolean containsAmbiguousAminoAcid(String sequence) { char[] sequenceAsCharArray = sequence.toCharArray(); for (char aa : sequenceAsCharArray) { AminoAcid aminoAcid = AminoAcid.getAminoAcid(aa); if (aminoAcid.iscombination()) { return true; } } return false; } @Override public String toString() { setSequenceStringBuilder(false); return sequence; } @Override public String asSequence() { setSequenceStringBuilder(false); return sequence; } @Override public Double getMass() { setSequenceStringBuilder(false); double mass = 0; for (int i = 0; i < length(); i++) { AminoAcid aminoAcid = AminoAcid.getAminoAcid(sequence.charAt(i)); mass += aminoAcid.getMonoisotopicMass(); if (modifications != null) { ArrayList<ModificationMatch> modificationAtIndex = modifications.get(i + 1); if (modificationAtIndex != null) { for (ModificationMatch modificationMatch : modificationAtIndex) { PTM ptm = PTMFactory.getInstance().getPTM(modificationMatch.getTheoreticPtm()); mass += ptm.getMass(); } } } } return mass; } @Override public boolean isSameAs(TagComponent anotherCompontent, SequenceMatchingPreferences sequenceMatchingPreferences) { if (!(anotherCompontent instanceof AminoAcidSequence)) { return false; } else { AminoAcidSequence aminoAcidSequence = (AminoAcidSequence) anotherCompontent; return isSameAs(aminoAcidSequence, sequenceMatchingPreferences); } } @Override public boolean isSameSequenceAndModificationStatusAs(TagComponent anotherCompontent, SequenceMatchingPreferences sequenceMatchingPreferences) { if (!(anotherCompontent instanceof AminoAcidSequence)) { return false; } else { AminoAcidSequence aminoAcidSequence = (AminoAcidSequence) anotherCompontent; return isSameSequenceAndModificationStatusAs(aminoAcidSequence, sequenceMatchingPreferences); } } }