package com.compomics.util.experiment.biology; import com.compomics.util.experiment.biology.variants.Variant; import com.compomics.util.experiment.identification.protein_sequences.SequenceFactory; import com.compomics.util.experiment.identification.matches.ModificationMatch; import com.compomics.util.experiment.identification.matches.VariantMatch; import com.compomics.util.experiment.personalization.ExperimentObject; import com.compomics.util.experiment.identification.identification_parameters.PtmSettings; import com.compomics.util.experiment.identification.protein_inference.PeptideMapper; import com.compomics.util.experiment.identification.protein_inference.PeptideProteinMapping; import com.compomics.util.preferences.DigestionPreferences; import com.compomics.util.preferences.SequenceMatchingPreferences; import java.io.FileNotFoundException; import java.io.IOException; import java.sql.SQLException; import java.util.*; import java.util.concurrent.Semaphore; /** * This class models a peptide. * * @author Marc Vaudel * @author Dominik Kopczynski */ public class Peptide extends ExperimentObject { /** * The version UID for serialization/deserialization compatibility. */ static final long serialVersionUID = 5632064601627536034L; /** * The peptide sequence. */ private String sequence; /** * The peptide key. */ private String key; /** * The peptide matching key. */ private String matchingKey; /** * The peptide sequence with the modified residues indicated in lower case. */ private String sequenceWithLowerCasePtms; /** * The peptide mass. */ private Double mass = null; /** * The parent proteins. */ private ArrayList<String> parentProteins = null; /** * Semaphore for the parent proteins. */ private Semaphore proteinsMutex; /** * The modifications carried by the peptide. */ private ArrayList<ModificationMatch> modifications = null; /** * The variants observed when mapping this peptide to the database. */ private ArrayList<VariantMatch> variants = null; /** * The variants in a map indexed by protein. */ private HashMap<String, HashMap<Integer, ArrayList<Variant>>> variantsMap = null; /** * Separator preceding confident localization of the confident localization * of a modification. */ public final static String MODIFICATION_LOCALIZATION_SEPARATOR = "-ATAA-"; /** * Separator used to separate modifications in peptide keys. */ public final static String MODIFICATION_SEPARATOR = "_"; /** * Constructor for the peptide. */ public Peptide() { } /** * Constructor. * * @param aSequence the peptide sequence, assumed to be in upper case only * @param modifications the PTM of this peptide * @param sanityCheck boolean indicating whether the input should be checked */ public Peptide(String aSequence, ArrayList<ModificationMatch> modifications, boolean sanityCheck) { this.sequence = aSequence; if (modifications != null) { this.modifications = new ArrayList<ModificationMatch>(modifications); } if (sanityCheck) { sanityCheck(); } proteinsMutex = new Semaphore(1); } /** * Constructor. No sanity check is performed on the input. * * @param aSequence the peptide sequence, assumed to be in upper case only * @param modifications the PTM of this peptide */ public Peptide(String aSequence, ArrayList<ModificationMatch> modifications) { this(aSequence, modifications, false); } /** * Removes characters from the sequence and checks the modifications names * for forbidden characters. */ private void sanityCheck() { sequence = sequence.replaceAll("[#*$%&]", ""); if (modifications != null) { for (ModificationMatch mod : modifications) { if (mod.getTheoreticPtm().contains(MODIFICATION_SEPARATOR)) { throw new IllegalArgumentException("PTM names containing '" + MODIFICATION_SEPARATOR + "' are not supported. Conflicting name: " + mod.getTheoreticPtm()); } if (mod.getTheoreticPtm().contains(MODIFICATION_LOCALIZATION_SEPARATOR)) { throw new IllegalArgumentException("PTM names containing '" + MODIFICATION_LOCALIZATION_SEPARATOR + "' are not supported. Conflicting name: " + mod.getTheoreticPtm()); } } } } /** * Constructor for the peptide. * * @param aSequence the peptide sequence, assumed to be in upper case only * @param modifications the PTM of this peptide * @param variants the variants compared to the database * @param sanityCheck boolean indicating whether the input should be checked */ public Peptide(String aSequence, ArrayList<ModificationMatch> modifications, ArrayList<VariantMatch> variants, boolean sanityCheck) { this.sequence = aSequence; this.modifications = new ArrayList<ModificationMatch>(modifications); this.variants = new ArrayList<VariantMatch>(variants); if (sanityCheck) { sanityCheck(); } proteinsMutex = new Semaphore(1); } /** * Getter for the mass. * * @return the peptide mass */ public Double getMass() { if (mass == null) { estimateTheoreticMass(); } return mass; } /** * Getter for the modifications carried by this peptide. * * @return the modifications matches as found by the search engine */ public ArrayList<ModificationMatch> getModificationMatches() { return modifications; } /** * Sets new modification matches for the peptide. * * @param modificationMatches the new modification matches */ public void setModificationMatches(ArrayList<ModificationMatch> modificationMatches) { this.modifications = modificationMatches; mass = null; key = null; matchingKey = null; } /** * Clears the list of imported modification matches. */ public void clearModificationMatches() { modifications.clear(); mass = null; key = null; matchingKey = null; } /** * Adds a modification match. * * @param modificationMatch the modification match to add */ public void addModificationMatch(ModificationMatch modificationMatch) { if (modifications == null) { modifications = new ArrayList<ModificationMatch>(1); } modifications.add(modificationMatch); mass = null; key = null; matchingKey = null; } /** * Getter for the variants carried by this peptide. Null if not set. * * @return the variants matches as found by the search engine */ public ArrayList<VariantMatch> getVariantMatches() { return variants; } /** * Sets new variants for the peptide. * * @param variants the new variant matches */ public void setVariantMatches(ArrayList<VariantMatch> variants) { this.variants = variants; } /** * Clears the list of imported variant matches. */ public void clearVariantMatches() { if (variants != null) { variants.clear(); variantsMap = null; } } /** * Adds a variant match. * * @param variantMatch the variant match to add */ public void addVariantMatch(VariantMatch variantMatch) { if (variants == null) { variants = new ArrayList<VariantMatch>(1); } variants.add(variantMatch); variantsMap = null; } /** * Adds variant matches. * * @param variantMatch the variant match to add */ public void addVariantMatches(Collection<VariantMatch> variantMatch) { if (variants == null) { variants = new ArrayList<VariantMatch>(variantMatch != null ? variantMatch.size() : 0); } if (variantMatch != null) { variants.addAll(variantMatch); } variantsMap = null; } /** * Returns the variants in a map indexed by protein accession and index. The * map is computed from the list of variants and saved in cache. * * @return the variants in a map */ public HashMap<String, HashMap<Integer, ArrayList<Variant>>> getVariantsMap() { if (variantsMap == null) { variantsMap = new HashMap<String, HashMap<Integer, ArrayList<Variant>>>(variants.size()); for (VariantMatch variantMatch : variants) { String proteinAccession = variantMatch.getProteinAccession(); HashMap<Integer, ArrayList<Variant>> proteinVariants = variantsMap.get(proteinAccession); if (proteinVariants == null) { proteinVariants = new HashMap<Integer, ArrayList<Variant>>(2); variantsMap.put(proteinAccession, proteinVariants); } int site = variantMatch.getSite(); ArrayList<Variant> variantsAtSite = proteinVariants.get(site); if (variantsAtSite == null) { variantsAtSite = new ArrayList<Variant>(1); proteinVariants.put(site, variantsAtSite); } variantsAtSite.add(variantMatch.getVariant()); } } return variantsMap; } /** * Clears the map saved in cache. */ public void clearVariantsMap() { variantsMap = null; } /** * Getter for the sequence. * * @return the peptide sequence */ public String getSequence() { return sequence; } /** * Returns the peptide sequence as a String where the modified residues are * in lower case. * * @return the peptide sequence with the modified residues in lowercase */ public String getSequenceWithLowerCasePtms() { if (sequenceWithLowerCasePtms != null) { return sequenceWithLowerCasePtms; } else { StringBuilder peptideSequence = new StringBuilder(sequence.length()); for (int i = 0; i < sequence.length(); i++) { boolean modified = false; if (modifications != null) { for (int j = 0; j < modifications.size() && !modified; j++) { if (modifications.get(j).getModificationSite() == (i + 1)) { modified = true; } } } if (modified) { peptideSequence.append(sequence.substring(i, i + 1).toLowerCase()); } else { peptideSequence.append(sequence.charAt(i)); } } sequenceWithLowerCasePtms = peptideSequence.toString(); return sequenceWithLowerCasePtms; } } /** * Returns the number of missed cleavages using the specified enzyme. * * @param enzyme the enzyme used * @return the amount of missed cleavages */ public int getNMissedCleavages(Enzyme enzyme) { return enzyme.getNmissedCleavages(sequence); } /** * Returns the number of missed cleavages using the digestion preferences. * Null if no cleavage set. * * @param digestionPreferences the digestion preferences * @return the amount of missed cleavages */ public Integer getNMissedCleavages(DigestionPreferences digestionPreferences) { Integer peptideMinMissedCleavages = null; if (digestionPreferences.getCleavagePreference() == DigestionPreferences.CleavagePreference.enzyme) { for (Enzyme enzyme : digestionPreferences.getEnzymes()) { int tempMissedCleavages = getNMissedCleavages(enzyme); if (peptideMinMissedCleavages == null || tempMissedCleavages < peptideMinMissedCleavages) { peptideMinMissedCleavages = tempMissedCleavages; } } } return peptideMinMissedCleavages; } /** * Returns the parent proteins and remaps the peptide to the protein in the * sequence factory if no protein mapping was set using the default mapper * of the sequence factory. * * @param sequenceMatchingPreferences the sequence matching preferences * * @return the proteins mapping this peptide * * @throws IOException exception thrown whenever an error occurs while * reading or writing a file. * @throws ClassNotFoundException exception thrown whenever an error occurs * while deserializing an object. * @throws InterruptedException exception thrown whenever a threading issue * occurred while interacting with the tree. * @throws SQLException exception thrown whenever a problem occurred while * interacting with an SQL database. */ public ArrayList<String> getParentProteins(SequenceMatchingPreferences sequenceMatchingPreferences) throws IOException, InterruptedException, SQLException, ClassNotFoundException { return getParentProteins(sequenceMatchingPreferences, true); } /** * Returns the parent proteins and eventually remaps the peptide to the * protein using the default protein tree. * * @param sequenceMatchingPreferences the sequence matching preferences * @param remap boolean indicating whether the peptide sequence should be * remapped to the proteins if no protein is found * * @return the proteins mapping this peptide * * @throws IOException exception thrown whenever an error occurs while * reading or writing a file. * @throws ClassNotFoundException exception thrown whenever an error occurs * while deserializing an object. * @throws InterruptedException exception thrown whenever a threading issue * occurred while interacting with the tree. * @throws SQLException exception thrown whenever a problem occurred while * interacting with an SQL database. */ public ArrayList<String> getParentProteins(SequenceMatchingPreferences sequenceMatchingPreferences, boolean remap) throws IOException, ClassNotFoundException, InterruptedException, SQLException { if (!remap || parentProteins != null) { // avoid building the index if not necessary return parentProteins; } PeptideMapper peptideMapper = SequenceFactory.getInstance().getDefaultPeptideMapper(); if (peptideMapper == null) { throw new IllegalArgumentException("Index not created for peptide to protein mapping."); } return getParentProteins(sequenceMatchingPreferences, peptideMapper); } /** * Returns the parent proteins and remaps the peptide to the protein if no * protein mapping was set. * * @param sequenceMatchingPreferences the sequence matching preferences * @param peptideMapper the peptide mapper to use for peptide to protein * mapping * * @return the proteins where this peptide can be mapped * * @throws IOException exception thrown whenever an error occurs while * reading or writing a file. * @throws ClassNotFoundException exception thrown whenever an error occurs * while deserializing an object. * @throws InterruptedException exception thrown whenever a threading issue * occurred while interacting with the tree. * @throws SQLException exception thrown whenever a problem occurred while * interacting with an SQL database. */ public ArrayList<String> getParentProteins(SequenceMatchingPreferences sequenceMatchingPreferences, PeptideMapper peptideMapper) throws IOException, InterruptedException, SQLException, ClassNotFoundException { if (parentProteins == null) { mapParentProteins(sequenceMatchingPreferences, peptideMapper); } return parentProteins; } /** * Maps the peptides to the proteins in the sequence database loaded in the * sequence factory. * * @param sequenceMatchingPreferences the sequence matching preferences * @param peptideMapper the peptide mapper to use * * @throws IOException exception thrown whenever an error occurred while * reading the dasta file * @throws InterruptedException exception thrown whenever a threading error * occurred while mapping the peptide * @throws SQLException exception thrown whenever an error occurred while * querying the protein tree database * @throws ClassNotFoundException exception thrown whenever an error * occurred while casting an object from the protein tree database */ public void mapParentProteins(SequenceMatchingPreferences sequenceMatchingPreferences, PeptideMapper peptideMapper) throws IOException, InterruptedException, SQLException, ClassNotFoundException { proteinsMutex.acquire(); if (parentProteins == null) { ArrayList<PeptideProteinMapping> proteinMapping = peptideMapper.getProteinMapping(sequence, sequenceMatchingPreferences); HashSet<String> accessionsFound = new HashSet<String>(2); for (PeptideProteinMapping peptideProteinMapping : proteinMapping) { accessionsFound.add(peptideProteinMapping.getProteinAccession()); } parentProteins = new ArrayList<String>(accessionsFound); Collections.sort(parentProteins); } proteinsMutex.release(); } /** * Returns the parent proteins without remapping them. Null if none mapped. * * @return an ArrayList containing the parent proteins */ public ArrayList<String> getParentProteinsNoRemapping() { return parentProteins; } /** * Sets the parent proteins. To clear the parent proteins, please use * clearParentProteins(). * * @param parentProteins the parent proteins as list, cannot be null or * empty */ public void setParentProteins(ArrayList<String> parentProteins) { this.parentProteins = parentProteins; } /** * Clears the parent proteins list. */ public void clearParentProteins() { parentProteins = null; proteinsMutex = new Semaphore(1); } /** * Returns a unique key for the peptide when considering the given matching * preferences. When ambiguity the first amino acid according to * AminoAcid.getAminoAcidsList() will be selected. For example the matching * key of peptide PEPTLDE_mod1_mod2 is PEPTIDE_mod1_mod2 * * @param sequenceMatchingPreferences the sequence matching preferences * * @return a key unique to the given matching type */ public String getMatchingKey(SequenceMatchingPreferences sequenceMatchingPreferences) { if (matchingKey == null) { String matchingSequence = AminoAcid.getMatchingSequence(sequence, sequenceMatchingPreferences); matchingKey = getKey(matchingSequence, modifications); } return matchingKey; } /** * Returns the reference key of a peptide. index = * SEQUENCE_modMass1_modMass2 with modMass1 and modMass2 modification masses * ordered alphabetically. * * Note: the key is not unique for indistinguishable sequences, see * getMatchingKey(SequenceMatchingPreferences sequenceMatchingPreferences). * Modifications must be loaded in the PTM factory. * * @return the key of the peptide */ public String getKey() { if (key == null) { key = getKey(sequence, modifications); } return key; } /** * Returns the reference key of a peptide. key = SEQUENCE_mod1_mod2 modMass1 * and modMass2 modification masses ordered alphabetically. * * @param sequence the sequence of the peptide * @param modificationMatches list of modification matches * * @return the key of the peptide */ public static String getKey(String sequence, ArrayList<ModificationMatch> modificationMatches) { if (modificationMatches == null) { return sequence; } StringBuilder result = new StringBuilder(sequence); ArrayList<String> tempModifications = new ArrayList<String>(modificationMatches.size()); for (ModificationMatch mod : modificationMatches) { if (mod.isVariable()) { String ptmName = mod.getTheoreticPtm(); if (ptmName != null) { PTM ptm = PTMFactory.getInstance().getPTM(ptmName); if (mod.isConfident() || mod.isInferred()) { StringBuilder tempModKey = new StringBuilder(); tempModKey.append(ptm.getMass()).append(MODIFICATION_LOCALIZATION_SEPARATOR).append(mod.getModificationSite()); tempModifications.add(tempModKey.toString()); } else { tempModifications.add(ptm.getMass() + ""); } } else { tempModifications.add("unknown-modification"); } } } Collections.sort(tempModifications); for (String mod : tempModifications) { result.append(MODIFICATION_SEPARATOR).append(mod); } return result.toString(); } /** * Indicates whether a peptide carries modifications. * * @return a boolean indicating whether a peptide carries modifications */ public boolean isModified() { return modifications != null && !modifications.isEmpty(); } /** * Returns a boolean indicating whether the peptide has variable * modifications based on its key. * * @param peptideKey the peptide key * @return a boolean indicating whether the peptide has variable * modifications */ public static boolean isModified(String peptideKey) { return peptideKey.contains(MODIFICATION_SEPARATOR); } /** * Returns a boolean indicating whether the peptide has the given variable * modification based on its key. * * @param peptideKey the peptide key * @param modificationMass the mass of the modification * * @return a boolean indicating whether the peptide has variable * modifications */ public static boolean isModified(String peptideKey, Double modificationMass) { return peptideKey.contains(modificationMass.toString()); } /** * Returns how many of the given modification was found in the given * peptide. * * @param peptideKey the peptide key * @param modificationMass the mass of the modification * @return the number of modifications */ public static int getModificationCount(String peptideKey, Double modificationMass) { String modKey = modificationMass + ""; String test = peptideKey + MODIFICATION_SEPARATOR; return test.split(modKey).length - 1; } /** * Returns the number of variable modifications found with the given mass. * * @param modificationMass the mass of the modification * @return the number of occurrences of this modification */ public int getNVariableModifications(double modificationMass) { int n = 0; if (modifications != null) { for (ModificationMatch modificationMatch : modifications) { if (modificationMatch.isVariable()) { PTM ptm = PTMFactory.getInstance().getPTM(modificationMatch.getTheoreticPtm()); if (ptm.getMass() == modificationMass) { n++; } } } } return n; } /** * Returns the number of modifications carried by this peptide. * * @return the number of modifications carried by this peptide */ public int getNModifications() { if (modifications != null) { return modifications.size(); } else { return 0; } } /** * Returns the list of modifications confidently localized or inferred for * the peptide indexed by the given key. * * @param peptideKey the peptide key * @param ptmMass the mass of the modification * @return the number of modifications confidently localized */ public static ArrayList<Integer> getNModificationLocalized(String peptideKey, Double ptmMass) { String test = peptideKey; ArrayList<Integer> result = new ArrayList<Integer>(); boolean first = true; String modKey = ptmMass + ""; for (String modificationSplit : test.split(MODIFICATION_SEPARATOR)) { if (!first) { String[] localizationSplit = modificationSplit.split(MODIFICATION_LOCALIZATION_SEPARATOR); if (localizationSplit.length == 2) { if (localizationSplit[0].equals(modKey)) { try { result.add(Integer.valueOf(localizationSplit[1])); } catch (Exception e) { throw new IllegalArgumentException("Cannot parse modification localization " + localizationSplit[1] + " for modification of mass " + ptmMass + " in peptide key " + peptideKey); } } } } else { first = false; } } return result; } /** * Returns the sequence of the peptide indexed by the given key. * * @param peptideKey the peptide key * @return the corresponding sequence */ public static String getSequence(String peptideKey) { int index = peptideKey.indexOf(MODIFICATION_SEPARATOR); if (index > 0) { return peptideKey.substring(0, peptideKey.indexOf(MODIFICATION_SEPARATOR)); } else { return peptideKey; } } /** * Returns a list of masses of the variable modifications found in the key * of a peptide. * * @param peptideKey the key of a peptide * * @return a list of names of the variable modifications found in the key */ public static ArrayList<String> getModificationFamily(String peptideKey) { ArrayList<String> result = new ArrayList<String>(); String[] parsedKey = peptideKey.split(MODIFICATION_SEPARATOR); for (int i = 1; i < parsedKey.length; i++) { String[] parsedMod = parsedKey[i].split(MODIFICATION_LOCALIZATION_SEPARATOR); result.add(parsedMod[0]); } return result; } /** * Returns a list of proteins where this peptide can be found in the * N-terminus. The proteins must be accessible via the sequence factory. If * none found, an empty list is returned. Warning: if the parent proteins * are not set, they will be set using the default protein tree and the * given matching type and mass tolerance * * @param sequenceMatchingPreferences the sequence matching preferences * * @return a list of proteins where this peptide can be found in the * N-terminus * * @throws IOException exception thrown whenever an error occurred while * reading the protein sequence * @throws IllegalArgumentException exception thrown whenever an error * occurred while reading the protein sequence * @throws InterruptedException exception thrown whenever an error occurred * while reading the protein sequence * @throws ClassNotFoundException if a ClassNotFoundException occurs * @throws SQLException if an SQLException occurs * @throws FileNotFoundException if a FileNotFoundException occurs */ public ArrayList<String> isNterm(SequenceMatchingPreferences sequenceMatchingPreferences) throws IOException, IllegalArgumentException, InterruptedException, FileNotFoundException, ClassNotFoundException, SQLException { SequenceFactory sequenceFactory = SequenceFactory.getInstance(); ArrayList<String> result = new ArrayList<String>(); if (parentProteins == null) { getParentProteins(sequenceMatchingPreferences); } for (String accession : parentProteins) { Protein protein = sequenceFactory.getProtein(accession); if (protein.isNTerm(sequence, sequenceMatchingPreferences)) { result.add(accession); } } return result; } /** * Returns a list of proteins where this peptide can be found in the * C-terminus. The proteins must be accessible via the sequence factory. If * none found, an empty list is returned. Warning: if the parent proteins * are not set, they will be set using the default protein tree and the * given matching type and mass tolerance * * @param sequenceMatchingPreferences the sequence matching preferences * * @return a list of proteins where this peptide can be found in the * C-terminus * * @throws IOException exception thrown whenever an error occurred while * reading a protein sequence * @throws IllegalArgumentException exception thrown whenever an error * occurred while reading a protein sequence * @throws InterruptedException exception thrown whenever an error occurred * while reading a protein sequence * @throws ClassNotFoundException if a ClassNotFoundException occurs * @throws SQLException if an SQLException occurs * @throws FileNotFoundException if a FileNotFoundException occurs */ public ArrayList<String> isCterm(SequenceMatchingPreferences sequenceMatchingPreferences) throws IOException, IllegalArgumentException, InterruptedException, FileNotFoundException, ClassNotFoundException, SQLException { SequenceFactory sequenceFactory = SequenceFactory.getInstance(); ArrayList<String> result = new ArrayList<String>(); if (parentProteins == null) { getParentProteins(sequenceMatchingPreferences); } for (String accession : parentProteins) { Protein protein = sequenceFactory.getProtein(accession); if (protein.isCTerm(sequence, sequenceMatchingPreferences)) { result.add(accession); } } return result; } /** * Indicates whether the given modification can be found on the peptide. For * instance, 'oxidation of M' cannot be found on sequence "PEPTIDE". For the * inspection of protein termini and peptide terminus the proteins sequences * must be accessible from the sequence factory. * * @param ptm the PTM of interest * @param sequenceMatchingPreferences the sequence matching preferences * * @return a boolean indicating whether the given modification can be found * on the peptide * * @throws IOException exception thrown whenever an error occurred while * reading a protein sequence * @throws IllegalArgumentException exception thrown whenever an error * occurred while reading a protein sequence * @throws InterruptedException exception thrown whenever an error occurred * while reading a protein sequence * @throws ClassNotFoundException if a ClassNotFoundException occurs * @throws SQLException if an SQLException occurs * @throws FileNotFoundException if a FileNotFoundException occurs */ public boolean isModifiable(PTM ptm, SequenceMatchingPreferences sequenceMatchingPreferences) throws IOException, IllegalArgumentException, InterruptedException, FileNotFoundException, ClassNotFoundException, SQLException { AminoAcidPattern pattern = ptm.getPattern(); switch (ptm.getType()) { case PTM.MODAA: int patternLength = pattern.length(); int target = pattern.getTarget(); if (target >= 0 && patternLength - target <= 1) { return pattern.matchesIn(sequence, sequenceMatchingPreferences); } else { SequenceFactory sequenceFactory = SequenceFactory.getInstance(); for (String accession : parentProteins) { Protein protein = sequenceFactory.getProtein(accession); for (int index : protein.getPeptideStart(sequence, sequenceMatchingPreferences)) { int beginIndex = index - target - 1; int endIndex = index + sequence.length() - 2 + patternLength - target; if (endIndex < protein.getLength()) { String tempSequence = protein.getSequence().substring(beginIndex, endIndex); if (pattern.matchesIn(tempSequence, sequenceMatchingPreferences)) { return true; } } } } return false; } case PTM.MODCP: return true; case PTM.MODNP: return true; case PTM.MODC: return !isCterm(sequenceMatchingPreferences).isEmpty(); case PTM.MODN: return !isNterm(sequenceMatchingPreferences).isEmpty(); case PTM.MODCAA: if (isCterm(sequenceMatchingPreferences).isEmpty()) { return false; } case PTM.MODCPAA: patternLength = pattern.length(); target = pattern.getTarget(); if (target == patternLength - 1 && sequence.length() >= patternLength) { return pattern.isEnding(sequence, sequenceMatchingPreferences); } else { SequenceFactory sequenceFactory = SequenceFactory.getInstance(); for (String accession : parentProteins) { Protein protein = sequenceFactory.getProtein(accession); for (int index : protein.getPeptideStart(sequence, sequenceMatchingPreferences)) { int beginIndex = index - target - 1; int endIndex = index + sequence.length() - 2 + patternLength - target; if (endIndex < protein.getLength()) { String tempSequence = protein.getSequence().substring(beginIndex, endIndex); if (pattern.isEnding(tempSequence, sequenceMatchingPreferences)) { return true; } } } } return false; } case PTM.MODNAA: if (isNterm(sequenceMatchingPreferences).isEmpty()) { return false; } case PTM.MODNPAA: patternLength = pattern.length(); target = pattern.getTarget(); if (target == 0 && sequence.length() >= patternLength) { return pattern.isStarting(sequence, sequenceMatchingPreferences); } else { SequenceFactory sequenceFactory = SequenceFactory.getInstance(); for (String accession : parentProteins) { Protein protein = sequenceFactory.getProtein(accession); for (int index : protein.getPeptideStart(sequence, sequenceMatchingPreferences)) { int beginIndex = index - target - 1; int endIndex = index + sequence.length() - 2 + patternLength - target; if (endIndex < protein.getLength()) { String tempSequence = protein.getSequence().substring(beginIndex, endIndex); if (pattern.isStarting(tempSequence, sequenceMatchingPreferences)) { return true; } } } } return false; } default: return false; } } /** * Returns the potential modification sites as an ordered list of sites. 1 * is the first amino acid. An empty list is returned if no possibility was * found. This method does not account for protein terminal modifications. * * @param ptmMass the mass of the potential PTM * @param sequenceMatchingPreferences the sequence matching preferences for * peptide to protein mapping * @param ptmSequenceMatchingPreferences the sequence matching preferences * for PTM to peptide mapping * @param modificationProfile the modification profile of the identification * * @return a list of potential modification sites * * @throws IOException exception thrown whenever an error occurred while * reading a protein sequence * @throws IllegalArgumentException exception thrown whenever an error * occurred while reading a protein sequence * @throws InterruptedException exception thrown whenever an error occurred * while reading a protein sequence * @throws ClassNotFoundException if a ClassNotFoundException occurs * @throws SQLException if an SQLException occurs * @throws FileNotFoundException if a FileNotFoundException occurs */ public ArrayList<Integer> getPotentialModificationSites(Double ptmMass, SequenceMatchingPreferences sequenceMatchingPreferences, SequenceMatchingPreferences ptmSequenceMatchingPreferences, PtmSettings modificationProfile) throws IOException, IllegalArgumentException, InterruptedException, FileNotFoundException, ClassNotFoundException, SQLException { ArrayList<Integer> sites = new ArrayList<Integer>(); for (String ptmName : modificationProfile.getAllNotFixedModifications()) { PTM ptm = PTMFactory.getInstance().getPTM(ptmName); if (ptm.getMass() == ptmMass) { //@TODO: use a mass tolerance for (int site : getPotentialModificationSites(ptm, sequenceMatchingPreferences, ptmSequenceMatchingPreferences)) { if (!sites.contains(site)) { sites.add(site); } } } } return sites; } /** * Returns the potential modification sites as an ordered list of sites. 1 * is the first amino acid. An empty list is returned if no possibility was * found. * * @param ptm the PTM considered * @param sequenceMatchingPreferences the sequence matching preferences for * peptide to protein mapping * @param ptmSequenceMatchingPreferences the sequence matching preferences * for PTM to peptide mapping * * @return a list of potential modification sites * * @throws IOException exception thrown whenever an error occurred while * interacting with a file while mapping potential modification sites * @throws InterruptedException exception thrown whenever a threading issue * occurred while mapping potential modification sites * @throws ClassNotFoundException exception thrown whenever an error * occurred while deserializing an object from the ProteinTree * @throws SQLException exception thrown whenever an error occurred while * interacting with the ProteinTree */ public ArrayList<Integer> getPotentialModificationSites(PTM ptm, SequenceMatchingPreferences sequenceMatchingPreferences, SequenceMatchingPreferences ptmSequenceMatchingPreferences) throws IOException, InterruptedException, ClassNotFoundException, SQLException { ArrayList<Integer> possibleSites = new ArrayList<Integer>(1); switch (ptm.getType()) { case PTM.MODAA: AminoAcidPattern pattern = ptm.getPattern(); int patternLength = pattern.length(); int target = pattern.getTarget(); if (target >= 0 && patternLength - target <= 1) { return pattern.getIndexes(sequence, ptmSequenceMatchingPreferences); } else { SequenceFactory sequenceFactory = SequenceFactory.getInstance(); for (String accession : parentProteins) { Protein protein = sequenceFactory.getProtein(accession); for (int index : protein.getPeptideStart(sequence, sequenceMatchingPreferences)) { int beginIndex = index - target - 1; int endIndex = index + sequence.length() - 2 + patternLength - target; if (endIndex < protein.getLength()) { String tempSequence = protein.getSequence().substring(beginIndex, endIndex); if (pattern.matchesIn(tempSequence, ptmSequenceMatchingPreferences)) { for (int tempIndex : pattern.getIndexes(tempSequence, ptmSequenceMatchingPreferences)) { Integer sequenceIndex = tempIndex - target; if (!possibleSites.contains(sequenceIndex)) { possibleSites.add(tempIndex); } } } } } } } return possibleSites; case PTM.MODC: if (isCterm(sequenceMatchingPreferences).isEmpty()) { return possibleSites; } case PTM.MODCP: possibleSites.add(sequence.length()); return possibleSites; case PTM.MODN: if (isNterm(sequenceMatchingPreferences).isEmpty()) { return possibleSites; } case PTM.MODNP: possibleSites.add(1); return possibleSites; case PTM.MODCAA: if (isCterm(sequenceMatchingPreferences).isEmpty()) { return possibleSites; } case PTM.MODCPAA: pattern = ptm.getPattern(); patternLength = pattern.length(); target = pattern.getTarget(); if (target == patternLength - 1 && sequence.length() >= patternLength) { if (pattern.isEnding(sequence, ptmSequenceMatchingPreferences)) { possibleSites.add(sequence.length()); } return possibleSites; } else { SequenceFactory sequenceFactory = SequenceFactory.getInstance(); Protein protein; for (String accession : parentProteins) { protein = sequenceFactory.getProtein(accession); for (int index : protein.getPeptideStart(sequence, sequenceMatchingPreferences)) { int beginIndex = index - target - 1; int endIndex = index + sequence.length() - 2 + patternLength - target; if (endIndex < protein.getLength()) { String tempSequence = protein.getSequence().substring(beginIndex, endIndex); if (pattern.isEnding(tempSequence, ptmSequenceMatchingPreferences)) { possibleSites.add(sequence.length()); return possibleSites; } } } } return possibleSites; } case PTM.MODNAA: if (isNterm(sequenceMatchingPreferences).isEmpty()) { return possibleSites; } case PTM.MODNPAA: pattern = ptm.getPattern(); patternLength = pattern.length(); target = pattern.getTarget(); if (target == 0 && sequence.length() >= patternLength) { if (pattern.isStarting(sequence, ptmSequenceMatchingPreferences)) { possibleSites.add(1); } } else { SequenceFactory sequenceFactory = SequenceFactory.getInstance(); Protein protein; for (String accession : parentProteins) { protein = sequenceFactory.getProtein(accession); for (int index : protein.getPeptideStart(sequence, sequenceMatchingPreferences)) { int beginIndex = index - target - 1; int endIndex = index + sequence.length() - 2 + patternLength - target; if (endIndex < protein.getLength()) { String tempSequence = protein.getSequence().substring(beginIndex, endIndex); if (pattern.isStarting(tempSequence, ptmSequenceMatchingPreferences)) { possibleSites.add(1); return possibleSites; } } } } } return possibleSites; default: throw new UnsupportedOperationException("Modification site not implemented for modification of type " + ptm.getType() + "."); } } /** * Returns the potential modification sites as an ordered list of sites. No * amino acid combination is tested. 1 is the first amino acid. An empty * list is returned if no possibility was found. No peptide to protein * mapping is done. The index on the protein must be provided with 0 as * first amino acid. * * @param ptm the PTM considered * @param proteinSequence the protein sequence * @param indexOnProtein the index of the peptide on the protein * * @return a list of potential modification sites */ public ArrayList<Integer> getPotentialModificationSitesNoCombination(PTM ptm, String proteinSequence, Integer indexOnProtein) { ArrayList<Integer> possibleSites = new ArrayList<Integer>(1); switch (ptm.getType()) { case PTM.MODAA: AminoAcidPattern aminoAcidPattern = ptm.getPattern(); HashSet<Character> targetedAA = aminoAcidPattern.getAminoAcidsAtTargetSet(); if (aminoAcidPattern.length() == 1) { for (int i = 0; i < sequence.length(); i++) { Character aa = sequence.charAt(i); if (targetedAA.contains(aa)) { possibleSites.add(i + 1); } } } else { for (int i = 0; i < sequence.length(); i++) { Character aa = sequence.charAt(i); if (targetedAA.contains(aa) && aminoAcidPattern.matchesAt(proteinSequence, SequenceMatchingPreferences.defaultStringMatching, indexOnProtein + i)) { possibleSites.add(i + 1); } } } return possibleSites; case PTM.MODC: int peptideLength = sequence.length(); if (indexOnProtein + peptideLength == proteinSequence.length()) { possibleSites.add(peptideLength); } return possibleSites; case PTM.MODCP: possibleSites.add(sequence.length()); return possibleSites; case PTM.MODN: if (indexOnProtein == 0) { possibleSites.add(1); } return possibleSites; case PTM.MODNP: possibleSites.add(1); return possibleSites; case PTM.MODCAA: aminoAcidPattern = ptm.getPattern(); targetedAA = aminoAcidPattern.getAminoAcidsAtTargetSet(); peptideLength = sequence.length(); if (indexOnProtein + peptideLength == proteinSequence.length()) { Character aa = sequence.charAt(peptideLength - 1); if (aminoAcidPattern.length() == 1) { if (targetedAA.contains(aa)) { possibleSites.add(peptideLength); } } else { if (targetedAA.contains(aa) && aminoAcidPattern.matchesAt(proteinSequence, SequenceMatchingPreferences.defaultStringMatching, indexOnProtein + peptideLength)) { possibleSites.add(peptideLength); } } } return possibleSites; case PTM.MODCPAA: aminoAcidPattern = ptm.getPattern(); targetedAA = aminoAcidPattern.getAminoAcidsAtTargetSet(); peptideLength = sequence.length(); Character aa = sequence.charAt(peptideLength - 1); if (aminoAcidPattern.length() == 1) { if (targetedAA.contains(aa)) { possibleSites.add(peptideLength); } } else { if (targetedAA.contains(aa) && aminoAcidPattern.matchesAt(proteinSequence, SequenceMatchingPreferences.defaultStringMatching, indexOnProtein + peptideLength)) { possibleSites.add(peptideLength); } } return possibleSites; case PTM.MODNAA: aminoAcidPattern = ptm.getPattern(); targetedAA = aminoAcidPattern.getAminoAcidsAtTargetSet(); peptideLength = sequence.length(); if (indexOnProtein == 0) { aa = sequence.charAt(0); if (aminoAcidPattern.length() == 1) { if (targetedAA.contains(aa)) { possibleSites.add(peptideLength); } } else { if (targetedAA.contains(aa) && aminoAcidPattern.matchesAt(proteinSequence, SequenceMatchingPreferences.defaultStringMatching, 0)) { possibleSites.add(peptideLength); } } } return possibleSites; case PTM.MODNPAA: aminoAcidPattern = ptm.getPattern(); targetedAA = aminoAcidPattern.getAminoAcidsAtTargetSet(); peptideLength = sequence.length(); aa = sequence.charAt(0); if (aminoAcidPattern.length() == 1) { if (targetedAA.contains(aa)) { possibleSites.add(peptideLength); } } else { if (targetedAA.contains(aa) && aminoAcidPattern.matchesAt(proteinSequence, SequenceMatchingPreferences.defaultStringMatching, 0)) { possibleSites.add(peptideLength); } } return possibleSites; default: throw new UnsupportedOperationException("Modification site not implemented for modification of type " + ptm.getType() + "."); } } /** * Indicates whether another peptide has the same sequence and modification * status without accounting for modification localization. * * @param anotherPeptide the other peptide to compare to this instance * @param sequenceMatchingPreferences the sequence matching preferences * * @return a boolean indicating whether the other peptide has the same * sequence and modification status. */ public boolean isSameSequenceAndModificationStatus(Peptide anotherPeptide, SequenceMatchingPreferences sequenceMatchingPreferences) { return isSameSequence(anotherPeptide, sequenceMatchingPreferences) && isSameModificationStatus(anotherPeptide); } /** * Returns a boolean indicating whether another peptide has the same * sequence as the given peptide * * @param anotherPeptide the other peptide to compare * @param sequenceMatchingPreferences the sequence matching preferences * * @return a boolean indicating whether the other peptide has the same * sequence */ public boolean isSameSequence(Peptide anotherPeptide, SequenceMatchingPreferences sequenceMatchingPreferences) { AminoAcidSequence pattern = new AminoAcidSequence(anotherPeptide.getSequence()); return pattern.matches(sequence, sequenceMatchingPreferences); } /** * Indicates whether another peptide has the same variable modifications as * this peptide. The localization of the PTM is not accounted for. * Modifications are considered equal when of same mass. Modifications * should be loaded in the PTM factory. * * @param anotherPeptide the other peptide * @return a boolean indicating whether the other peptide has the same * variable modifications as the peptide of interest */ public boolean isSameModificationStatus(Peptide anotherPeptide) { if (!isModified() && !anotherPeptide.isModified()) { return true; } if (getNModifications() != anotherPeptide.getNModifications()) { return false; } PTMFactory ptmFactory = PTMFactory.getInstance(); ArrayList<String> modifications1 = getModificationFamily(getKey()); HashMap<Double, Integer> masses1 = new HashMap<Double, Integer>(); for (String modName : modifications1) { PTM ptm = ptmFactory.getPTM(modName); double tempMass = ptm.getMass(); Integer occurrence = masses1.get(tempMass); if (occurrence == null) { masses1.put(tempMass, 1); } else { masses1.put(tempMass, occurrence + 1); } } ArrayList<String> modifications2 = getModificationFamily(anotherPeptide.getKey()); HashMap<Double, Integer> masses2 = new HashMap<Double, Integer>(); for (String modName : modifications2) { PTM ptm = ptmFactory.getPTM(modName); double tempMass = ptm.getMass(); Integer occurrence = masses2.get(tempMass); if (occurrence == null) { masses2.put(tempMass, 1); } else { masses2.put(tempMass, occurrence + 1); } } if (masses1.size() != masses2.size()) { return false; } for (Double tempMass : masses1.keySet()) { Integer occurrence1 = masses1.get(tempMass); Integer occurrence2 = masses2.get(tempMass); if (occurrence2 == null || occurrence2.intValue() != occurrence1) { return false; } } return true; } /** * Indicates whether another peptide has the same modifications at the same * localization as this peptide. This method comes as a complement of * isSameAs, here the localization of all PTMs is taken into account. * Modifications are considered equal when of same mass. Modifications * should be loaded in the PTM factory. * * @param anotherPeptide another peptide * @param ptms the PTMs * @return true if the other peptide has the same positions at the same * location as the considered peptide */ public boolean sameModificationsAs(Peptide anotherPeptide, ArrayList<String> ptms) { if (!isModified() && !anotherPeptide.isModified()) { return true; } if (getNModifications() != anotherPeptide.getNModifications()) { return false; } HashMap<Double, ArrayList<Integer>> ptmToPositionsMap1 = new HashMap<Double, ArrayList<Integer>>(); HashMap<Double, ArrayList<Integer>> ptmToPositionsMap2 = new HashMap<Double, ArrayList<Integer>>(); PTMFactory ptmFactory = PTMFactory.getInstance(); for (ModificationMatch modificationMatch : modifications) { String modName = modificationMatch.getTheoreticPtm(); if (ptms.contains(modName)) { double tempMass = ptmFactory.getPTM(modName).getMass(); ArrayList<Integer> sites = ptmToPositionsMap1.get(tempMass); if (sites == null) { sites = new ArrayList<Integer>(); ptmToPositionsMap1.put(tempMass, sites); } int position = modificationMatch.getModificationSite(); sites.add(position); } } for (ModificationMatch modificationMatch : anotherPeptide.getModificationMatches()) { String modName = modificationMatch.getTheoreticPtm(); if (ptms.contains(modName)) { double tempMass = ptmFactory.getPTM(modName).getMass(); ArrayList<Integer> sites = ptmToPositionsMap2.get(tempMass); if (sites == null) { sites = new ArrayList<Integer>(); ptmToPositionsMap2.put(tempMass, sites); } int position = modificationMatch.getModificationSite(); sites.add(position); } } for (Double tempMass : ptmToPositionsMap1.keySet()) { ArrayList<Integer> sites1 = ptmToPositionsMap1.get(tempMass); ArrayList<Integer> sites2 = ptmToPositionsMap2.get(tempMass); if (sites2 == null || sites1.size() != sites2.size()) { return false; } Collections.sort(sites1); Collections.sort(sites2); for (int i = 0; i < sites1.size(); i++) { if (sites1.get(i).intValue() != sites2.get(i)) { return false; } } } return true; } /** * Indicates whether another peptide has the same modifications at the same * localization as this peptide. This method comes as a complement of * isSameAs, here the localization of all PTMs is taken into account. * Modifications are considered equal when of same mass. Modifications * should be loaded in the PTM factory. * * @param anotherPeptide another peptide * @return true if the other peptide has the same positions at the same * location as the considered peptide */ public boolean sameModificationsAs(Peptide anotherPeptide) { if (!isModified() && !anotherPeptide.isModified()) { return true; } if (getNModifications() != anotherPeptide.getNModifications()) { return false; } ArrayList<String> ptms = new ArrayList<String>(); for (ModificationMatch modificationMatch : modifications) { String modName = modificationMatch.getTheoreticPtm(); if (!ptms.contains(modName)) { ptms.add(modName); } } for (ModificationMatch modificationMatch : anotherPeptide.getModificationMatches()) { String modName = modificationMatch.getTheoreticPtm(); if (!ptms.contains(modName)) { ptms.add(modName); } } return sameModificationsAs(anotherPeptide, ptms); } /** * Returns the N-terminal of the peptide as a String. Returns "NH2" if the * terminal is not modified, otherwise returns the name of the modification. * /!\ this method will work only if the PTM found in the peptide are in the * PTMFactory. * * @return the N-terminal of the peptide as a String, e.g., "NH2" */ public String getNTerminal() { String nTerm = "NH2"; PTMFactory ptmFactory = PTMFactory.getInstance(); if (modifications != null) { for (ModificationMatch modificationMatch : modifications) { if (modificationMatch.getModificationSite() == 1) { PTM ptm = ptmFactory.getPTM(modificationMatch.getTheoreticPtm()); if (ptm.getType() != PTM.MODAA && ptm.getType() != PTM.MODMAX) { nTerm = ptm.getShortName(); } } } } nTerm = nTerm.replaceAll("-", " "); return nTerm; } /** * Returns the C-terminal of the peptide as a String. Returns "COOH" if the * terminal is not modified, otherwise returns the name of the modification. * /!\ This method will work only if the PTM found in the peptide are in the * PTMFactory. * * @return the C-terminal of the peptide as a String, e.g., "COOH" */ public String getCTerminal() { String cTerm = "COOH"; PTMFactory ptmFactory = PTMFactory.getInstance(); if (modifications != null) { for (int i = 0; i < modifications.size(); i++) { if (modifications.get(i).getModificationSite() == sequence.length()) { PTM ptm = ptmFactory.getPTM(modifications.get(i).getTheoreticPtm()); if (ptm.getType() != PTM.MODAA && ptm.getType() != PTM.MODMAX) { cTerm = ptm.getShortName(); } } } } cTerm = cTerm.replaceAll("-", " "); return cTerm; } /** * Returns the modified sequence as an tagged string with potential * modification sites color coded or with PTM tags, e.g, <mox>. /!\ * this method will work only if the PTM found in the peptide are in the * PTMFactory. /!\ This method uses the modifications as set in the * modification matches of this peptide and displays all of them. * * @param modificationProfile the modification profile of the search * @param useHtmlColorCoding if true, color coded HTML is used, otherwise * PTM tags, e.g, <mox>, are used * @param includeHtmlStartEndTags if true, start and end HTML tags are added * @param useShortName if true the short names are used in the tags * @param excludeAllFixedPtms if true, all fixed PTMs are excluded * @return the modified sequence as a tagged string */ public String getTaggedModifiedSequence(PtmSettings modificationProfile, boolean useHtmlColorCoding, boolean includeHtmlStartEndTags, boolean useShortName, boolean excludeAllFixedPtms) { HashMap<Integer, ArrayList<String>> confidentModificationSites = new HashMap<Integer, ArrayList<String>>(); HashMap<Integer, ArrayList<String>> representativeModificationSites = new HashMap<Integer, ArrayList<String>>(); HashMap<Integer, ArrayList<String>> secondaryModificationSites = new HashMap<Integer, ArrayList<String>>(); HashMap<Integer, ArrayList<String>> fixedModificationSites = new HashMap<Integer, ArrayList<String>>(); if (modifications != null) { for (ModificationMatch modMatch : modifications) { String modName = modMatch.getTheoreticPtm(); int modSite = modMatch.getModificationSite(); if (modMatch.isVariable()) { if (modMatch.isConfident()) { if (!confidentModificationSites.containsKey(modSite)) { confidentModificationSites.put(modSite, new ArrayList<String>(1)); } confidentModificationSites.get(modSite).add(modName); } else { if (!representativeModificationSites.containsKey(modSite)) { representativeModificationSites.put(modSite, new ArrayList<String>(1)); } representativeModificationSites.get(modSite).add(modName); } } else if (!excludeAllFixedPtms) { if (!fixedModificationSites.containsKey(modSite)) { fixedModificationSites.put(modSite, new ArrayList<String>(1)); } fixedModificationSites.get(modSite).add(modName); } } } return getTaggedModifiedSequence(modificationProfile, this, confidentModificationSites, representativeModificationSites, secondaryModificationSites, fixedModificationSites, useHtmlColorCoding, includeHtmlStartEndTags, useShortName); } /** * Returns the modified sequence as an tagged string with potential * modification sites color coded or with PTM tags, e.g, <mox>. /!\ * this method will work only if the PTM found in the peptide are in the * PTMFactory. /!\ This method uses the modifications as set in the * modification matches of this peptide and displays all of them. * * @param modificationProfile the modification profile of the search * @param useHtmlColorCoding if true, color coded HTML is used, otherwise * PTM tags, e.g, <mox>, are used * @param includeHtmlStartEndTags if true, start and end HTML tags are added * @param useShortName if true the short names are used in the tags * @return the modified sequence as a tagged string */ public String getTaggedModifiedSequence(PtmSettings modificationProfile, boolean useHtmlColorCoding, boolean includeHtmlStartEndTags, boolean useShortName) { return getTaggedModifiedSequence(modificationProfile, useHtmlColorCoding, includeHtmlStartEndTags, useShortName, false); } /** * Returns the modified sequence as an tagged string with potential * modification sites color coded or with PTM tags, e.g, <mox>. /!\ * This method will work only if the PTM found in the peptide are in the * PTMFactory. * * @param modificationProfile the modification profile of the search * @param includeHtmlStartEndTags if true, start and end HTML tags are added * @param peptide the peptide to annotate * @param confidentModificationSites the confidently localized variable * modification sites in a map: aa number > list of modifications (1 is * the first AA) (can be null) * @param representativeAmbiguousModificationSites the representative site * of the ambiguously localized variable modifications in a map: aa number * > list of modifications (1 is the first AA) (can be null) * @param secondaryAmbiguousModificationSites the secondary sites of the * ambiguously localized variable modifications in a map: aa number > * list of modifications (1 is the first AA) (can be null) * @param fixedModificationSites the fixed modification sites in a map: aa * number > list of modifications (1 is the first AA) (can be null) * @param useHtmlColorCoding if true, color coded HTML is used, otherwise * PTM tags, e.g, <mox>, are used * @param useShortName if true the short names are used in the tags * @return the tagged modified sequence as a string */ public static String getTaggedModifiedSequence(PtmSettings modificationProfile, Peptide peptide, HashMap<Integer, ArrayList<String>> confidentModificationSites, HashMap<Integer, ArrayList<String>> representativeAmbiguousModificationSites, HashMap<Integer, ArrayList<String>> secondaryAmbiguousModificationSites, HashMap<Integer, ArrayList<String>> fixedModificationSites, boolean useHtmlColorCoding, boolean includeHtmlStartEndTags, boolean useShortName) { if (confidentModificationSites == null) { confidentModificationSites = new HashMap<Integer, ArrayList<String>>(0); } if (representativeAmbiguousModificationSites == null) { representativeAmbiguousModificationSites = new HashMap<Integer, ArrayList<String>>(0); } if (secondaryAmbiguousModificationSites == null) { secondaryAmbiguousModificationSites = new HashMap<Integer, ArrayList<String>>(0); } if (fixedModificationSites == null) { fixedModificationSites = new HashMap<Integer, ArrayList<String>>(0); } String modifiedSequence = ""; if (useHtmlColorCoding && includeHtmlStartEndTags) { modifiedSequence += "<html>"; } modifiedSequence += peptide.getNTerminal() + "-"; modifiedSequence += AminoAcidSequence.getTaggedModifiedSequence(modificationProfile, peptide.sequence, confidentModificationSites, representativeAmbiguousModificationSites, secondaryAmbiguousModificationSites, fixedModificationSites, useHtmlColorCoding, useShortName); modifiedSequence += "-" + peptide.getCTerminal(); if (useHtmlColorCoding && includeHtmlStartEndTags) { modifiedSequence += "</html>"; } return modifiedSequence; } /** * Returns the peptide modifications as a string. * * @param peptide the peptide * @param variablePtms if true, only variable PTMs are shown, false return * only the fixed PTMs * * @return the peptide modifications as a string */ public static String getPeptideModificationsAsString(Peptide peptide, boolean variablePtms) { StringBuilder result = new StringBuilder(); HashMap<String, ArrayList<Integer>> modMap = new HashMap<String, ArrayList<Integer>>(); if (peptide.isModified()) { for (ModificationMatch modificationMatch : peptide.getModificationMatches()) { if ((variablePtms && modificationMatch.isVariable()) || (!variablePtms && !modificationMatch.isVariable())) { if (!modMap.containsKey(modificationMatch.getTheoreticPtm())) { modMap.put(modificationMatch.getTheoreticPtm(), new ArrayList<Integer>()); } modMap.get(modificationMatch.getTheoreticPtm()).add(modificationMatch.getModificationSite()); } } } boolean first = true, first2; ArrayList<String> mods = new ArrayList<String>(modMap.keySet()); Collections.sort(mods); for (String mod : mods) { if (first) { first = false; } else { result.append(", "); } first2 = true; result.append(mod); result.append(" ("); for (int aa : modMap.get(mod)) { if (first2) { first2 = false; } else { result.append(", "); } result.append(aa); } result.append(")"); } return result.toString(); } /** * Returns the indexes of the residues in the peptide that contain at least * one variable modification. * * @return the indexes of the modified residues */ public ArrayList<Integer> getModifiedIndexes() { return getModifiedIndexes(true); } /** * Returns the indexes of the residues in the peptide that contain at least * one modification. * * @param excludeFixed exclude fixed PTMs * @return the indexes of the modified residues */ public ArrayList<Integer> getModifiedIndexes(boolean excludeFixed) { if (modifications == null) { return new ArrayList<Integer>(0); } ArrayList<Integer> modifiedResidues = new ArrayList<Integer>(modifications.size()); PTMFactory ptmFactory = PTMFactory.getInstance(); for (int i = 0; i < sequence.length(); i++) { for (int j = 0; j < modifications.size(); j++) { PTM ptm = ptmFactory.getPTM(modifications.get(j).getTheoreticPtm()); if (ptm.getType() == PTM.MODAA && (modifications.get(j).isVariable() || !excludeFixed)) { if (modifications.get(j).getModificationSite() == (i + 1)) { modifiedResidues.add(i + 1); } } } } return modifiedResidues; } /** * Returns an indexed map of all fixed modifications amino acid, (1 is the * first) > list of modification names. * * @return an indexed map of all fixed modifications amino acid */ public HashMap<Integer, ArrayList<String>> getIndexedFixedModifications() { if (modifications == null) { return new HashMap<Integer, ArrayList<String>>(0); } HashMap<Integer, ArrayList<String>> result = new HashMap<Integer, ArrayList<String>>(modifications.size()); for (ModificationMatch modificationMatch : modifications) { if (!modificationMatch.isVariable()) { int aa = modificationMatch.getModificationSite(); if (!result.containsKey(aa)) { result.put(aa, new ArrayList<String>()); } result.get(aa).add(modificationMatch.getTheoreticPtm()); } } return result; } /** * Estimates the theoretic mass of the peptide. The previous version is * silently overwritten. * * @throws IllegalArgumentException if the peptide sequence contains unknown * amino acids */ public synchronized void estimateTheoreticMass() throws IllegalArgumentException { if (mass == null) { Double tempMass = Atom.H.getMonoisotopicMass(); char[] sequenceAsCharArray = sequence.toCharArray(); for (char aa : sequenceAsCharArray) { try { AminoAcid currentAA = AminoAcid.getAminoAcid(aa); tempMass += currentAA.getMonoisotopicMass(); } catch (NullPointerException e) { throw new IllegalArgumentException("Unknown amino acid: " + aa + "."); } } tempMass += Atom.H.getMonoisotopicMass() + Atom.O.getMonoisotopicMass(); if (modifications != null) { PTMFactory ptmFactory = PTMFactory.getInstance(); for (ModificationMatch ptmMatch : modifications) { tempMass += ptmFactory.getPTM(ptmMatch.getTheoreticPtm()).getMass(); } } mass = tempMass; } } /** * Returns the sequence of this peptide as AminoAcidPattern. * * @return the sequence of this peptide as AminoAcidPattern */ public AminoAcidPattern getSequenceAsPattern() { return getSequenceAsPattern(sequence); } /** * Returns the given sequence as AminoAcidPattern. * * @param sequence the sequence of interest * @return the sequence as AminoAcidPattern */ public static AminoAcidPattern getSequenceAsPattern(String sequence) { return AminoAcidPattern.getAminoAcidPatternFromString(sequence); } /** * Returns the sequence of this peptide as AminoAcidSequence. * * @return the sequence of this peptide as AminoAcidSequence */ public AminoAcidSequence getSequenceAsAminoAcidSequence() { return getSequenceAsAminoAcidSequence(sequence); } /** * Returns the given sequence as AminoAcidSequence. * * @param sequence the sequence of interest * * @return the sequence as AminoAcidSequence */ public static AminoAcidSequence getSequenceAsAminoAcidSequence(String sequence) { return new AminoAcidSequence(sequence); } /** * Indicates whether a peptide can be derived from a decoy protein. * * @param sequenceMatchingPreferences the sequence matching preferences * * @return whether a peptide can be derived from a decoy protein * * @throws IOException exception thrown whenever an error occurred while * reading a protein sequence * @throws InterruptedException exception thrown whenever an error occurred * while reading a protein sequence * @throws ClassNotFoundException if a ClassNotFoundException occurs * @throws SQLException if an SQLException occurs */ public boolean isDecoy(SequenceMatchingPreferences sequenceMatchingPreferences) throws IOException, InterruptedException, SQLException, ClassNotFoundException { if (parentProteins == null) { getParentProteins(sequenceMatchingPreferences); } for (String accession : parentProteins) { if (SequenceFactory.getInstance().isDecoyAccession(accession)) { return true; } } return false; } /** * Returns a version of the peptide which does not contain the inspected * PTMs. * * @param peptide the original peptide * @param ptms list of inspected PTMs * * @return a not modified version of the peptide * * @throws IOException exception thrown whenever an error occurred while * reading a protein sequence * @throws InterruptedException exception thrown whenever an error occurred * while reading a protein sequence * @throws ClassNotFoundException if a ClassNotFoundException occurs * @throws SQLException if an SQLException occurs */ public static Peptide getNoModPeptide(Peptide peptide, ArrayList<PTM> ptms) throws IOException, SQLException, ClassNotFoundException, InterruptedException { Peptide noModPeptide = new Peptide(peptide.getSequence(), new ArrayList<ModificationMatch>()); noModPeptide.setParentProteins(peptide.getParentProteinsNoRemapping()); if (peptide.isModified()) { for (ModificationMatch modificationMatch : peptide.getModificationMatches()) { boolean found = false; for (PTM ptm : ptms) { if (modificationMatch.getTheoreticPtm().equals(ptm.getName())) { found = true; break; } } if (!found) { noModPeptide.addModificationMatch(modificationMatch); } } } return noModPeptide; } }