package com.compomics.util.experiment.biology; import com.compomics.util.experiment.personalization.ExperimentObject; import com.compomics.util.pride.CvTerm; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; /** * This class models an enzyme. * * @author Marc Vaudel * @author Harald Barsnes */ public class Enzyme extends ExperimentObject { /** * The version UID for Serialization/Deserialization compatibility. */ static final long serialVersionUID = -1852087173903613377L; /* * The enzyme id. * @deprecated use the name as identifier. */ private int id; /* * The enzyme name. */ private String name; /* * The amino acids before cleavage. * * @deprecated use the set instead */ private ArrayList<Character> aminoAcidBefore = new ArrayList<Character>(0); /* * The amino acids after cleavage. * * @deprecated use the set instead */ private ArrayList<Character> aminoAcidAfter = new ArrayList<Character>(0); /* * The restriction amino acids before cleavage. * * @deprecated use the set instead */ private ArrayList<Character> restrictionBefore = new ArrayList<Character>(0); /* * The restriction amino acids after cleavage. * * @deprecated use the set instead */ private ArrayList<Character> restrictionAfter = new ArrayList<Character>(0); /* * The amino acids before cleavage. */ private HashSet<Character> aminoAcidBeforeSet = new HashSet<Character>(0); /* * The amino acids after cleavage. */ private HashSet<Character> aminoAcidAfterSet = new HashSet<Character>(0); /* * The restriction amino acids before cleavage. */ private HashSet<Character> restrictionBeforeSet = new HashSet<Character>(0); /* * The restriction amino acids after cleavage. */ private HashSet<Character> restrictionAfterSet = new HashSet<Character>(0); /** * If true, the enzyme is considered as semi-specific, meaning that only one * end of the resulting peptide has to be enzymatic. * * @deprecated use the digestion preferences instead */ private Boolean isSemiSpecific = false; /** * If true, the enzyme does not cleave, i.e., the whole protein sequence is * used. * * @deprecated use the digestion preferences instead */ private Boolean isWholeProtein = false; /** * The CV term associated to this enzyme. */ private CvTerm cvTerm; /** * Constructor for an Enzyme. * * @param name the name of the enzyme */ public Enzyme(String name) { this.name = name; } /** * Get the enzyme name. * * @return the enzyme name as String */ public String getName() { return name; } /** * Get the enzyme id. * * @return the enzyme number */ public int getId() { return id; } /** * Converts a list based object (utilities older than 0.8.4) to a set based. */ public void backwardCompatibilityFix() { if (aminoAcidAfterSet == null && aminoAcidAfter != null) { aminoAcidAfterSet = new HashSet(aminoAcidAfter); } if (aminoAcidBeforeSet == null && aminoAcidBefore != null) { aminoAcidBeforeSet = new HashSet(aminoAcidBefore); } if (restrictionAfterSet == null && restrictionAfter != null) { restrictionAfterSet = new HashSet(restrictionAfter); } if (restrictionBeforeSet == null && restrictionBefore != null) { restrictionBeforeSet = new HashSet(restrictionBefore); } } /** * Adds an amino acid to the list of allowed amino acids after the cleavage * site. * * @param aminoAcid an amino acid represented by its single amino acid code. */ public void addAminoAcidAfter(Character aminoAcid) { aminoAcidAfterSet.add(aminoAcid); } /** * Getter for the amino acids potentially following the cleavage. Null if * none. * * @return the amino acids potentially following the cleavage */ public HashSet<Character> getAminoAcidAfter() { return aminoAcidAfterSet; } /** * Adds an amino acid to the list of allowed amino acids before the cleavage * site. * * @param aminoAcid an amino acid represented by its single amino acid code. */ public void addAminoAcidBefore(Character aminoAcid) { aminoAcidBeforeSet.add(aminoAcid); } /** * Getter for the amino acids potentially preceding the cleavage. Null if * none. * * @return the amino acids potentially preceding the cleavage */ public HashSet<Character> getAminoAcidBefore() { return aminoAcidBeforeSet; } /** * Adds an amino acid to the list of forbidden amino acids after the * cleavage site. * * @param aminoAcid an amino acid represented by its single amino acid code. */ public void addRestrictionAfter(Character aminoAcid) { restrictionAfterSet.add(aminoAcid); } /** * Getter for the amino acids restricting when following the cleavage. Null * if none. * * @return the amino acids restricting when following the cleavage */ public HashSet<Character> getRestrictionAfter() { return restrictionAfterSet; } /** * Adds an amino acid to the list of forbidden amino acids before the * cleavage site. * * @param aminoAcid an amino acid represented by its single amino acid code. */ public void addRestrictionBefore(Character aminoAcid) { restrictionBeforeSet.add(aminoAcid); } /** * Getter for the amino acids restricting when preceding the cleavage. Null * if none. * * @return the amino acids restricting when preceding the cleavage */ public HashSet<Character> getRestrictionBefore() { return restrictionBeforeSet; } /** * Returns a boolean indicating whether the given amino acids represent a * cleavage site. Trypsin example: (D, E) returns false (R, D) returns true * Note: returns false if no cleavage site is implemented. * * @param aaBefore the amino acid before the cleavage site * @param aaAfter the amino acid after the cleavage site * @return true if the amino acid combination can represent a cleavage site */ public boolean isCleavageSite(String aaBefore, String aaAfter) { if (aaBefore.length() == 0 || aaAfter.length() == 0) { return true; } return isCleavageSite(aaBefore.charAt(aaBefore.length() - 1), aaAfter.charAt(0)); } /** * Returns a boolean indicating whether the given amino acids represent a * cleavage site. Amino acid combinations are extended to find possible * restrictions or cleavage sites. Trypsin example: (D, E) returns false (R, * D) returns true Note: returns false if no cleavage site is implemented. * * @param aaBefore the amino acid before the cleavage site * @param aaAfter the amino acid after the cleavage site * * @return true if the amino acid combination can represent a cleavage site */ public boolean isCleavageSite(Character aaBefore, Character aaAfter) { AminoAcid aminoAcid1 = AminoAcid.getAminoAcid(aaBefore); AminoAcid aminoAcid2 = AminoAcid.getAminoAcid(aaAfter); for (char possibleAaBefore : aminoAcid1.getSubAminoAcids()) { if (aminoAcidBeforeSet.contains(possibleAaBefore)) { boolean restriction = false; for (char possibleAaAfter : aminoAcid2.getSubAminoAcids()) { if (restrictionAfterSet.contains(possibleAaAfter)) { restriction = true; break; } } if (!restriction) { return true; } } } for (char possibleAaAfter : aminoAcid2.getSubAminoAcids()) { if (aminoAcidAfterSet.contains(possibleAaAfter)) { boolean restriction = false; for (char possibleAaBefore : aminoAcid1.getSubAminoAcids()) { if (restrictionBeforeSet.contains(possibleAaBefore)) { restriction = true; break; } } if (!restriction) { return true; } } } return false; } /** * Returns a boolean indicating whether the given amino acids represent a * cleavage site. This method does not support amino acid combinations. * Trypsin example: (D, E) returns false (R, D) returns true Note: returns * false if no cleavage site is implemented. * * @param aaBefore the amino acid before the cleavage site * @param aaAfter the amino acid after the cleavage site * * @return true if the amino acid combination can represent a cleavage site */ public boolean isCleavageSiteNoCombination(Character aaBefore, Character aaAfter) { return aminoAcidBeforeSet.contains(aaBefore) && !restrictionAfterSet.contains(aaAfter) || aminoAcidAfterSet.contains(aaAfter) && !restrictionBeforeSet.contains(aaBefore); } /** * Returns the number of missed cleavages in an amino acid sequence. * * @param sequence the amino acid sequence as a string. * * @return the number of missed cleavages */ public int getNmissedCleavages(String sequence) { int result = 0; if (sequence.length() > 1) { for (int i = 0; i < sequence.length() - 1; i++) { if (isCleavageSite(sequence.charAt(i), sequence.charAt(i + 1))) { result++; } } } return result; } /** * Digests a protein sequence in a list of expected peptide sequences. * * @param sequence the protein sequence * @param nMissedCleavages the maximum number of missed cleavages * @param nMin the minimal size for a peptide (inclusive, ignored if null) * @param nMax the maximal size for a peptide (inclusive, ignored if null) * * @return a list of expected peptide sequences */ public HashSet<String> digest(String sequence, int nMissedCleavages, Integer nMin, Integer nMax) { char aa, aaBefore; char aaAfter = sequence.charAt(0); StringBuilder currentPeptide = new StringBuilder(); currentPeptide.append(aaAfter); HashSet<String> results = new HashSet<String>(); HashMap<Integer, ArrayList<String>> mc = new HashMap<Integer, ArrayList<String>>(); for (int i = 1; i <= nMissedCleavages; i++) { mc.put(i, new ArrayList<String>(nMissedCleavages)); } for (int i = 1; i < sequence.length(); i++) { aa = sequence.charAt(i); aaBefore = aaAfter; aaAfter = aa; if (isCleavageSite(aaBefore, aaAfter) && currentPeptide.length() != 0) { String currentPeptideString = currentPeptide.toString(); if ((nMin == null || currentPeptide.length() >= nMin) && (nMax == null || currentPeptide.length() <= nMax)) { results.add(currentPeptideString); } for (int nMc : mc.keySet()) { mc.get(nMc).add(currentPeptideString); while (mc.get(nMc).size() > nMc + 1) { mc.get(nMc).remove(0); } StringBuilder mcSequence = new StringBuilder(); for (String subPeptide : mc.get(nMc)) { mcSequence.append(subPeptide); } if ((nMin == null || mcSequence.length() >= nMin) && (nMax == null || mcSequence.length() <= nMax)) { results.add(mcSequence.toString()); } } currentPeptide = new StringBuilder(); } currentPeptide.append(aa); } String currentPeptideString = currentPeptide.toString(); if ((nMin == null || currentPeptide.length() >= nMin) && (nMax == null || currentPeptide.length() <= nMax)) { results.add(currentPeptideString); } for (int nMc : mc.keySet()) { mc.get(nMc).add(currentPeptideString); while (mc.get(nMc).size() > nMc + 1) { mc.get(nMc).remove(0); } StringBuilder mcSequence = new StringBuilder(); for (String subPeptide : mc.get(nMc)) { mcSequence.append(subPeptide); } if ((nMin == null || mcSequence.length() >= nMin) && (nMax == null || mcSequence.length() <= nMax)) { results.add(mcSequence.toString()); } } return results; } /** * Digests a protein sequence in a list of expected peptide sequences. * * @param sequence the protein sequence * @param nMissedCleavages the maximum number of missed cleavages * @param massMin the minimal mass for a peptide (inclusive) * @param massMax the maximal mass for a peptide (inclusive) * * @return a list of expected peptide sequences */ public HashSet<String> digest(String sequence, int nMissedCleavages, Double massMin, Double massMax) { char aa, aaBefore; char aaAfter = sequence.charAt(0); StringBuilder currentPeptide = new StringBuilder(); currentPeptide.append(aaAfter); Double currentMass = AminoAcid.getAminoAcid(aaAfter).getMonoisotopicMass(); HashSet<String> results = new HashSet<String>(); HashMap<Integer, ArrayList<String>> mc = new HashMap<Integer, ArrayList<String>>(); for (int i = 1; i <= nMissedCleavages; i++) { mc.put(i, new ArrayList<String>(nMissedCleavages)); } HashMap<String, Double> peptideMasses = new HashMap<String, Double>(); for (int i = 1; i < sequence.length(); i++) { aa = sequence.charAt(i); aaBefore = aaAfter; aaAfter = aa; if (isCleavageSite(aaBefore, aaAfter) && currentPeptide.length() > 0) { String currentPeptideString = currentPeptide.toString(); if ((massMin == null || currentMass >= massMin) && (massMax == null || currentMass <= massMax)) { results.add(currentPeptideString); } for (int nMc : mc.keySet()) { mc.get(nMc).add(currentPeptideString); peptideMasses.put(currentPeptideString, currentMass); while (mc.get(nMc).size() > nMc + 1) { mc.get(nMc).remove(0); } StringBuilder mcSequence = new StringBuilder(); Double mcMass = 0.0; for (String subPeptide : mc.get(nMc)) { mcSequence.append(subPeptide); mcMass += peptideMasses.get(subPeptide); } if ((massMin == null || mcMass >= massMin) && (massMax == null || mcMass <= massMax)) { results.add(mcSequence.toString()); } } currentPeptide = new StringBuilder(); } currentPeptide.append(aa); currentMass += AminoAcid.getAminoAcid(aa).getMonoisotopicMass(); } String currentPeptideString = currentPeptide.toString(); if ((massMin == null || currentMass >= massMin) && (massMax == null || currentMass <= massMax)) { results.add(currentPeptideString); } for (int nMc : mc.keySet()) { mc.get(nMc).add(currentPeptideString); peptideMasses.put(currentPeptideString, currentMass); while (mc.get(nMc).size() > nMc + 1) { mc.get(nMc).remove(0); } StringBuilder mcSequence = new StringBuilder(); Double mcMass = 0.0; for (String subPeptide : mc.get(nMc)) { mcSequence.append(subPeptide); mcMass += peptideMasses.get(subPeptide); } if ((massMin == null || mcMass >= massMin) && (massMax == null || mcMass <= massMax)) { results.add(mcSequence.toString()); } } return results; } /** * Returns true of the two enzymes are identical. * * @param otherEnzyme the enzyme to compare against. * @return true of the two enzymes are identical */ public boolean equals(Enzyme otherEnzyme) { if (otherEnzyme == null) { return false; } if (this.getId() != otherEnzyme.getId()) { return false; } if (!this.getName().equalsIgnoreCase(otherEnzyme.getName())) { return false; } if (!this.getAminoAcidBefore().equals(otherEnzyme.getAminoAcidBefore())) { return false; } if (!this.getRestrictionBefore().equals(otherEnzyme.getRestrictionBefore())) { return false; } if (!this.getAminoAcidAfter().equals(otherEnzyme.getAminoAcidAfter())) { return false; } if (!this.getRestrictionAfter().equals(otherEnzyme.getRestrictionAfter())) { return false; } return true; } /** * Returns the description of the cleavage of this enzyme. * * @return the description of the cleavage of this enzyme */ public String getDescription() { String description = "Cleaves "; if (!getAminoAcidBefore().isEmpty()) { description += "after "; for (Character aa : getAminoAcidBefore()) { description += aa; } if (!getAminoAcidAfter().isEmpty()) { description += " and "; } } if (!getAminoAcidAfter().isEmpty()) { description += "before "; for (Character aa : getAminoAcidBefore()) { description += aa; } } if (!getRestrictionBefore().isEmpty()) { description += " not preceeded by "; for (Character aa : getRestrictionBefore()) { description += aa; } if (!getRestrictionAfter().isEmpty()) { description += " and "; } } if (!getRestrictionAfter().isEmpty()) { description += " not followed by "; for (Character aa : getRestrictionAfter()) { description += aa; } } return description; } /** * Returns the CV term associated with this enzyme. * * @return the CV term associated with this enzyme */ public CvTerm getCvTerm() { return cvTerm; } /** * Sets the CV term associated with this enzyme. * * @param cvTerm the CV term associated with this enzyme */ public void setCvTerm(CvTerm cvTerm) { this.cvTerm = cvTerm; } /** * Returns true if the enzyme is unspecific, i.e., cleaves at every residue. * * @deprecated use the digestion preferences instead * * @return true if the enzyme is unspecific */ public boolean isUnspecific() { return id == 17; } /** * Returns true if the enzyme is semi-specific. * * @deprecated use the digestion preferences instead * * @return true if the enzyme is semi-specific */ public boolean isSemiSpecific() { if (isSemiSpecific == null) { isSemiSpecific = false; } return isSemiSpecific; } /** * Returns true if the enzyme does not cleave at all, i.e., the whole * protein is used. * * @deprecated use the digestion preferences instead * * @return true if the enzyme does not cleave at all */ public boolean isWholeProtein() { if (isWholeProtein == null) { isWholeProtein = name.equalsIgnoreCase("Whole Protein") || name.equalsIgnoreCase("Top-Down"); } return isWholeProtein; } }