package com.compomics.util.experiment.identification.matches; import com.compomics.util.experiment.biology.Enzyme; import com.compomics.util.experiment.biology.Peptide; import com.compomics.util.experiment.biology.Protein; import com.compomics.util.experiment.identification.IdentificationMatch; import com.compomics.util.experiment.identification.protein_sequences.SequenceFactory; import com.compomics.util.preferences.SequenceMatchingPreferences; import java.io.FileNotFoundException; import java.io.IOException; import java.sql.SQLException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.List; /** * This class models a protein match. * * @author Marc Vaudel */ public class ProteinMatch extends IdentificationMatch { /** * The version UID for Serialization/Deserialization compatibility. */ static final long serialVersionUID = -6061842447053092696L; /** * The matching protein(s) accessions. */ private ArrayList<String> theoreticProtein = new ArrayList<String>(); /** * The accession of the retained protein after protein inference resolution. */ private String mainMatch; /** * The corresponding peptide match keys. */ private ArrayList<String> peptideMatchesKeys = new ArrayList<String>(); /** * The splitter in the key between protein accessions. */ public static final String PROTEIN_KEY_SPLITTER = "_cus_"; /** * Map of the most complex groups: key | proteins. */ private static HashMap<String, String[]> proteinGroupCache = new HashMap<String, String[]>(1000); /** * Size of the protein groups cache. */ private static int cacheSize = 1000; /** * The minimal group size to include a protein in the cache. */ private static int sizeOfProteinsInCache = 10; /** * Constructor for the protein match. */ public ProteinMatch() { } /** * Constructor for the protein match. * * @param proteinAccession the matching protein * @throws IllegalArgumentException if an IllegalArgumentException occurs */ public ProteinMatch(String proteinAccession) throws IllegalArgumentException { if (proteinAccession.contains(PROTEIN_KEY_SPLITTER)) { throw new IllegalArgumentException("Protein accession containing '" + PROTEIN_KEY_SPLITTER + "' are not supported. Conflicting accession: " + mainMatch); } theoreticProtein.add(proteinAccession); mainMatch = proteinAccession; } /** * Constructor for the protein match. Note: proteins must be set for the * peptide. * * @param peptide the corresponding peptide match * @param peptideMatchKey the key of the peptide match * * @throws IOException if an IOException occurs * @throws SQLException if an SQLException occurs * @throws ClassNotFoundException if a ClassNotFoundException occurs * @throws InterruptedException if an InterruptedException occurs */ public ProteinMatch(Peptide peptide, String peptideMatchKey) throws IOException, SQLException, ClassNotFoundException, InterruptedException { ArrayList<String> parentProteins = peptide.getParentProteinsNoRemapping(); if (parentProteins == null || parentProteins.isEmpty()) { throw new IllegalArgumentException("Peptide " + peptide.getSequence() + " presents no parent protein."); } Collections.sort(parentProteins); for (String protein : parentProteins) { if (!theoreticProtein.contains(protein)) { theoreticProtein.add(protein); } } mainMatch = parentProteins.get(0); peptideMatchesKeys.add(peptideMatchKey); } /** * Returns the accessions of the possible theoretic proteins. * * @return the accessions of the possible theoretic proteins */ public ArrayList<String> getTheoreticProteinsAccessions() { return theoreticProtein; } /** * Setter for the matching protein. * * @param proteinAccession the matching protein */ public void addTheoreticProtein(String proteinAccession) { theoreticProtein.add(proteinAccession); } /** * Returns the main match accession after protein inference. * * @return the main match accession after protein inference */ public String getMainMatch() { return mainMatch; } /** * Sets the main protein accession after protein inference. * * @param mainMatch the main match */ public void setMainMatch(String mainMatch) { this.mainMatch = mainMatch; } /** * Getter for the peptide keys. * * @return subordinated peptide keys */ public ArrayList<String> getPeptideMatchesKeys() { return peptideMatchesKeys; } /** * Add a subordinated peptide key. * * @param peptideMatchKey a peptide key */ public void addPeptideMatchKey(String peptideMatchKey) { if (!peptideMatchesKeys.contains(peptideMatchKey)) { peptideMatchesKeys.add(peptideMatchKey); } } /** * Sets the peptide keys for this protein match. * * @param peptideMatchKeys the peptide match keys */ public void setPeptideKeys(ArrayList<String> peptideMatchKeys) { peptideMatchesKeys = peptideMatchKeys; } /** * Returns the number of peptides found. * * @return the number of peptides found */ public int getPeptideCount() { return peptideMatchesKeys.size(); } /** * Method indicates if the protein match is a decoy one. * * @return boolean indicating if the protein match is a decoy one */ public boolean isDecoy() { for (String accession : theoreticProtein) { if (SequenceFactory.getInstance().isDecoyAccession(accession)) { return true; } } return false; } /** * Convenience method indicating whether a match is decoy based on the match * key. A match is considered decoy if at least one of its accessions is * decoy. * * Note: the sequence database should be loaded in the sequence factory * * @param key the match key * @return a boolean indicating whether a match is decoy */ public static boolean isDecoy(String key) { SequenceFactory sequenceFactory = SequenceFactory.getInstance(); for (String accession : getAccessions(key)) { if (sequenceFactory.isDecoyAccession(accession)) { return true; } } return false; } @Override public String getKey() { Collections.sort(theoreticProtein); StringBuilder result = new StringBuilder(); for (String accession : theoreticProtein) { if (result.length() != 0) { result.append(PROTEIN_KEY_SPLITTER); } result.append(accession); } return result.toString(); } /** * Convenience method which returns the protein key of a peptide. Note: * proteins must be set for the peptide. * * @param peptide the considered peptide * @return the protein match key * * @throws IOException if an IOException occurs * @throws SQLException if an SQLException occurs * @throws ClassNotFoundException if a ClassNotFoundException occurs * @throws InterruptedException if an InterruptedException occurs */ public static String getProteinMatchKey(Peptide peptide) throws IOException, SQLException, ClassNotFoundException, InterruptedException { ArrayList<String> accessions = peptide.getParentProteinsNoRemapping(); if (accessions == null) { throw new IllegalArgumentException("Proteins not set for peptide " + peptide.getKey() + "."); } HashSet<String> uniqueAccessions = new HashSet<String>(accessions); accessions = new ArrayList<String>(uniqueAccessions); Collections.sort(accessions); StringBuilder key = new StringBuilder(accessions.size() * 6); for (String accession : accessions) { if (key.length() > 0) { key.append(PROTEIN_KEY_SPLITTER); } key.append(accession); } return key.toString(); } /** * Returns the number of proteins for the match corresponding to the given. * key. * * @param matchKey the given key * @return the number of proteins for this match */ public static int getNProteins(String matchKey) { return getAccessions(matchKey).length; } /** * Returns the number of proteins for this match. * * @return the number of proteins for this match */ public int getNProteins() { return theoreticProtein.size(); } /** * Returns a boolean indicating whether a protein match contains another set * of matches. * * @param sharedKey the key of the protein of interest * @param uniqueKey the key of the protein supposedly contained * @return a boolean indicating whether a protein match contains another set * of matches. */ public static boolean contains(String sharedKey, String uniqueKey) { List<String> sharedAccessions = Arrays.asList(getAccessions(sharedKey)); for (String uniqueAccession : getAccessions(uniqueKey)) { if (!sharedAccessions.contains(uniqueAccession)) { return false; } } return true; } /** * Returns a boolean indicating whether a protein match contains another set * of matches. * * @param sharedAccessions the accessions of the shared protein match * @param uniqueKeys the keys of the unique protein match * * @return a boolean indicating whether a protein match contains another set * of matches. */ public static boolean contains(HashSet<String> sharedAccessions, ArrayList<String> uniqueKeys) { for (String uniqueAccession : uniqueKeys) { if (!sharedAccessions.contains(uniqueAccession)) { return false; } } return true; } /** * Returns the proteins in a group (group1) which are not in another group * (group2). * * @param group1 the key of the shared group * @param group2 the key of the unique group * @return list of the accessions in the search group which are not in the * unique group */ public static ArrayList<String> getOtherProteins(String group1, String group2) { String[] group1Proteins = getAccessions(group1); List<String> group2Proteins = Arrays.asList(getAccessions(group2)); ArrayList<String> result = new ArrayList<String>(); for (String sharedAccession : group1Proteins) { if (!group2Proteins.contains(sharedAccession)) { result.add(sharedAccession); } } return result; } /** * Returns the common proteins between two protein groups. * * @param group1 key of the first group * @param group2 key of the second group * * @return a list of common keys */ public static ArrayList<String> getCommonProteins(String group1, String group2) { String[] group1Proteins = getAccessions(group1); List<String> group2Proteins = Arrays.asList(getAccessions(group2)); ArrayList<String> result = new ArrayList<String>(); for (String sharedAccession : group1Proteins) { if (group2Proteins.contains(sharedAccession)) { result.add(sharedAccession); } } return result; } /** * Returns a boolean indicating whether the protein match contains another * set of theoretic proteins. * * @param proteinMatch another protein match * @return a boolean indicating whether the protein match contains another * set of theoretic proteins */ public boolean contains(ProteinMatch proteinMatch) { if (getKey().equals(proteinMatch.getKey())) { return false; } for (String accession : proteinMatch.getTheoreticProteinsAccessions()) { if (!theoreticProtein.contains(accession)) { return false; } } return true; } /** * Returns a boolean indicating whether a protein was found in this protein * match. * * @param aProtein the inspected protein * @return a boolean indicating whether a protein was found in this protein * match */ public boolean contains(String aProtein) { return theoreticProtein.contains(aProtein); } /** * Returns a list of accessions from the given key. * * @param groupKey the given key * * @return the corresponding list of accessions */ public static String[] getAccessions(String groupKey) { String[] result = proteinGroupCache.get(groupKey); if (result == null) { result = groupKey.split(PROTEIN_KEY_SPLITTER); if (result.length > sizeOfProteinsInCache) { proteinGroupCache.put(groupKey, result); if (proteinGroupCache.size() > cacheSize) { int smallestSize = sizeOfProteinsInCache; String smallestGroup = null; for (String key : proteinGroupCache.keySet()) { String[] group = proteinGroupCache.get(key); if (smallestGroup == null || group.length < smallestSize) { smallestGroup = key; smallestSize = group.length; } } proteinGroupCache.remove(smallestGroup); sizeOfProteinsInCache = smallestSize; } } } return result; } /** * Clears the cache. */ public static void clearCache() { proteinGroupCache.clear(); sizeOfProteinsInCache = 10; } /** * Indicates whether the protein group has an enzymatic peptide when * considering the given accession as main accession. * * @param accession the candidate main accession * @param enzymes the enzymes used * @param sequenceMatchingPreferences the sequence matching preferences * * @throws IOException if an IOException occurs * @throws ClassNotFoundException if a ClassNotFoundException occurs * @throws InterruptedException if an InterruptedException occurs * * @return true if the main accession generates an enzymatic peptide */ public boolean hasEnzymaticPeptide(String accession, ArrayList<Enzyme> enzymes, SequenceMatchingPreferences sequenceMatchingPreferences) throws IOException, InterruptedException, ClassNotFoundException { SequenceFactory sequenceFactory = SequenceFactory.getInstance(); for (String peptideKey : peptideMatchesKeys) { String peptideSequence = Peptide.getSequence(peptideKey); Protein protein = sequenceFactory.getProtein(accession); if (protein.isEnzymaticPeptide(peptideSequence, enzymes, sequenceMatchingPreferences)) { return true; } } return false; } @Override public MatchType getType() { return MatchType.Protein; } }