package com.compomics.util.experiment.identification.filtering;
import com.compomics.util.experiment.identification.identification_parameters.PtmSettings;
import com.compomics.util.Util;
import com.compomics.util.experiment.biology.Enzyme;
import com.compomics.util.experiment.biology.PTM;
import com.compomics.util.experiment.biology.PTMFactory;
import com.compomics.util.experiment.biology.Peptide;
import com.compomics.util.experiment.identification.spectrum_assumptions.PeptideAssumption;
import com.compomics.util.experiment.identification.identification_parameters.SearchParameters;
import com.compomics.util.experiment.identification.protein_sequences.SequenceFactory;
import com.compomics.util.experiment.identification.matches.ModificationMatch;
import com.compomics.util.experiment.identification.protein_inference.PeptideMapper;
import com.compomics.util.experiment.massspectrometry.SpectrumFactory;
import com.compomics.util.preferences.DigestionPreferences;
import com.compomics.util.preferences.SequenceMatchingPreferences;
import java.io.IOException;
import java.io.Serializable;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.HashMap;
import uk.ac.ebi.jmzml.xml.io.MzMLUnmarshallerException;
/**
* This class filters peptide assumptions based on various properties.
*
* @author Marc Vaudel
* @author Harald Barsnes
*/
public class PeptideAssumptionFilter implements Serializable {
/**
* Serial number for backward compatibility.
*/
static final long serialVersionUID = 8416219001106063781L;
/**
* The minimal peptide length allowed.
*/
private int minPepLength;
/**
* The maximal peptide length allowed.
*/
private int maxPepLength;
/**
* The maximal m/z deviation allowed.
*/
private double maxMassDeviation;
/**
* Boolean indicating the unit of the allowed m/z deviation (true: ppm,
* false: Da).
*/
private boolean isPpm;
/**
* Boolean indicating whether peptides presenting unknown PTMs should be
* ignored.
*/
private boolean unknownPtm;
/**
* The minimum number of missed cleavages allowed. Null means no lower
* limit.
*/
private Integer minMissedCleavages;
/**
* The maximum number of missed cleavages allowed. Null means no upper
* limit.
*/
private Integer maxMissedCleavages;
/**
* The minimum number of isotopes allowed. Null means no lower limit.
*/
private Integer minIsotopes;
/**
* The maximum number of isotopes allowed. Null means no upper limit.
*/
private Integer maxIsotopes;
/**
* Constructor with default settings.
*/
public PeptideAssumptionFilter() {
minPepLength = 8;
maxPepLength = 30;
maxMassDeviation = -1;
isPpm = true;
unknownPtm = true;
minMissedCleavages = null;
maxMissedCleavages = null;
minIsotopes = null;
maxIsotopes = null;
}
/**
* Constructor for an Identification filter.
*
* @param minPepLength the minimal peptide length allowed (0 or less for
* disabled)
* @param maxPepLength the maximal peptide length allowed (0 or less for
* disabled)
* @param maxMzDeviation the maximal m/z deviation allowed (0 or less for
* disabled)
* @param isPpm boolean indicating the unit of the allowed m/z deviation
* (true: ppm, false: Da)
* @param unknownPTM shall peptides presenting unknownPTMs be ignored
* @param minMissedCleavages the minimum number of missed cleavages allowed
* (null for disabled)
* @param maxMissedCleavages the maximum number of missed cleavages allowed
* (null for disabled)
* @param minIsotopes the minimum number of isotopes allowed (null for
* disabled)
* @param maxIsotopes the maximum number of isotopes allowed (null for
* disabled)
*/
public PeptideAssumptionFilter(int minPepLength, int maxPepLength, double maxMzDeviation, boolean isPpm, boolean unknownPTM, Integer minMissedCleavages, Integer maxMissedCleavages, Integer minIsotopes, Integer maxIsotopes) {
this.minPepLength = minPepLength;
this.maxPepLength = maxPepLength;
this.maxMassDeviation = maxMzDeviation;
this.isPpm = isPpm;
this.unknownPtm = unknownPTM;
this.minMissedCleavages = minMissedCleavages;
this.maxMissedCleavages = maxMissedCleavages;
this.minIsotopes = minIsotopes;
this.maxIsotopes = maxIsotopes;
}
/**
* Updates the filter based on the search parameters.
*
* @param searchParameters the search parameters where to take the
* information from
*/
public void setFilterFromSearchParameters(SearchParameters searchParameters) {
this.isPpm = searchParameters.isPrecursorAccuracyTypePpm();
this.maxMassDeviation = searchParameters.getPrecursorAccuracy();
this.minIsotopes = searchParameters.getMinIsotopicCorrection();
this.maxIsotopes = searchParameters.getMaxIsotopicCorrection();
this.unknownPtm = true;
}
/**
* Validates the peptide based on the peptide length, the share of X's in
* the sequence and the allowed number of missed cleavages.
*
* @param peptide the peptide to validate
* @param sequenceMatchingPreferences the sequence matching preferences
* containing the maximal share of X's allowed
* @param digestionPreferences the digestion preferences
*
* @return a boolean indicating whether the peptide passed the test
*/
public boolean validatePeptide(Peptide peptide, SequenceMatchingPreferences sequenceMatchingPreferences, DigestionPreferences digestionPreferences) {
String peptideSequence = peptide.getSequence();
int sequenceLength = peptideSequence.length();
if ((maxPepLength > 0 && sequenceLength > maxPepLength)
|| (minPepLength > 0 && sequenceLength < minPepLength)) {
return false;
}
double xShare = ((double) Util.getOccurrence(peptideSequence, 'X')) / sequenceLength;
if (sequenceMatchingPreferences.hasLimitX() && xShare > sequenceMatchingPreferences.getLimitX()) {
return false;
}
if (minMissedCleavages != null || maxMissedCleavages != null) {
Integer peptideMinMissedCleavages = peptide.getNMissedCleavages(digestionPreferences);
if (minMissedCleavages != null && peptideMinMissedCleavages != null && peptideMinMissedCleavages < minMissedCleavages) {
return false;
}
if (maxMissedCleavages != null && peptideMinMissedCleavages != null && peptideMinMissedCleavages > maxMissedCleavages) {
return false;
}
}
return true;
}
/**
* Validates a peptide depending on its protein inference status. Maps the
* peptide to proteins in case it was not done before using the default
* protein tree of the sequence factory
*
* @param peptide the peptide
* @param sequenceMatchingPreferences the sequence matching preferences
* @return a boolean indicating whether the peptide passed the test
*
* @throws IOException if an IOException occurs
* @throws SQLException if an SQLException occurs
* @throws ClassNotFoundException if a ClassNotFoundException occurs
* @throws InterruptedException if an InterruptedException occurs
*/
public boolean validateProteins(Peptide peptide, SequenceMatchingPreferences sequenceMatchingPreferences) throws IOException, SQLException, ClassNotFoundException, InterruptedException {
return validateProteins(peptide, sequenceMatchingPreferences, SequenceFactory.getInstance().getDefaultPeptideMapper());
}
/**
* Validates a peptide depending on its protein inference status. Maps the
* peptide to proteins in case it was not done before
*
* @param peptide the peptide
* @param sequenceMatchingPreferences the sequence matching preferences
* @param peptideMapper the peptide mapper to use for peptide to protein
* mapping
*
* @return a boolean indicating whether the peptide passed the test
*
* @throws IOException if an IOException occurs
* @throws SQLException if an SQLException occurs
* @throws ClassNotFoundException if a ClassNotFoundException occurs
* @throws InterruptedException if an InterruptedException occurs
*/
public boolean validateProteins(Peptide peptide, SequenceMatchingPreferences sequenceMatchingPreferences, PeptideMapper peptideMapper)
throws IOException, SQLException, ClassNotFoundException, InterruptedException {
ArrayList<String> accessions = peptide.getParentProteins(sequenceMatchingPreferences, peptideMapper);
if (accessions != null && accessions.size() > 1) {
boolean target = false;
boolean decoy = false;
for (String accession : accessions) {
if (SequenceFactory.getInstance().isDecoyAccession(accession)) {
decoy = true;
} else {
target = true;
}
}
if (target && decoy) {
return false;
}
}
return true;
}
/**
* Validates the modifications of a peptide.
*
* @param peptide the peptide of interest
* @param sequenceMatchingPreferences the sequence matching preferences for
* peptide to protein mapping
* @param ptmSequenceMatchingPreferences the sequence matching preferences
* for PTM to peptide mapping
* @param modificationProfile the modification profile of the identification
*
* @return a boolean indicating whether the peptide passed the test
*/
public boolean validateModifications(Peptide peptide, SequenceMatchingPreferences sequenceMatchingPreferences,
SequenceMatchingPreferences ptmSequenceMatchingPreferences, PtmSettings modificationProfile) {
// check if it is an unknown peptide
if (unknownPtm) {
ArrayList<ModificationMatch> modificationMatches = peptide.getModificationMatches();
if (modificationMatches != null) {
for (ModificationMatch modMatch : modificationMatches) {
String ptmName = modMatch.getTheoreticPtm();
if (ptmName.equals(PTMFactory.unknownPTM.getName())) {
return false;
}
}
}
}
PTMFactory ptmFactory = PTMFactory.getInstance();
// get the variable ptms and the number of times they occur
HashMap<Double, Integer> modMatches = new HashMap<Double, Integer>(peptide.getNModifications());
if (peptide.isModified()) {
for (ModificationMatch modMatch : peptide.getModificationMatches()) {
if (modMatch.isVariable()) {
String modName = modMatch.getTheoreticPtm();
PTM ptm = ptmFactory.getPTM(modName);
double mass = ptm.getMass();
if (!modMatches.containsKey(mass)) {
modMatches.put(mass, 1);
} else {
modMatches.put(mass, modMatches.get(mass) + 1);
}
}
}
}
// check if there are more ptms than ptm sites
for (double mass : modMatches.keySet()) {
try {
ArrayList<Integer> possiblePositions = peptide.getPotentialModificationSites(mass, sequenceMatchingPreferences, ptmSequenceMatchingPreferences, modificationProfile);
if (possiblePositions.size() < modMatches.get(mass)) {
return false;
}
} catch (Exception e) {
e.printStackTrace();
return false;
}
}
return true;
}
/**
* Validates the mass deviation of a peptide assumption.
*
* @param assumption the considered peptide assumption
* @param spectrumKey the key of the spectrum used to get the precursor the
* precursor should be accessible via the spectrum factory
* @param spectrumFactory the spectrum factory
* @param searchParameters the search parameters
*
* @return a boolean indicating whether the given assumption passes the
* filter
*
* @throws IOException if an error occurs while reading the spectrum
* @throws MzMLUnmarshallerException if an MzMLUnmarshallerException occurs
* reading while the spectrum
*/
public boolean validatePrecursor(PeptideAssumption assumption, String spectrumKey, SpectrumFactory spectrumFactory, SearchParameters searchParameters) throws IOException, MzMLUnmarshallerException {
double precursorMz = spectrumFactory.getPrecursorMz(spectrumKey);
int isotopeNumber = assumption.getIsotopeNumber(precursorMz, searchParameters.getMinIsotopicCorrection(), searchParameters.getMaxIsotopicCorrection());
if (minIsotopes != null && isotopeNumber < minIsotopes) {
return false;
}
if (maxIsotopes != null && isotopeNumber > maxIsotopes) {
return false;
}
Double mzDeviation = assumption.getDeltaMass(precursorMz, isPpm, searchParameters.getMinIsotopicCorrection(), searchParameters.getMaxIsotopicCorrection());
return (maxMassDeviation <= 0 || Math.abs(mzDeviation) <= maxMassDeviation);
}
/**
* Returns a boolean indicating whether unknown PTMs shall be removed.
*
* @return a boolean indicating whether unknown PTMs shall be removed
*/
public boolean removeUnknownPTMs() {
return unknownPtm;
}
/**
* Set whether unknown PTMs shall be removed.
*
* @param unknownPtm whether unknown PTMs shall be removed
*/
public void setRemoveUnknownPTMs(boolean unknownPtm) {
this.unknownPtm = unknownPtm;
}
/**
* Indicates whether the mass tolerance is in ppm (true) or Dalton (false).
*
* @return a boolean indicating whether the mass tolerance is in ppm (true)
* or Dalton (false)
*/
public boolean isIsPpm() {
return isPpm;
}
/**
* Sets whether the mass tolerance is in ppm (true) or Dalton (false).
*
* @param isPpm a boolean indicating whether the mass tolerance is in ppm
* (true) or Dalton (false)
*/
public void setIsPpm(boolean isPpm) {
this.isPpm = isPpm;
}
/**
* Returns the maximal m/z deviation allowed.
*
* @return the maximal mass deviation allowed
*/
public double getMaxMzDeviation() {
return maxMassDeviation;
}
/**
* Sets the maximal m/z deviation allowed.
*
* @param maxMzDeviation the maximal mass deviation allowed
*/
public void setMaxMzDeviation(double maxMzDeviation) {
this.maxMassDeviation = maxMzDeviation;
}
/**
* Returns the maximal peptide length allowed.
*
* @return the maximal peptide length allowed
*/
public int getMaxPepLength() {
return maxPepLength;
}
/**
* Sets the maximal peptide length allowed.
*
* @param maxPepLength the maximal peptide length allowed
*/
public void setMaxPepLength(int maxPepLength) {
this.maxPepLength = maxPepLength;
}
/**
* Returns the maximal peptide length allowed.
*
* @return the maximal peptide length allowed
*/
public int getMinPepLength() {
return minPepLength;
}
/**
* Sets the maximal peptide length allowed.
*
* @param minPepLength the maximal peptide length allowed
*/
public void setMinPepLength(int minPepLength) {
this.minPepLength = minPepLength;
}
/**
* Returns the minimal number of isotopes allowed (inclusive).
*
* @return the minimal number of isotopes allowed
*/
public Integer getMinIsotopes() {
return minIsotopes;
}
/**
* Sets the minimal number of isotopes allowed (inclusive).
*
* @param minIsotopes the minimal number of isotopes allowed
*/
public void setMinIsotopes(Integer minIsotopes) {
this.minIsotopes = minIsotopes;
}
/**
* Returns the maximal number of isotopes allowed (inclusive).
*
* @return the maximal number of isotopes allowed
*/
public Integer getMaxIsotopes() {
return maxIsotopes;
}
/**
* Sets the maximal number of isotopes allowed (inclusive).
*
* @param maxIsotopes the maximal number of isotopes allowed
*/
public void setMaxIsotopes(Integer maxIsotopes) {
this.maxIsotopes = maxIsotopes;
}
/**
* Indicates whether this filter is the same as another one.
*
* @param anotherFilter another filter
* @return a boolean indicating that the filters have the same parameters
*/
public boolean isSameAs(PeptideAssumptionFilter anotherFilter) {
if (minMissedCleavages != null && anotherFilter.getMinMissedCleavages() != null) {
if (!minMissedCleavages.equals(anotherFilter.getMinMissedCleavages())) {
return false;
}
}
if (minMissedCleavages != null && anotherFilter.getMinMissedCleavages() == null) {
return false;
}
if (minMissedCleavages == null && anotherFilter.getMinMissedCleavages() != null) {
return false;
}
if (maxMissedCleavages != null && anotherFilter.getMaxMissedCleavages() != null) {
if (maxMissedCleavages.equals(anotherFilter.getMaxMissedCleavages())) {
return false;
}
}
if (maxMissedCleavages != null && anotherFilter.getMaxMissedCleavages() == null) {
return false;
}
if (maxMissedCleavages == null && anotherFilter.getMaxMissedCleavages() != null) {
return false;
}
if (minIsotopes != null && anotherFilter.getMinIsotopes() != null) {
if (!minIsotopes.equals(anotherFilter.getMinIsotopes())) {
return false;
}
}
if (minIsotopes != null && anotherFilter.getMinIsotopes() == null) {
return false;
}
if (minIsotopes == null && anotherFilter.getMinIsotopes() != null) {
return false;
}
if (maxIsotopes != null && anotherFilter.getMaxIsotopes() != null) {
if (!maxIsotopes.equals(anotherFilter.getMaxIsotopes())) {
return false;
}
}
if (maxIsotopes != null && anotherFilter.getMaxIsotopes() == null) {
return false;
}
if (maxIsotopes == null && anotherFilter.getMaxIsotopes() != null) {
return false;
}
return isPpm == anotherFilter.isPpm
&& unknownPtm == anotherFilter.removeUnknownPTMs()
&& minPepLength == anotherFilter.getMinPepLength()
&& maxPepLength == anotherFilter.getMaxPepLength()
&& maxMassDeviation == anotherFilter.getMaxMzDeviation();
}
/**
* Returns a short description of the parameters.
*
* @return a short description of the parameters
*/
public String getShortDescription() {
String newLine = System.getProperty("line.separator");
StringBuilder output = new StringBuilder();
output.append("Peptide Length: ").append(minPepLength).append("-").append(maxPepLength).append(".").append(newLine);
if (maxMassDeviation >= 0) {
output.append("Precursor m/z Deviation: ").append(maxMassDeviation);
if (isPpm) {
output.append(" ppm.").append(newLine);
} else {
output.append(" Da.").append(newLine);
}
}
output.append("Ignore Unknown PTMs: ").append(unknownPtm).append(".").append(newLine);
if (minMissedCleavages != null || maxMissedCleavages != null) {
output.append("Missed Cleavages: ");
if (minMissedCleavages != null) {
output.append(minMissedCleavages);
} else {
output.append("0");
}
output.append("-");
if (maxMissedCleavages != null) {
output.append(maxMissedCleavages);
} else {
output.append("n");
}
output.append(".").append(newLine);
}
if (minIsotopes != null || maxIsotopes != null) {
output.append("Isotopes: ");
if (minIsotopes != null) {
output.append(minIsotopes);
} else {
output.append("n");
}
output.append("-");
if (maxIsotopes != null) {
output.append(maxIsotopes);
} else {
output.append("n");
}
output.append(".").append(newLine);
}
return output.toString();
}
/**
* Returns the minimum number of missed cleavages. Null means no limit.
*
* @return the minMissedCleavages
*/
public Integer getMinMissedCleavages() {
return minMissedCleavages;
}
/**
* Set the minimum number of missed cleavages. Null means no limit.
*
* @param minMissedCleavages the minMissedCleavages to set
*/
public void setMinMissedCleavages(Integer minMissedCleavages) {
this.minMissedCleavages = minMissedCleavages;
}
/**
* Returns the maximum number of missed cleavages. Null means no limit.
*
* @return the maxMissedCleavages
*/
public Integer getMaxMissedCleavages() {
return maxMissedCleavages;
}
/**
* Set the maximum number of missed cleavages. Null means no limit.
*
* @param maxMissedCleavages the maxMissedCleavages to set
*/
public void setMaxMissedCleavages(Integer maxMissedCleavages) {
this.maxMissedCleavages = maxMissedCleavages;
}
}