package com.compomics.util.experiment.io.identifications.idfilereaders; import com.compomics.util.Util; import com.compomics.util.experiment.biology.AminoAcid; import com.compomics.util.experiment.biology.AminoAcidSequence; import com.compomics.util.experiment.biology.Atom; import com.compomics.util.experiment.biology.Peptide; import com.compomics.util.experiment.identification.Advocate; import com.compomics.util.experiment.identification.spectrum_assumptions.PeptideAssumption; import com.compomics.util.experiment.identification.identification_parameters.SearchParameters; import com.compomics.util.experiment.identification.SpectrumIdentificationAssumption; import com.compomics.util.experiment.identification.matches.ModificationMatch; import com.compomics.util.experiment.identification.matches.SpectrumMatch; import com.compomics.util.experiment.io.identifications.IdfileReader; import com.compomics.util.experiment.massspectrometry.Charge; import com.compomics.util.experiment.massspectrometry.Spectrum; import com.compomics.util.experiment.massspectrometry.SpectrumFactory; import com.compomics.util.preferences.SequenceMatchingPreferences; import com.compomics.util.waiting.WaitingHandler; import java.io.BufferedReader; import java.io.File; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; import java.sql.SQLException; import java.util.ArrayList; import java.util.HashMap; import java.util.LinkedList; import javax.xml.bind.JAXBException; import org.xmlpull.v1.XmlPullParser; import org.xmlpull.v1.XmlPullParserException; import org.xmlpull.v1.XmlPullParserFactory; /** * Simple IdfileReader for Pepxml files. * * @author Marc Vaudel * @author Harald Barsnes */ public class PepxmlIdfileReader implements IdfileReader { /** * List of the spectrum matches in the file. */ private LinkedList<SpectrumMatch> spectrumMatches = null; /** * The name of the search engine which was used to create the file. */ private String searchEngine = null; /** * The version of the search engine which was used to create the file. */ private String searchEngineVersion = null; /** * The file to parse. */ private File idFile; /** * The name of the spectrum file. */ private String inputFileName; /** * The spectrum factory used to retrieve spectrum titles. */ private SpectrumFactory spectrumFactory = SpectrumFactory.getInstance(); /** * Stores the mass differences of the fixed modifications. The key is the * amino acid residue as a single upper case character and the element is * the list of the mass differences of the masses targeting that residue. */ private HashMap<Character, ArrayList<Double>> fixedModificationsMassDiffs; /** * Stores the masses of the fixed modifications. */ private ArrayList<Double> fixedModificationMasses; /** * Stores the masses of the fixed n-terminal modifications. */ private ArrayList<Double> fixedNTerminalModifications = new ArrayList<Double>(); /** * Stores the masses of the fixed c-terminal modifications. */ private ArrayList<Double> fixedCTerminalModifications = new ArrayList<Double>(); /** * Blank constructor for instantiation purposes. */ public PepxmlIdfileReader() { } /** * Constructor. * * @param idFile the file to parse */ public PepxmlIdfileReader(File idFile) { this.idFile = idFile; } /** * Parses the identification file. * * @param waitingHandler waiting handler returning information about the * progress and allowing canceling the parsing. * @param expandAaCombinations if true the combinations of amino acids will * be expanded * @param overwriteExtension if true, the extension of the input file will * be overwritten to mgf * * @throws XmlPullParserException * @throws FileNotFoundException * @throws IOException * @throws SQLException * @throws ClassNotFoundException * @throws InterruptedException */ private void parseFile(WaitingHandler waitingHandler, boolean expandAaCombinations, boolean overwriteExtension) throws XmlPullParserException, FileNotFoundException, IOException, SQLException, ClassNotFoundException, InterruptedException { // Create the pull parser. XmlPullParserFactory factory = XmlPullParserFactory.newInstance(System.getProperty(XmlPullParserFactory.PROPERTY_NAME), null); factory.setNamespaceAware(true); XmlPullParser parser = factory.newPullParser(); // Create a reader for the input file. BufferedReader br = new BufferedReader(new FileReader(idFile)); try { // Set the XML Pull Parser to read from this reader. parser.setInput(br); // Start the parsing. int type; boolean hasMatch = false; HashMap<String, SpectrumMatch> spectrumMatchesMap = new HashMap<String, SpectrumMatch>(); spectrumMatches = new LinkedList<SpectrumMatch>(); SpectrumMatch currentMatch = null; Integer currentCharge = null; // Go through the whole document. while ((type = parser.next()) != XmlPullParser.END_DOCUMENT) { String tagName = parser.getName(); if (type == XmlPullParser.START_TAG && tagName.equals("msms_run_summary")) { parseRunSummary(parser, overwriteExtension); if (waitingHandler != null && spectrumFactory.fileLoaded(inputFileName)) { waitingHandler.setMaxSecondaryProgressCounter(spectrumFactory.getNSpectra(inputFileName)); waitingHandler.setSecondaryProgressCounter(0); } } if (type == XmlPullParser.START_TAG && tagName.equals("search_summary")) { parseSearchSummary(parser); } if (type == XmlPullParser.START_TAG && tagName.equals("spectrum_query")) { currentMatch = parseSpectrumQuery(parser); SpectrumMatch previousMatch = spectrumMatchesMap.get(currentMatch.getKey()); if (previousMatch != null) { currentMatch = previousMatch; } for (int i = 0; i < parser.getAttributeCount(); i++) { String attributeName = parser.getAttributeName(i); if (attributeName.equals("assumed_charge")) { String value = parser.getAttributeValue(i); try { currentCharge = new Integer(value.trim()); } catch (Exception e) { throw new IllegalArgumentException("Charge " + value + " could not be parsed. Integer expected."); } } } } if (type == XmlPullParser.START_TAG && tagName.equals("search_hit")) { if (currentMatch == null) { throw new IllegalArgumentException("No spectrum match when parsing search hit."); } if (currentCharge == null) { throw new IllegalArgumentException("No charge found when parsing search hit of spectrum " + currentMatch.getKey() + "."); } PeptideAssumption peptideAssumption = parseSearchHit(parser, currentCharge); Peptide peptide = peptideAssumption.getPeptide(); String peptideSequence = peptide.getSequence(); hasMatch = true; boolean found = false; if (currentMatch.getAllAssumptions() != null) { for (SpectrumIdentificationAssumption tempAssumption : currentMatch.getAllAssumptions()) { PeptideAssumption tempPeptideAssumption = (PeptideAssumption) tempAssumption; Peptide tempPeptide = tempPeptideAssumption.getPeptide(); if (peptide.getSequence().equals(tempPeptide.getSequence())) { boolean sameModifications = peptide.getNModifications() == tempPeptide.getNModifications(); if (sameModifications && peptide.isModified()) { for (ModificationMatch originalMatch : peptide.getModificationMatches()) { boolean ptmFound = false; for (ModificationMatch otherMatch : tempPeptide.getModificationMatches()) { if (originalMatch.getTheoreticPtm().equals(otherMatch.getTheoreticPtm()) && originalMatch.getModificationSite() == otherMatch.getModificationSite()) { ptmFound = true; break; } } if (!ptmFound) { sameModifications = false; break; } } } if (sameModifications) { found = true; break; } } } } if (!found) { Advocate advocate = Advocate.getAdvocate(searchEngine); if (expandAaCombinations && AminoAcidSequence.hasCombination(peptideSequence)) { ArrayList<ModificationMatch> previousModificationMatches = peptide.getModificationMatches(), newModificationMatches = null; if (previousModificationMatches != null) { newModificationMatches = new ArrayList<ModificationMatch>(previousModificationMatches.size()); } for (StringBuilder expandedSequence : AminoAcidSequence.getCombinations(peptide.getSequence())) { Peptide newPeptide = new Peptide(expandedSequence.toString(), newModificationMatches, true); if (previousModificationMatches != null) { for (ModificationMatch modificationMatch : previousModificationMatches) { newPeptide.addModificationMatch(new ModificationMatch(modificationMatch.getTheoreticPtm(), modificationMatch.isVariable(), modificationMatch.getModificationSite())); } } PeptideAssumption newAssumption = new PeptideAssumption(newPeptide, peptideAssumption.getRank(), peptideAssumption.getAdvocate(), peptideAssumption.getIdentificationCharge(), peptideAssumption.getScore(), peptideAssumption.getIdentificationFile()); currentMatch.addHit(advocate.getIndex(), newAssumption, false); } } else { currentMatch.addHit(advocate.getIndex(), peptideAssumption, false); } } } if (type == XmlPullParser.END_TAG && tagName.equals("spectrum_query")) { if (hasMatch) { String key = currentMatch.getKey(); if (!spectrumMatchesMap.containsKey(key)) { spectrumMatchesMap.put(key, currentMatch); spectrumMatches.add(currentMatch); } hasMatch = false; currentMatch = null; currentCharge = null; } if (waitingHandler != null && spectrumFactory.fileLoaded(inputFileName)) { waitingHandler.increaseSecondaryProgressCounter(); } } } spectrumMatchesMap.clear(); } finally { br.close(); } } /** * Parses a search hit. * * @param parser the XML parser * @param charge the charge of the hit * * @return the peptide assumption in the search hit * * @throws XmlPullParserException * @throws IOException */ private PeptideAssumption parseSearchHit(XmlPullParser parser, Integer charge) throws XmlPullParserException, IOException { Integer rank = null; String sequence = null; ArrayList<ModificationMatch> modificationMatches = new ArrayList<ModificationMatch>(); Double score = null; for (int i = 0; i < parser.getAttributeCount(); i++) { String name = parser.getAttributeName(i); if (name.equals("hit_rank")) { String value = parser.getAttributeValue(i); try { rank = new Integer(value.trim()); } catch (Exception e) { throw new IllegalArgumentException("An error occurred while parsing rank " + value + ". Integer expected."); } } else if (name.equals("peptide")) { sequence = parser.getAttributeValue(i).trim(); } } int type; while ((type = parser.next()) != XmlPullParser.START_TAG) { } String tagName = parser.getName(); if (tagName.equals("modification_info")) { for (int i = 0; i < parser.getAttributeCount(); i++) { String attributeName = parser.getAttributeName(i); if (attributeName.equals("mod_nterm_mass") || attributeName.equals("mod_cterm_mass")) { String value = parser.getAttributeValue(i).trim(); Double terminalMass = null; try { terminalMass = new Double(value); } catch (Exception e) { throw new IllegalArgumentException("An error occurred while parsing modification terminal mass " + value + ". Number expected."); } // check if the terminal modification is fixed or variable boolean variableModification; if (attributeName.equals("mod_nterm_mass")) { variableModification = !fixedNTerminalModifications.contains(terminalMass); } else { variableModification = !fixedCTerminalModifications.contains(terminalMass); } int site; if (attributeName.equals("mod_nterm_mass")) { site = 1; terminalMass -= Atom.H.getMonoisotopicMass(); } else { // c-term site = sequence.length(); terminalMass -= (Atom.O.getMonoisotopicMass() + Atom.H.getMonoisotopicMass()); // fix for older comet pepxml files if (searchEngine != null && searchEngine.equalsIgnoreCase("Comet") && searchEngineVersion != null && !searchEngineVersion.equalsIgnoreCase("2015.02 rev. 4") && !searchEngineVersion.equalsIgnoreCase("2015.02 rev. 5")) { // @TODO: make more generic... terminalMass -= Atom.H.getMonoisotopicMass(); } } char aa = sequence.charAt(site - 1); terminalMass = Util.roundDouble(terminalMass, 2); String tempModificationName = terminalMass + "@" + aa; ModificationMatch modificationMatch = new ModificationMatch(tempModificationName, variableModification, site); modificationMatches.add(modificationMatch); } } while ((type = parser.next()) != XmlPullParser.END_DOCUMENT) { tagName = parser.getName(); if (tagName != null) { if (tagName.equals("mod_aminoacid_mass")) { Integer site = null; for (int i = 0; i < parser.getAttributeCount(); i++) { String attributeName = parser.getAttributeName(i); if (attributeName.equals("position")) { String value = parser.getAttributeValue(i); try { site = new Integer(value); } catch (Exception e) { throw new IllegalArgumentException("An error occurred while parsing modification position " + value + ". Integer expected."); } } } if (site != null) { Double modifiedAaMass = null; for (int i = 0; i < parser.getAttributeCount(); i++) { String attributeName = parser.getAttributeName(i); if (attributeName.equals("mass")) { String value = parser.getAttributeValue(i); try { modifiedAaMass = new Double(value); } catch (Exception e) { throw new IllegalArgumentException("An error occurred while parsing modification mass " + value + ". Number expected."); } } } if (modifiedAaMass != null) { char aa = sequence.charAt(site - 1); AminoAcid aminoAcid = AminoAcid.getAminoAcid(aa); // see if the amino acid also has a fixed modification // // example: // carbamidomethyl _and_ pyrolidone from carbamidomethylated c: // // <modification_info modified_peptide="C[143]EQALLQVAK"> // <mod_aminoacid_mass position="1" mass="143.004100"/> // </modification_info> // double fixedModificationMass = 0; boolean variableModification; if (fixedModificationMasses.contains(modifiedAaMass)) { variableModification = false; } else { if (fixedModificationsMassDiffs.get(aa) != null) { for (Double tempMassDiff : fixedModificationsMassDiffs.get(aa)) { fixedModificationMass += tempMassDiff; } } variableModification = true; } if (variableModification) { double modificationMass = modifiedAaMass - fixedModificationMass - aminoAcid.getMonoisotopicMass(); modificationMass = Util.roundDouble(modificationMass, 2); String tempModificationName = modificationMass + "@" + aa; ModificationMatch modificationMatch = new ModificationMatch(tempModificationName, true, site); modificationMatches.add(modificationMatch); } } } } else if (type == XmlPullParser.END_TAG && parser.getName().equals("modification_info")) { while ((type = parser.next()) != XmlPullParser.START_TAG) { } break; } } } } while (type != XmlPullParser.END_DOCUMENT) { tagName = parser.getName(); if (tagName != null) { if (type == XmlPullParser.START_TAG && parser.getName().equals("search_score")) { String name = null; String value = null; for (int i = 0; i < parser.getAttributeCount(); i++) { String attributeName = parser.getAttributeName(i); if (attributeName.equals("name")) { name = parser.getAttributeValue(i); } else if (attributeName.equals("value")) { value = parser.getAttributeValue(i); } } if (name != null && value != null) { if (name.equals("expect") || name.equals("Morpheus Score")) { try { score = new Double(value); } catch (Exception e) { throw new IllegalArgumentException("Impossible to parse expectation value " + value + ". Number expected."); } } } } else if (type == XmlPullParser.END_TAG && tagName.equals("search_hit")) { break; } } type = parser.next(); } Peptide peptide = new Peptide(sequence, modificationMatches, true); Advocate advocate = Advocate.getAdvocate(searchEngine); return new PeptideAssumption(peptide, rank, advocate.getIndex(), new Charge(Charge.PLUS, charge), score, idFile.getName()); } /** * Parses a spectrum query. * * @param parser the XML parser * * @return the spectrum match in this spectrum query * * @throws XmlPullParserException * @throws IOException */ private SpectrumMatch parseSpectrumQuery(XmlPullParser parser) throws XmlPullParserException, IOException { Integer index = null; String spectrumId = null; String spectrumNativeID = null; for (int i = 0; i < parser.getAttributeCount(); i++) { String name = parser.getAttributeName(i); if (name.equals("spectrum")) { spectrumId = parser.getAttributeValue(i); } else if (name.equals("index")) { String value = parser.getAttributeValue(i); try { index = new Integer(value.trim()); } catch (Exception e) { throw new IllegalArgumentException("An error occurred while parsing index " + value + ". Integer expected."); } } else if (name.equals("spectrumNativeID")) { spectrumNativeID = parser.getAttributeValue(i); } } if (index == null) { throw new IllegalArgumentException("No index found for spectrum " + spectrumId + "."); } String spectrumTitle; if (spectrumNativeID != null) { spectrumTitle = spectrumNativeID; } else { spectrumTitle = index + ""; if (spectrumFactory.fileLoaded(inputFileName)) { spectrumTitle = spectrumFactory.getSpectrumTitle(inputFileName, index); } } String spectrumKey = Spectrum.getSpectrumKey(inputFileName, spectrumTitle); SpectrumMatch spectrumMatch = new SpectrumMatch(spectrumKey); spectrumMatch.setSpectrumNumber(index); return spectrumMatch; } /** * Parses the run summary. * * @param parser the XML parser * @param overwriteExtension if true, the extension of the input file will * be overwritten to mgf * * @throws XmlPullParserException * @throws IOException */ private void parseRunSummary(XmlPullParser parser, boolean overwriteExtension) throws XmlPullParserException, IOException { // Something like <msms_run_summary base_name="D:\path\filename" raw_data="extention"> is expected String path = ""; for (int i = 0; i < parser.getAttributeCount(); i++) { String name = parser.getAttributeName(i); if (name.equals("base_name")) { path += parser.getAttributeValue(i); } else if (!overwriteExtension && name.equals("raw_data")) { path += parser.getAttributeValue(i); } } if (overwriteExtension) { path += ".mgf"; } File spectrumFile = new File(path); inputFileName = Util.getFileName(spectrumFile); } /** * Parses the search summary. * * @param parser the XML parser * * @throws XmlPullParserException * @throws IOException */ private void parseSearchSummary(XmlPullParser parser) throws XmlPullParserException, IOException { for (int i = 0; i < parser.getAttributeCount(); i++) { String name = parser.getAttributeName(i); if (name.equals("search_engine")) { searchEngine = parser.getAttributeValue(i); } else if (name.equals("search_engine_version")) { searchEngineVersion = parser.getAttributeValue(i); } } // extract the required information about the modifications fixedModificationsMassDiffs = new HashMap<Character, ArrayList<Double>>(); fixedModificationMasses = new ArrayList<Double>(); fixedNTerminalModifications = new ArrayList<Double>(); fixedCTerminalModifications = new ArrayList<Double>(); int type; while ((type = parser.next()) != XmlPullParser.END_DOCUMENT) { if (type == XmlPullParser.END_TAG && parser.getName() != null) { if (parser.getName().equals("search_summary")) { break; } } if (type == XmlPullParser.START_TAG) { String tagName = parser.getName(); if (type == XmlPullParser.START_TAG && tagName.equals("aminoacid_modification")) { Character aminoacid = null; Boolean variable = null; Double massDiff = null; Double mass = null; for (int i = 0; i < parser.getAttributeCount(); i++) { String name = parser.getAttributeName(i); if (name.equals("aminoacid")) { aminoacid = parser.getAttributeValue(i).charAt(0); } else if (name.equals("massdiff")) { massDiff = new Double(parser.getAttributeValue(i)); } else if (name.equals("mass")) { mass = new Double(parser.getAttributeValue(i)); } else if (name.equals("variable")) { String variableAsString = parser.getAttributeValue(i); if (variableAsString.equalsIgnoreCase("Y")) { variable = true; } else if (variableAsString.equalsIgnoreCase("N")) { variable = false; } } } if (variable != null && massDiff != null && mass != null && aminoacid != null) { if (!variable) { ArrayList<Double> massDiffs = fixedModificationsMassDiffs.get(aminoacid); if (massDiffs == null) { massDiffs = new ArrayList<Double>(); } massDiffs.add(massDiff); fixedModificationsMassDiffs.put(aminoacid, massDiffs); fixedModificationMasses.add(mass); } } else { throw new IllegalArgumentException("An error occurred while parsing aminoacid_modification element. Missing values."); } } else if (type == XmlPullParser.START_TAG && tagName.equals("terminal_modification")) { Boolean variable = null; Double mass = null; String terminus = null; for (int i = 0; i < parser.getAttributeCount(); i++) { String name = parser.getAttributeName(i); if (name.equals("terminus")) { String terminusAsString = parser.getAttributeValue(i); if (terminusAsString.equalsIgnoreCase("N") || terminusAsString.equalsIgnoreCase("C")) { terminus = terminusAsString; } } else if (name.equals("mass")) { mass = new Double(parser.getAttributeValue(i)); } else if (name.equals("variable")) { String variableAsString = parser.getAttributeValue(i); if (variableAsString.equalsIgnoreCase("Y")) { variable = true; } else if (variableAsString.equalsIgnoreCase("N")) { variable = false; } } } if (variable != null && mass != null && terminus != null) { if (!variable) { if (terminus.equalsIgnoreCase("N")) { fixedNTerminalModifications.add(mass); } else { fixedCTerminalModifications.add(mass); } } } else { throw new IllegalArgumentException("An error occurred while parsing terminal_modification element. Missing values."); } } } } } @Override public HashMap<String, ArrayList<String>> getSoftwareVersions() { ArrayList<String> versions = new ArrayList<String>(1); versions.add(searchEngineVersion); HashMap<String, ArrayList<String>> result = new HashMap<String, ArrayList<String>>(1); result.put(searchEngine, versions); return result; } @Override public String getExtension() { return ".pep.xml"; } @Override public void close() throws IOException { // nothing to do here } @Override public LinkedList<SpectrumMatch> getAllSpectrumMatches(WaitingHandler waitingHandler, SearchParameters searchParameters) throws IOException, IllegalArgumentException, SQLException, ClassNotFoundException, InterruptedException, JAXBException, XmlPullParserException { return getAllSpectrumMatches(waitingHandler, searchParameters, null, true); } @Override public LinkedList<SpectrumMatch> getAllSpectrumMatches(WaitingHandler waitingHandler, SearchParameters searchParameters, SequenceMatchingPreferences sequenceMatchingPreferences, boolean expandAaCombinations) throws IOException, IllegalArgumentException, SQLException, ClassNotFoundException, InterruptedException, JAXBException, XmlPullParserException { if (spectrumMatches == null) { parseFile(waitingHandler, expandAaCombinations, true); } return spectrumMatches; } @Override public HashMap<String, LinkedList<SpectrumMatch>> getTagsMap() { return new HashMap<String, LinkedList<SpectrumMatch>>(0); } @Override public void clearTagsMap() { // Nothing to do here } @Override public boolean hasDeNovoTags() { return false; } }