package com.compomics.util.experiment.io.identifications.idfilereaders; import com.compomics.util.Util; import com.compomics.util.experiment.biology.AminoAcid; import com.compomics.util.experiment.biology.AminoAcidSequence; import com.compomics.util.experiment.biology.Atom; import com.compomics.util.experiment.biology.ions.ElementaryIon; import com.compomics.util.experiment.identification.Advocate; import com.compomics.util.experiment.identification.identification_parameters.SearchParameters; import com.compomics.util.experiment.identification.protein_sequences.SequenceFactory; import com.compomics.util.experiment.identification.spectrum_assumptions.TagAssumption; import com.compomics.util.experiment.identification.identification_parameters.tool_specific.PepnovoParameters; import com.compomics.util.experiment.identification.matches.ModificationMatch; import com.compomics.util.experiment.identification.matches.SpectrumMatch; import com.compomics.util.experiment.identification.amino_acid_tags.Tag; import com.compomics.util.experiment.identification.protein_inference.PeptideMapperType; import com.compomics.util.experiment.identification.protein_inference.proteintree.ProteinTree; import com.compomics.util.experiment.io.identifications.IdfileReader; import com.compomics.util.experiment.massspectrometry.Charge; import com.compomics.util.experiment.massspectrometry.Spectrum; import com.compomics.util.experiment.personalization.ExperimentObject; import com.compomics.util.experiment.refinementparameters.PepnovoAssumptionDetails; import com.compomics.util.preferences.SequenceMatchingPreferences; import com.compomics.util.waiting.WaitingHandler; import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; import java.net.URLDecoder; import java.sql.SQLException; import java.util.ArrayList; import java.util.HashMap; import java.util.LinkedList; import java.util.Map; import javax.xml.bind.JAXBException; import uk.ac.ebi.pride.tools.braf.BufferedRandomAccessFile; /** * This class can be used to parse PepNovo identification files. * * @author Marc Vaudel */ public class PepNovoIdfileReader extends ExperimentObject implements IdfileReader { /** * A map of all spectrum titles and the associated index in the random * access file. */ private HashMap<String, Long> index; /** * The result file in random access. */ private BufferedRandomAccessFile bufferedRandomAccessFile = null; /** * The name of the result file. */ private String fileName; /** * The standard format. */ public static final String tableHeader = "#Index RnkScr PnvScr N-Gap C-Gap [M+H] Charge Sequence"; /** * The mass to add to the C-terminal gap so that is corresponds to a peptide * fragment. */ public final double cTermCorrection = Atom.O.getMonoisotopicMass() + 2 * Atom.H.getMonoisotopicMass() + ElementaryIon.proton.getTheoreticMass(); /** * The mass to add to the N-terminal gap so that is corresponds to a peptide * fragment. */ public final double nTermCorrection = 0; /** * Map of the tags found indexed by amino acid sequence. */ private HashMap<String, LinkedList<SpectrumMatch>> tagsMap; /** * Default constructor for the purpose of instantiation. */ public PepNovoIdfileReader() { } /** * Constructor, initiate the parser. Displays the progress using the waiting * handler. The close() method shall be used when the file reader is no * longer used. * * @param identificationFile the identification file to parse * * @throws FileNotFoundException exception thrown whenever the provided file * was not found * @throws IOException exception thrown whenever an error occurred while * reading the file */ public PepNovoIdfileReader(File identificationFile) throws FileNotFoundException, IOException { this(identificationFile, null); } /** * Constructor, initiate the parser. Displays the progress using the waiting * handler. The close() method shall be used when the file reader is no * longer used. * * @param identificationFile the identification file to parse * @param waitingHandler a waiting handler providing progress feedback to * the user * * @throws FileNotFoundException exception thrown whenever the provided file * was not found * @throws IOException exception thrown whenever an error occurred while * reading the file */ public PepNovoIdfileReader(File identificationFile, WaitingHandler waitingHandler) throws FileNotFoundException, IOException { bufferedRandomAccessFile = new BufferedRandomAccessFile(identificationFile, "r", 1024 * 100); fileName = Util.getFileName(identificationFile); if (waitingHandler != null) { waitingHandler.resetSecondaryProgressCounter(); waitingHandler.setMaxSecondaryProgressCounter(100); } long progressUnit = bufferedRandomAccessFile.length() / 100; if (progressUnit == 0) { progressUnit = 1; } index = new HashMap<String, Long>(); String line; while ((line = bufferedRandomAccessFile.readLine()) != null) { if (line.startsWith(">>")) { long currentIndex = bufferedRandomAccessFile.getFilePointer(); String[] temp = line.split("\\s+"); String formatted = ""; for (int i = 3; i < temp.length; i++) { formatted += (temp[i] + " "); } int endIndex = formatted.lastIndexOf("#Problem"); if (endIndex == -1) { endIndex = formatted.lastIndexOf("(SQS"); } // Condition: Skip problematic spectra not containing (SQS) at the end of the line. if (endIndex > -1) { String spectrumTitle = formatted.substring(0, endIndex).trim(); index.put(spectrumTitle, currentIndex); } if (waitingHandler != null) { if (waitingHandler.isRunCanceled()) { break; } waitingHandler.setSecondaryProgressCounter((int) (currentIndex / progressUnit)); } } } } @Override public LinkedList<SpectrumMatch> getAllSpectrumMatches(WaitingHandler waitingHandler, SearchParameters searchParameters) throws IOException, IllegalArgumentException, SQLException, ClassNotFoundException, InterruptedException, JAXBException { return getAllSpectrumMatches(waitingHandler, searchParameters, null, false); } @Override public LinkedList<SpectrumMatch> getAllSpectrumMatches(WaitingHandler waitingHandler, SearchParameters searchParameters, SequenceMatchingPreferences sequenceMatchingPreferences, boolean expandAaCombinations) throws IOException, IllegalArgumentException, SQLException, ClassNotFoundException, InterruptedException, JAXBException { int tagMapKeyLength = 0; if (sequenceMatchingPreferences != null && sequenceMatchingPreferences.getPeptideMapperType() == PeptideMapperType.tree) { SequenceFactory sequenceFactory = SequenceFactory.getInstance(); tagMapKeyLength = ((ProteinTree) sequenceFactory.getDefaultPeptideMapper()).getInitialTagSize(); tagsMap = new HashMap<String, LinkedList<SpectrumMatch>>(1024); } if (bufferedRandomAccessFile == null) { throw new IllegalStateException("The identification file was not set. Please use the appropriate constructor."); } LinkedList<SpectrumMatch> spectrumMatches = new LinkedList<SpectrumMatch>(); if (waitingHandler != null) { waitingHandler.setSecondaryProgressCounterIndeterminate(false); waitingHandler.resetSecondaryProgressCounter(); waitingHandler.setMaxSecondaryProgressCounter(index.size()); } for (String title : index.keySet()) { // remove any html from the title String decodedTitle = URLDecoder.decode(title, "utf-8"); SpectrumMatch currentMatch = new SpectrumMatch(Spectrum.getSpectrumKey(getMgfFileName(), decodedTitle)); int cpt = 1; bufferedRandomAccessFile.seek(index.get(title)); String line = bufferedRandomAccessFile.getNextLine().trim(); boolean solutionsFound = true; if (line.startsWith("# No") || line.startsWith("# Charge") || line.startsWith("#Problem") || line.startsWith("# too")) { solutionsFound = false; } else if (!line.equals(tableHeader)) { throw new IllegalArgumentException("Unrecognized table format. Expected: \"" + tableHeader + "\", found:\"" + line + "\"."); } while ((line = bufferedRandomAccessFile.getNextLine()) != null && !line.equals("") && !line.startsWith(">>")) { currentMatch.addHit(Advocate.pepnovo.getIndex(), getAssumptionFromLine(line, cpt), true); cpt++; } if (solutionsFound) { if (sequenceMatchingPreferences != null && sequenceMatchingPreferences.getPeptideMapperType() == PeptideMapperType.tree) { HashMap<Integer, HashMap<String, ArrayList<TagAssumption>>> matchTagMap = currentMatch.getTagAssumptionsMap(tagMapKeyLength, sequenceMatchingPreferences); for (HashMap<String, ArrayList<TagAssumption>> advocateMap : matchTagMap.values()) { for (String key : advocateMap.keySet()) { LinkedList<SpectrumMatch> tagMatches = tagsMap.get(key); if (tagMatches == null) { tagMatches = new LinkedList<SpectrumMatch>(); tagsMap.put(key, tagMatches); } tagMatches.add(currentMatch); } } } spectrumMatches.add(currentMatch); } if (waitingHandler != null) { if (waitingHandler.isRunCanceled()) { break; } waitingHandler.increaseSecondaryProgressCounter(); } } return spectrumMatches; } /** * Returns the spectrum file name. This method assumes that the PepNovo * output file is the mgf file name + ".out" * * @return the spectrum file name */ public String getMgfFileName() { return fileName.substring(0, fileName.length() - 4); } @Override public String getExtension() { return ".out"; } @Override public void close() throws IOException { bufferedRandomAccessFile.close(); } /** * Returns a Peptide Assumption from a PepNovo result line. The rank score * is taken as reference score. All additional parameters are attached as * PeptideAssumptionDetails. Note: fixed PTMs are not annotated, variable * PTMs are marked with the PepNovo PTM tag (see PepnovoParameters to * retrieve utilities names) * * @param line the line to parse * @param rank the rank of the assumption * @return the corresponding assumption */ private TagAssumption getAssumptionFromLine(String line, int rank) { String[] lineComponents = line.trim().split("\t"); Double rankScore = new Double(lineComponents[1]); Double pepNovoScore = new Double(lineComponents[2]); Double nGap = new Double(lineComponents[3]); Double cGap = new Double(lineComponents[4]); if (cGap > 0 && cGap < cTermCorrection) { throw new IllegalArgumentException("Incompatible c-term gap " + cGap); } else if (cGap > 0) { cGap -= cTermCorrection; } Double mH = new Double(lineComponents[5]); Integer charge = new Integer(lineComponents[6]); String pepNovoSequence = lineComponents[7]; String sequence = ""; ArrayList<ModificationMatch> modificationMatches = new ArrayList<ModificationMatch>(); String modificationMass = "", currentAA = ""; int currentPtmLocation = 0; boolean nTermPtm = false; boolean cTermPtm = false; String ptmTag = ""; // find and add the variable ptms for (int i = 0; i < pepNovoSequence.length(); i++) { String aa = pepNovoSequence.charAt(i) + ""; if (aa.equals("^") || aa.equals("$")) { ptmTag = aa; if (aa.equals("^")) { nTermPtm = true; } else { cTermPtm = true; } } else { if (aa.equals("+") || aa.equals("-")) { modificationMass += aa; } else { try { new Integer(aa); modificationMass += aa; } catch (Exception e) { if (!modificationMass.equals("")) { String pepNovoPtmTag = ""; if (nTermPtm || cTermPtm) { pepNovoPtmTag += ptmTag; } else { pepNovoPtmTag += currentAA; } pepNovoPtmTag += modificationMass; ModificationMatch modMatch = new ModificationMatch(pepNovoPtmTag, true, currentPtmLocation); modMatch.setConfident(true); modificationMatches.add(modMatch); modificationMass = ""; nTermPtm = false; } AminoAcid aminoAcid = AminoAcid.getAminoAcid(aa); if (aminoAcid == null) { throw new IllegalArgumentException("Attempting to parse " + aa + " as amino acid in " + pepNovoSequence + "."); } sequence += aa; currentAA = aa; currentPtmLocation++; } } } } if (!modificationMass.equals("")) { String pepNovoPtmTag = ""; if (nTermPtm || cTermPtm) { pepNovoPtmTag += ptmTag; } else { pepNovoPtmTag += currentAA; } pepNovoPtmTag += modificationMass; ModificationMatch modMatch = new ModificationMatch(pepNovoPtmTag, true, currentPtmLocation); modificationMatches.add(modMatch); } AminoAcidSequence aminoAcidSequence = new AminoAcidSequence(sequence); for (ModificationMatch modificationMatch : modificationMatches) { aminoAcidSequence.addModificationMatch(modificationMatch.getModificationSite(), modificationMatch); } Tag tag = new Tag(nGap, aminoAcidSequence, cGap); TagAssumption tagAssumption = new TagAssumption(Advocate.pepnovo.getIndex(), rank, tag, new Charge(Charge.PLUS, charge), pepNovoScore); PepnovoAssumptionDetails pepnovoAssumptionDetails = new PepnovoAssumptionDetails(); pepnovoAssumptionDetails.setRankScore(rankScore); pepnovoAssumptionDetails.setMH(mH); tagAssumption.addUrParam(pepnovoAssumptionDetails); return tagAssumption; } /** * Get a PTM. * * @param pepnovoParameters the PepNovo parameters * @param pepNovoModification the PepNovo modification * * @return the PTM as a string */ public static String getPTM(PepnovoParameters pepnovoParameters, String pepNovoModification) { Map<String, String> invertedPtmMap = pepnovoParameters.getPepNovoPtmMap(); if (invertedPtmMap == null) { // @TODO: possible to rescue these? throw new IllegalArgumentException("Unsupported de novo search result. Please reprocess the data."); } String utilitesPtmName = invertedPtmMap.get(pepNovoModification); if (utilitesPtmName != null) { return utilitesPtmName; } else { throw new IllegalArgumentException("An error occurred while parsing the modification " + pepNovoModification + "."); } } @Override public HashMap<String, ArrayList<String>> getSoftwareVersions() { HashMap<String, ArrayList<String>> result = new HashMap<String, ArrayList<String>>(); ArrayList<String> versions = new ArrayList<String>(); versions.add("3.1 (beta)"); result.put("PepNovo+", versions); return result; } @Override public HashMap<String, LinkedList<SpectrumMatch>> getTagsMap() { return tagsMap; } @Override public void clearTagsMap() { if (tagsMap != null) { tagsMap.clear(); } } @Override public boolean hasDeNovoTags() { return true; } }