package com.compomics.util.experiment.io.identifications.idfilereaders; import com.compomics.util.Util; import com.compomics.util.experiment.biology.AminoAcidSequence; import com.compomics.util.experiment.biology.Peptide; import com.compomics.util.experiment.identification.Advocate; import com.compomics.util.experiment.identification.identification_parameters.SearchParameters; import com.compomics.util.experiment.identification.identification_parameters.tool_specific.NovorParameters; import com.compomics.util.experiment.identification.matches.ModificationMatch; import com.compomics.util.experiment.identification.matches.SpectrumMatch; import com.compomics.util.experiment.identification.spectrum_assumptions.PeptideAssumption; import com.compomics.util.experiment.io.identifications.IdfileReader; import com.compomics.util.experiment.massspectrometry.Charge; import com.compomics.util.experiment.massspectrometry.Spectrum; import com.compomics.util.experiment.massspectrometry.SpectrumFactory; import com.compomics.util.experiment.personalization.ExperimentObject; import com.compomics.util.preferences.SequenceMatchingPreferences; import com.compomics.util.waiting.WaitingHandler; import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; import java.sql.SQLException; import java.util.ArrayList; import java.util.HashMap; import java.util.LinkedList; import javax.xml.bind.JAXBException; import uk.ac.ebi.pride.tools.braf.BufferedRandomAccessFile; /** * This IdfileReader reads identifications from a Novor csv result file. * * @author Harald Barsnes */ public class NovorIdfileReader extends ExperimentObject implements IdfileReader { /** * The software name. */ private String softwareName = "Novor"; /** * The softwareVersion. */ private String softwareVersion = null; /** * The Novor csv file. */ private File novorCsvFile; /** * Map of the tags found indexed by amino acid sequence. */ private HashMap<String, LinkedList<SpectrumMatch>> tagsMap; /** * The spectrum factory used to retrieve spectrum titles. */ private SpectrumFactory spectrumFactory = SpectrumFactory.getInstance(); /** * Default constructor for the purpose of instantiation. */ public NovorIdfileReader() { } /** * Constructor for an Novor csv result file reader. * * @param novorCsvFile the Novor csv file * @throws FileNotFoundException if a FileNotFoundException occurs * @throws IOException if an IOException occurs */ public NovorIdfileReader(File novorCsvFile) throws FileNotFoundException, IOException { this(novorCsvFile, null); } /** * Constructor for an Novor csv result file reader. * * @param novorCsvFile the Novor csv file * @param waitingHandler the waiting handler * @throws FileNotFoundException if a FileNotFoundException occurs * @throws IOException if an IOException occurs */ public NovorIdfileReader(File novorCsvFile, WaitingHandler waitingHandler) throws FileNotFoundException, IOException { this.novorCsvFile = novorCsvFile; // get the novor version number extractVersionNumber(); } /** * Extracts the Novor version number. */ private void extractVersionNumber() throws IOException { BufferedRandomAccessFile bufferedRandomAccessFile = new BufferedRandomAccessFile(novorCsvFile, "r", 1024 * 100); String line = bufferedRandomAccessFile.readLine(); boolean versionNumberFound = false; String versionNumberString = null; while (line.startsWith("#") && !versionNumberFound) { if (line.contains(" v")) { versionNumberString = line; versionNumberString = versionNumberString.substring(1); versionNumberString = versionNumberString.trim(); versionNumberFound = true; } line = bufferedRandomAccessFile.readLine(); } if (versionNumberFound) { softwareVersion = versionNumberString.trim(); } bufferedRandomAccessFile.close(); } @Override public String getExtension() { return ".novor.csv"; } @Override public LinkedList<SpectrumMatch> getAllSpectrumMatches(WaitingHandler waitingHandler, SearchParameters searchParameters) throws IOException, IllegalArgumentException, SQLException, ClassNotFoundException, InterruptedException, JAXBException { return getAllSpectrumMatches(waitingHandler, searchParameters, null, true); } @Override public LinkedList<SpectrumMatch> getAllSpectrumMatches(WaitingHandler waitingHandler, SearchParameters searchParameters, SequenceMatchingPreferences sequenceMatchingPreferences, boolean expandAaCombinations) throws IOException, IllegalArgumentException, SQLException, ClassNotFoundException, InterruptedException, JAXBException { // int tagMapKeyLength = 0; // if (sequenceMatchingPreferences != null) { // SequenceFactory sequenceFactory = SequenceFactory.getInstance(); // tagMapKeyLength = sequenceFactory.getDefaultProteinTree().getInitialTagSize(); // tagsMap = new HashMap<String, LinkedList<SpectrumMatch>>(1024); // } NovorParameters novorParameters = (NovorParameters) searchParameters.getIdentificationAlgorithmParameter(Advocate.novor.getIndex()); LinkedList<SpectrumMatch> result = new LinkedList<SpectrumMatch>(); BufferedRandomAccessFile bufferedRandomAccessFile = new BufferedRandomAccessFile(novorCsvFile, "r", 1024 * 100); if (waitingHandler != null) { waitingHandler.resetSecondaryProgressCounter(); waitingHandler.setMaxSecondaryProgressCounter(100); } long progressUnit = bufferedRandomAccessFile.length() / 100; String inputFile = null; String fixedModificationsLine = null; String variableModificationsLine = null; // read until we find the header line String line; while ((line = bufferedRandomAccessFile.readLine()) != null && !line.startsWith("# id,")) { if (line.startsWith("# input file = ")) { inputFile = line.substring("# input file = ".length()).trim(); } if (line.startsWith("# fixedModifications = ")) { fixedModificationsLine = line.substring("# fixedModifications = ".length()).trim(); } if (line.startsWith("# variableModifications = ")) { variableModificationsLine = line.substring("# variableModifications = ".length()).trim(); } } if (inputFile == null) { throw new IllegalArgumentException("Mandatory header information is missing in the Novor csv file (the input file tag). Please check the file!"); } if (fixedModificationsLine == null) { throw new IllegalArgumentException("Mandatory header information is missing in the Novor csv file (the fixedModifications tag). Please check the file!"); } if (variableModificationsLine == null) { throw new IllegalArgumentException("Mandatory header information is missing in the Novor csv file (the variableModifications tag). Please check the file!"); } // get the spectrum file name String spectrumFileName = new File(inputFile).getName(); // get the variable modifications HashMap<Integer, String> variableModificationsMap = new HashMap<Integer, String>(); String[] tempVariable = variableModificationsLine.split(", "); for (int i = 0; i < tempVariable.length; i++) { variableModificationsMap.put(i, tempVariable[i]); } // get the fixed modifications HashMap<Integer, String> fixedModificationsMap = new HashMap<Integer, String>(); String[] tempFixed = fixedModificationsLine.split(", "); for (int i = 0; i < tempFixed.length; i++) { fixedModificationsMap.put(variableModificationsMap.size() + i, tempFixed[i]); } String headerString = line.substring(1).trim(); if (headerString.endsWith(",")) { headerString = headerString.substring(0, headerString.length() - 1); } // parse the header line String[] headers = headerString.split(", "); int idIndex = -1, scanNumberIndex = -1, rtIndex = -1, mzIndex = -1, chargeIndex = -1, pepMassIndex = -1, erorrIndex = -1, ppmIndex = -1, scoreIndex = -1, peptideIndex = -1, aaScoreIndex = -1; // get the column index of the headers for (int i = 0; i < headers.length; i++) { String header = headers[i]; if (header.equalsIgnoreCase("id")) { idIndex = i; } else if (header.equalsIgnoreCase("scanNum")) { scanNumberIndex = i; } else if (header.equalsIgnoreCase("RT")) { rtIndex = i; } else if (header.equalsIgnoreCase("mz(data)")) { mzIndex = i; } else if (header.equalsIgnoreCase("z")) { chargeIndex = i; } else if (header.equalsIgnoreCase("pepMass(denovo)")) { pepMassIndex = i; } else if (header.equalsIgnoreCase("err(data-denovo)")) { erorrIndex = i; } else if (header.equalsIgnoreCase("ppm(1e6*err/(mz*z))")) { ppmIndex = i; } else if (header.equalsIgnoreCase("score")) { scoreIndex = i; } else if (header.equalsIgnoreCase("peptide")) { peptideIndex = i; } else if (header.equalsIgnoreCase("aaScore")) { aaScoreIndex = i; } } // check if all the required header are found if (idIndex == -1 || scanNumberIndex == -1 || rtIndex == -1 || mzIndex == -1 || chargeIndex == -1 || pepMassIndex == -1 || erorrIndex == -1 || ppmIndex == -1 || scoreIndex == -1 || peptideIndex == -1 || aaScoreIndex == -1) { throw new IllegalArgumentException("Mandatory columns are missing in the Novor csv file. Please check the file!"); } String currentSpectrumTitle = null; SpectrumMatch currentMatch = null; // get the psms while ((line = bufferedRandomAccessFile.readLine()) != null) { String[] elements = line.split(", "); if (!line.trim().isEmpty()) { // @TODO: make this more robust? int id = Integer.valueOf(elements[idIndex]); int charge = Integer.valueOf(elements[chargeIndex]); String peptideSequenceWithMods = elements[peptideIndex]; // get the novor score String scoreAsText = elements[scoreIndex]; double novorScore = Util.readDoubleAsString(scoreAsText); // get the novor e-value //double novorEValue = Math.pow(10, -novorScore); // convert novor score to e-value // @TODO: is this correct? // amino acids scores String aminoAcidScoresAsString = elements[aaScoreIndex]; String[] tempAminoAcidScores = aminoAcidScoresAsString.split("-"); double[] aminoAcidScoresAsList = new double[tempAminoAcidScores.length]; for (int i = 0; i < tempAminoAcidScores.length; i++) { aminoAcidScoresAsList[i] = Double.valueOf(tempAminoAcidScores[i]); } ArrayList<double[]> aminoAcidScores = new ArrayList<double[]>(1); aminoAcidScores.add(aminoAcidScoresAsList); // get the name of the spectrum file String spectrumTitle = id + ""; if (spectrumFactory.fileLoaded(spectrumFileName)) { spectrumTitle = spectrumFactory.getSpectrumTitle(spectrumFileName, id); } // set up the yet empty spectrum match, or add to the current match if (currentMatch == null || (currentSpectrumTitle != null && !currentSpectrumTitle.equalsIgnoreCase(spectrumTitle))) { // add the previous match, if any if (currentMatch != null) { result.add(currentMatch); } currentMatch = new SpectrumMatch(Spectrum.getSpectrumKey(spectrumFileName, spectrumTitle)); currentMatch.setSpectrumNumber(id); currentSpectrumTitle = spectrumTitle; } // get the modifications ArrayList<ModificationMatch> utilitiesModifications = new ArrayList<ModificationMatch>(); String peptideSequence; // extract the modifications if (peptideSequenceWithMods.contains("(") || peptideSequenceWithMods.contains("[")) { // example: (N-term|Acetyl)S(Phospho)EQUENCES(Phospho)(C-term|Amidated) peptideSequence = ""; for (int i = 0; i < peptideSequenceWithMods.length(); i++) { char currentChar = peptideSequenceWithMods.charAt(i); if (currentChar == '(') { int modStart = i + 1; int modEnd = peptideSequenceWithMods.indexOf(")", i + 1); String currentMod = peptideSequenceWithMods.substring(modStart, modEnd); if (currentMod.toLowerCase().startsWith("n-term|")) { int currentModAsInt = new Integer(currentMod.substring("n-term|".length())); if (variableModificationsMap.containsKey(currentModAsInt)) { utilitiesModifications.add(new ModificationMatch(variableModificationsMap.get(currentModAsInt), true, 1)); } else if (fixedModificationsMap.containsKey(currentModAsInt)) { utilitiesModifications.add(new ModificationMatch(fixedModificationsMap.get(currentModAsInt), false, 1)); } else if (novorParameters.getNovorPtmMap() == null) { throw new IllegalArgumentException("Unknown PTM! Please check the Novor results file."); } } else if (currentMod.toLowerCase().startsWith("c-term|")) { int currentModAsInt = new Integer(currentMod.substring("c-term|".length())); if (variableModificationsMap.containsKey(currentModAsInt)) { utilitiesModifications.add(new ModificationMatch(variableModificationsMap.get(currentModAsInt), true, peptideSequence.length())); } else if (fixedModificationsMap.containsKey(currentModAsInt)) { utilitiesModifications.add(new ModificationMatch(fixedModificationsMap.get(currentModAsInt), false, peptideSequence.length())); } else if (novorParameters.getNovorPtmMap() == null) { throw new IllegalArgumentException("Unknown PTM! Please check the Novor results file."); } } else { int currentModAsInt = new Integer(currentMod); if (variableModificationsMap.containsKey(currentModAsInt)) { utilitiesModifications.add(new ModificationMatch(variableModificationsMap.get(currentModAsInt), true, peptideSequence.length())); } else if (fixedModificationsMap.containsKey(currentModAsInt)) { utilitiesModifications.add(new ModificationMatch(fixedModificationsMap.get(currentModAsInt), false, peptideSequence.length())); } else if (novorParameters.getNovorPtmMap() == null) { throw new IllegalArgumentException("Unknown PTM! Please check the Novor results file."); } } i = modEnd; } else { peptideSequence += currentChar; } } } else { peptideSequence = peptideSequenceWithMods; } // set up the charge Charge peptideCharge = new Charge(Charge.PLUS, charge); //@TODO: do we want to leave the option of using tags? // create the tag assumption // AminoAcidSequence aminoAcidSequence = new AminoAcidSequence(peptideSequence); // for (ModificationMatch modificationMatch : utilitiesModifications) { // aminoAcidSequence.addModificationMatch(modificationMatch.getModificationSite(), modificationMatch); // } // Tag tag = new Tag(0, aminoAcidSequence, 0); // TagAssumption tagAssumption = new TagAssumption(Advocate.novor.getIndex(), 1, tag, peptideCharge, novorScore); // tagAssumption.setAminoAcidScores(aminoAcidScores); //// //tagAssumption.setRawScore(novorScore); // // currentMatch.addHit(Advocate.novor.getIndex(), tagAssumption, true); // // if (sequenceMatchingPreferences != null) { // HashMap<Integer, HashMap<String, ArrayList<TagAssumption>>> matchTagMap = currentMatch.getTagAssumptionsMap(tagMapKeyLength, sequenceMatchingPreferences); // for (HashMap<String, ArrayList<TagAssumption>> advocateMap : matchTagMap.values()) { // for (String key : advocateMap.keySet()) { // LinkedList<SpectrumMatch> tagMatches = tagsMap.get(key); // if (tagMatches == null) { // tagMatches = new LinkedList<SpectrumMatch>(); // tagsMap.put(key, tagMatches); // } // tagMatches.add(currentMatch); // } // } // } // Create the peptide assumption Peptide peptide = new Peptide(peptideSequence, utilitiesModifications, true); PeptideAssumption peptideAssumption = new PeptideAssumption(peptide, 1, Advocate.novor.getIndex(), peptideCharge, novorScore, novorCsvFile.getName()); peptideAssumption.setAminoAcidScores(aminoAcidScores); //peptideAssumption.setRawScore(novorScore); if (expandAaCombinations && AminoAcidSequence.hasCombination(peptideAssumption.getPeptide().getSequence())) { ArrayList<ModificationMatch> previousModificationMatches = peptide.getModificationMatches(), newModificationMatches = null; if (previousModificationMatches != null) { newModificationMatches = new ArrayList<ModificationMatch>(previousModificationMatches.size()); } for (StringBuilder expandedSequence : AminoAcidSequence.getCombinations(peptide.getSequence())) { Peptide newPeptide = new Peptide(expandedSequence.toString(), newModificationMatches, true); if (previousModificationMatches != null) { for (ModificationMatch modificationMatch : previousModificationMatches) { newPeptide.addModificationMatch(new ModificationMatch(modificationMatch.getTheoreticPtm(), modificationMatch.isVariable(), modificationMatch.getModificationSite())); } } PeptideAssumption newAssumption = new PeptideAssumption(newPeptide, peptideAssumption.getRank(), peptideAssumption.getAdvocate(), peptideAssumption.getIdentificationCharge(), peptideAssumption.getScore(), peptideAssumption.getIdentificationFile()); currentMatch.addHit(Advocate.novor.getIndex(), newAssumption, true); } } else { currentMatch.addHit(Advocate.novor.getIndex(), peptideAssumption, true); } if (waitingHandler != null && progressUnit != 0) { waitingHandler.setSecondaryProgressCounter((int) (bufferedRandomAccessFile.getFilePointer() / progressUnit)); if (waitingHandler.isRunCanceled()) { bufferedRandomAccessFile.close(); break; } } } } // add the last match, if any if (currentMatch != null) { result.add(currentMatch); } bufferedRandomAccessFile.close(); return result; } @Override public void close() throws IOException { novorCsvFile = null; } @Override public HashMap<String, ArrayList<String>> getSoftwareVersions() { HashMap<String, ArrayList<String>> result = new HashMap<String, ArrayList<String>>(); ArrayList<String> versions = new ArrayList<String>(); versions.add(softwareVersion); result.put(softwareName, versions); return result; } @Override public HashMap<String, LinkedList<SpectrumMatch>> getTagsMap() { if (tagsMap == null) { return new HashMap<String, LinkedList<SpectrumMatch>>(0); } return tagsMap; } @Override public void clearTagsMap() { if (tagsMap != null) { tagsMap.clear(); } } @Override public boolean hasDeNovoTags() { return false; } }