package com.compomics.util.experiment.io.massspectrometry; import com.compomics.util.experiment.massspectrometry.Charge; import com.compomics.util.experiment.massspectrometry.MSnSpectrum; import com.compomics.util.experiment.massspectrometry.Peak; import com.compomics.util.experiment.massspectrometry.Precursor; import com.compomics.util.preferences.UtilitiesUserPreferences; import com.compomics.util.waiting.WaitingHandler; import java.io.*; import java.net.URLDecoder; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.LinkedHashSet; import uk.ac.ebi.pride.tools.braf.BufferedRandomAccessFile; /** * This class will read an MGF file. * * @author Marc Vaudel * @author Harald Barsnes */ public class MgfReader { /** * General constructor for an mgf reader. */ public MgfReader() { } /** * Returns the next spectrum found in the mgf file. Null if none found. * * @param br a buffered reader * @param fileName the name of the mgf file * * @return the next spectrum found in the mgf file * * @throws IOException if an IOException occurs */ public static MSnSpectrum getSpectrum(BufferedReader br, String fileName) throws IOException { String line; HashMap<Double, Peak> spectrum = new HashMap<Double, Peak>(); double precursorMz = 0; double precursorIntensity = 0; double rt = -1.0; double rt1 = -1.0; double rt2 = -1.0; ArrayList<Charge> precursorCharges = new ArrayList<Charge>(); String scanNumber = ""; String spectrumTitle = ""; boolean insideSpectrum = false; while ((line = br.readLine()) != null) { // fix for lines ending with \r if (line.endsWith("\r")) { line = line.replace("\r", ""); } if (line.startsWith("BEGIN IONS")) { // reset the spectrum details insideSpectrum = true; } else if (line.startsWith("TITLE")) { spectrumTitle = line.substring(line.indexOf('=') + 1); try { spectrumTitle = URLDecoder.decode(spectrumTitle, "utf-8"); } catch (UnsupportedEncodingException e) { System.out.println("An exception was thrown when trying to decode the mgf title '" + spectrumTitle + "'."); e.printStackTrace(); } } else if (line.startsWith("CHARGE")) { precursorCharges = parseCharges(line); } else if (line.startsWith("PEPMASS")) { String temp = line.substring(line.indexOf("=") + 1); String[] values = temp.split("\\s"); precursorMz = Double.parseDouble(values[0]); if (values.length > 1) { precursorIntensity = Double.parseDouble(values[1]); } else { precursorIntensity = 0.0; } } else if (line.startsWith("RTINSECONDS")) { String rtInput = line.substring(line.indexOf('=') + 1); try { String[] rtWindow = rtInput.split("-"); if (rtWindow.length == 1) { String tempRt = rtWindow[0]; // possible fix for values like RTINSECONDS=PT121.250000S if (tempRt.startsWith("PT") && tempRt.endsWith("S")) { tempRt = tempRt.substring(2, tempRt.length() - 1); } rt = new Double(tempRt); } else if (rtWindow.length == 2) { rt1 = new Double(rtWindow[0]); rt2 = new Double(rtWindow[1]); } } catch (Exception e) { System.out.println("An exception was thrown when trying to decode the retention time " + rtInput + " in spectrum " + spectrumTitle + "."); e.printStackTrace(); // ignore exception, RT will not be parsed } } else if (line.startsWith("TOLU")) { // peptide tolerance unit not implemented } else if (line.startsWith("TOL")) { // peptide tolerance not implemented } else if (line.startsWith("SEQ")) { // sequence qualifier not implemented } else if (line.startsWith("COMP")) { // composition qualifier not implemented } else if (line.startsWith("ETAG")) { // error tolerant search sequence tag not implemented } else if (line.startsWith("TAG")) { // sequence tag not implemented } else if (line.startsWith("SCANS")) { try { scanNumber = line.substring(line.indexOf('=') + 1); } catch (NumberFormatException e) { throw new IllegalArgumentException("Cannot parse scan number."); } } else if (line.startsWith("TAG")) { // sequence tag not implemented } else if (line.startsWith("RAWSCANS")) { // raw scans not implemented } else if (line.startsWith("END IONS")) { insideSpectrum = false; Precursor precursor; if (rt1 != -1 && rt2 != -1) { precursor = new Precursor(precursorMz, precursorIntensity, precursorCharges, rt1, rt2); } else { precursor = new Precursor(rt, precursorMz, precursorIntensity, precursorCharges); } MSnSpectrum msnSpectrum = new MSnSpectrum(2, precursor, spectrumTitle, spectrum, fileName); if (scanNumber.length() > 0) { msnSpectrum.setScanNumber(scanNumber); } return msnSpectrum; } else if (insideSpectrum && !line.equals("")) { try { String values[] = line.split("\\s+"); Double mz = new Double(values[0]); Double intensity = new Double(values[1]); spectrum.put(mz, new Peak(mz, intensity)); } catch (Exception e1) { // ignore comments and all other lines } } } return null; } /** * Reads an MGF file and retrieves a list of spectra. * * @param aFile the mgf file * @return list of MSnSpectra imported from the file * @throws FileNotFoundException Exception thrown if a problem is * encountered reading the file * @throws IOException Exception thrown if a problem is encountered reading * the file * @throws IllegalArgumentException thrown when a parameter in the file * cannot be parsed correctly */ public ArrayList<MSnSpectrum> getSpectra(File aFile) throws FileNotFoundException, IOException, IllegalArgumentException { ArrayList<MSnSpectrum> spectra = new ArrayList<MSnSpectrum>(); BufferedReader br = new BufferedReader(new FileReader(aFile)); try { MSnSpectrum spectrum; while ((spectrum = getSpectrum(br, aFile.getName())) != null) { spectra.add(spectrum); } } finally { br.close(); } return spectra; } /** * Returns the index of all spectra in the given mgf file. * * @param mgfFile the given mgf file * @return the index of all spectra * @throws FileNotFoundException Exception thrown whenever the file is not * found * @throws IOException Exception thrown whenever an error occurs while * reading the file */ public static MgfIndex getIndexMap(File mgfFile) throws FileNotFoundException, IOException { return getIndexMap(mgfFile, null); } /** * Returns the index of all spectra in the given MGF file. * * @param mgfFile the given MGF file * @param waitingHandler a waitingHandler showing the progress * @return the index of all spectra * @throws FileNotFoundException Exception thrown whenever the file is not * found * @throws IOException Exception thrown whenever an error occurs while * reading the file */ public static MgfIndex getIndexMap(File mgfFile, WaitingHandler waitingHandler) throws FileNotFoundException, IOException { HashMap<String, Long> indexes = new HashMap<String, Long>(); HashMap<String, Integer> spectrumIndexes = new HashMap<String, Integer>(); HashMap<Integer, Double> precursorMzMap = new HashMap<Integer, Double>(); LinkedHashSet<String> spectrumTitles = new LinkedHashSet<String>(); HashMap<String, Integer> duplicateTitles = new HashMap<String, Integer>(); BufferedRandomAccessFile bufferedRandomAccessFile = new BufferedRandomAccessFile(mgfFile, "r", 1024 * 100); long currentIndex = 0; String title = null; int spectrumCounter = 0; double maxRT = -1, minRT = Double.MAX_VALUE, maxMz = -1, maxIntensity = 0; int maxCharge = 0, maxPeakCount = 0, peakCount = 0; boolean peakPicked = true; boolean precursorChargesMissing = false; if (waitingHandler != null) { waitingHandler.setSecondaryProgressCounterIndeterminate(false); waitingHandler.setMaxSecondaryProgressCounter(100); waitingHandler.setSecondaryProgressCounter(0); } long progressUnit = bufferedRandomAccessFile.length() / 100; String line; boolean insideSpectrum = false; boolean chargeTagFound = false; while ((line = bufferedRandomAccessFile.getNextLine()) != null) { // fix for lines ending with \r if (line.endsWith("\r")) { line = line.replace("\r", ""); } if (line.equals("BEGIN IONS")) { insideSpectrum = true; chargeTagFound = false; currentIndex = bufferedRandomAccessFile.getFilePointer(); spectrumCounter++; peakCount = 0; if (waitingHandler != null) { if (waitingHandler.isRunCanceled()) { break; } waitingHandler.setSecondaryProgressCounter((int) (currentIndex / progressUnit)); } } else if (line.startsWith("TITLE")) { title = line.substring(line.indexOf('=') + 1); try { title = URLDecoder.decode(title, "utf-8"); } catch (UnsupportedEncodingException e) { if (waitingHandler != null) { waitingHandler.appendReport("An exception was thrown when trying to decode an mgf title: " + title, true, true); } System.out.println("An exception was thrown when trying to decode an mgf title: " + title); e.printStackTrace(); } Integer nDuplicates = duplicateTitles.get(title); if (nDuplicates != null || spectrumTitles.contains(title)) { if (nDuplicates == null) { nDuplicates = 0; System.err.println("Warning: Spectrum title " + title + " is not unique in " + mgfFile.getName() + "!"); } duplicateTitles.put(title, ++nDuplicates); title += "_" + nDuplicates; } spectrumTitles.add(title); indexes.put(title, currentIndex); spectrumIndexes.put(title, spectrumCounter - 1); } else if (line.startsWith("CHARGE")) { ArrayList<Charge> precursorCharges = parseCharges(line); for (Charge charge : precursorCharges) { if (charge.value > maxCharge) { maxCharge = charge.value; } } chargeTagFound = true; } else if (line.startsWith("PEPMASS")) { String temp = line.substring(line.indexOf("=") + 1); String[] values = temp.split("\\s"); double precursorMz = Double.parseDouble(values[0]); if (precursorMz > maxMz) { maxMz = precursorMz; } if (values.length > 1) { double precursorIntensity = Double.parseDouble(values[1]); if (precursorIntensity > maxIntensity) { maxIntensity = precursorIntensity; } } precursorMzMap.put(spectrumCounter - 1, precursorMz); } else if (line.startsWith("RTINSECONDS")) { String rtInput = ""; try { rtInput = line.substring(line.indexOf('=') + 1); String[] rtWindow = rtInput.split("-"); if (rtWindow.length == 1) { String tempRt = rtWindow[0]; // possible fix for values like RTINSECONDS=PT121.250000S if (tempRt.startsWith("PT") && tempRt.endsWith("S")) { tempRt = tempRt.substring(2, tempRt.length() - 1); } double rt = new Double(tempRt); if (rt > maxRT) { maxRT = rt; } if (rt < minRT) { minRT = rt; } } else if (rtWindow.length == 2 && !rtWindow[0].equals("")) { double rt1 = new Double(rtWindow[0]); if (rt1 > maxRT) { maxRT = rt1; } if (rt1 < minRT) { minRT = rt1; } double rt2 = new Double(rtWindow[1]); if (rt2 > maxRT) { maxRT = rt2; } if (rt2 < minRT) { minRT = rt2; } } } catch (NumberFormatException e) { throw new IllegalArgumentException("Cannot parse retention time: " + rtInput); } } else if (line.startsWith("END IONS")) { insideSpectrum = false; if (title != null) { if (peakCount > maxPeakCount) { maxPeakCount = peakCount; } } title = null; if (!chargeTagFound) { precursorChargesMissing = true; } } else if (insideSpectrum && !line.equals("")) { try { String values[] = line.split("\\s+"); //Double mz = new Double(values[0]); Double intensity = new Double(values[1]); if (peakPicked && intensity == 0) { peakPicked = false; } peakCount++; } catch (Exception e1) { // ignore comments and all other lines } } } if (waitingHandler != null) { waitingHandler.setSecondaryProgressCounterIndeterminate(true); } bufferedRandomAccessFile.close(); if (minRT == Double.MAX_VALUE) { minRT = 0; } // convert the spectrum titles to an arraylist ArrayList<String> spectrumTitlesAsArrayList = new ArrayList<String>(); // @TODO: is there a faster way of doing this? for (String temp : spectrumTitles) { spectrumTitlesAsArrayList.add(temp); } return new MgfIndex(spectrumTitlesAsArrayList, duplicateTitles, indexes, spectrumIndexes, precursorMzMap, mgfFile.getName(), minRT, maxRT, maxMz, maxIntensity, maxCharge, maxPeakCount, peakPicked, precursorChargesMissing, mgfFile.lastModified(), spectrumCounter); } /** * Removes duplicate spectrum titles (the first occurrence is kept). * * @param mgfFile the MGF file to validate * @param waitingHandler a waitingHandler showing the progress, can be null * * @throws FileNotFoundException Exception thrown whenever the file is not * found * @throws IOException Exception thrown whenever an error occurs while * reading the file * @throws UnsupportedEncodingException if the decoding of a spectrum title * fails */ public static void removeDuplicateSpectrumTitles(File mgfFile, WaitingHandler waitingHandler) throws FileNotFoundException, IOException, UnsupportedEncodingException { ArrayList<String> spectrumTitles = new ArrayList<String>(); File tempSpectrumFile = new File(mgfFile.getParentFile(), mgfFile.getName() + "_temp"); if (waitingHandler != null) { waitingHandler.setSecondaryProgressCounterIndeterminate(false); waitingHandler.setMaxSecondaryProgressCounter(100); waitingHandler.setSecondaryProgressCounter(0); } BufferedRandomAccessFile br = new BufferedRandomAccessFile(mgfFile, "r", 1024 * 100); String lineBreak = System.getProperty("line.separator"); try { long progressUnit = br.length() / 100; FileWriter fw = new FileWriter(tempSpectrumFile); try { BufferedWriter bw = new BufferedWriter(fw); try { String line; String currentSpectrum = ""; boolean includeSpectrum = true; while ((line = br.readLine()) != null) { if (line.startsWith("BEGIN IONS")) { currentSpectrum = line + lineBreak; if (waitingHandler != null) { if (waitingHandler.isRunCanceled()) { break; } waitingHandler.setSecondaryProgressCounter((int) (br.getFilePointer() / progressUnit)); } } else if (line.startsWith("TITLE")) { currentSpectrum += line + lineBreak; String title = line.substring(line.indexOf('=') + 1); try { title = URLDecoder.decode(title, "utf-8"); } catch (UnsupportedEncodingException e) { e.printStackTrace(); throw new UnsupportedEncodingException("An exception was thrown when trying to decode an mgf title: " + title); } if (!spectrumTitles.contains(title)) { spectrumTitles.add(title); includeSpectrum = true; } else { includeSpectrum = false; } } else if (line.startsWith("END IONS")) { currentSpectrum += line + lineBreak; if (includeSpectrum) { bw.write(currentSpectrum); bw.newLine(); } } else { currentSpectrum += line + lineBreak; } } } finally { bw.close(); } } finally { fw.close(); } } finally { br.close(); } if (waitingHandler != null) { waitingHandler.setSecondaryProgressCounterIndeterminate(true); } // replace the old file String orignalFilePath = mgfFile.getAbsolutePath(); boolean fileDeleted = mgfFile.delete(); if (!fileDeleted) { throw new IOException("Failed to delete the original spectrum file."); } boolean fileRenamed = tempSpectrumFile.renameTo(new File(orignalFilePath)); if (!fileRenamed) { throw new IOException("Failed to replace the original spectrum file."); } } /** * Adds missing spectrum titles. * * @param mgfFile the MGF file to fix * @param waitingHandler a waitingHandler showing the progress, can be null * * @throws FileNotFoundException Exception thrown whenever the file is not * found * @throws IOException Exception thrown whenever an error occurs while * reading the file * @throws UnsupportedEncodingException if the decoding of a spectrum title * fails */ public static void addMissingSpectrumTitles(File mgfFile, WaitingHandler waitingHandler) throws FileNotFoundException, IOException, UnsupportedEncodingException { ArrayList<String> spectrumTitles = new ArrayList<String>(); File tempSpectrumFile = new File(mgfFile.getParentFile(), mgfFile.getName() + "_temp"); if (waitingHandler != null) { waitingHandler.setSecondaryProgressCounterIndeterminate(false); waitingHandler.setMaxSecondaryProgressCounter(100); waitingHandler.setSecondaryProgressCounter(0); } BufferedRandomAccessFile br = new BufferedRandomAccessFile(mgfFile, "r", 1024 * 100); String lineBreak = System.getProperty("line.separator"); try { long progressUnit = br.length() / 100; FileWriter fw = new FileWriter(tempSpectrumFile); try { BufferedWriter bw = new BufferedWriter(fw); try { String line; String currentSpectrum = ""; String title = null; int spectrumCounter = 0; while ((line = br.readLine()) != null) { if (line.startsWith("BEGIN IONS")) { spectrumCounter++; if (waitingHandler != null) { if (waitingHandler.isRunCanceled()) { break; } waitingHandler.setSecondaryProgressCounter((int) (br.getFilePointer() / progressUnit)); } } else if (line.startsWith("TITLE")) { currentSpectrum += line + lineBreak; title = line.substring(line.indexOf('=') + 1); try { title = URLDecoder.decode(title, "utf-8"); } catch (UnsupportedEncodingException e) { e.printStackTrace(); throw new UnsupportedEncodingException("An exception was thrown when trying to decode an mgf title: " + title); } spectrumTitles.add(title); } else if (line.startsWith("END IONS")) { bw.write("BEGIN IONS" + lineBreak); if (title == null) { title = "Spectrum " + spectrumCounter; while (spectrumTitles.contains(title)) { title = "Spectrum " + ++spectrumCounter; } spectrumTitles.add(title); bw.write("TITLE=" + title + lineBreak); } bw.write(currentSpectrum); bw.write("END IONS" + lineBreak); currentSpectrum = ""; title = null; } else { currentSpectrum += line + lineBreak; } } } finally { bw.close(); } } finally { fw.close(); } } finally { br.close(); } if (waitingHandler != null) { waitingHandler.setSecondaryProgressCounterIndeterminate(true); } // replace the old file String orignalFilePath = mgfFile.getAbsolutePath(); boolean fileDeleted = mgfFile.delete(); if (!fileDeleted) { throw new IOException("Failed to delete the original spectrum file."); } boolean fileRenamed = tempSpectrumFile.renameTo(new File(orignalFilePath)); if (!fileRenamed) { throw new IOException("Failed to replace the original spectrum file."); } } /** * Add missing precursor charges. * * @param mgfFile the MGF file to fix * @param waitingHandler a waitingHandler showing the progress, can be null * * @throws FileNotFoundException Exception thrown whenever the file is not * found * @throws IOException Exception thrown whenever an error occurs while * reading the file * @throws UnsupportedEncodingException if the decoding of a spectrum title * fails */ public static void addMissingPrecursorCharges(File mgfFile, WaitingHandler waitingHandler) throws FileNotFoundException, IOException, UnsupportedEncodingException { File tempSpectrumFile = new File(mgfFile.getParentFile(), mgfFile.getName() + "_temp"); if (waitingHandler != null) { waitingHandler.setSecondaryProgressCounterIndeterminate(false); waitingHandler.setMaxSecondaryProgressCounter(100); waitingHandler.setSecondaryProgressCounter(0); } UtilitiesUserPreferences userPreferences = UtilitiesUserPreferences.loadUserPreferences(); BufferedRandomAccessFile br = new BufferedRandomAccessFile(mgfFile, "r", 1024 * 100); String lineBreak = System.getProperty("line.separator"); try { long progressUnit = br.length() / 100; FileWriter fw = new FileWriter(tempSpectrumFile); try { BufferedWriter bw = new BufferedWriter(fw); try { String line; boolean chargeFound = false; boolean insideSpectrum = false; while ((line = br.readLine()) != null) { if (line.startsWith("BEGIN IONS")) { insideSpectrum = true; chargeFound = false; if (waitingHandler != null) { if (waitingHandler.isRunCanceled()) { break; } waitingHandler.setSecondaryProgressCounter((int) (br.getFilePointer() / progressUnit)); } } else if (line.startsWith("END IONS")) { insideSpectrum = false; } else if (line.startsWith("CHARGE")) { chargeFound = true; } else if (!line.equals("")) { if (insideSpectrum && !chargeFound) { try { String values[] = line.split("\\s+"); new Double(values[0]); new Double(values[1]); // we're inside the peak list bw.write("CHARGE="); for (int i = userPreferences.getMinSpectrumChargeRange(); i <= userPreferences.getMaxSpectrumChargeRange(); i++) { if (i > userPreferences.getMinSpectrumChargeRange()) { bw.write(" and "); } bw.write(i + "+"); } bw.write(lineBreak); chargeFound = true; } catch (Exception e1) { // ignore comments and all other lines } } } bw.write(line); bw.write(lineBreak); } } finally { bw.close(); } } finally { fw.close(); } } finally { br.close(); } if (waitingHandler != null) { waitingHandler.setSecondaryProgressCounterIndeterminate(true); } // replace the old file String orignalFilePath = mgfFile.getAbsolutePath(); boolean fileDeleted = mgfFile.delete(); if (!fileDeleted) { throw new IOException("Failed to delete the original spectrum file."); } boolean fileRenamed = tempSpectrumFile.renameTo(new File(orignalFilePath)); if (!fileRenamed) { throw new IOException("Failed to replace the original spectrum file."); } } /** * Removes zero intensity peaks. * * @param mgfFile the MGF file to fix * @param waitingHandler a waitingHandler showing the progress, can be null * * @throws FileNotFoundException Exception thrown whenever the file is not * found * @throws IOException Exception thrown whenever an error occurs while * reading the file * @throws UnsupportedEncodingException if the decoding of a spectrum title * fails */ public static void removeZeroes(File mgfFile, WaitingHandler waitingHandler) throws FileNotFoundException, IOException, UnsupportedEncodingException { File tempSpectrumFile = new File(mgfFile.getParentFile(), mgfFile.getName() + "_temp"); if (waitingHandler != null) { waitingHandler.setSecondaryProgressCounterIndeterminate(false); waitingHandler.setMaxSecondaryProgressCounter(100); waitingHandler.setSecondaryProgressCounter(0); } BufferedRandomAccessFile br = new BufferedRandomAccessFile(mgfFile, "r", 1024 * 100); try { long progressUnit = br.length() / 100; FileWriter fw = new FileWriter(tempSpectrumFile); try { BufferedWriter bw = new BufferedWriter(fw); try { String line; boolean spectrum = false; while ((line = br.readLine()) != null) { if (line.startsWith("BEGIN IONS")) { spectrum = true; if (waitingHandler != null) { if (waitingHandler.isRunCanceled()) { break; } waitingHandler.setSecondaryProgressCounter((int) (br.getFilePointer() / progressUnit)); } } else if (line.startsWith("END IONS")) { spectrum = false; } boolean peak = true; boolean zero = false; String[] split = line.split(" "); if (split.length != 2 && split.length != 3) { split = line.split("\t"); if (split.length != 2 && split.length != 3) { peak = false; } } if (peak) { try { new Double(split[0]); } catch (Exception e) { peak = false; } if (peak) { try { Double intensity = new Double(split[1]); if (intensity == 0.0) { zero = true; } } catch (Exception e) { throw new IllegalArgumentException("Line not recognized:\n" + line); } } } if (!spectrum || !peak || !zero) { bw.write(line); bw.newLine(); } } } finally { bw.close(); } } finally { fw.close(); } } finally { br.close(); } if (waitingHandler != null) { waitingHandler.setSecondaryProgressCounterIndeterminate(true); } // replace the old file String orignalFilePath = mgfFile.getAbsolutePath(); boolean fileDeleted = mgfFile.delete(); if (!fileDeleted) { throw new IOException("Failed to delete the original spectrum file."); // can sometimes happeen of the file is loaded twice in the gui, e.g., once with cancel for zero removal and one with ok } boolean fileRenamed = tempSpectrumFile.renameTo(new File(orignalFilePath)); if (!fileRenamed) { throw new IOException("Failed to replace the original spectrum file."); } } /** * Renames duplicate spectrum titles. Adds (2), (3) etc, behind the * duplicate spectrum titles. * * @param mgfFile the MGF file to validate * @param waitingHandler a waitingHandler showing the progress * @throws FileNotFoundException Exception thrown whenever the file is not * found * @throws IOException Exception thrown whenever an error occurs while * reading the file * @throws UnsupportedEncodingException if the decoding of a spectrum title * fails */ public static void renameDuplicateSpectrumTitles(File mgfFile, WaitingHandler waitingHandler) throws FileNotFoundException, IOException, UnsupportedEncodingException { ArrayList<String> spectrumTitles = new ArrayList<String>(); File tempSpectrumFile = new File(mgfFile.getParentFile(), mgfFile.getName() + "_temp"); FileWriter fw = new FileWriter(tempSpectrumFile); BufferedWriter bw = new BufferedWriter(fw); FileReader fr = new FileReader(mgfFile); BufferedReader br = new BufferedReader(fr); String lineBreak = System.getProperty("line.separator"); String line = br.readLine(); while (line != null) { if (line.startsWith("TITLE")) { if (waitingHandler != null) { if (waitingHandler.isRunCanceled()) { break; } //waitingHandler.setSecondaryProgressCounter((int) (br.getFilePointer() / progressUnit)); // @TODO: use the waitingHandler?? } String originalTitle = line.substring(line.indexOf('=') + 1); try { originalTitle = URLDecoder.decode(originalTitle, "utf-8"); } catch (UnsupportedEncodingException e) { e.printStackTrace(); throw new UnsupportedEncodingException("An exception was thrown when trying to decode an mgf title: " + originalTitle); } String tempTitle = originalTitle; int counter = 2; while (spectrumTitles.contains(tempTitle)) { tempTitle = originalTitle + " (" + counter++ + ")"; } spectrumTitles.add(tempTitle); bw.write("TITLE=" + tempTitle + lineBreak); } else { bw.write(line + lineBreak); } line = br.readLine(); } br.close(); fr.close(); bw.close(); fw.close(); // replace the old file String orignalFilePath = mgfFile.getAbsolutePath(); boolean fileDeleted = mgfFile.delete(); if (!fileDeleted) { throw new IOException("Failed to delete the original spectrum file."); } boolean fileRenamed = tempSpectrumFile.renameTo(new File(orignalFilePath)); if (!fileRenamed) { throw new IOException("Failed to replace the original spectrum file."); } } /** * Splits an mgf file into smaller ones and returns the indexes of the * generated files. * * @param mgfFile the mgf file to split * @param nSpectra the number of spectra allowed in the smaller files * @param waitingHandler the waitingHandler showing the progress * @return a list of indexes of the generated files * @throws FileNotFoundException exception thrown whenever a file was not * found * @throws IOException exception thrown whenever a problem occurred while * reading/writing a file */ public ArrayList<MgfIndex> splitFile(File mgfFile, int nSpectra, WaitingHandler waitingHandler) throws FileNotFoundException, IOException { String fileName = mgfFile.getName(); if (fileName.toLowerCase().endsWith(".mgf")) { if (waitingHandler != null) { waitingHandler.setSecondaryProgressCounterIndeterminate(false); waitingHandler.setMaxSecondaryProgressCounter(100); waitingHandler.setSecondaryProgressCounter(0); } String splittedName = fileName.substring(0, fileName.lastIndexOf(".")); ArrayList<File> splittedFiles = new ArrayList<File>(); int fileCounter = 1, spectrumCounter = 0; String currentName = splittedName + "_" + fileCounter + ".mgf"; File testFile = new File(mgfFile.getParent(), currentName); splittedFiles.add(testFile); BufferedRandomAccessFile writeBufferedRandomAccessFile = new BufferedRandomAccessFile(testFile, "rw", 1024 * 100); BufferedRandomAccessFile readBufferedRandomAccessFile = new BufferedRandomAccessFile(mgfFile, "r", 1024 * 100); String lineBreak = System.getProperty("line.separator"); long sizeOfReadAccessFile = readBufferedRandomAccessFile.length(), lastIndex = 0; long progressUnit = sizeOfReadAccessFile / 100; String line; while ((line = readBufferedRandomAccessFile.getNextLine()) != null) { if (line.startsWith("BEGIN IONS")) { spectrumCounter++; long readIndex = readBufferedRandomAccessFile.getFilePointer(); if (spectrumCounter > nSpectra) { if (sizeOfReadAccessFile - readIndex > (readIndex - lastIndex) / 2) { // try to avoid small leftovers writeBufferedRandomAccessFile.close(); currentName = splittedName + "_" + ++fileCounter + ".mgf"; testFile = new File(mgfFile.getParent(), currentName); splittedFiles.add(testFile); lastIndex = readIndex; spectrumCounter = 0; writeBufferedRandomAccessFile = new BufferedRandomAccessFile(testFile, "rw", 1024 * 100); } } if (waitingHandler != null) { if (waitingHandler.isRunCanceled()) { break; } waitingHandler.setSecondaryProgressCounter((int) (readIndex / progressUnit)); } } writeBufferedRandomAccessFile.writeBytes(line + lineBreak); } writeBufferedRandomAccessFile.close(); readBufferedRandomAccessFile.close(); // index the new files ArrayList<MgfIndex> mgfIndexes = new ArrayList<MgfIndex>(); for (int i = 0; i < splittedFiles.size(); i++) { File newFile = splittedFiles.get(i); if (waitingHandler != null) { waitingHandler.setWaitingText("Indexing New Files " + (i + 1) + "/" + splittedFiles.size() + ". Please Wait..."); } mgfIndexes.add(getIndexMap(newFile, waitingHandler)); if (waitingHandler.isRunCanceled()) { break; } } if (waitingHandler != null) { waitingHandler.setSecondaryProgressCounterIndeterminate(true); } return mgfIndexes; } else { throw new IllegalArgumentException("Spectrum file format not supported."); } } /** * Returns the next spectrum starting from the given index. * * @param bufferedRandomAccessFile The random access file of the inspected * mgf file * @param index The index where to start looking for the spectrum * @param fileName The name of the MGF file * @return The next spectrum encountered * @throws IOException Exception thrown whenever an error is encountered * while reading the spectrum * @throws IllegalArgumentException Exception thrown whenever the file is * not of a compatible format */ public static MSnSpectrum getSpectrum(BufferedRandomAccessFile bufferedRandomAccessFile, long index, String fileName) throws IOException, IllegalArgumentException { // @TODO get fileName from the random access file? bufferedRandomAccessFile.seek(index); double precursorMz = 0, precursorIntensity = 0, rt = -1.0, rt1 = -1, rt2 = -1; ArrayList<Charge> precursorCharges = new ArrayList<Charge>(); String scanNumber = "", spectrumTitle = ""; HashMap<Double, Peak> spectrum = new HashMap<Double, Peak>(); String line; boolean insideSpectrum = false; while ((line = bufferedRandomAccessFile.getNextLine()) != null) { // fix for lines ending with \r if (line.endsWith("\r")) { line = line.replace("\r", ""); } if (line.startsWith("BEGIN IONS")) { insideSpectrum = true; spectrum = new HashMap<Double, Peak>(); } else if (line.startsWith("TITLE")) { insideSpectrum = true; spectrumTitle = line.substring(line.indexOf('=') + 1); try { spectrumTitle = URLDecoder.decode(spectrumTitle, "utf-8"); } catch (UnsupportedEncodingException e) { System.out.println("An exception was thrown when trying to decode an mgf title: " + spectrumTitle); e.printStackTrace(); } } else if (line.startsWith("CHARGE")) { precursorCharges = parseCharges(line); } else if (line.startsWith("PEPMASS")) { String temp = line.substring(line.indexOf("=") + 1); String[] values = temp.split("\\s"); precursorMz = Double.parseDouble(values[0]); if (values.length > 1) { precursorIntensity = Double.parseDouble(values[1]); } else { precursorIntensity = 0.0; } } else if (line.startsWith("RTINSECONDS")) { try { String rtInput = line.substring(line.indexOf('=') + 1); String[] rtWindow = rtInput.split("-"); if (rtWindow.length == 1) { String tempRt = rtWindow[0]; // possible fix for values like RTINSECONDS=PT121.250000S if (tempRt.startsWith("PT") && tempRt.endsWith("S")) { tempRt = tempRt.substring(2, tempRt.length() - 1); } rt = new Double(tempRt); } else if (rtWindow.length == 2) { rt1 = new Double(rtWindow[0]); rt2 = new Double(rtWindow[1]); } } catch (Exception e) { System.out.println("An exception was thrown when trying to decode the retention time: " + spectrumTitle); e.printStackTrace(); // ignore exception, RT will not be parsed } } else if (line.startsWith("TOLU")) { // peptide tolerance unit not implemented } else if (line.startsWith("TOL")) { // peptide tolerance not implemented } else if (line.startsWith("SEQ")) { // sequence qualifier not implemented } else if (line.startsWith("COMP")) { // composition qualifier not implemented } else if (line.startsWith("ETAG")) { // error tolerant search sequence tag not implemented } else if (line.startsWith("TAG")) { // sequence tag not implemented } else if (line.startsWith("SCANS")) { try { scanNumber = line.substring(line.indexOf('=') + 1); } catch (Exception e) { throw new IllegalArgumentException("Cannot parse scan number."); } } else if (line.startsWith("INSTRUMENT")) { // ion series not implemented } else if (line.startsWith("END IONS")) { insideSpectrum = false; Precursor precursor; if (rt1 != -1 && rt2 != -1) { precursor = new Precursor(precursorMz, precursorIntensity, precursorCharges, rt1, rt2); } else { precursor = new Precursor(rt, precursorMz, precursorIntensity, precursorCharges); } MSnSpectrum msnSpectrum = new MSnSpectrum(2, precursor, spectrumTitle, spectrum, fileName); msnSpectrum.setScanNumber(scanNumber); return msnSpectrum; } else if (insideSpectrum && !line.equals("")) { try { String values[] = line.split("\\s+"); Double mz = new Double(values[0]); Double intensity = new Double(values[1]); spectrum.put(mz, new Peak(mz, intensity)); } catch (Exception e1) { // ignore comments and all other lines } } } throw new IllegalArgumentException("End of the file reached before encountering the tag \"END IONS\"."); } /** * Parses the charge line of an MGF files. * * @param chargeLine the charge line * @return the possible charges found * @throws IllegalArgumentException */ private static ArrayList<Charge> parseCharges(String chargeLine) throws IllegalArgumentException { ArrayList<Charge> result = new ArrayList<Charge>(1); String tempLine = chargeLine.substring(chargeLine.indexOf("=") + 1); String[] chargesAnd = tempLine.split(" and "); ArrayList<String> chargesAsString = new ArrayList<String>(); for (String charge : chargesAnd) { for (String charge2 : charge.split(",")) { chargesAsString.add(charge2.trim()); } } for (String chargeAsString : chargesAsString) { Integer value; chargeAsString = chargeAsString.trim(); if (!chargeAsString.isEmpty()) { try { if (chargeAsString.endsWith("+")) { value = new Integer(chargeAsString.substring(0, chargeAsString.length() - 1)); result.add(new Charge(Charge.PLUS, value)); } else if (chargeAsString.endsWith("-")) { value = new Integer(chargeAsString.substring(0, chargeAsString.length() - 1)); result.add(new Charge(Charge.MINUS, value)); } else if (!chargeAsString.equalsIgnoreCase("Mr")) { result.add(new Charge(Charge.PLUS, new Integer(chargeAsString))); } } catch (NumberFormatException e) { e.printStackTrace(); throw new IllegalArgumentException("\'" + chargeAsString + "\' could not be processed as a valid precursor charge!"); } } } // if empty, add a default charge of 1 if (result.isEmpty()) { result.add(new Charge(Charge.PLUS, 1)); } return result; } /** * Returns the next precursor starting from the given index. * * @param bufferedRandomAccessFile The random access file of the inspected * mgf file * @param index The index where to start looking for the spectrum * @param fileName The name of the mgf file * @return The next spectrum encountered * @throws IOException Exception thrown whenever an error is encountered * while reading the spectrum * @throws IllegalArgumentException Exception thrown whenever the file is * not of a compatible format */ public static Precursor getPrecursor(BufferedRandomAccessFile bufferedRandomAccessFile, Long index, String fileName) throws IOException, IllegalArgumentException { // @TODO: get fileName from the random access file? bufferedRandomAccessFile.seek(index); String line, title = null; double precursorMz = 0, precursorIntensity = 0, rt = -1.0, rt1 = -1, rt2 = -1; ArrayList<Charge> precursorCharges = new ArrayList<Charge>(1); while ((line = bufferedRandomAccessFile.getNextLine()) != null) { // fix for lines ending with \r if (line.endsWith("\r")) { line = line.replace("\r", ""); } if (line.startsWith("TITLE")) { title = line.substring(line.indexOf("=") + 1); try { title = URLDecoder.decode(title, "utf-8"); } catch (UnsupportedEncodingException e) { System.out.println("An exception was thrown when trying to decode an mgf title: " + title); e.printStackTrace(); } } else if (line.startsWith("CHARGE")) { precursorCharges = parseCharges(line); } else if (line.startsWith("PEPMASS")) { String temp = line.substring(line.indexOf("=") + 1); String[] values = temp.split("\\s"); precursorMz = Double.parseDouble(values[0]); if (values.length > 1) { precursorIntensity = Double.parseDouble(values[1]); } else { precursorIntensity = 0.0; } } else if (line.startsWith("RTINSECONDS")) { try { String rtInput = line.substring(line.indexOf('=') + 1); String[] rtWindow = rtInput.split("-"); if (rtWindow.length == 1) { String tempRt = rtWindow[0]; if (tempRt.startsWith("PT") && tempRt.endsWith("S")) { // possible fix for values like RTINSECONDS=PT121.250000S tempRt = tempRt.substring(2, tempRt.length() - 1); } rt = new Double(tempRt); } else if (rtWindow.length == 2) { rt1 = new Double(rtWindow[0]); rt2 = new Double(rtWindow[1]); } } catch (Exception e) { System.out.println("An exception was thrown when trying to decode the retention time: " + title); e.printStackTrace(); // ignore exception, RT will not be parsed } } else if (!line.isEmpty()) { if (line.startsWith("END IONS") || (!line.contains("#") && !line.contains("="))) { if (rt1 != -1 && rt2 != -1) { return new Precursor(precursorMz, precursorIntensity, precursorCharges, rt1, rt2); } return new Precursor(rt, precursorMz, precursorIntensity, precursorCharges); } } } throw new IllegalArgumentException("End of the file reached before encountering the tag \"END IONS\". File: " + fileName + ", title: " + title); } /** * Writes an apl file from an MGF file. @TODO: move to * massspectrometry.export * * @param mgfFile the mgf file * @param aplFile the target apl file * @param fragmentation the fragmentation method used * @throws FileNotFoundException exception thrown whenever a file was not * found * @throws IOException exception thrown whenever an error occurred while * reading/writing a file * @throws IllegalArgumentException exception thrown whenever the mgf file * is truncated in the middle of a spectrum */ public static void writeAplFile(File mgfFile, File aplFile, String fragmentation) throws FileNotFoundException, IOException, IllegalArgumentException { if (fragmentation == null) { fragmentation = "Unknown"; } Writer aplWriter = new BufferedWriter(new FileWriter(aplFile)); MgfIndex mgfIndex = getIndexMap(mgfFile); HashMap<Double, ArrayList<String>> spectrumTitleMap = new HashMap<Double, ArrayList<String>>(); BufferedRandomAccessFile mgfRFile = new BufferedRandomAccessFile(mgfFile, "r", 1024 * 100); for (String title : mgfIndex.getSpectrumTitles()) { Precursor precursor = getPrecursor(mgfRFile, mgfIndex.getIndex(title), mgfFile.getName()); if (!spectrumTitleMap.containsKey(precursor.getMz())) { spectrumTitleMap.put(precursor.getMz(), new ArrayList<String>()); } spectrumTitleMap.get(precursor.getMz()).add(title); } ArrayList<Double> masses = new ArrayList<Double>(spectrumTitleMap.keySet()); Collections.sort(masses); for (double mz : masses) { for (String title : spectrumTitleMap.get(mz)) { MSnSpectrum spectrum = getSpectrum(mgfRFile, mgfIndex.getIndex(title), mgfFile.getName()); aplWriter.write("peaklist start\n"); aplWriter.write("mz=" + mz + "\n"); aplWriter.write("fragmentation=" + fragmentation + "\n"); aplWriter.write("charge=" + spectrum.getPrecursor().getPossibleCharges().get(0).value + "\n"); //@TODO what if many/no charge is present? aplWriter.write("header=" + spectrum.getSpectrumTitle() + "\n"); HashMap<Double, Peak> peakMap = spectrum.getPeakMap(); ArrayList<Double> fragmentMasses = new ArrayList<Double>(peakMap.keySet()); Collections.sort(fragmentMasses); for (double fragmentMass : fragmentMasses) { aplWriter.write(fragmentMass + "\t" + peakMap.get(fragmentMass).intensity + "\n"); } aplWriter.write("peaklist end\n\n"); } } mgfRFile.close(); aplWriter.close(); } }