/* * Copyright (c) 2003-2012 Fred Hutchinson Cancer Research Center * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.fhcrc.cpl.toolbox.proteomics.filehandler; import java.math.*; import java.io.*; import java.util.ArrayList; import java.util.List; import org.apache.log4j.Logger; import net.sourceforge.sashimi.schemaRevision.mzXML21.*; import org.w3c.dom.Node; import org.w3c.dom.Attr; import org.apache.xmlbeans.XmlOptions; import org.apache.xmlbeans.GDuration; import org.systemsbiology.jrap.stax.SoftwareInfo; import org.systemsbiology.jrap.stax.MZXMLFileInfo; import org.systemsbiology.jrap.stax.ParentFile; import org.systemsbiology.jrap.stax.MSInstrumentInfo; import org.fhcrc.cpl.toolbox.proteomics.MassCalibrationUtilities; import org.fhcrc.cpl.toolbox.proteomics.MSRun; import org.fhcrc.cpl.toolbox.ApplicationContext; import org.fhcrc.cpl.toolbox.datastructure.Pair; /** * A restrictive wrapper for writing mzXml files based on a single run. We take advantage of XmlBeans to build * the structure of the file, and to build individual spectrum_queries (for features) * and search_summaries (for modifications), but we stitch the XmlBeans XML output for features together by * hand, writing out to a file as we go, so that we don't have to hold the whole structure * in memory. * A special wrinkle of mzXML files is that the file must contain offsets for finding individual * scans, and even for finding the index that holds those offsets. For that reason we have to * maintain a _currentFilePosition variable that tells us how many bytes we've written so far. * * Also, XmlBeans likes namespaces. A lot. I can't figure out how to get XmlBeans to stop prefixing * every tag with <mzx:>. This causes problems for jrap. So I'm manually stripping out mzx: every time * I write anything to the file. This could conceivably cause problems. * * For extra special fun, we can calibrate the spectra masses as we write out the file. That is, * for every m/z value we write out, perform a specified linear transformation on it. We do * NOT calibrate MS/MS precursor masses here. That can be done elsewhere. The spectra can't * be done elsewhere, since they're only soft-referenced in Java, and if we try to do them * in place in memory the changes will get lost. * * dhmay, 2008-12-31: changing the scan-writing behavior to preserve the numbering of scans as * received. Previously, scans were re-numbered starting with 1, because our mzXML-parsing software * had trouble with non-sequential scans. This has been addressed. */ public class MzXmlWriter { static Logger _log = Logger.getLogger(MzXmlWriter.class); //doc representation protected MsRunDocument _xmlBeansMsRunDoc = null; //Index representation protected MzXMLDocument.MzXML.Index _xmlBeansIndex = null; //MsRun representation protected MsRunDocument.MsRun _xmlBeansMsRun = null; //MSRun that we want to write out MSRun _run = null; //Strings of xml representing the structure before and after the scan content protected String _documentPrefix = null; protected String _documentPostscript = null; //encapsulates printing options for all fragments protected XmlOptions _optionsForPrinting = null; //current position in the output file protected long _currentFilePosition = 0; //attribute name indicating precursor mz has been corrected public static final String PRECURSORMZ_ATTR_MSINSPECT_CORRECTED = "msInspect_corrected"; public static final double UNSET_WAVELENGTH_OR_OFFSET = 999999999; public static final double UNSET_INTENSITY_SCALE = -1; //for mass recalibration protected Pair<Integer, Pair<Double, Double>>[] _massCalibrationParameters = null; //Should we calibrate precursor masses? protected boolean _shouldCalibratePrecursorMz = false; //should we calibrate the spectra themselves? If not, and if calibration //parameters are set, only calibrate precursor masses protected boolean _shouldCalibrateSpectra = false; //for intensity scaling protected double intensityScaleFactor = UNSET_INTENSITY_SCALE; //optionally exclude MS1 scans protected boolean excludeMS1Scans = false; /** * Constructor creates the XmlBeans representing the shell of a mzXml document, and * creates the "prefix" and "postscript" strings representing that shell */ public MzXmlWriter() { //initialize basic document structure and printing options _xmlBeansMsRunDoc = MsRunDocument.Factory.newInstance(); //Construct generic document structure _xmlBeansMsRun = _xmlBeansMsRunDoc.addNewMsRun(); //oddly, there's no Index element in the xsd under MsRun, even though that's where we want it MzXMLDocument fakeMzXmlDoc = MzXMLDocument.Factory.newInstance(); _xmlBeansIndex = fakeMzXmlDoc.addNewMzXML().addNewIndex(); //set printing options for xml fragments _optionsForPrinting = new XmlOptions(); _optionsForPrinting.setSaveOuter(); _optionsForPrinting.setSavePrettyPrint(); _optionsForPrinting.setSavePrettyPrintOffset(0); } /** * Create doc structure, populate features and modifications * @param run */ public MzXmlWriter(MSRun run) { this(); setRun(run); } /** * This should only be called after the run is set. This creates the whole XML shell around * the scans and index, which thankfully go right next to each other */ protected void createDocumentShellXml() { //add a sentinel node that tells us where to split the document to insert scans, Node runNode = _xmlBeansMsRun.getDomNode(); Node scanLocationNode = runNode.getOwnerDocument().createElement("SENTINEL_SCAN_LOCATION"); runNode.appendChild(scanLocationNode); //create and break up the xml that defines the document structure String documentShell = _xmlBeansMsRunDoc.xmlText(_optionsForPrinting); String[] halves = documentShell.split("<SENTINEL_SCAN_LOCATION[^\\/]*\\/>"); if (halves.length != 2) { _log.error("Failed to create document shell for writing"); return; } _documentPrefix = removeNamespaceColon("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" + halves[0]); _documentPostscript = removeNamespaceColon(halves[1]); //remove our dummy node runNode.removeChild(scanLocationNode); } /** * Does all the processing to create an XmlBeans structure that * represents the run. Doesn't do anything with the scans. * if restricted is true, the scan count is set to the passed-in numScans, * otherwise it's gathered from the run */ public void buildDocStructure(int firstScanNum, int lastScanNum, boolean restricted) { MZXMLFileInfo fileInfo = _run.getHeaderInfo(); int numMs1ScansToWrite = _run.getScanCount(); int numMs2ScansToWrite = 0; int numMs3ScansToWrite = 0; if (_run.getMS2Scans() != null) numMs2ScansToWrite = _run.getMS2Scans().length; if (_run.getMS3Scans() != null) numMs3ScansToWrite = _run.getMS3Scans().length; if (restricted) { //since we're given scan numbers, not indexes, we don't immediately know how many //scans we're actually writing int firstScanIndex = _run.getIndexForScanNum(firstScanNum); int lastScanIndex = _run.getIndexForScanNum(lastScanNum); numMs1ScansToWrite = lastScanIndex-firstScanIndex+1; if (excludeMS1Scans) numMs1ScansToWrite = 0; numMs2ScansToWrite = 0; if (_run.getMS2Scans() != null) { for (int i=0; i<_run.getMS2Scans().length; i++) { int currentMS2ScanNum = _run.getMS2Scans()[i].getNum(); if (currentMS2ScanNum >= firstScanNum) { if (currentMS2ScanNum <= lastScanNum) { numMs2ScansToWrite++; } else break; } } } numMs3ScansToWrite = 0; if (_run.getMS3Scans() != null) { for (int i=0; i<_run.getMS3Scans().length; i++) { int currentMS3ScanNum = _run.getMS3Scans()[i].getNum(); if (currentMS3ScanNum >= firstScanNum) { if (currentMS3ScanNum <= lastScanNum) { numMs3ScansToWrite++; } else break; } } } } _xmlBeansMsRun.setScanCount(BigInteger.valueOf(numMs1ScansToWrite + numMs2ScansToWrite + numMs3ScansToWrite)); MsRunDocument.MsRun.DataProcessing dataProcessing = _xmlBeansMsRun.addNewDataProcessing(); List<SoftwareInfo> jrapSoftwareInfoList = fileInfo.getDataProcessing().getSoftwareUsed(); for (int i=0; i<jrapSoftwareInfoList.size(); i++) { SoftwareInfo jrapSoftwareInfo = jrapSoftwareInfoList.get(i); SoftwareDocument.Software xmlBeansSoftware = dataProcessing.addNewSoftware(); xmlBeansSoftware.setName(jrapSoftwareInfo.name); xmlBeansSoftware.setType(SoftwareDocument.Software.Type.Enum.forString(jrapSoftwareInfo.type)); xmlBeansSoftware.setVersion(jrapSoftwareInfo.version); } List<ParentFile> jrapParentFileArray = fileInfo.getParentFiles(); for (int i=0; i<jrapParentFileArray.size(); i++) { ParentFile jrapParentFile = jrapParentFileArray.get(i); MsRunDocument.MsRun.ParentFile xmlBeansParentFile = _xmlBeansMsRun.addNewParentFile(); //is this right?! ParentFile doesn't seem to have a Name xmlBeansParentFile.setFileName(jrapParentFile.getURI()); xmlBeansParentFile.setFileSha1(jrapParentFile.getSha1()); xmlBeansParentFile.setFileType(MsRunDocument.MsRun.ParentFile.FileType.Enum.forString(jrapParentFile.getType())); } //for some reason we don't always seem to capture this, so null-checking MSInstrumentInfo jrapInstrumentInfo = fileInfo.getInstrumentInfo(); if (jrapInstrumentInfo != null) { MsRunDocument.MsRun.MsInstrument xmlBeansMsInstrument = _xmlBeansMsRun.addNewMsInstrument(); MsRunDocument.MsRun.MsInstrument.MsManufacturer xmlBeansManufacturer = xmlBeansMsInstrument.addNewMsManufacturer(); xmlBeansManufacturer.setCategory("msManufacturer"); xmlBeansManufacturer.setValue(jrapInstrumentInfo.getManufacturer()); OntologyEntryType xmlBeansMsModel = xmlBeansMsInstrument.addNewMsModel(); xmlBeansMsModel.setValue(jrapInstrumentInfo.getModel()); SoftwareInfo jrapInstrumentSoftwareInfo = jrapInstrumentInfo.getSoftwareInfo(); if (jrapInstrumentSoftwareInfo != null) { SoftwareDocument.Software xmlBeansSoftware = dataProcessing.addNewSoftware(); xmlBeansSoftware.setName(jrapInstrumentSoftwareInfo.name); xmlBeansSoftware.setType(SoftwareDocument.Software.Type.Enum.forString(jrapInstrumentSoftwareInfo.type)); xmlBeansSoftware.setVersion(jrapInstrumentSoftwareInfo.version); xmlBeansMsInstrument.setSoftware(xmlBeansSoftware); } } //TODO: I'm not at all sure that this is correct, but these items are required //and they're not stored on the run String runStartTime = "PT0.0S"; String runEndTime = "PT0.0S"; try { MSRun.MSScan firstScan = _run.getScan(0); if (restricted) { int firstScanIndex = _run.getIndexForScanNum(firstScanNum); if (firstScanIndex < 0) firstScanIndex = -firstScanIndex; firstScan = _run.getScan(firstScanIndex); } runStartTime = firstScan.getRetentionTime(); MSRun.MSScan lastScan = _run.getScan(_run.getScanCount()-1); //if restricting scans, then the last scan isn't the last scan of the run int lastScanIndex = _run.getIndexForScanNum(lastScanNum); if (lastScanIndex < 0) lastScanIndex = -lastScanIndex; if (restricted) lastScan = _run.getScan(lastScanIndex); runEndTime = lastScan.getRetentionTime(); } catch (Exception e) { ApplicationContext.infoMessage("Warning: Unable to get first and last scan information from run. Defaulting run start/end times to 0."); } _xmlBeansMsRun.setStartTime(new GDuration(runStartTime)); _xmlBeansMsRun.setEndTime(new GDuration(runEndTime)); createDocumentShellXml(); } /** * setter for the run. * @param run */ public void setRun(MSRun run) { _run = run; } /** * Write out the index * @param pw */ public void writeIndex(PrintWriter pw) { long indexOffset = _currentFilePosition; String indexFragment = removeNamespaceColon(_xmlBeansIndex.xmlText(_optionsForPrinting)); pw.print(indexFragment); _currentFilePosition += indexFragment.length(); String indexOffsetFragment = removeNamespaceColon("<mzx:indexOffset>" + indexOffset + "</mzx:indexOffset>"); pw.print(indexOffsetFragment); _currentFilePosition += indexOffsetFragment.length(); pw.flush(); } /** * Return the index of the next MS2 scan _after_ index oldMs2ScanIndex that falls into * the given range (if restrict is true) * @param oldMs2ScanIndex * @param ms2Scans * @param firstScan * @param lastScan * @param restrict * @return The index of the next MS2 scan for writing. If none, return -1 */ protected int queueNextMs2ScanIndex(int oldMs2ScanIndex, MSRun.MSScan[] ms2Scans, int firstScan, int lastScan, boolean restrict) { int result = -1; if (ms2Scans == null || ms2Scans.length==0) return result; int currentScanIndex = oldMs2ScanIndex+1; while (currentScanIndex < ms2Scans.length) { if (!restrict || (ms2Scans[currentScanIndex].getNum() >= firstScan && ms2Scans[currentScanIndex].getNum() <= lastScan)) { result = currentScanIndex; break; } currentScanIndex++; } return result; } /** * Return the index of the next MS3 scan _after_ index oldMs3ScanIndex that falls into * the given range (if restrict is true) * This could really be folded into queueNextMs2ScanIndex, but it's just easier not to * @param oldMs3ScanIndex * @param ms3Scans * @param firstScan * @param lastScan * @param restrict * @return The index of the next MS2 scan for writing. If none, return -1 */ protected int queueNextMs3ScanIndex(int oldMs3ScanIndex, MSRun.MSScan[] ms3Scans, int firstScan, int lastScan, boolean restrict) { int result = -1; if (ms3Scans == null || ms3Scans.length==0) return result; int currentScanIndex = oldMs3ScanIndex+1; while (currentScanIndex < ms3Scans.length) { if (!restrict || (ms3Scans[currentScanIndex].getNum() >= firstScan && ms3Scans[currentScanIndex].getNum() <= lastScan)) { result = currentScanIndex; break; } currentScanIndex++; } return result; } /** * Write out scans immediately. Either write out all scans, or just a subregion, depending * on the value of the restrict argument. If restrict == false, ignore the int and float args * * @param pw * @param firstScan * @param lastScan * @param lowMz * @param highMz * @param restrict */ public void writeScans(PrintWriter pw, int firstScan, int lastScan, float lowMz, float highMz, boolean restrict) { _log.debug("writeScans start"); if (_run == null) return; MSRun.MSScan[] ms2Scans = _run.getMS2Scans(); MSRun.MSScan[] ms3Scans = _run.getMS3Scans(); int nextMs2ScanIndex = queueNextMs2ScanIndex(-1, ms2Scans, firstScan, lastScan, restrict); int nextMs3ScanIndex = queueNextMs3ScanIndex(-1, ms3Scans, firstScan, lastScan, restrict); int ms1ScanCount = _run.getScanCount(); _log.debug("MS1 scans: " + ms1ScanCount); if (ms2Scans !=null) _log.debug("MS2 scans: " + ms2Scans.length); if (ms3Scans !=null) _log.debug("MS3 scans: " + ms3Scans.length); int nextCalibChangeIndex = 0; double massCalibrationWavelength = UNSET_WAVELENGTH_OR_OFFSET; double massCalibrationOffset = UNSET_WAVELENGTH_OR_OFFSET; int nextCalibChangeScan = 0; if (_massCalibrationParameters != null && (_shouldCalibrateSpectra || _shouldCalibratePrecursorMz)) { nextCalibChangeScan = _massCalibrationParameters[0].first; } for (int i=0; i<ms1ScanCount; i++) { if (i % (ms1ScanCount / 100) == 0) ApplicationContext.setMessage((i * 100 / ms1ScanCount) + " % complete"); MSRun.MSScan ms1Scan = _run.getScan(i); if (_massCalibrationParameters != null && (_shouldCalibrateSpectra || _shouldCalibratePrecursorMz)) { if (nextCalibChangeScan <= ms1Scan.getNum()) { massCalibrationWavelength = _massCalibrationParameters[nextCalibChangeIndex].second.first; massCalibrationOffset = _massCalibrationParameters[nextCalibChangeIndex].second.second; if (_massCalibrationParameters.length >= nextCalibChangeIndex+2) { nextCalibChangeIndex++; nextCalibChangeScan = _massCalibrationParameters[nextCalibChangeIndex].first; } } } //System.err.println("*****MS1 scan: " + ms1Scan.getNum()); //Write out all MS2 scans that should be written out _before_ this ms1 scan is //written. //check that the next ms2 scan exists, //and that its number is less than the next ms1 scan's number, //and that it's number is in the range we want to write while (nextMs2ScanIndex > -1 && ms1Scan.getNum() > ms2Scans[nextMs2ScanIndex].getNum()) { MSRun.MSScan ms2Scan = ms2Scans[nextMs2ScanIndex]; //System.err.println(" *****MS2 scan: " + ms2Scan.getNum()); //Write out all MS3 scans that should be written out _before_ this ms1 scan is //written. //check that the next ms3 scan exists, //and that its number is less than the next ms2 scan's number, //and that it's number is in the range we want to write while (nextMs3ScanIndex > -1 && ms2Scan.getNum() > ms3Scans[nextMs3ScanIndex].getNum()) { writeScan(ms3Scans[nextMs3ScanIndex], ms3Scans[nextMs3ScanIndex].getNum(), pw, lowMz, highMz, restrict, massCalibrationWavelength, massCalibrationOffset); nextMs3ScanIndex = queueNextMs3ScanIndex(nextMs3ScanIndex, ms3Scans, firstScan, lastScan, restrict); } writeScan(ms2Scan, ms2Scan.getNum(), pw, lowMz, highMz, restrict, massCalibrationWavelength, massCalibrationOffset); nextMs2ScanIndex = queueNextMs2ScanIndex(nextMs2ScanIndex, ms2Scans, firstScan, lastScan, restrict); } if (restrict) { //if we're restricting the scan and mz window, need to check whether this scan //is in the window. Since restriction is based on scan number, not index, //need to check the scan to get the scan number int scanNum = ms1Scan.getNum(); if (scanNum < firstScan) continue; //scan numbers are monotonically increasing with scan index, so if we're out of //range already, we're done if (scanNum > lastScan) break; } //if we get here, this is a scan we want to write if (!excludeMS1Scans) writeScan(ms1Scan, ms1Scan.getNum(), pw, lowMz, highMz, restrict, massCalibrationWavelength, massCalibrationOffset); } //write out any remaining MS2 scans with higher indexes than the ms1 scans //we're writing, and any interstitial MS3 scans. while (nextMs2ScanIndex > -1) { MSRun.MSScan ms2Scan = ms2Scans[nextMs2ScanIndex]; while (nextMs3ScanIndex > -1 && ms2Scan.getNum() > ms3Scans[nextMs3ScanIndex].getNum()) { writeScan(ms3Scans[nextMs3ScanIndex], ms3Scans[nextMs3ScanIndex].getNum(), pw, lowMz, highMz, restrict, massCalibrationWavelength, massCalibrationOffset); nextMs3ScanIndex = queueNextMs3ScanIndex(nextMs3ScanIndex, ms3Scans, firstScan, lastScan, restrict); } //no recalibration for MS/MS scans writeScan(ms2Scan, ms2Scan.getNum(), pw, lowMz, highMz, restrict); nextMs2ScanIndex = queueNextMs2ScanIndex(nextMs2ScanIndex, ms2Scans, firstScan, lastScan, restrict); } //write out any remaining MS3 scans after all ms1 and ms2 scans. //No calibration for MS3 scans while (nextMs3ScanIndex > -1) { writeScan(ms3Scans[nextMs3ScanIndex], ms3Scans[nextMs3ScanIndex].getNum(), pw, lowMz, highMz, restrict); nextMs3ScanIndex = queueNextMs3ScanIndex(nextMs3ScanIndex, ms3Scans, firstScan, lastScan, restrict); } ApplicationContext.setMessage("100% complete"); } protected void writeScan(MSRun.MSScan scan, int scanNumber, PrintWriter pw, float lowMz, float highMz, boolean restrict) { writeScan(scan, scanNumber, pw, lowMz, highMz, restrict, UNSET_WAVELENGTH_OR_OFFSET, UNSET_WAVELENGTH_OR_OFFSET); } /** * Add a MsScan representing the passed-in MSScan to the MsRun. If restrict==false, then only * write out a subregion of the scan's mz values * Write out the XML for that search result * @param scan * @param scanNumber * @param pw * @param lowMz * @param highMz * @param restrict * @param wavelengthForCalibration * @param offsetForCalibration */ public void writeScan(MSRun.MSScan scan, int scanNumber, PrintWriter pw, float lowMz, float highMz, boolean restrict, double wavelengthForCalibration, double offsetForCalibration) { ScanDocument.Scan xmlBeansScan = _xmlBeansMsRun.addNewScan(); //scan information //this often seems to be null if (scan.getScanType() != null) { //some mzXml files have been seen to have bad scanType values, i.e., values not //in the list of allowed values, which includes Full, SRM, CRM, etc. //In those cases, just trapping the exception and refusing to write it out. try { xmlBeansScan.setScanType(ScanDocument.Scan.ScanType.Enum.forString( scan.getScanType())); } catch (Exception e) { _log.debug("Error setting scan type, " + e.getMessage()); } } xmlBeansScan.setNum(BigInteger.valueOf(scanNumber)); xmlBeansScan.setMsLevel(BigInteger.valueOf(scan.getMsLevel())); //Hack for "removing" scans with no peaks //TODO: make this hack ACTUALLY, OPTIONALLY remove those scans //if (scan.getPeaksCount() == 0 || scan.getSpectrum().length == 0) //{ // xmlBeansScan.setMsLevel(BigInteger.valueOf(4)); // System.err.println("setting mslevel of scan " + scan.getNum() + " to 4"); //} xmlBeansScan.setRetentionTime(new GDuration(scan.getRetentionTime())); //These will be overridden if we're restricting float basePeakMz = scan.getBasePeakMz(); float basePeakIntensity = scan.getBasePeakIntensity(); //peaks ScanDocument.Scan.Peaks xmlBeansPeaks = xmlBeansScan.addNewPeaks(); float[][] spectrum = null; try { //getSpectrum() can fail with an NPE if a scan is empty. System.err.println("BEFORE GETSPEC"); spectrum = scan.getSpectrum(); System.err.println("AFTER GETSPEC!"); } catch (NullPointerException e) { spectrum = new float[2][0]; System.err.println("CAUGHT NPE!"); } float[] mzSpectrum = spectrum[0]; float[] intensitySpectrum = spectrum[1]; byte[] spectrumArray = null; int peaksCount = scan.getPeaksCount(); //get the low and high mz values from the scan. If we're restricting, these will be changed float scanLowMz = scan.getLowMz(); float scanHighMz = scan.getHighMz(); //These are the MZ range imposed on the scan by the machine. In the case of restriction, //should be further constrained by the window we're restricting to float scanStartMz = scan.getStartMz(); float scanEndMz = scan.getEndMz(); Float[] totIonCurrent = new Float[1]; totIonCurrent[0] = scan.getTotIonCurrent(); float precursorMz = scan.getPrecursorMz(); //scale intensities, if specified if (intensityScaleFactor != UNSET_INTENSITY_SCALE) { for (int i=0; i<intensitySpectrum.length; i++) intensitySpectrum[i] *= intensitySpectrum.length; } //calibrate masses, if specified if (wavelengthForCalibration != UNSET_WAVELENGTH_OR_OFFSET && offsetForCalibration != UNSET_WAVELENGTH_OR_OFFSET) { if (_shouldCalibrateSpectra) { for (int i=0; i<mzSpectrum.length; i++) mzSpectrum[i] += mzSpectrum[i] * (MassCalibrationUtilities.DEFAULT_THEORETICAL_MASS_WAVELENGTH - wavelengthForCalibration) - offsetForCalibration; } if (_shouldCalibratePrecursorMz) { precursorMz += precursorMz * (MassCalibrationUtilities.DEFAULT_THEORETICAL_MASS_WAVELENGTH - wavelengthForCalibration) - offsetForCalibration; } } if (xmlBeansScan.getMsLevel().intValue() == 2) { ScanDocument.Scan.PrecursorMz xmlBeansPrecursorMz = xmlBeansScan.addNewPrecursorMz(); xmlBeansPrecursorMz.setPrecursorCharge(BigInteger.valueOf(scan.getPrecursorCharge())); xmlBeansPrecursorMz.setPrecursorScanNum(BigInteger.valueOf(scan.getPrecursorScanNum())); xmlBeansPrecursorMz.setFloatValue(precursorMz); if (scan.isPrecursorMzCorrected()) { Attr correctedAttribute = _xmlBeansMsRun.getDomNode().getOwnerDocument().createAttribute( PRECURSORMZ_ATTR_MSINSPECT_CORRECTED); correctedAttribute.setValue("true"); xmlBeansPrecursorMz.getDomNode().getAttributes().setNamedItem(correctedAttribute); // xmlBeansPrecursorMz.getDomNode().appendChild(correctedAttribute); } } if (restrict) { //this will hold the lowest and highest mz values that we find in the scan Pair<Float, Float> lowAndHighFoundMz = new Pair<Float, Float>(lowMz, highMz); //this will hold the mz value of the peak with the highest intensity Pair<Float, Float> basePeakMzIntensity = new Pair<Float, Float>(0f, 0f); spectrumArray = createRestrictedByteArrayForSpectrum(mzSpectrum, intensitySpectrum, lowMz, highMz, lowAndHighFoundMz, basePeakMzIntensity, totIonCurrent); //if we're restricting, need to recalculate peaksCount. every 8 bytes in the //spectrum array represent one peak peaksCount = spectrumArray.length / 8; //record the actual found lowest and highest MZ values scanStartMz = Math.max(lowMz,scanLowMz); scanEndMz = Math.min(highMz,scanHighMz); //record the base (highest-intensity) peak mz and intensity basePeakMz = basePeakMzIntensity.first; basePeakIntensity = basePeakMzIntensity.second; //Set the scan _range_ appropriately. The most restrictive window defined by //the scan's low and high values and the low and high values we're restricting to here scanLowMz = lowAndHighFoundMz.first; scanHighMz = lowAndHighFoundMz.second; } else { spectrumArray = createByteArrayForSpectrum(mzSpectrum, intensitySpectrum, scan.getPeaksCount()); } xmlBeansScan.setLowMz(scanLowMz); xmlBeansScan.setHighMz(scanHighMz); xmlBeansScan.setStartMz(scanStartMz); xmlBeansScan.setEndMz(scanEndMz); xmlBeansScan.setTotIonCurrent(totIonCurrent[0]); xmlBeansScan.setBasePeakMz(basePeakMz); xmlBeansScan.setBasePeakIntensity(basePeakIntensity); xmlBeansScan.setPeaksCount(BigInteger.valueOf(peaksCount)); xmlBeansPeaks.setPrecision(BigInteger.valueOf(scan.getPrecision())); xmlBeansPeaks.setByteArrayValue(spectrumArray); try { String fragment = removeNamespaceColon(_xmlBeansMsRun.getScanArray(0).xmlText(_optionsForPrinting)); MzXMLDocument.MzXML.Index.Offset scanOffset = _xmlBeansIndex.addNewOffset(); scanOffset.setId(BigInteger.valueOf(scanNumber)); scanOffset.setLongValue(_currentFilePosition); pw.print(fragment); _currentFilePosition += fragment.length(); pw.flush(); } catch (Exception e) { e.printStackTrace(System.err); } _xmlBeansMsRun.removeScan(0); } /** * Creates a base-64 byte array, with alternating mz values and intensities, to represent * the two float arrays we store in MSScan. * If we got REALLY parsimonious wrt memory, we could change things so that we * don't even store this whole thing all at once. That'd get hairy, though. * @return */ protected byte[] createByteArrayForSpectrum(float[] mzSpectrum, float[]intensitySpectrum, int peaksCount) { byte[] result = new byte[peaksCount * 2 * 4]; for (int i=0; i<mzSpectrum.length; i++) { //populate 4 bytes representing the mz fillByteArrayForFloat(mzSpectrum[i], result, 8*i); //populate 4 bytes representing the intensity fillByteArrayForFloat(intensitySpectrum[i], result, 8*i+4); } return result; } /** * Separate handling for restricted spectra, since we have to build an ArrayList of bytes * because we don't know in itially how many bytes we're writing out. * Populate lowAndHighFoundMz with the actual lowest and highest values we find * @param mzSpectrum * @param intensitySpectrum * @param lowMz * @param highMz * @param outLowAndHighFoundMz will contain the lowest and highest found mz values * @param outBasePeakMzIntensity will contain the mz and intensity of the base peak * @param outTotIonCurrent will contain the sum of all intensities as its first element (hack) * @return */ protected byte[] createRestrictedByteArrayForSpectrum(float[] mzSpectrum, float[]intensitySpectrum, float lowMz, float highMz, Pair outLowAndHighFoundMz, Pair outBasePeakMzIntensity, Float[] outTotIonCurrent) { ArrayList<Byte> resultList = new ArrayList<Byte>(); //initialize the lowest and highest MZ values so that they're sure to get overwritten float lowestMz = highMz; float highestMz = lowMz; //initialize the base peak (peak with highest intensity) values so that they're sure to get //overwritten float basePeakMz = 0f; float basePeakIntensity = 0f; float totIonCurrent = 0f; byte[] byteHolder = new byte[4]; for (int i=0; i<mzSpectrum.length; i++) { if (mzSpectrum[i] < lowMz) continue; if (mzSpectrum[i] > highMz) break; //populate 4 bytes representing the mz fillByteArrayForFloat(mzSpectrum[i], byteHolder, 0); resultList.add(byteHolder[0]); resultList.add(byteHolder[1]); resultList.add(byteHolder[2]); resultList.add(byteHolder[3]); //populate 4 bytes representing the intensity fillByteArrayForFloat(intensitySpectrum[i], byteHolder, 0); resultList.add(byteHolder[0]); resultList.add(byteHolder[1]); resultList.add(byteHolder[2]); resultList.add(byteHolder[3]); if (mzSpectrum[i] < lowestMz) lowestMz = mzSpectrum[i]; if (mzSpectrum[i] > highestMz) highestMz = mzSpectrum[i]; if (intensitySpectrum[i] > basePeakIntensity) { basePeakIntensity = intensitySpectrum[i]; basePeakMz = mzSpectrum[i]; } totIonCurrent += intensitySpectrum[i]; } //TODO: This is probably very memory-inefficient byte[] result = new byte[resultList.size()]; for (int i=0; i<resultList.size(); i++) result[i] = resultList.get(i); outLowAndHighFoundMz.first = Float.valueOf(lowestMz); outLowAndHighFoundMz.second = Float.valueOf(highestMz); outBasePeakMzIntensity.first = Float.valueOf(basePeakMz); outBasePeakMzIntensity.second = Float.valueOf(basePeakIntensity); outTotIonCurrent[0]=totIonCurrent; return result; } /** * Given a float, populate the given byte array with four bytes representing the float. * The first byte will go in at the offset given * @param floatVal * @param bytes */ protected void fillByteArrayForFloat(float floatVal, byte[] bytes, int offset) { assert(bytes.length >= (offset + 4)); int intBits = Float.floatToIntBits(floatVal); bytes[offset] = (byte)(intBits >> 24); bytes[offset + 1] = (byte)(intBits >> 16); bytes[offset + 2] = (byte)(intBits >> 8); bytes[offset + 3] = (byte)(intBits); } /** * Remove all occurrences of the namespace prefix from an xml fragment. * TODO: Boy, I'd sure like XmlBeans to do this for me. * @param input * @return */ protected String removeNamespaceColon(String input) { return input.replaceAll("mzx:",""); } /** * Write out the full document, with all modifications and features, to a file * @param file * @throws IOException */ public void write(File file) throws IOException { _write(file, 0,0,0,0,false); } /** * Write out just a subregion * @param file * @param firstScan * @param lastScan * @param lowMz * @param highMz */ public void writeSubregion(File file, int firstScan, int lastScan, float lowMz, float highMz) throws IOException { _write(file,firstScan,lastScan,lowMz,highMz,true); } /** * Workhorse writing method. Can write just a subregion, or the whole file * @param file * @param firstScanNum * @param lastScanNum * @param lowMz * @param highMz */ protected void _write(File file, int firstScanNum, int lastScanNum, float lowMz, float highMz, boolean restrict) throws IOException { //build the XML structure of the document buildDocStructure(firstScanNum, lastScanNum, restrict); PrintWriter pw = new PrintWriter(file); _log.debug("Writing document start"); printDocPrefix(pw); writeScans(pw, firstScanNum, lastScanNum, lowMz, highMz, restrict); _log.debug("Writing index"); writeIndex(pw); _log.debug("Finishing document"); printDocPostscript(pw); pw.flush(); _log.debug("Done."); } public void printDocPrefix(PrintWriter pw) { _currentFilePosition = 0; pw.print(_documentPrefix); _currentFilePosition += _documentPrefix.length(); } public void printDocPostscript(PrintWriter pw) { pw.print(_documentPostscript); _currentFilePosition += _documentPostscript.length(); } public void setMassCalibrationParameters(Pair<Integer,Pair<Double,Double>>[] newParameters) { _massCalibrationParameters = newParameters; _shouldCalibrateSpectra = true; } public void setMassCalibrationWavelengthOffset(double massCalibrationWavelength, double massCalibrationOffset) { _massCalibrationParameters = (Pair<Integer, Pair<Double,Double>>[]) new Pair[1]; _massCalibrationParameters[0] = new Pair<Integer, Pair<Double,Double>>(0, new Pair<Double,Double>(massCalibrationWavelength, massCalibrationOffset)); } public boolean shouldCalibrateSpectra() { return _shouldCalibrateSpectra; } public void setShouldCalibrateSpectra(boolean _shouldCalibrateSpectra) { this._shouldCalibrateSpectra = _shouldCalibrateSpectra; } public boolean isShouldCalibratePrecursorMz() { return _shouldCalibratePrecursorMz; } public void setShouldCalibratePrecursorMasses(boolean _shouldCalibratePrecursorMasses) { this._shouldCalibratePrecursorMz = _shouldCalibratePrecursorMasses; } public double getIntensityScaleFactor() { return intensityScaleFactor; } public void setIntensityScaleFactor(double intensityScaleFactor) { this.intensityScaleFactor = intensityScaleFactor; } public boolean isExcludeMS1Scans() { return excludeMS1Scans; } public void setExcludeMS1Scans(boolean excludeMS1Scans) { this.excludeMS1Scans = excludeMS1Scans; } }