MzXmlWriter.java example

Explorer
msInspect-master
/*
 * Copyright (c) 2003-2012 Fred Hutchinson Cancer Research Center
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.fhcrc.cpl.toolbox.proteomics.filehandler;

import java.math.*;
import java.io.*;
import java.util.ArrayList;
import java.util.List;

import org.apache.log4j.Logger;

import net.sourceforge.sashimi.schemaRevision.mzXML21.*;
import org.w3c.dom.Node;
import org.w3c.dom.Attr;
import org.apache.xmlbeans.XmlOptions;
import org.apache.xmlbeans.GDuration;
import org.systemsbiology.jrap.stax.SoftwareInfo;
import org.systemsbiology.jrap.stax.MZXMLFileInfo;
import org.systemsbiology.jrap.stax.ParentFile;
import org.systemsbiology.jrap.stax.MSInstrumentInfo;
import org.fhcrc.cpl.toolbox.proteomics.MassCalibrationUtilities;
import org.fhcrc.cpl.toolbox.proteomics.MSRun;
import org.fhcrc.cpl.toolbox.ApplicationContext;
import org.fhcrc.cpl.toolbox.datastructure.Pair;


/**
 * A restrictive wrapper for writing mzXml files based on a single run.  We take advantage of XmlBeans to build
 * the structure of the file, and to build individual spectrum_queries (for features)
 * and search_summaries (for modifications), but we stitch the XmlBeans XML output for features together by
 * hand, writing out to a file as we go, so that we don't have to hold the whole structure
 * in memory.
 * A special wrinkle of mzXML files is that the file must contain offsets for finding individual
 * scans, and even for finding the index that holds those offsets.  For that reason we have to
 * maintain a _currentFilePosition variable that tells us how many bytes we've written so far.
 *
 * Also, XmlBeans likes namespaces.  A lot.  I can't figure out how to get XmlBeans to stop prefixing
 * every tag with <mzx:>.  This causes problems for jrap.  So I'm manually stripping out mzx: every time
 * I write anything to the file.  This could conceivably cause problems.
 *
 * For extra special fun, we can calibrate the spectra masses as we write out the file.  That is,
 * for every m/z value we write out, perform a specified linear transformation on it.  We do
 * NOT calibrate MS/MS precursor masses here.  That can be done elsewhere.  The spectra can't
 * be done elsewhere, since they're only soft-referenced in Java, and if we try to do them
 * in place in memory the changes will get lost.
 *
 * dhmay, 2008-12-31: changing the scan-writing behavior to preserve the numbering of scans as
 * received.  Previously, scans were re-numbered starting with 1, because our mzXML-parsing software
 * had trouble with non-sequential scans.  This has been addressed.
 */
public class MzXmlWriter
{
    static Logger _log = Logger.getLogger(MzXmlWriter.class);

    //doc representation
    protected MsRunDocument _xmlBeansMsRunDoc = null;
    //Index representation
    protected MzXMLDocument.MzXML.Index _xmlBeansIndex = null;
    //MsRun representation
    protected MsRunDocument.MsRun _xmlBeansMsRun = null;

    //MSRun that we want to write out
    MSRun _run = null;

    //Strings of xml representing the structure before and after the scan content
    protected String _documentPrefix = null;
    protected String _documentPostscript = null;

    //encapsulates printing options for all fragments
    protected XmlOptions _optionsForPrinting = null;

    //current position in the output file
    protected long _currentFilePosition = 0;

    //attribute name indicating precursor mz has been corrected
    public static final String PRECURSORMZ_ATTR_MSINSPECT_CORRECTED = "msInspect_corrected";

    public static final double UNSET_WAVELENGTH_OR_OFFSET = 999999999;
    public static final double UNSET_INTENSITY_SCALE = -1;

    //for mass recalibration
    protected Pair<Integer, Pair<Double, Double>>[] _massCalibrationParameters = null;

    //Should we calibrate precursor masses?
    protected boolean _shouldCalibratePrecursorMz = false;

    //should we calibrate the spectra themselves?  If not, and if calibration
    //parameters are set, only calibrate precursor masses
    protected boolean _shouldCalibrateSpectra = false;

    //for intensity scaling
    protected double intensityScaleFactor = UNSET_INTENSITY_SCALE;

    //optionally exclude MS1 scans
    protected boolean excludeMS1Scans = false;

    /**
     * Constructor creates the XmlBeans representing the shell of a mzXml document, and
     * creates the "prefix" and "postscript" strings representing that shell
     */
    public MzXmlWriter()
    {
        //initialize basic document structure and printing options

        _xmlBeansMsRunDoc = MsRunDocument.Factory.newInstance();

        //Construct generic document structure
        _xmlBeansMsRun = _xmlBeansMsRunDoc.addNewMsRun();

        //oddly, there's no Index element in the xsd under MsRun, even though that's where we want it
        MzXMLDocument fakeMzXmlDoc = MzXMLDocument.Factory.newInstance();
        _xmlBeansIndex = fakeMzXmlDoc.addNewMzXML().addNewIndex();

        //set printing options for xml fragments
        _optionsForPrinting = new XmlOptions();
        _optionsForPrinting.setSaveOuter();
        _optionsForPrinting.setSavePrettyPrint();
        _optionsForPrinting.setSavePrettyPrintOffset(0);
    }

    /**
     * Create doc structure, populate features and modifications
     * @param run
     */
    public MzXmlWriter(MSRun run)
    {
        this();
        setRun(run);
    }

    /**
     * This should only be called after the run is set.  This creates the whole XML shell around
     * the scans and index, which thankfully go right next to each other
     */
    protected void createDocumentShellXml()
    {
        //add a sentinel node that tells us where to split the document to insert scans,
        Node runNode = _xmlBeansMsRun.getDomNode();
        Node scanLocationNode = runNode.getOwnerDocument().createElement("SENTINEL_SCAN_LOCATION");
        runNode.appendChild(scanLocationNode);

        //create and break up the xml that defines the document structure
        String documentShell = _xmlBeansMsRunDoc.xmlText(_optionsForPrinting);
        String[] halves = documentShell.split("<SENTINEL_SCAN_LOCATION[^\\/]*\\/>");
        if (halves.length != 2)
        {
            _log.error("Failed to create document shell for writing");
            return;
        }

        _documentPrefix = removeNamespaceColon("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" + halves[0]);
        _documentPostscript = removeNamespaceColon(halves[1]);

        //remove our dummy node
        runNode.removeChild(scanLocationNode);
    }

    /**
     * Does all the processing to create an XmlBeans structure that
     * represents the run.  Doesn't do anything with the scans.
     * if restricted is true, the scan count is set to the passed-in numScans,
     * otherwise it's gathered from the run
     */
    public void buildDocStructure(int firstScanNum, int lastScanNum, boolean restricted)
    {
        MZXMLFileInfo fileInfo = _run.getHeaderInfo();

        int numMs1ScansToWrite = _run.getScanCount();
        int numMs2ScansToWrite = 0;
        int numMs3ScansToWrite = 0;
        if (_run.getMS2Scans() != null)
            numMs2ScansToWrite = _run.getMS2Scans().length;
        if (_run.getMS3Scans() != null)
            numMs3ScansToWrite = _run.getMS3Scans().length;

        if (restricted)
        {
            //since we're given scan numbers, not indexes, we don't immediately know how many
            //scans we're actually writing
            int firstScanIndex = _run.getIndexForScanNum(firstScanNum);
            int lastScanIndex = _run.getIndexForScanNum(lastScanNum);
            numMs1ScansToWrite = lastScanIndex-firstScanIndex+1;
            if (excludeMS1Scans)
                numMs1ScansToWrite = 0;

            numMs2ScansToWrite = 0;
            if (_run.getMS2Scans() != null)
            {
                for (int i=0; i<_run.getMS2Scans().length; i++)
                {
                    int currentMS2ScanNum = _run.getMS2Scans()[i].getNum();
                    if (currentMS2ScanNum >= firstScanNum)
                    {
                        if (currentMS2ScanNum <= lastScanNum)
                        {
                            numMs2ScansToWrite++;
                        }
                        else
                            break;
                    }
                }
            }
            numMs3ScansToWrite = 0;
            if (_run.getMS3Scans() != null)
            {
                for (int i=0; i<_run.getMS3Scans().length; i++)
                {
                    int currentMS3ScanNum = _run.getMS3Scans()[i].getNum();
                    if (currentMS3ScanNum >= firstScanNum)
                    {
                        if (currentMS3ScanNum <= lastScanNum)
                        {
                            numMs3ScansToWrite++;
                        }
                        else
                            break;
                    }
                }
            }
        }
        _xmlBeansMsRun.setScanCount(BigInteger.valueOf(numMs1ScansToWrite + numMs2ScansToWrite +
                                                       numMs3ScansToWrite));

        MsRunDocument.MsRun.DataProcessing dataProcessing = _xmlBeansMsRun.addNewDataProcessing();
        List<SoftwareInfo> jrapSoftwareInfoList = fileInfo.getDataProcessing().getSoftwareUsed();
        for (int i=0; i<jrapSoftwareInfoList.size(); i++)
        {
            SoftwareInfo jrapSoftwareInfo = jrapSoftwareInfoList.get(i);
            SoftwareDocument.Software xmlBeansSoftware = dataProcessing.addNewSoftware();
            xmlBeansSoftware.setName(jrapSoftwareInfo.name);
            xmlBeansSoftware.setType(SoftwareDocument.Software.Type.Enum.forString(jrapSoftwareInfo.type));
            xmlBeansSoftware.setVersion(jrapSoftwareInfo.version);
        }

        List<ParentFile> jrapParentFileArray = fileInfo.getParentFiles();
        for (int i=0; i<jrapParentFileArray.size(); i++)
        {
            ParentFile jrapParentFile = jrapParentFileArray.get(i);
            MsRunDocument.MsRun.ParentFile xmlBeansParentFile = _xmlBeansMsRun.addNewParentFile();
            //is this right?! ParentFile doesn't seem to have a Name
            xmlBeansParentFile.setFileName(jrapParentFile.getURI());
            xmlBeansParentFile.setFileSha1(jrapParentFile.getSha1());
            xmlBeansParentFile.setFileType(MsRunDocument.MsRun.ParentFile.FileType.Enum.forString(jrapParentFile.getType()));
        }

        //for some reason we don't always seem to capture this, so null-checking
        MSInstrumentInfo jrapInstrumentInfo = fileInfo.getInstrumentInfo();
        if (jrapInstrumentInfo != null)
        {
            MsRunDocument.MsRun.MsInstrument xmlBeansMsInstrument = _xmlBeansMsRun.addNewMsInstrument();
            MsRunDocument.MsRun.MsInstrument.MsManufacturer xmlBeansManufacturer =
                    xmlBeansMsInstrument.addNewMsManufacturer();
            xmlBeansManufacturer.setCategory("msManufacturer");            
            xmlBeansManufacturer.setValue(jrapInstrumentInfo.getManufacturer());
            OntologyEntryType xmlBeansMsModel = xmlBeansMsInstrument.addNewMsModel();
            xmlBeansMsModel.setValue(jrapInstrumentInfo.getModel());
            SoftwareInfo jrapInstrumentSoftwareInfo = jrapInstrumentInfo.getSoftwareInfo();
            if (jrapInstrumentSoftwareInfo != null)
            {
                SoftwareDocument.Software xmlBeansSoftware = dataProcessing.addNewSoftware();
                xmlBeansSoftware.setName(jrapInstrumentSoftwareInfo.name);
                xmlBeansSoftware.setType(SoftwareDocument.Software.Type.Enum.forString(jrapInstrumentSoftwareInfo.type));
                xmlBeansSoftware.setVersion(jrapInstrumentSoftwareInfo.version);
                xmlBeansMsInstrument.setSoftware(xmlBeansSoftware);
            }
        }

        //TODO: I'm not at all sure that this is correct, but these items are required
        //and they're not stored on the run
        String runStartTime = "PT0.0S";
        String runEndTime = "PT0.0S";

        try
        {
            MSRun.MSScan firstScan = _run.getScan(0);
            if (restricted)
            {
                int firstScanIndex = _run.getIndexForScanNum(firstScanNum);
                if (firstScanIndex < 0)
                    firstScanIndex = -firstScanIndex;
                firstScan = _run.getScan(firstScanIndex);

            }
            runStartTime = firstScan.getRetentionTime();

            MSRun.MSScan lastScan = _run.getScan(_run.getScanCount()-1);
            //if restricting scans, then the last scan isn't the last scan of the run
            int lastScanIndex = _run.getIndexForScanNum(lastScanNum);
            if (lastScanIndex < 0)
                lastScanIndex = -lastScanIndex;
            if (restricted)
                lastScan = _run.getScan(lastScanIndex);
            runEndTime = lastScan.getRetentionTime();
        }
        catch (Exception e)
        {
            ApplicationContext.infoMessage("Warning:  Unable to get first and last scan information from run.  Defaulting run start/end times to 0.");
        }
        _xmlBeansMsRun.setStartTime(new GDuration(runStartTime));
        _xmlBeansMsRun.setEndTime(new GDuration(runEndTime));

        createDocumentShellXml();
    }


    /**
     * setter for the run.
     * @param run
     */
    public void setRun(MSRun run)
    {
        _run = run;
    }

    /**
     * Write out the index
     * @param pw
     */
    public void writeIndex(PrintWriter pw)
    {
        long indexOffset = _currentFilePosition;

        String indexFragment = removeNamespaceColon(_xmlBeansIndex.xmlText(_optionsForPrinting));
        pw.print(indexFragment);

        _currentFilePosition += indexFragment.length();

        String indexOffsetFragment = removeNamespaceColon("<mzx:indexOffset>" + indexOffset + "</mzx:indexOffset>");
        pw.print(indexOffsetFragment);

        _currentFilePosition += indexOffsetFragment.length();

        pw.flush();
    }

    /**
     * Return the index of the next MS2 scan _after_ index oldMs2ScanIndex that falls into
     * the given range (if restrict is true)
     * @param oldMs2ScanIndex
     * @param ms2Scans
     * @param firstScan
     * @param lastScan
     * @param restrict
     * @return The index of the next MS2 scan for writing.  If none, return -1 
     */
    protected int queueNextMs2ScanIndex(int oldMs2ScanIndex, MSRun.MSScan[] ms2Scans,
                                        int firstScan, int lastScan,
                                        boolean restrict)
    {
        int result = -1;
        if (ms2Scans == null || ms2Scans.length==0)
            return result;
        int currentScanIndex = oldMs2ScanIndex+1;
        while (currentScanIndex < ms2Scans.length)
        {
            if (!restrict ||
                (ms2Scans[currentScanIndex].getNum() >= firstScan &&
                 ms2Scans[currentScanIndex].getNum() <= lastScan))
            {
                result = currentScanIndex;
                break;
            }
            currentScanIndex++;
        }
        return result;
    }

    /**
     * Return the index of the next MS3 scan _after_ index oldMs3ScanIndex that falls into
     * the given range (if restrict is true)
     * This could really be folded into queueNextMs2ScanIndex, but it's just easier not to
     * @param oldMs3ScanIndex
     * @param ms3Scans
     * @param firstScan
     * @param lastScan
     * @param restrict
     * @return The index of the next MS2 scan for writing.  If none, return -1
     */
    protected int queueNextMs3ScanIndex(int oldMs3ScanIndex, MSRun.MSScan[] ms3Scans,
                                        int firstScan, int lastScan,
                                        boolean restrict)
    {
        int result = -1;
        if (ms3Scans == null || ms3Scans.length==0)
            return result;
        int currentScanIndex = oldMs3ScanIndex+1;
        while (currentScanIndex < ms3Scans.length)
        {
            if (!restrict ||
                (ms3Scans[currentScanIndex].getNum() >= firstScan &&
                 ms3Scans[currentScanIndex].getNum() <= lastScan))
            {
                result = currentScanIndex;
                break;
            }
            currentScanIndex++;
        }
        return result;
    }


    /**
     * Write out scans immediately.  Either write out all scans, or just a subregion, depending
     * on the value of the restrict argument.  If restrict == false, ignore the int and float args
     *
     * @param pw
     * @param firstScan
     * @param lastScan
     * @param lowMz
     * @param highMz
     * @param restrict
     */
    public void writeScans(PrintWriter pw, int firstScan, int lastScan, float lowMz, float highMz,
                              boolean restrict)
    {
        _log.debug("writeScans start");

        if (_run == null)
            return;
        MSRun.MSScan[] ms2Scans = _run.getMS2Scans();
        MSRun.MSScan[] ms3Scans = _run.getMS3Scans();

        int nextMs2ScanIndex = queueNextMs2ScanIndex(-1, ms2Scans, firstScan, lastScan,
                                                              restrict);
        int nextMs3ScanIndex = queueNextMs3ScanIndex(-1, ms3Scans, firstScan, lastScan,
                                                              restrict);

        int ms1ScanCount = _run.getScanCount();

        _log.debug("MS1 scans: " + ms1ScanCount);
        if (ms2Scans !=null) _log.debug("MS2 scans: " + ms2Scans.length);
        if (ms3Scans !=null) _log.debug("MS3 scans: " + ms3Scans.length);


        int nextCalibChangeIndex = 0;
        double massCalibrationWavelength = UNSET_WAVELENGTH_OR_OFFSET;
        double massCalibrationOffset = UNSET_WAVELENGTH_OR_OFFSET;

        int nextCalibChangeScan = 0;
        if (_massCalibrationParameters != null &&
                (_shouldCalibrateSpectra || _shouldCalibratePrecursorMz))
        {
            nextCalibChangeScan = _massCalibrationParameters[0].first;
        }
        for (int i=0; i<ms1ScanCount; i++)
        {
            if (i % (ms1ScanCount / 100) == 0)
                ApplicationContext.setMessage((i * 100 / ms1ScanCount) + " % complete");
            MSRun.MSScan ms1Scan = _run.getScan(i);

            if (_massCalibrationParameters != null &&
                (_shouldCalibrateSpectra || _shouldCalibratePrecursorMz))
            {
                if (nextCalibChangeScan <= ms1Scan.getNum())
                {
                    massCalibrationWavelength = _massCalibrationParameters[nextCalibChangeIndex].second.first;
                    massCalibrationOffset = _massCalibrationParameters[nextCalibChangeIndex].second.second;

                    if (_massCalibrationParameters.length >= nextCalibChangeIndex+2)
                    {
                        nextCalibChangeIndex++;
                        nextCalibChangeScan = _massCalibrationParameters[nextCalibChangeIndex].first;
                    }
                }
            }

//System.err.println("*****MS1 scan: " + ms1Scan.getNum());

            //Write out all MS2 scans that should be written out _before_ this ms1 scan is
            //written.
            //check that the next ms2 scan exists,
            //and that its number is less than the next ms1 scan's number,
            //and that it's number is in the range we want to write
            while (nextMs2ScanIndex > -1 &&
                   ms1Scan.getNum() > ms2Scans[nextMs2ScanIndex].getNum())
            {
                MSRun.MSScan ms2Scan = ms2Scans[nextMs2ScanIndex];
//System.err.println("  *****MS2 scan: " + ms2Scan.getNum());
                //Write out all MS3 scans that should be written out _before_ this ms1 scan is
                //written.
                //check that the next ms3 scan exists,
                //and that its number is less than the next ms2 scan's number,
                //and that it's number is in the range we want to write
                while (nextMs3ScanIndex > -1 &&
                        ms2Scan.getNum() > ms3Scans[nextMs3ScanIndex].getNum())
                {
                    writeScan(ms3Scans[nextMs3ScanIndex], ms3Scans[nextMs3ScanIndex].getNum(), pw, lowMz,
                              highMz, restrict, massCalibrationWavelength, massCalibrationOffset);
                    nextMs3ScanIndex = queueNextMs3ScanIndex(nextMs3ScanIndex, ms3Scans,
                                                             firstScan, lastScan, restrict);
                }

                writeScan(ms2Scan, ms2Scan.getNum(), pw, lowMz, highMz, restrict,
                          massCalibrationWavelength, massCalibrationOffset);
                nextMs2ScanIndex = queueNextMs2ScanIndex(nextMs2ScanIndex, ms2Scans,
                                                         firstScan, lastScan, restrict);
            }

            if (restrict)
            {
                //if we're restricting the scan and mz window, need to check whether this scan
                //is in the window.  Since restriction is based on scan number, not index,
                //need to check the scan to get the scan number
                int scanNum = ms1Scan.getNum();
                if (scanNum < firstScan)
                    continue;
                //scan numbers are monotonically increasing with scan index, so if we're out of
                //range already, we're done
                if (scanNum > lastScan)
                    break;
            }


            //if we get here, this is a scan we want to write
            if (!excludeMS1Scans)
                writeScan(ms1Scan, ms1Scan.getNum(), pw, lowMz, highMz, restrict,
                      massCalibrationWavelength, massCalibrationOffset);

        }

        //write out any remaining MS2 scans with higher indexes than the ms1 scans
        //we're writing, and any interstitial MS3 scans.
        while (nextMs2ScanIndex > -1)
        {
            MSRun.MSScan ms2Scan = ms2Scans[nextMs2ScanIndex];
            while (nextMs3ScanIndex > -1 &&
                    ms2Scan.getNum() > ms3Scans[nextMs3ScanIndex].getNum())
            {
                writeScan(ms3Scans[nextMs3ScanIndex], ms3Scans[nextMs3ScanIndex].getNum(), pw, lowMz, highMz,
                          restrict,
                          massCalibrationWavelength, massCalibrationOffset);
                nextMs3ScanIndex = queueNextMs3ScanIndex(nextMs3ScanIndex, ms3Scans,
                        firstScan, lastScan, restrict);
            }
            //no recalibration for MS/MS scans
            writeScan(ms2Scan, ms2Scan.getNum(), pw, lowMz, highMz, restrict);
            nextMs2ScanIndex = queueNextMs2ScanIndex(nextMs2ScanIndex, ms2Scans,
                                                     firstScan, lastScan, restrict);
        }

        //write out any remaining MS3 scans after all ms1 and ms2 scans.
        //No calibration for MS3 scans
        while (nextMs3ScanIndex > -1)
        {
            writeScan(ms3Scans[nextMs3ScanIndex], ms3Scans[nextMs3ScanIndex].getNum(), pw,
                    lowMz, highMz, restrict);
            nextMs3ScanIndex = queueNextMs3ScanIndex(nextMs3ScanIndex, ms3Scans,
                                                     firstScan, lastScan, restrict);
        }

        ApplicationContext.setMessage("100% complete");
    }


    protected void writeScan(MSRun.MSScan scan, int scanNumber, PrintWriter pw,
                             float lowMz, float highMz,
                             boolean restrict)
    {
        writeScan(scan, scanNumber, pw, lowMz, highMz, restrict,
                  UNSET_WAVELENGTH_OR_OFFSET, UNSET_WAVELENGTH_OR_OFFSET);
    }

    /**
     * Add a MsScan representing the passed-in MSScan to the MsRun.  If restrict==false, then only
     * write out a subregion of the scan's mz values
     * Write out the XML for that search result
     * @param scan
     * @param scanNumber
     * @param pw
     * @param lowMz
     * @param highMz
     * @param restrict
     * @param wavelengthForCalibration
     * @param offsetForCalibration
     */
    public void writeScan(MSRun.MSScan scan, int scanNumber, PrintWriter pw, float lowMz, float highMz,
                             boolean restrict, double wavelengthForCalibration, double offsetForCalibration)
    {
        ScanDocument.Scan xmlBeansScan = _xmlBeansMsRun.addNewScan();

        //scan information

        //this often seems to be null
        if (scan.getScanType() != null)
        {
            //some mzXml files have been seen to have bad scanType values, i.e., values not
            //in the list of allowed values, which includes Full, SRM, CRM, etc.
            //In those cases, just trapping the exception and refusing to write it out.
            try
            {
                xmlBeansScan.setScanType(ScanDocument.Scan.ScanType.Enum.forString(
                                               scan.getScanType()));
            }
            catch (Exception e)
            {
                _log.debug("Error setting scan type, " + e.getMessage());
            }
        }
        xmlBeansScan.setNum(BigInteger.valueOf(scanNumber));
        xmlBeansScan.setMsLevel(BigInteger.valueOf(scan.getMsLevel()));

//Hack for "removing" scans with no peaks
//TODO: make this hack ACTUALLY, OPTIONALLY remove those scans        
//if (scan.getPeaksCount() == 0 || scan.getSpectrum().length == 0)
//{
//    xmlBeansScan.setMsLevel(BigInteger.valueOf(4));
//    System.err.println("setting mslevel of scan " + scan.getNum() + " to 4");
//}
        xmlBeansScan.setRetentionTime(new GDuration(scan.getRetentionTime()));

        //These will be overridden if we're restricting
        float basePeakMz = scan.getBasePeakMz();
        float basePeakIntensity = scan.getBasePeakIntensity();

        //peaks
        ScanDocument.Scan.Peaks xmlBeansPeaks = xmlBeansScan.addNewPeaks();

        float[][] spectrum = null;
        try
        {
            //getSpectrum() can fail with an NPE if a scan is empty.
System.err.println("BEFORE GETSPEC");            
            spectrum = scan.getSpectrum();
  System.err.println("AFTER GETSPEC!");            

        }
        catch (NullPointerException e)
        {
            spectrum = new float[2][0];
System.err.println("CAUGHT NPE!");            
        }

        float[] mzSpectrum = spectrum[0];
        float[] intensitySpectrum = spectrum[1];

        byte[] spectrumArray = null;
        int peaksCount = scan.getPeaksCount();

        //get the low and high mz values from the scan.  If we're restricting, these will be changed
        float scanLowMz = scan.getLowMz();
        float scanHighMz = scan.getHighMz();

        //These are the MZ range imposed on the scan by the machine.  In the case of restriction,
        //should be further constrained by the window we're restricting to
        float scanStartMz = scan.getStartMz();
        float scanEndMz = scan.getEndMz();

        Float[] totIonCurrent = new Float[1];
        totIonCurrent[0] = scan.getTotIonCurrent();

        float precursorMz = scan.getPrecursorMz();

        //scale intensities, if specified
        if (intensityScaleFactor != UNSET_INTENSITY_SCALE)
        {
            for (int i=0; i<intensitySpectrum.length; i++)
                intensitySpectrum[i] *= intensitySpectrum.length;
        }

        //calibrate masses, if specified
        if (wavelengthForCalibration != UNSET_WAVELENGTH_OR_OFFSET &&
                offsetForCalibration != UNSET_WAVELENGTH_OR_OFFSET)
        {
            if (_shouldCalibrateSpectra)
            {
                for (int i=0; i<mzSpectrum.length; i++)
                    mzSpectrum[i] +=
                            mzSpectrum[i] *
                                    (MassCalibrationUtilities.DEFAULT_THEORETICAL_MASS_WAVELENGTH -
                                            wavelengthForCalibration) -
                                    offsetForCalibration;
            }
            if (_shouldCalibratePrecursorMz)
            {
                precursorMz += precursorMz * (MassCalibrationUtilities.DEFAULT_THEORETICAL_MASS_WAVELENGTH -
                                            wavelengthForCalibration) -
                                    offsetForCalibration;
            }
        }


        if (xmlBeansScan.getMsLevel().intValue() == 2)
        {
            ScanDocument.Scan.PrecursorMz xmlBeansPrecursorMz =
                    xmlBeansScan.addNewPrecursorMz();
            xmlBeansPrecursorMz.setPrecursorCharge(BigInteger.valueOf(scan.getPrecursorCharge()));
            xmlBeansPrecursorMz.setPrecursorScanNum(BigInteger.valueOf(scan.getPrecursorScanNum()));
            xmlBeansPrecursorMz.setFloatValue(precursorMz);

            if (scan.isPrecursorMzCorrected())
            {
                Attr correctedAttribute = _xmlBeansMsRun.getDomNode().getOwnerDocument().createAttribute(
                        PRECURSORMZ_ATTR_MSINSPECT_CORRECTED);
                correctedAttribute.setValue("true");
                xmlBeansPrecursorMz.getDomNode().getAttributes().setNamedItem(correctedAttribute);
//                xmlBeansPrecursorMz.getDomNode().appendChild(correctedAttribute);
            }

        }

        if (restrict)
        {
            //this will hold the lowest and highest mz values that we find in the scan
            Pair<Float, Float> lowAndHighFoundMz =
                    new Pair<Float, Float>(lowMz, highMz);
            //this will hold the mz value of the peak with the highest intensity
            Pair<Float, Float> basePeakMzIntensity =
                    new Pair<Float, Float>(0f, 0f);
            spectrumArray = createRestrictedByteArrayForSpectrum(mzSpectrum, intensitySpectrum,
                                                                 lowMz, highMz, lowAndHighFoundMz,
                                                                 basePeakMzIntensity, totIonCurrent);
            //if we're restricting, need to recalculate peaksCount.  every 8 bytes in the
            //spectrum array represent one peak
            peaksCount = spectrumArray.length / 8;

            //record the actual found lowest and highest MZ values
            scanStartMz = Math.max(lowMz,scanLowMz);
            scanEndMz = Math.min(highMz,scanHighMz);

            //record the base (highest-intensity) peak mz and intensity
            basePeakMz = basePeakMzIntensity.first;
            basePeakIntensity = basePeakMzIntensity.second;

            //Set the scan _range_ appropriately.  The most restrictive window defined by
            //the scan's low and high values and the low and high values we're restricting to here
            scanLowMz = lowAndHighFoundMz.first;
            scanHighMz = lowAndHighFoundMz.second;
        }
        else
        {
            spectrumArray = createByteArrayForSpectrum(mzSpectrum, intensitySpectrum,
                                                       scan.getPeaksCount());
        }

        xmlBeansScan.setLowMz(scanLowMz);
        xmlBeansScan.setHighMz(scanHighMz);
        xmlBeansScan.setStartMz(scanStartMz);
        xmlBeansScan.setEndMz(scanEndMz);

        xmlBeansScan.setTotIonCurrent(totIonCurrent[0]);
        xmlBeansScan.setBasePeakMz(basePeakMz);
        xmlBeansScan.setBasePeakIntensity(basePeakIntensity);

        xmlBeansScan.setPeaksCount(BigInteger.valueOf(peaksCount));
        xmlBeansPeaks.setPrecision(BigInteger.valueOf(scan.getPrecision()));
        xmlBeansPeaks.setByteArrayValue(spectrumArray);


        try
        {
            String fragment = removeNamespaceColon(_xmlBeansMsRun.getScanArray(0).xmlText(_optionsForPrinting));
            MzXMLDocument.MzXML.Index.Offset scanOffset = _xmlBeansIndex.addNewOffset();
            scanOffset.setId(BigInteger.valueOf(scanNumber));
            scanOffset.setLongValue(_currentFilePosition);
            pw.print(fragment);
            _currentFilePosition += fragment.length();
            pw.flush();
        }
        catch (Exception e)
        {
            e.printStackTrace(System.err);
        }

        _xmlBeansMsRun.removeScan(0);
    }

    /**
     * Creates a base-64 byte array, with alternating mz values and intensities, to represent
     * the two float arrays we store in MSScan.
     * If we got REALLY parsimonious wrt memory, we could change things so that we
     * don't even store this whole thing all at once.  That'd get hairy, though.
     * @return
     */
    protected byte[] createByteArrayForSpectrum(float[] mzSpectrum, float[]intensitySpectrum,
                                                int peaksCount)
    {
        byte[] result = new byte[peaksCount * 2 * 4];
        for (int i=0; i<mzSpectrum.length; i++)
        {
            //populate 4 bytes representing the mz
            fillByteArrayForFloat(mzSpectrum[i], result, 8*i);
            //populate 4 bytes representing the intensity
            fillByteArrayForFloat(intensitySpectrum[i], result, 8*i+4);
        }

        return result;
    }

    /**
     * Separate handling for restricted spectra, since we have to build an ArrayList of bytes
     * because we don't know in itially how many bytes we're writing out.
     * Populate lowAndHighFoundMz with the actual lowest and highest values we find
     * @param mzSpectrum
     * @param intensitySpectrum
     * @param lowMz
     * @param highMz
     * @param outLowAndHighFoundMz will contain the lowest and highest found mz values
     * @param outBasePeakMzIntensity will contain the mz and intensity of the base peak
     * @param outTotIonCurrent will contain the sum of all intensities as its first element (hack)
     * @return
     */
    protected byte[] createRestrictedByteArrayForSpectrum(float[] mzSpectrum,
                                                          float[]intensitySpectrum,
                                                          float lowMz, float highMz,
                                                          Pair outLowAndHighFoundMz,
                                                          Pair outBasePeakMzIntensity,
                                                          Float[] outTotIonCurrent)
    {
        ArrayList<Byte> resultList = new ArrayList<Byte>();

        //initialize the lowest and highest MZ values so that they're sure to get overwritten
        float lowestMz = highMz;
        float highestMz = lowMz;

        //initialize the base peak (peak with highest intensity) values so that they're sure to get
        //overwritten
        float basePeakMz = 0f;
        float basePeakIntensity = 0f;

        float totIonCurrent = 0f;

        byte[] byteHolder = new byte[4];

        for (int i=0; i<mzSpectrum.length; i++)
        {
            if (mzSpectrum[i] < lowMz)
                continue;
            if (mzSpectrum[i] > highMz)
                break;

            //populate 4 bytes representing the mz
            fillByteArrayForFloat(mzSpectrum[i], byteHolder, 0);
            resultList.add(byteHolder[0]);
            resultList.add(byteHolder[1]);
            resultList.add(byteHolder[2]);
            resultList.add(byteHolder[3]);
            //populate 4 bytes representing the intensity
            fillByteArrayForFloat(intensitySpectrum[i], byteHolder, 0);
            resultList.add(byteHolder[0]);
            resultList.add(byteHolder[1]);
            resultList.add(byteHolder[2]);
            resultList.add(byteHolder[3]);

            if (mzSpectrum[i] < lowestMz)
                lowestMz = mzSpectrum[i];
            if (mzSpectrum[i] > highestMz)
                highestMz = mzSpectrum[i];

            if (intensitySpectrum[i] > basePeakIntensity)
            {
                basePeakIntensity = intensitySpectrum[i];
                basePeakMz = mzSpectrum[i];
            }
            totIonCurrent += intensitySpectrum[i];
        }

        //TODO: This is probably very memory-inefficient
        byte[] result = new byte[resultList.size()];
        for (int i=0; i<resultList.size(); i++)
            result[i] = resultList.get(i);
        outLowAndHighFoundMz.first = Float.valueOf(lowestMz);
        outLowAndHighFoundMz.second = Float.valueOf(highestMz);

        outBasePeakMzIntensity.first = Float.valueOf(basePeakMz);
        outBasePeakMzIntensity.second = Float.valueOf(basePeakIntensity);

        outTotIonCurrent[0]=totIonCurrent;
        return result;
    }

    /**
     * Given a float, populate the given byte array with four bytes representing the float.
     * The first byte will go in at the offset given
     * @param floatVal
     * @param bytes
     */
    protected void fillByteArrayForFloat(float floatVal, byte[] bytes, int offset)
    {
        assert(bytes.length >= (offset + 4));

        int intBits = Float.floatToIntBits(floatVal);
        bytes[offset] = (byte)(intBits >> 24);
        bytes[offset + 1] = (byte)(intBits >> 16);
        bytes[offset + 2] = (byte)(intBits >> 8);
        bytes[offset + 3] = (byte)(intBits);
    }


    /**
     * Remove all occurrences of the namespace prefix from an xml fragment.
     * TODO: Boy, I'd sure like XmlBeans to do this for me.
     * @param input
     * @return
     */
    protected String removeNamespaceColon(String input)
    {
        return input.replaceAll("mzx:","");
    }



    /**
     * Write out the full document, with all modifications and features, to a file
     * @param file
     * @throws IOException
     */
    public void write(File file) throws IOException
    {
       _write(file, 0,0,0,0,false);
    }


    /**
     * Write out just a subregion
     * @param file
     * @param firstScan
     * @param lastScan
     * @param lowMz
     * @param highMz
     */
    public void writeSubregion(File file, int firstScan, int lastScan, float lowMz, float highMz)
            throws IOException
    {
        _write(file,firstScan,lastScan,lowMz,highMz,true);
    }

    /**
     * Workhorse writing method.  Can write just a subregion, or the whole file
     * @param file
     * @param firstScanNum
     * @param lastScanNum
     * @param lowMz
     * @param highMz
     */
    protected void _write(File file, int firstScanNum, int lastScanNum, float lowMz, float highMz,
                          boolean restrict)
            throws IOException
    {
        //build the XML structure of the document
        buildDocStructure(firstScanNum, lastScanNum, restrict);
        PrintWriter pw = new PrintWriter(file);
        _log.debug("Writing document start");
        printDocPrefix(pw);

        writeScans(pw, firstScanNum, lastScanNum, lowMz, highMz, restrict);
        _log.debug("Writing index");
        writeIndex(pw);
        _log.debug("Finishing document");
        printDocPostscript(pw);

        pw.flush();
        _log.debug("Done.");
    }

    public void printDocPrefix(PrintWriter pw)
    {
        _currentFilePosition = 0;
        pw.print(_documentPrefix);
        _currentFilePosition += _documentPrefix.length();
    }

    public void printDocPostscript(PrintWriter pw)
    {
        pw.print(_documentPostscript);
        _currentFilePosition += _documentPostscript.length();
    }

    public void setMassCalibrationParameters(Pair<Integer,Pair<Double,Double>>[] newParameters)
    {
        _massCalibrationParameters = newParameters;
        _shouldCalibrateSpectra = true;
    }

    public void setMassCalibrationWavelengthOffset(double massCalibrationWavelength,
                                                   double massCalibrationOffset)
    {
        _massCalibrationParameters = (Pair<Integer, Pair<Double,Double>>[]) new Pair[1];
        _massCalibrationParameters[0] =
                new Pair<Integer, Pair<Double,Double>>(0,
                        new Pair<Double,Double>(massCalibrationWavelength,
                                                massCalibrationOffset));
    }

    public boolean shouldCalibrateSpectra()
    {
        return _shouldCalibrateSpectra;
    }

    public void setShouldCalibrateSpectra(boolean _shouldCalibrateSpectra)
    {
        this._shouldCalibrateSpectra = _shouldCalibrateSpectra;
    }


    public boolean isShouldCalibratePrecursorMz()
    {
        return _shouldCalibratePrecursorMz;
    }

    public void setShouldCalibratePrecursorMasses(boolean _shouldCalibratePrecursorMasses)
    {
        this._shouldCalibratePrecursorMz = _shouldCalibratePrecursorMasses;
    }

    public double getIntensityScaleFactor()
    {
        return intensityScaleFactor;
    }

    public void setIntensityScaleFactor(double intensityScaleFactor)
    {
        this.intensityScaleFactor = intensityScaleFactor;
    }


    public boolean isExcludeMS1Scans()
    {
        return excludeMS1Scans;
    }

    public void setExcludeMS1Scans(boolean excludeMS1Scans)
    {
        this.excludeMS1Scans = excludeMS1Scans;
    }
}