MSRun.java example

Explorer
msInspect-master
/*
 * Copyright (c) 2003-2012 Fred Hutchinson Cancer Research Center
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.fhcrc.cpl.toolbox.proteomics;

import org.apache.log4j.Logger;
import org.fhcrc.cpl.toolbox.CPUTimer;
import org.fhcrc.cpl.toolbox.proteomics.gui.IntensityPlot;
import org.fhcrc.cpl.toolbox.proteomics.feature.Spectrum;
import org.fhcrc.cpl.toolbox.proteomics.feature.Feature;
import org.fhcrc.cpl.toolbox.proteomics.feature.FeatureSet;
import org.fhcrc.cpl.toolbox.*;
import org.fhcrc.cpl.toolbox.gui.chart.PanelWithHistogram;
import org.fhcrc.cpl.toolbox.gui.chart.PanelWithScatterPlot;
import org.fhcrc.cpl.toolbox.datastructure.FloatArray;
import org.fhcrc.cpl.toolbox.datastructure.FloatRange;
import org.systemsbiology.jrap.stax.Base64;
import org.systemsbiology.jrap.stax.MSXMLParser;
import org.systemsbiology.jrap.stax.MZXMLFileInfo;
import org.systemsbiology.jrap.stax.ScanHeader;

import java.awt.*;
import java.awt.image.BufferedImage;
import java.io.*;
import java.lang.ref.SoftReference;
import java.nio.ByteBuffer;
import java.nio.channels.ClosedByInterruptException;
import java.nio.channels.ClosedChannelException;
import java.nio.channels.FileChannel;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.TreeMap;
import java.util.Map;


/**
 * User: mbellew
 * Date: May 20, 2004
 * Time: 9:54:13 AM
 */
public class MSRun implements Serializable
{
    //flag to control the use of the sequential parser, which doesn't require an mzXML index
    protected boolean useSequentialParser = true;
//    protected boolean showTimeCharts = false;

    private static Logger _log = Logger.getLogger(MSRun.class);
    private static final int FLOATBYTES = 4;

    //	private static final long serialVersionUID = 8280766319681981127L;
    private static final long serialVersionUID = 8280766319681981128L;
    static float IMAGE_THRESHOLD = 10;

    // source file info
    String _filename;
    long _lastModified;
    long _filelength;
    private transient File _file;

    // for retrieving spectrum data
    transient FileChannel _fileChannel;
    transient MSXMLParser parser;

    MSScan[] _scans;
    FloatRange _mzRange;
    transient float[] _templateSpectrum = null;

    // BufferedImage is not serializable
    transient BufferedImage _image;
    float[] _scanArray;
    float[] _mzArray;
    float[] _intensityArray;

    // OK, these are redundant, but it's makes getIndex() easier
    transient int[] _indexMap;
    transient double[] _timeMap;

    //dhmay adding for ms2 scan indexing
    transient int[] _ms2IndexMap;

    transient MZXMLFileInfo _fileInfo = null;
    //dhmay removing "transient" 2/8/06 -- need MS2 data for AMT, and Mark thinks
    //MS3 data might be useful in the future
    MSScan[] _scans2 = null;
    MSScan[] _scans3 = null;
    TreeMap<Integer,MSScan> _allScans = null;

    private static boolean showIndexBuilderProgress = true;

    // used by Scan.getSpectrum()
    static CPUTimer readTimer = new CPUTimer("scan read");
    static CPUTimer decodeTimer = new CPUTimer("scan decode");
    static CPUTimer toFloatTimer = new CPUTimer("scan toFloat");
    transient private byte[] encodedData = null;



    private MSRun(String path) throws IOException
    {
        File f = new File(path);
        if (!f.exists())
            throw new FileNotFoundException(path);

        _file = f;
        _filename = f.getName();
        _lastModified = f.lastModified();
        _filelength = f.length();

        // data to generate BufferedImage
        FloatArray scanArray = new FloatArray();
        FloatArray mzArray = new FloatArray();
        FloatArray intensityArray = new FloatArray();
        _allScans = new TreeMap<Integer,MSScan>();
        try
        {                                           
            ApplicationContext.setMessage("Building index file: first pass");
            if (null != ApplicationContext.getFrame())
                ApplicationContext.getFrame().setCursor(Cursor.getPredefinedCursor(Cursor.WAIT_CURSOR));

            parser = new MSXMLParser(path, useSequentialParser);
            int count = parser.getScanCount();
            _log.debug("JRAP scan count: " + count);
            int percent = Math.max(1, count / 100);
            float min = Float.MAX_VALUE;
            float max = 0.0F;
            int precursorScan = 0;
            //_scans = new MSScan[count];
            ArrayList list = new ArrayList();
            ArrayList list2 = new ArrayList();
            ArrayList list3 = new ArrayList();

            int currentNumScansPresent = 0;
            //long start = -1L;
            //long finish = -1L;
            //dhmay switching on sequential parser
            int loopMax = useSequentialParser ? count : parser.getMaxScanNumber();

            long jrapStart = System.currentTimeMillis();
            //for bookkeeping.  Not populated if showTimeCharts is false;
            java.util.List<Float> scanLoadTimes = new ArrayList<Float>();
            java.util.List<Float> scanSizes = new ArrayList<Float>();

            for (int i = 1; i <= loopMax; i++)
            {
                if (0 == (i % percent))
                {
                    if (showIndexBuilderProgress)
                        ApplicationContext.setMessage("Building index file: " + (currentNumScansPresent * 100 / Math.max(1,count)) + "%");
                    Thread.yield();
                }
                //start = System.currentTimeMillis();
                //dhmay switching on sequential parser
                ScanHeader scan = useSequentialParser ? parser.nextHeader() : parser.rapHeader(i);
                //finish = System.currentTimeMillis();
/*
                if (showTimeCharts)
                {
                    scanLoadTimes.add((float) (finish-start));
                    scanSizes.add((float)scan.getPeaksCount());
                }
*/
                //System.out.println("get scanheader for scan "+(i-1)+" "+(finish-start)+" ms");
//System.err.println("Scan index " + i);
                if (scan == null)
                    continue;
//System.err.println("\tScan #" + scan.getNum());
                currentNumScansPresent++;
                MSScan msScan = new MSScan(scan);
                _allScans.put(i,msScan);
                // ???? Consider: might be better to accept only msLevel=1 scanType=Full|unspecified
                if ("calibration".equals(scan.getScanType()))
                    continue;
                if ("zoom".equals(scan.getScanType()))
                    continue;
//System.err.println("*2");
                // Some ABI 4800 files contain partially filled headers when a scan detects no peaks
                // allowing ms2 scans with 0 peaks for MRM analyses
                if (scan.getMsLevel() == 1 && scan.getPeaksCount() <= 0)
                    continue;

//  System.err.println("*3, level=" + scan.getMsLevel() + ", num=" + scan.getNum());

                if (scan.getMsLevel() == 1)
                    precursorScan = scan.getNum();
                else
                {
                    if (-1 == scan.getPrecursorScanNum())
                        scan.setPrecursorScanNum(precursorScan);
                    if (scan.getMsLevel() == 2)
                        list2.add(msScan);
                    else if (scan.getMsLevel() == 3)
                        list3.add(msScan);
                    continue;
                }
                int index = list.size();
                list.add(msScan);
//   System.err.println("List size: " + index + ", added");

                float[][] spectrum = msScan.getSpectrum();
//   System.err.println("*4, " + spectrum.length);

                if (scan.getMsLevel() == 1 && spectrum[0].length > 0)
                {
                    min = Math.min(min, spectrum[0][0]);
                    max = Math.max(max, spectrum[0][spectrum[0].length - 1]);
                    if (min < 0 || max > 1e5)
                    {
                        RuntimeException x = new RuntimeException("Suspect mzxml data file, scan=" + i + " mz=" + (min < 0 ? min : max));
                        ApplicationContext.errorMessage(null, x);
                        throw x;
                    }
                }
                _computeImagePoints(index, spectrum, scanArray, mzArray, intensityArray);
            }
/*
            //dhmay adding conditional showing of charts
            if (showTimeCharts)
            {
                System.err.println("Total ms in JRAP: " + (System.currentTimeMillis() - jrapStart));
                PanelWithHistogram pwh = new PanelWithHistogram(scanLoadTimes, "Scan load ms"); pwh.setBreaks(200); pwh.displayInTab();
                new PanelWithScatterPlot(scanSizes, scanLoadTimes, "scan size (x) vs load time (y)").displayInTab();
            }
*/          _log.debug("Total ms in JRAP: "+(System.currentTimeMillis() - jrapStart));

            _scans = (MSScan[])list.toArray(new MSScan[0]);
            _scans2 = (MSScan[])list2.toArray(new MSScan[0]);
            _scans3 = (MSScan[])list3.toArray(new MSScan[0]);
            _mzRange = new FloatRange(min, max);

            _scanArray = scanArray.toArray(null);
            _mzArray = mzArray.toArray(null);
            _intensityArray = intensityArray.toArray(null);

            _initMaps();
        }
        finally
        {
            if (null != ApplicationContext.getFrame())
                ApplicationContext.getFrame().setCursor(Cursor.getDefaultCursor());
            ApplicationContext.setMessage("");
        }
    }


    private void _initMaps()
    {
        _indexMap = new int[_scans.length];
        _timeMap = new double[_scans.length];
        for (int i = 0; i < _scans.length; i++)
        {
            _indexMap[i] = _scans[i].getNum();
            _timeMap[i] = _scans[i].getDoubleRetentionTime();
        }

        //dhmay adding for ms2 scan indexing
        _ms2IndexMap = new int[_scans2.length];
        for (int i = 0; i < _scans2.length; i++)
        {
            _ms2IndexMap[i] = _scans2[i].getNum();
        }
    }

    /**
     * Turn index builder progress on or off (useful for batch scripts)
     */
    public static void setShowIndexBuilderProgress(boolean b)
    {
        showIndexBuilderProgress = b;
    }

    public String getFileName()
    {
        return _filename;
    }


    /* We have a bit of a mismatch with IntensityPlot
         * reduce and convert spectrum to expected format
         */
    private void _computeImagePoints(float scan, float[][] spectrum, FloatArray scanArray, FloatArray mzArray, FloatArray intensityArray)
    {
        int len = spectrum[0].length;
        if (len == 0) return;
        int mzCurrent = (int)spectrum[0][0];
        float intensityMax = (int)spectrum[1][0];
        for (int i = 1; i < len; i++)
        {
            int mz = (int)spectrum[0][i];
            float intensity = spectrum[1][i];
            if (mz == mzCurrent)
            {
                intensityMax = intensityMax > intensity ? intensityMax : intensity;
            }
            else
            {
                if (intensityMax > IMAGE_THRESHOLD)
                {
                    scanArray.add(scan);
                    mzArray.add((float)mzCurrent);
                    intensityArray.add(intensityMax);
                }
                mzCurrent = mz;
                intensityMax = intensity;
            }
        }
        scanArray.add(scan);
        mzArray.add((float)mzCurrent);
        intensityArray.add(intensityMax);
    }


    public static String _indexName(String filename)
    {
        File f = new File(filename);
        String s = f.getName() + ".inspect";
        if (null != f.getParent())
            s = f.getParent() + "/" + s;
        return s;
    }


/*	private static String _imageName(String filename)
		{
		File f = new File(filename);
		String s = f.getTextCode() + ".png";
		if (null != f.getParent())
			s = f.getParent() + "/" + s;
		return s;
		} */


    static MSRun _loadFromIndex(String path, String indexname)
    {
        File sourceFile = new File(path);
        File indexFile = new File(indexname);
        if (!indexFile.exists() || !sourceFile.exists())
            return null;
        _log.debug("Found .inspect file " + indexFile.getAbsolutePath() + ", checking...");
        if (indexFile.lastModified() < sourceFile.lastModified())
        {
            _log.debug(".inspect file is older than mzXML file, not using.");
            return null;
        }
        FileInputStream in = null;
        try
        {
            in = new FileInputStream(indexFile);
            ObjectInputStream ois = new ObjectInputStream(in);
            Object o = ois.readObject();
            MSRun run = (MSRun)o;

            //  don't check to make it easier to rename both files together
            //if (!sourceFile.getTextCode().equals(run._filename))
            //	return null;
            if (sourceFile.lastModified() != run._lastModified)
            {
                _log.debug("Incorrect modified date for mzXML file in .inspect file");
                return null;
            }
            if (sourceFile.length() != run._filelength)
            {
                _log.debug("Bad source file length in .inspect file");
                return null;
            }

            if (null == run._scanArray || null == run._mzArray || null == run._intensityArray)
                return null;
            if (null == run._mzRange)
            {
                _log.debug("Null _mzRange in .inspect file");
                return null;
            }
            // If index file contained no scans, assume it is busted.
            // dhmay 9/19/2007 removing a check for at least one MS1 scan, which fails on MRM files
            if (null == run._scans)
            {
                _log.debug("Null _scans in .inspect file");
                return null;
            }
            // dhmay adding 2/8/06.  If _null_ ms2/3 scans (empty is ok), assume it is busted
            if (null == run._scans2)
            {
                _log.debug("Null _scans2 in .inspect file");
                return null;
            }
            if (null == run._scans3)
            {
                _log.debug("Null _scans3 in .inspect file");
                return null;
            }

            run._initMaps();
            run._file = sourceFile;
            run._filename = sourceFile.getName();

            _log.debug("Index file loaded successfully");

            return run;
        }
        catch (FileNotFoundException x)
        {
            _log.debug("FileNotFoundException on .inspect file");
        }
        catch (IOException x)
        {
            _log.debug("IOException on index file");
        }
        catch (ClassNotFoundException x)
        {
            _log.debug("ClassNotFoundException on .inspect file");
        }
        catch (ClassCastException x)
        {
            _log.debug("ClassCastException on .inspect file");
        }
        finally
        {
            if (null != in)
                try
                {
                    in.close();
                }
                catch (IOException x)
                {
                }
        }
        return null;
    }


    boolean _writeIndex(String indexname)
    {
        File f = new File(indexname);
        try
        {
            // CONSIDER: use Deflater
            if (f.exists())
                f.delete();
            f.createNewFile();
            FileOutputStream out = new FileOutputStream(indexname);
            ObjectOutputStream oos = new ObjectOutputStream(out);
            oos.writeObject(this);
            out.close();
        }
        catch (FileNotFoundException x)
        {
            ApplicationContext.infoMessage(TextProvider.getText("WARNING_FAILED_TO_WRITE_AUXILIARY_FILE_FILE", f.getAbsolutePath()));
        }
        catch (IOException x)
        {
            ApplicationContext.infoMessage(TextProvider.getText("WARNING_FAILED_TO_WRITE_AUXILIARY_FILE_FILE", f.getAbsolutePath()));
        }
        return false;
    }

    public static MSRun load(String filename) throws IOException
    {
        return load(filename, true); // Write both .inspect and .ms2.tsv files unless run was read from index file
    }

    public static MSRun load(String filename, boolean writeIndex) throws IOException
    {
        if (!(new File(filename).exists()))
            throw new FileNotFoundException();

        String indexName = _indexName(filename);
        MSRun run = _loadFromIndex(filename, indexName);
        if (null == run)
        {
            _log.debug("No valid index file found, loading from mzXML");
            run = new MSRun(filename);
            if (writeIndex)
            {
                ApplicationContext.setMessage("Writing .inspect file...");
                run._writeIndex(indexName);

                /* NOTE: should really make saving these an Action, but since we've just read the .mzxml file,
                * I'd hate to have to read it again.
                */
                FeatureSet fs = run.getTandemFeatureSet(2);
                if (null != fs)
                {
                    ApplicationContext.setMessage("Writing MS2 features...");
                    File ms2File = new File(run.getFile().getPath() + ".ms2.tsv");
                    try
                    {
                        fs.save(ms2File);
                    }
                    catch (IOException e)
                    {
                        ApplicationContext.infoMessage(
                                TextProvider.getText(
                                        "WARNING_FAILED_TO_WRITE_AUXILIARY_FILE_FILE",
                                        ms2File.getAbsolutePath()));
                    }
                }
                fs = run.getTandemFeatureSet(3);
                if (null != fs)
                {
                    ApplicationContext.setMessage("Writing MS3 features...");
                    File ms3File = new File(run.getFile().getPath() + ".ms2.tsv");
                    try
                    {
                        fs.save(new File(run.getFile().getPath() + ".ms3.tsv"));
                    }
                    catch (IOException e)
                    {
                        ApplicationContext.infoMessage(
                                TextProvider.getText(
                                        "WARNING_FAILED_TO_WRITE_AUXILIARY_FILE_FILE",
                                        ms3File.getAbsolutePath()));
                    }
                }
            }
            ApplicationContext.setMessage("");
        }
        return run;
    }


    private long _checkIO() throws IOException
    {
        if (null != _fileChannel && _fileChannel.isOpen())
        {
            try
            {
                long size = _fileChannel.size();
                return size;
            }
            catch (IOException x)
            {
            }
        }
        close();
        _initIO();
        return _fileChannel.size();
    }


    private void _initIO()
    {
        String path = _file.getPath();
        try
        {
            _fileChannel = new RandomAccessFile(path, "r").getChannel();
        }
        catch (Exception x)
        {
            close();
            ApplicationContext.errorMessage(null, x);
        }
    }


    public void close()
    {
        if (null != _fileChannel)
            try
            {
                _fileChannel.close();
            }
            catch (IOException x)
            {
            }
        _fileChannel = null;
    }


    protected void finalize()
    {
        close();
    }

    public void invalidateImage()
    {
        _image = null;
    }

    public BufferedImage getImage(String colorScheme)
    {
        if (null == _image)
        {
            try
            {
                ApplicationContext.setMessage("Building image...");
                IntensityPlot plot = new IntensityPlot();
                float median = Spectrum.MedianSampled(_intensityArray, false);
                float threshold = median / 2;
                plot.setData(FloatArray.asFloatArray(_scanArray),
                        FloatArray.asFloatArray(_mzArray),
                        FloatArray.asFloatArray(_intensityArray));

                _image = plot.plot(threshold, true, colorScheme);

                float maxTIC = 0.0F;
                for (int s = 0; s < getScanCount(); s++)
                {
                    MSScan scan = getScan(s);
                    maxTIC = Math.max(maxTIC, scan.getTotIonCurrent());
                }

                // TIC Chart
                if (true)
                {
                    Graphics g = _image.getGraphics();
                    g.setColor(Color.BLACK);
                    int height = _image.getHeight();
                    int yPrev = 0;
                    for (int s = 0; s < getScanCount(); s++)
                    {
                        MSScan scan = getScan(s);
                        int y = (int)(99 * scan.getTotIonCurrent() / maxTIC);
                        if (s > 0)
                            g.drawLine(s - 1, height - yPrev - 1, s, height - y - 1);
                        yPrev = y;
                    }
                }

                ApplicationContext.setMessage("");
            }
            catch (Exception x)
            {
                ApplicationContext.errorMessage("Error generating image", x);
            }
        }
        return _image;
    }


    public int getScanCount()
    {
        return _scans.length;
    }

    public Map<Integer,MSScan> getAllScans()
    {
        return _allScans;
    }

    public MSScan getScanByNum(int num) {
        return this.getAllScans().get(num);
    }

    public MSScan[] getScans()
    {
        return _scans;
    }

    /**
     * Get a scan by index (NOT scan number)
     * @param scanIndex
     * @return
     */
    public MSScan getScan(int scanIndex)
    {
        return _scans[scanIndex];
    }

    public MSScan[] getMS2Scans()
    {
        return _scans2;
    }

    /**
     * get an MS2 scan by index (NOT scan number)
     * @param scanIndex
     * @return
     */
    public MSScan getMS2Scan(int scanIndex)
    {
        return _scans2[scanIndex];
    }

    public MSScan[] getMS3Scans()
    {
        return _scans3;
    }


    // this is the actual range, not the range reported by scan.getLowMz() and scan.getHighMz()
    public FloatRange getMzRange()
    {
        return _mzRange;
    }


    public float[] getTemplateSpectrum()
    {
        if (null == _templateSpectrum)
        {
            // since it's easy find spectrum with most data points
            int scanMax = 0;
            int countMax = 0;
            long totalPeaks = 0;
            for (int s = 0; s < _scans.length; s++)
            {
                totalPeaks += _scans[s].getPeaksCount();
                if (_scans[s].getPeaksCount() > countMax)
                {
                    countMax = _scans[s].getPeaksCount();
                    scanMax = s;
                }
            }
            _log.info(this.getFileName() + " " + totalPeaks + " total peaks");

            float[][] spectrum = _scans[scanMax].getSpectrum();
            _templateSpectrum = Spectrum.GenerateSpectrumTemplate(spectrum[0], getMzRange());
        }
        return _templateSpectrum;
    }


    public int getIndexForFeature(Feature f)
    {
        if (f.getTime() > 0)
            return getIndexForTime(f.getTime());

        int index = getIndexForScanNum(f.getScan());
        if (index<0)
            index= -(index+1);
        return index;
    }


    // maps a scan num in this run to an index
    public int getIndexForScanNum(int num)
    {
        return Arrays.binarySearch(_indexMap, num);
    }

    // maps an MS2 scan num in this run to an index
    public int getIndexForMS2ScanNum(int num)
    {
        return Arrays.binarySearch(_ms2IndexMap, num);
    }

    public int getScanNumForIndex(int index)
    {
        return _indexMap[index];
    }

    public int getScanNumForMS2Index(int index)
    {
        return _ms2IndexMap[index];
    }

    /*
	 * maps a scan num in this run to an index. If returnPrecursor is true, returns the
	 * index of the *previous* scan when the exact scan number is not present; useful for
	 * overlaying MS2 features
	 */
    public int getIndexForScanNum(int num, boolean returnPrecursor)
    {
        int i = Arrays.binarySearch(_indexMap, num);
        if (returnPrecursor && i < 0)
            i = -(i + 1);
        return i;
    }

    // map a time to a location in the scan array
    // useful for overlaying feature sets
    public int getIndexForTime(double time)
    {
        int i = Arrays.binarySearch(_timeMap, time);
        if (i < 0)
            i = -(i+1);
        i = Math.min(i, _scans.length-1);
        if (i == 0)
            return 0;
        double prev = _scans[i-1].getDoubleRetentionTime();
        double next = _scans[i].getDoubleRetentionTime();
        assert prev <= time && time <= next;
        return time-prev < next-time ? i-1 : i;
    }


    public class MSScan implements Serializable, Scan
    {
        //private static final long serialVersionUID = 3460298621454357258L;
        private static final long serialVersionUID = 3460298621454357259L;

        //Was the precursor m/z of this scan corrected by some program (namely msInspect)?
        //dhmay adding 3/31/2008
        protected boolean precursorMzCorrected = false;

        public MSScan(org.systemsbiology.jrap.stax.Scan s)
        {
            _scan = s.getHeader();
            float[][] spectrum = convertSpectrumToFloatArray(s.getMassIntensityList());
            if (null != spectrum)
                _spectrumRef = new SoftReference(spectrum);
        }


        public MSScan(ScanHeader s)
        {
            _scan = s;
        }


        public int getIndex()
        {
            return getIndexForScanNum(getNum());
        }


        /** For performance, use this method for caching of resampled spectrum
         public synchronized float[][] getResampledSpectrum()
         {
         FloatRange r = new FloatRange(getLowMz(), getHighMz());
         float[] s = _resampleRef != null ? (float[])_resampleRef.get() : null;
         if (null == s)
         {
         s = Spectrum.Resample(getSpectrum(), r, 36);
         _resampleRef = new SoftReference(s);
         }
         // CONSIDER: cache shared domain array
         float[] domain = new float[s.length];
         for (int i=0 ; i<domain.length ; i++)
         domain[i] = r.min + i/36.0F;
         return new float[][] {domain, s};
         }
         */

        /**
         * Return spectrum for this scan. Sychronize on run, so that different threads
         * can request spectra. Optionally cache spectra for future use.
         */
        public float[][] getSpectrum()
        {
            synchronized(MSRun.this)
            {
                return _getSpectrumInternal();
            }
        }                                    

        /**
         * can return NULL if thread is interrupted
         */
        private synchronized float[][] _getSpectrumInternal()
        {
//System.err.println("_getSpectrumInternal 1");
            float[][] spectrum = null == _spectrumRef ? null : (float[][])_spectrumRef.get();
            if (null != spectrum)
                return spectrum;
//  System.err.println("_getSpectrumInternal 1, null, will read");        `

            if (null == _fileChannel)
                _initIO();

            Throwable throwable = null;

            for (int ttry = 0 ; null == spectrum && ttry<2 ; ttry++)
            {
                try
                {
//System.err.println("Calling _getSpectrum()");
                    //First try without JRAP.  However,
                    //if it's not an mzXML file (i.e., it's an mzML file) we have to use JRAP, because
                    //the local code here doesn't know how to handle mzML
                    spectrum = _getSpectrum(!_filename.toUpperCase().contains(".MZXML"));
                    break;
                }
                catch (ClosedByInterruptException x)
                {
                }
                catch (ClosedChannelException x)
                {
                }
                catch (IOException x)
                {
                    _log.error(x);
                }
                try
                {
                    _checkIO();
                    continue;
                }
                catch (IOException x)
                {
                    throwable = x;
                }
                if (Thread.currentThread().isInterrupted())
                    return null;
            }
            // end of retry loop
            if (null != throwable)
                _log.error(null, throwable);

            if (null == spectrum)
            {
                //Second try.  Whether or not we tried JRAP before, we're trying it now
                try
                {
                    spectrum = _getSpectrum(true);
                    assert null != spectrum;
                    if (null == spectrum)
                        throw new NullPointerException();
                }
                catch (Throwable x)
                {
                    _log.fatal(x);
                   // System.exit(1);
                }
            }

            // Some files have zero MZ values at beginning and/or end!
            int length = spectrum[0].length;
            if (length > 0 && (spectrum[0][0] == 0.0 || spectrum[0][length-1] == 0.0))
            {
                while (length > 0 && spectrum[0][length-1] == 0.0)
                    length--;
                int start;
                for (start=0 ; start<length && spectrum[0][start] == 0.0 ; start++)
                    ;

                float[][] copy = new float[2][length-start];
                System.arraycopy(spectrum[0], start, copy[0], 0, copy[0].length);
                System.arraycopy(spectrum[1], start, copy[1], 0, copy[1].length);
                spectrum = copy;
            }

            _spectrumRef = new SoftReference(spectrum);
            return spectrum;
        }

        /**
         * use getSpectrum(false) when scanning large portions of the file serially
         * to first try avoiding the XML parser.
         * 
         * dhmay changing 20100727.  This had been checking useSequentialParser and passing it into JRAP,
         * but that's not appropriate here.  Sequential parser is only appropriate for full-file read, and this
         * method is only for reads of partial files.
         */
        private synchronized float[][] _getSpectrum(boolean jrap) throws IOException
        {
            float[][] spectrum = null;

            _log.debug("_getSpectrum, scan " + _scan.getNum());
            long offset = _scan.getScanOffset();            
            _log.debug("_getSpectrum, got scan offset, " + offset + ", jrap? " + jrap);
            if (!jrap && offset > 0)
            {
                // PERF HACK: avoid XML parser at all costs
                int count = _scan.getPeaksCount();
                long len = count * 2 * FLOATBYTES;
                long lenEnc = len / 3 * 4;
                lenEnc += 2048; // room for header
                lenEnc = Math.min(lenEnc, _fileChannel.size() - offset);

                ByteBuffer fileBuf = _fileChannel.map(FileChannel.MapMode.READ_ONLY, offset, lenEnc);
                fileBuf.position(0);
                byte[] buf = new byte[2048];
                fileBuf.get(buf);

                int i;
                findPeakList:
                for (i = 0; i < buf.length-6; i++)
                {
                    if (buf[i] != '<')
                        continue;
                    if (buf[i + 1] != 'p' || buf[i + 2] != 'e' || buf[i + 3] != 'a' || buf[i + 4] != 'k' || buf[i + 5] != 's')
                        continue;
                    for (i += 6; i < buf.length; i++)
                        if (buf[i] == '>')
                            break findPeakList;
                }
                if (i < buf.length)
                {
                    fileBuf.position(i + 1);
                    spectrum = _parseSpectrum(fileBuf, count);
                    if (null != spectrum)
                        return spectrum;
                }
            }
            _log.debug("About to try using JRAP, scan " + _scan.getNum() );

            // retry using JRAP parser
            for (int retry=0 ; retry<2 && null == spectrum; retry++)
            {
                //dhmay adding 20091028.  This null-check was missing, so would get NPE every time we reopened an
                //already-indexed file and encountered a scan with spectrum length 2048 (forcing us to hit the parser).
                //Not sure how this bug lasted this long... possibly introduced recently somehow?
                if (parser == null)
                    parser = new MSXMLParser(_file.getAbsolutePath(), false);
                org.systemsbiology.jrap.stax.Scan tmp = parser.rap(_scan.getNum());

                if (null != tmp)
                {
                    spectrum = convertSpectrumToFloatArray(tmp.getMassIntensityList());
                }
                else
                    parser = new MSXMLParser(_file.getAbsolutePath(), false);
            }
//System.err.println("end");
            return spectrum;
        }

        /**
         * JRAP used to deal with spectra as float[][], now it uses double[][].  This method converts one to
         * the other, which is a temporary waste of space and of CPU time.
         *
         * It might be better to move to storing spectra as double[][].  But this would take a lot of work, and
         * would increase the storage requirements for spectra significantly.
         *
         * TODO: move to storing spectra as double[][]?
         * @param spectrumDouble
         * @return
         */
        protected float[][] convertSpectrumToFloatArray(double[][] spectrumDouble)
        {
            if (spectrumDouble == null)
                return null;
            
            float[][] spectrum = new float[spectrumDouble.length][spectrumDouble[0].length];
            for (int i=0; i<spectrumDouble.length; i++)
            {
                for (int j=0; j<spectrumDouble[0].length; j++)
                    spectrum[i][j] = (float) spectrumDouble[i][j];
            }
            return spectrum;
        }


        private ScanHeader _scan;
        transient SoftReference _spectrumRef = null;
        transient SoftReference _resampleRef = null;

        public int getNum()
        {
            return _scan.getNum();
        }

        public int getMsLevel()
        {
            return _scan.getMsLevel();
        }

        public int getPeaksCount()
        {
            return _scan.getPeaksCount();
        }

        public String getPolarity()
        {
            return _scan.getPolarity();
        }

        public String getScanType()
        {
            return _scan.getScanType();
        }

        public int getCentroided()
        {
            return _scan.getCentroided();
        }

        public int getDeisotoped()
        {
            return _scan.getDeisotoped();
        }

        public int getChargeDeconvoluted()
        {
            return _scan.getChargeDeconvoluted();
        }

        public String getRetentionTime()
        {
            return _scan.getRetentionTime();
        }

        public float getStartMz()
        {
            return _scan.getStartMz();
        }

        public float getEndMz()
        {
            return _scan.getEndMz();
        }

        public float getLowMz()
        {
            return _scan.getLowMz();
        }

        public float getHighMz()
        {
            return _scan.getHighMz();
        }

        public float getBasePeakMz()
        {
            return _scan.getBasePeakMz();
        }

        public float getBasePeakIntensity()
        {
            return _scan.getBasePeakIntensity();
        }

        public float getTotIonCurrent()
        {
            return _scan.getTotIonCurrent();
        }

        public float getPrecursorMz()
        {
            return _scan.getPrecursorMz();
        }

        public void setPrecursorMz(float precursorMz)
        {
            _scan.setPrecursorMz(precursorMz);
        }

        public int getPrecursorScanNum()
        {
            return _scan.getPrecursorScanNum();
        }

        public int getPrecursorCharge()
        {
            return _scan.getPrecursorCharge();
        }

        public void setPrecursorCharge(int charge)
        {
            _scan.setPrecursorCharge(charge);
        }

        public float getCollisionEnergy()
        {
            return _scan.getCollisionEnergy();
        }

        public float getIonisationEnergy()
        {
            return _scan.getIonisationEnergy();
        }

        public int getPrecision()
        {
            return _scan.getPrecision();
        }

        public double getDoubleRetentionTime()
        {
            return _scan.getDoubleRetentionTime();
        }

        public String getFilterLine()
        {
            return _scan.getFilterLine();
        }


        public boolean isPrecursorMzCorrected()
        {
            return precursorMzCorrected;
        }

        public void setPrecursorMzCorrected(boolean precursorMzCorrected)
        {
            this.precursorMzCorrected = precursorMzCorrected;
        }

        public String toString()
        {
            return "MSScan(" + MSRun.this._filename + "," + getNum() + ")";
        }

        private float[][] _parseSpectrum(ByteBuffer buf, int count)
        {
            int len = count * 2 * FLOATBYTES;
            int lenEnc = len / 3 * 4;
            if (0 != len % 3)
                lenEnc += 4;

            // validate buffer size and position
            if (buf.position() + lenEnc + 1 >= buf.limit())
                return null;

            byte trailingByte = buf.get(buf.position() + lenEnc);
            // we expect '<' here, if not, then revert to safe parse
            if ('<' != trailingByte)
                return null;

            // seems to be faster to get byte[] than use buf
            assert readTimer.start();
            if (encodedData == null || encodedData.length < lenEnc)
                encodedData = new byte[Math.max(lenEnc,encodedData==null?128*1024:encodedData.length+4*1024)];
            buf.get(encodedData, 0, lenEnc);
            assert readTimer.stop();

            assert decodeTimer.start();
            byte[] byteData = encodedData; // HACK, we can use same array for output (decoded data is always shorter)
            int lenDecode = Base64.decode(encodedData, 0, lenEnc, byteData);
            assert decodeTimer.stop();

            if (null == byteData) // IO error or bad encoding
                return null;

            if (lenDecode % (FLOATBYTES * 2) != 0)
                return null;

            if (lenDecode / FLOATBYTES / 2 != count)
                return null;
            assert toFloatTimer.start();

            float[][] peakList = new float[2][count];
            for (int i = 0, p = 0, intBits = 0; p < count; p++)
            {
                intBits =
                        (((int)byteData[i]) << 24) |
                                ((((int)byteData[i + 1]) & 0xff) << 16) |
                                ((((int)byteData[i + 2]) & 0xff) << 8) |
                                (((int)byteData[i + 3]) & 0xff);
                i += FLOATBYTES;
                peakList[0][p] = Float.intBitsToFloat(intBits);
                intBits =
                        (((int)byteData[i]) << 24) |
                                ((((int)byteData[i + 1]) & 0xff) << 16) |
                                ((((int)byteData[i + 2]) & 0xff) << 8) |
                                (((int)byteData[i + 3]) & 0xff);
                i += FLOATBYTES;
                peakList[1][p] = Float.intBitsToFloat(intBits);
            }
            assert toFloatTimer.stop();

            return peakList;
        }
    }


    public File getFile()
    {
        return _file;
    }


    public void setFile(File file)
    {
        _file = file;
    }


    public String toString()
    {
        return "MSRun(" + _filename + ")";
    }


    public MZXMLFileInfo getHeaderInfo()
    {
        if (null == _fileInfo)
        {
            try
            {
                MSXMLParser parser = new MSXMLParser(_file.getPath(), useSequentialParser);
                _fileInfo = parser.rapFileHeader();
            }
            catch (IOException e)
            {
                throw new RuntimeException("Failed to parse mzXML file", e);
            }
        }
        return _fileInfo;
    }


    public FeatureSet getTandemFeatureSet(int level)
    {
        if (level != 2 && level != 3)
            throw new IllegalArgumentException();

        MSScan[] scans = level == 2 ? _scans2 : _scans3;
        if (null == scans || 0 == scans.length)
            return null;

        ArrayList list = new ArrayList(scans.length);
        for (int i = 0; i < scans.length; i++)
        {
            MSScan msScan = scans[i];
            Feature f = new Feature(msScan.getNum(), msScan.getPrecursorMz(), msScan.getBasePeakIntensity());
            f.setTime((float)msScan.getDoubleRetentionTime());
            if (msScan.getPrecursorCharge() > 0)
            {
                // If we know precursor charge we can fix up the mass as well
                f.setCharge(msScan.getPrecursorCharge());
                f.updateMass();
            }
            list.add(f);
        }
        Feature[] features = (Feature[])list.toArray(new Feature[0]);
        return new FeatureSet(features);
    }



    /**
     * Helper method to return a subset of the scans of this run
     * @param scanStart
     * @param scanEnd
     * @return the partial array of scans
     */
    public MSRun.MSScan[] getPartialScanArray(int scanStart, int scanEnd)
    {
        if (scanEnd<scanStart)
            return null;
        MSRun.MSScan[] result = new MSRun.MSScan[scanEnd - scanStart + 1];
        for (int scannum = scanStart; scannum <= scanEnd; scannum++)
        {
            int index = this.getIndexForScanNum(scannum);
//TODO: need to add handling for pepXml, negative indexes

            if (index < 0)
                result[scannum - scanStart] = this.getScan((-index) - 1);
            else
                result[scannum - scanStart] = this.getScan(index);
        }
        return result;
    }


}