/* * Copyright (c) 2003-2012 Fred Hutchinson Cancer Research Center * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.fhcrc.cpl.toolbox.proteomics; import org.apache.log4j.Logger; import org.fhcrc.cpl.toolbox.CPUTimer; import org.fhcrc.cpl.toolbox.proteomics.gui.IntensityPlot; import org.fhcrc.cpl.toolbox.proteomics.feature.Spectrum; import org.fhcrc.cpl.toolbox.proteomics.feature.Feature; import org.fhcrc.cpl.toolbox.proteomics.feature.FeatureSet; import org.fhcrc.cpl.toolbox.*; import org.fhcrc.cpl.toolbox.gui.chart.PanelWithHistogram; import org.fhcrc.cpl.toolbox.gui.chart.PanelWithScatterPlot; import org.fhcrc.cpl.toolbox.datastructure.FloatArray; import org.fhcrc.cpl.toolbox.datastructure.FloatRange; import org.systemsbiology.jrap.stax.Base64; import org.systemsbiology.jrap.stax.MSXMLParser; import org.systemsbiology.jrap.stax.MZXMLFileInfo; import org.systemsbiology.jrap.stax.ScanHeader; import java.awt.*; import java.awt.image.BufferedImage; import java.io.*; import java.lang.ref.SoftReference; import java.nio.ByteBuffer; import java.nio.channels.ClosedByInterruptException; import java.nio.channels.ClosedChannelException; import java.nio.channels.FileChannel; import java.util.ArrayList; import java.util.Arrays; import java.util.TreeMap; import java.util.Map; /** * User: mbellew * Date: May 20, 2004 * Time: 9:54:13 AM */ public class MSRun implements Serializable { //flag to control the use of the sequential parser, which doesn't require an mzXML index protected boolean useSequentialParser = true; // protected boolean showTimeCharts = false; private static Logger _log = Logger.getLogger(MSRun.class); private static final int FLOATBYTES = 4; // private static final long serialVersionUID = 8280766319681981127L; private static final long serialVersionUID = 8280766319681981128L; static float IMAGE_THRESHOLD = 10; // source file info String _filename; long _lastModified; long _filelength; private transient File _file; // for retrieving spectrum data transient FileChannel _fileChannel; transient MSXMLParser parser; MSScan[] _scans; FloatRange _mzRange; transient float[] _templateSpectrum = null; // BufferedImage is not serializable transient BufferedImage _image; float[] _scanArray; float[] _mzArray; float[] _intensityArray; // OK, these are redundant, but it's makes getIndex() easier transient int[] _indexMap; transient double[] _timeMap; //dhmay adding for ms2 scan indexing transient int[] _ms2IndexMap; transient MZXMLFileInfo _fileInfo = null; //dhmay removing "transient" 2/8/06 -- need MS2 data for AMT, and Mark thinks //MS3 data might be useful in the future MSScan[] _scans2 = null; MSScan[] _scans3 = null; TreeMap<Integer,MSScan> _allScans = null; private static boolean showIndexBuilderProgress = true; // used by Scan.getSpectrum() static CPUTimer readTimer = new CPUTimer("scan read"); static CPUTimer decodeTimer = new CPUTimer("scan decode"); static CPUTimer toFloatTimer = new CPUTimer("scan toFloat"); transient private byte[] encodedData = null; private MSRun(String path) throws IOException { File f = new File(path); if (!f.exists()) throw new FileNotFoundException(path); _file = f; _filename = f.getName(); _lastModified = f.lastModified(); _filelength = f.length(); // data to generate BufferedImage FloatArray scanArray = new FloatArray(); FloatArray mzArray = new FloatArray(); FloatArray intensityArray = new FloatArray(); _allScans = new TreeMap<Integer,MSScan>(); try { ApplicationContext.setMessage("Building index file: first pass"); if (null != ApplicationContext.getFrame()) ApplicationContext.getFrame().setCursor(Cursor.getPredefinedCursor(Cursor.WAIT_CURSOR)); parser = new MSXMLParser(path, useSequentialParser); int count = parser.getScanCount(); _log.debug("JRAP scan count: " + count); int percent = Math.max(1, count / 100); float min = Float.MAX_VALUE; float max = 0.0F; int precursorScan = 0; //_scans = new MSScan[count]; ArrayList list = new ArrayList(); ArrayList list2 = new ArrayList(); ArrayList list3 = new ArrayList(); int currentNumScansPresent = 0; //long start = -1L; //long finish = -1L; //dhmay switching on sequential parser int loopMax = useSequentialParser ? count : parser.getMaxScanNumber(); long jrapStart = System.currentTimeMillis(); //for bookkeeping. Not populated if showTimeCharts is false; java.util.List<Float> scanLoadTimes = new ArrayList<Float>(); java.util.List<Float> scanSizes = new ArrayList<Float>(); for (int i = 1; i <= loopMax; i++) { if (0 == (i % percent)) { if (showIndexBuilderProgress) ApplicationContext.setMessage("Building index file: " + (currentNumScansPresent * 100 / Math.max(1,count)) + "%"); Thread.yield(); } //start = System.currentTimeMillis(); //dhmay switching on sequential parser ScanHeader scan = useSequentialParser ? parser.nextHeader() : parser.rapHeader(i); //finish = System.currentTimeMillis(); /* if (showTimeCharts) { scanLoadTimes.add((float) (finish-start)); scanSizes.add((float)scan.getPeaksCount()); } */ //System.out.println("get scanheader for scan "+(i-1)+" "+(finish-start)+" ms"); //System.err.println("Scan index " + i); if (scan == null) continue; //System.err.println("\tScan #" + scan.getNum()); currentNumScansPresent++; MSScan msScan = new MSScan(scan); _allScans.put(i,msScan); // ???? Consider: might be better to accept only msLevel=1 scanType=Full|unspecified if ("calibration".equals(scan.getScanType())) continue; if ("zoom".equals(scan.getScanType())) continue; //System.err.println("*2"); // Some ABI 4800 files contain partially filled headers when a scan detects no peaks // allowing ms2 scans with 0 peaks for MRM analyses if (scan.getMsLevel() == 1 && scan.getPeaksCount() <= 0) continue; // System.err.println("*3, level=" + scan.getMsLevel() + ", num=" + scan.getNum()); if (scan.getMsLevel() == 1) precursorScan = scan.getNum(); else { if (-1 == scan.getPrecursorScanNum()) scan.setPrecursorScanNum(precursorScan); if (scan.getMsLevel() == 2) list2.add(msScan); else if (scan.getMsLevel() == 3) list3.add(msScan); continue; } int index = list.size(); list.add(msScan); // System.err.println("List size: " + index + ", added"); float[][] spectrum = msScan.getSpectrum(); // System.err.println("*4, " + spectrum.length); if (scan.getMsLevel() == 1 && spectrum[0].length > 0) { min = Math.min(min, spectrum[0][0]); max = Math.max(max, spectrum[0][spectrum[0].length - 1]); if (min < 0 || max > 1e5) { RuntimeException x = new RuntimeException("Suspect mzxml data file, scan=" + i + " mz=" + (min < 0 ? min : max)); ApplicationContext.errorMessage(null, x); throw x; } } _computeImagePoints(index, spectrum, scanArray, mzArray, intensityArray); } /* //dhmay adding conditional showing of charts if (showTimeCharts) { System.err.println("Total ms in JRAP: " + (System.currentTimeMillis() - jrapStart)); PanelWithHistogram pwh = new PanelWithHistogram(scanLoadTimes, "Scan load ms"); pwh.setBreaks(200); pwh.displayInTab(); new PanelWithScatterPlot(scanSizes, scanLoadTimes, "scan size (x) vs load time (y)").displayInTab(); } */ _log.debug("Total ms in JRAP: "+(System.currentTimeMillis() - jrapStart)); _scans = (MSScan[])list.toArray(new MSScan[0]); _scans2 = (MSScan[])list2.toArray(new MSScan[0]); _scans3 = (MSScan[])list3.toArray(new MSScan[0]); _mzRange = new FloatRange(min, max); _scanArray = scanArray.toArray(null); _mzArray = mzArray.toArray(null); _intensityArray = intensityArray.toArray(null); _initMaps(); } finally { if (null != ApplicationContext.getFrame()) ApplicationContext.getFrame().setCursor(Cursor.getDefaultCursor()); ApplicationContext.setMessage(""); } } private void _initMaps() { _indexMap = new int[_scans.length]; _timeMap = new double[_scans.length]; for (int i = 0; i < _scans.length; i++) { _indexMap[i] = _scans[i].getNum(); _timeMap[i] = _scans[i].getDoubleRetentionTime(); } //dhmay adding for ms2 scan indexing _ms2IndexMap = new int[_scans2.length]; for (int i = 0; i < _scans2.length; i++) { _ms2IndexMap[i] = _scans2[i].getNum(); } } /** * Turn index builder progress on or off (useful for batch scripts) */ public static void setShowIndexBuilderProgress(boolean b) { showIndexBuilderProgress = b; } public String getFileName() { return _filename; } /* We have a bit of a mismatch with IntensityPlot * reduce and convert spectrum to expected format */ private void _computeImagePoints(float scan, float[][] spectrum, FloatArray scanArray, FloatArray mzArray, FloatArray intensityArray) { int len = spectrum[0].length; if (len == 0) return; int mzCurrent = (int)spectrum[0][0]; float intensityMax = (int)spectrum[1][0]; for (int i = 1; i < len; i++) { int mz = (int)spectrum[0][i]; float intensity = spectrum[1][i]; if (mz == mzCurrent) { intensityMax = intensityMax > intensity ? intensityMax : intensity; } else { if (intensityMax > IMAGE_THRESHOLD) { scanArray.add(scan); mzArray.add((float)mzCurrent); intensityArray.add(intensityMax); } mzCurrent = mz; intensityMax = intensity; } } scanArray.add(scan); mzArray.add((float)mzCurrent); intensityArray.add(intensityMax); } public static String _indexName(String filename) { File f = new File(filename); String s = f.getName() + ".inspect"; if (null != f.getParent()) s = f.getParent() + "/" + s; return s; } /* private static String _imageName(String filename) { File f = new File(filename); String s = f.getTextCode() + ".png"; if (null != f.getParent()) s = f.getParent() + "/" + s; return s; } */ static MSRun _loadFromIndex(String path, String indexname) { File sourceFile = new File(path); File indexFile = new File(indexname); if (!indexFile.exists() || !sourceFile.exists()) return null; _log.debug("Found .inspect file " + indexFile.getAbsolutePath() + ", checking..."); if (indexFile.lastModified() < sourceFile.lastModified()) { _log.debug(".inspect file is older than mzXML file, not using."); return null; } FileInputStream in = null; try { in = new FileInputStream(indexFile); ObjectInputStream ois = new ObjectInputStream(in); Object o = ois.readObject(); MSRun run = (MSRun)o; // don't check to make it easier to rename both files together //if (!sourceFile.getTextCode().equals(run._filename)) // return null; if (sourceFile.lastModified() != run._lastModified) { _log.debug("Incorrect modified date for mzXML file in .inspect file"); return null; } if (sourceFile.length() != run._filelength) { _log.debug("Bad source file length in .inspect file"); return null; } if (null == run._scanArray || null == run._mzArray || null == run._intensityArray) return null; if (null == run._mzRange) { _log.debug("Null _mzRange in .inspect file"); return null; } // If index file contained no scans, assume it is busted. // dhmay 9/19/2007 removing a check for at least one MS1 scan, which fails on MRM files if (null == run._scans) { _log.debug("Null _scans in .inspect file"); return null; } // dhmay adding 2/8/06. If _null_ ms2/3 scans (empty is ok), assume it is busted if (null == run._scans2) { _log.debug("Null _scans2 in .inspect file"); return null; } if (null == run._scans3) { _log.debug("Null _scans3 in .inspect file"); return null; } run._initMaps(); run._file = sourceFile; run._filename = sourceFile.getName(); _log.debug("Index file loaded successfully"); return run; } catch (FileNotFoundException x) { _log.debug("FileNotFoundException on .inspect file"); } catch (IOException x) { _log.debug("IOException on index file"); } catch (ClassNotFoundException x) { _log.debug("ClassNotFoundException on .inspect file"); } catch (ClassCastException x) { _log.debug("ClassCastException on .inspect file"); } finally { if (null != in) try { in.close(); } catch (IOException x) { } } return null; } boolean _writeIndex(String indexname) { File f = new File(indexname); try { // CONSIDER: use Deflater if (f.exists()) f.delete(); f.createNewFile(); FileOutputStream out = new FileOutputStream(indexname); ObjectOutputStream oos = new ObjectOutputStream(out); oos.writeObject(this); out.close(); } catch (FileNotFoundException x) { ApplicationContext.infoMessage(TextProvider.getText("WARNING_FAILED_TO_WRITE_AUXILIARY_FILE_FILE", f.getAbsolutePath())); } catch (IOException x) { ApplicationContext.infoMessage(TextProvider.getText("WARNING_FAILED_TO_WRITE_AUXILIARY_FILE_FILE", f.getAbsolutePath())); } return false; } public static MSRun load(String filename) throws IOException { return load(filename, true); // Write both .inspect and .ms2.tsv files unless run was read from index file } public static MSRun load(String filename, boolean writeIndex) throws IOException { if (!(new File(filename).exists())) throw new FileNotFoundException(); String indexName = _indexName(filename); MSRun run = _loadFromIndex(filename, indexName); if (null == run) { _log.debug("No valid index file found, loading from mzXML"); run = new MSRun(filename); if (writeIndex) { ApplicationContext.setMessage("Writing .inspect file..."); run._writeIndex(indexName); /* NOTE: should really make saving these an Action, but since we've just read the .mzxml file, * I'd hate to have to read it again. */ FeatureSet fs = run.getTandemFeatureSet(2); if (null != fs) { ApplicationContext.setMessage("Writing MS2 features..."); File ms2File = new File(run.getFile().getPath() + ".ms2.tsv"); try { fs.save(ms2File); } catch (IOException e) { ApplicationContext.infoMessage( TextProvider.getText( "WARNING_FAILED_TO_WRITE_AUXILIARY_FILE_FILE", ms2File.getAbsolutePath())); } } fs = run.getTandemFeatureSet(3); if (null != fs) { ApplicationContext.setMessage("Writing MS3 features..."); File ms3File = new File(run.getFile().getPath() + ".ms2.tsv"); try { fs.save(new File(run.getFile().getPath() + ".ms3.tsv")); } catch (IOException e) { ApplicationContext.infoMessage( TextProvider.getText( "WARNING_FAILED_TO_WRITE_AUXILIARY_FILE_FILE", ms3File.getAbsolutePath())); } } } ApplicationContext.setMessage(""); } return run; } private long _checkIO() throws IOException { if (null != _fileChannel && _fileChannel.isOpen()) { try { long size = _fileChannel.size(); return size; } catch (IOException x) { } } close(); _initIO(); return _fileChannel.size(); } private void _initIO() { String path = _file.getPath(); try { _fileChannel = new RandomAccessFile(path, "r").getChannel(); } catch (Exception x) { close(); ApplicationContext.errorMessage(null, x); } } public void close() { if (null != _fileChannel) try { _fileChannel.close(); } catch (IOException x) { } _fileChannel = null; } protected void finalize() { close(); } public void invalidateImage() { _image = null; } public BufferedImage getImage(String colorScheme) { if (null == _image) { try { ApplicationContext.setMessage("Building image..."); IntensityPlot plot = new IntensityPlot(); float median = Spectrum.MedianSampled(_intensityArray, false); float threshold = median / 2; plot.setData(FloatArray.asFloatArray(_scanArray), FloatArray.asFloatArray(_mzArray), FloatArray.asFloatArray(_intensityArray)); _image = plot.plot(threshold, true, colorScheme); float maxTIC = 0.0F; for (int s = 0; s < getScanCount(); s++) { MSScan scan = getScan(s); maxTIC = Math.max(maxTIC, scan.getTotIonCurrent()); } // TIC Chart if (true) { Graphics g = _image.getGraphics(); g.setColor(Color.BLACK); int height = _image.getHeight(); int yPrev = 0; for (int s = 0; s < getScanCount(); s++) { MSScan scan = getScan(s); int y = (int)(99 * scan.getTotIonCurrent() / maxTIC); if (s > 0) g.drawLine(s - 1, height - yPrev - 1, s, height - y - 1); yPrev = y; } } ApplicationContext.setMessage(""); } catch (Exception x) { ApplicationContext.errorMessage("Error generating image", x); } } return _image; } public int getScanCount() { return _scans.length; } public Map<Integer,MSScan> getAllScans() { return _allScans; } public MSScan getScanByNum(int num) { return this.getAllScans().get(num); } public MSScan[] getScans() { return _scans; } /** * Get a scan by index (NOT scan number) * @param scanIndex * @return */ public MSScan getScan(int scanIndex) { return _scans[scanIndex]; } public MSScan[] getMS2Scans() { return _scans2; } /** * get an MS2 scan by index (NOT scan number) * @param scanIndex * @return */ public MSScan getMS2Scan(int scanIndex) { return _scans2[scanIndex]; } public MSScan[] getMS3Scans() { return _scans3; } // this is the actual range, not the range reported by scan.getLowMz() and scan.getHighMz() public FloatRange getMzRange() { return _mzRange; } public float[] getTemplateSpectrum() { if (null == _templateSpectrum) { // since it's easy find spectrum with most data points int scanMax = 0; int countMax = 0; long totalPeaks = 0; for (int s = 0; s < _scans.length; s++) { totalPeaks += _scans[s].getPeaksCount(); if (_scans[s].getPeaksCount() > countMax) { countMax = _scans[s].getPeaksCount(); scanMax = s; } } _log.info(this.getFileName() + " " + totalPeaks + " total peaks"); float[][] spectrum = _scans[scanMax].getSpectrum(); _templateSpectrum = Spectrum.GenerateSpectrumTemplate(spectrum[0], getMzRange()); } return _templateSpectrum; } public int getIndexForFeature(Feature f) { if (f.getTime() > 0) return getIndexForTime(f.getTime()); int index = getIndexForScanNum(f.getScan()); if (index<0) index= -(index+1); return index; } // maps a scan num in this run to an index public int getIndexForScanNum(int num) { return Arrays.binarySearch(_indexMap, num); } // maps an MS2 scan num in this run to an index public int getIndexForMS2ScanNum(int num) { return Arrays.binarySearch(_ms2IndexMap, num); } public int getScanNumForIndex(int index) { return _indexMap[index]; } public int getScanNumForMS2Index(int index) { return _ms2IndexMap[index]; } /* * maps a scan num in this run to an index. If returnPrecursor is true, returns the * index of the *previous* scan when the exact scan number is not present; useful for * overlaying MS2 features */ public int getIndexForScanNum(int num, boolean returnPrecursor) { int i = Arrays.binarySearch(_indexMap, num); if (returnPrecursor && i < 0) i = -(i + 1); return i; } // map a time to a location in the scan array // useful for overlaying feature sets public int getIndexForTime(double time) { int i = Arrays.binarySearch(_timeMap, time); if (i < 0) i = -(i+1); i = Math.min(i, _scans.length-1); if (i == 0) return 0; double prev = _scans[i-1].getDoubleRetentionTime(); double next = _scans[i].getDoubleRetentionTime(); assert prev <= time && time <= next; return time-prev < next-time ? i-1 : i; } public class MSScan implements Serializable, Scan { //private static final long serialVersionUID = 3460298621454357258L; private static final long serialVersionUID = 3460298621454357259L; //Was the precursor m/z of this scan corrected by some program (namely msInspect)? //dhmay adding 3/31/2008 protected boolean precursorMzCorrected = false; public MSScan(org.systemsbiology.jrap.stax.Scan s) { _scan = s.getHeader(); float[][] spectrum = convertSpectrumToFloatArray(s.getMassIntensityList()); if (null != spectrum) _spectrumRef = new SoftReference(spectrum); } public MSScan(ScanHeader s) { _scan = s; } public int getIndex() { return getIndexForScanNum(getNum()); } /** For performance, use this method for caching of resampled spectrum public synchronized float[][] getResampledSpectrum() { FloatRange r = new FloatRange(getLowMz(), getHighMz()); float[] s = _resampleRef != null ? (float[])_resampleRef.get() : null; if (null == s) { s = Spectrum.Resample(getSpectrum(), r, 36); _resampleRef = new SoftReference(s); } // CONSIDER: cache shared domain array float[] domain = new float[s.length]; for (int i=0 ; i<domain.length ; i++) domain[i] = r.min + i/36.0F; return new float[][] {domain, s}; } */ /** * Return spectrum for this scan. Sychronize on run, so that different threads * can request spectra. Optionally cache spectra for future use. */ public float[][] getSpectrum() { synchronized(MSRun.this) { return _getSpectrumInternal(); } } /** * can return NULL if thread is interrupted */ private synchronized float[][] _getSpectrumInternal() { //System.err.println("_getSpectrumInternal 1"); float[][] spectrum = null == _spectrumRef ? null : (float[][])_spectrumRef.get(); if (null != spectrum) return spectrum; // System.err.println("_getSpectrumInternal 1, null, will read"); ` if (null == _fileChannel) _initIO(); Throwable throwable = null; for (int ttry = 0 ; null == spectrum && ttry<2 ; ttry++) { try { //System.err.println("Calling _getSpectrum()"); //First try without JRAP. However, //if it's not an mzXML file (i.e., it's an mzML file) we have to use JRAP, because //the local code here doesn't know how to handle mzML spectrum = _getSpectrum(!_filename.toUpperCase().contains(".MZXML")); break; } catch (ClosedByInterruptException x) { } catch (ClosedChannelException x) { } catch (IOException x) { _log.error(x); } try { _checkIO(); continue; } catch (IOException x) { throwable = x; } if (Thread.currentThread().isInterrupted()) return null; } // end of retry loop if (null != throwable) _log.error(null, throwable); if (null == spectrum) { //Second try. Whether or not we tried JRAP before, we're trying it now try { spectrum = _getSpectrum(true); assert null != spectrum; if (null == spectrum) throw new NullPointerException(); } catch (Throwable x) { _log.fatal(x); // System.exit(1); } } // Some files have zero MZ values at beginning and/or end! int length = spectrum[0].length; if (length > 0 && (spectrum[0][0] == 0.0 || spectrum[0][length-1] == 0.0)) { while (length > 0 && spectrum[0][length-1] == 0.0) length--; int start; for (start=0 ; start<length && spectrum[0][start] == 0.0 ; start++) ; float[][] copy = new float[2][length-start]; System.arraycopy(spectrum[0], start, copy[0], 0, copy[0].length); System.arraycopy(spectrum[1], start, copy[1], 0, copy[1].length); spectrum = copy; } _spectrumRef = new SoftReference(spectrum); return spectrum; } /** * use getSpectrum(false) when scanning large portions of the file serially * to first try avoiding the XML parser. * * dhmay changing 20100727. This had been checking useSequentialParser and passing it into JRAP, * but that's not appropriate here. Sequential parser is only appropriate for full-file read, and this * method is only for reads of partial files. */ private synchronized float[][] _getSpectrum(boolean jrap) throws IOException { float[][] spectrum = null; _log.debug("_getSpectrum, scan " + _scan.getNum()); long offset = _scan.getScanOffset(); _log.debug("_getSpectrum, got scan offset, " + offset + ", jrap? " + jrap); if (!jrap && offset > 0) { // PERF HACK: avoid XML parser at all costs int count = _scan.getPeaksCount(); long len = count * 2 * FLOATBYTES; long lenEnc = len / 3 * 4; lenEnc += 2048; // room for header lenEnc = Math.min(lenEnc, _fileChannel.size() - offset); ByteBuffer fileBuf = _fileChannel.map(FileChannel.MapMode.READ_ONLY, offset, lenEnc); fileBuf.position(0); byte[] buf = new byte[2048]; fileBuf.get(buf); int i; findPeakList: for (i = 0; i < buf.length-6; i++) { if (buf[i] != '<') continue; if (buf[i + 1] != 'p' || buf[i + 2] != 'e' || buf[i + 3] != 'a' || buf[i + 4] != 'k' || buf[i + 5] != 's') continue; for (i += 6; i < buf.length; i++) if (buf[i] == '>') break findPeakList; } if (i < buf.length) { fileBuf.position(i + 1); spectrum = _parseSpectrum(fileBuf, count); if (null != spectrum) return spectrum; } } _log.debug("About to try using JRAP, scan " + _scan.getNum() ); // retry using JRAP parser for (int retry=0 ; retry<2 && null == spectrum; retry++) { //dhmay adding 20091028. This null-check was missing, so would get NPE every time we reopened an //already-indexed file and encountered a scan with spectrum length 2048 (forcing us to hit the parser). //Not sure how this bug lasted this long... possibly introduced recently somehow? if (parser == null) parser = new MSXMLParser(_file.getAbsolutePath(), false); org.systemsbiology.jrap.stax.Scan tmp = parser.rap(_scan.getNum()); if (null != tmp) { spectrum = convertSpectrumToFloatArray(tmp.getMassIntensityList()); } else parser = new MSXMLParser(_file.getAbsolutePath(), false); } //System.err.println("end"); return spectrum; } /** * JRAP used to deal with spectra as float[][], now it uses double[][]. This method converts one to * the other, which is a temporary waste of space and of CPU time. * * It might be better to move to storing spectra as double[][]. But this would take a lot of work, and * would increase the storage requirements for spectra significantly. * * TODO: move to storing spectra as double[][]? * @param spectrumDouble * @return */ protected float[][] convertSpectrumToFloatArray(double[][] spectrumDouble) { if (spectrumDouble == null) return null; float[][] spectrum = new float[spectrumDouble.length][spectrumDouble[0].length]; for (int i=0; i<spectrumDouble.length; i++) { for (int j=0; j<spectrumDouble[0].length; j++) spectrum[i][j] = (float) spectrumDouble[i][j]; } return spectrum; } private ScanHeader _scan; transient SoftReference _spectrumRef = null; transient SoftReference _resampleRef = null; public int getNum() { return _scan.getNum(); } public int getMsLevel() { return _scan.getMsLevel(); } public int getPeaksCount() { return _scan.getPeaksCount(); } public String getPolarity() { return _scan.getPolarity(); } public String getScanType() { return _scan.getScanType(); } public int getCentroided() { return _scan.getCentroided(); } public int getDeisotoped() { return _scan.getDeisotoped(); } public int getChargeDeconvoluted() { return _scan.getChargeDeconvoluted(); } public String getRetentionTime() { return _scan.getRetentionTime(); } public float getStartMz() { return _scan.getStartMz(); } public float getEndMz() { return _scan.getEndMz(); } public float getLowMz() { return _scan.getLowMz(); } public float getHighMz() { return _scan.getHighMz(); } public float getBasePeakMz() { return _scan.getBasePeakMz(); } public float getBasePeakIntensity() { return _scan.getBasePeakIntensity(); } public float getTotIonCurrent() { return _scan.getTotIonCurrent(); } public float getPrecursorMz() { return _scan.getPrecursorMz(); } public void setPrecursorMz(float precursorMz) { _scan.setPrecursorMz(precursorMz); } public int getPrecursorScanNum() { return _scan.getPrecursorScanNum(); } public int getPrecursorCharge() { return _scan.getPrecursorCharge(); } public void setPrecursorCharge(int charge) { _scan.setPrecursorCharge(charge); } public float getCollisionEnergy() { return _scan.getCollisionEnergy(); } public float getIonisationEnergy() { return _scan.getIonisationEnergy(); } public int getPrecision() { return _scan.getPrecision(); } public double getDoubleRetentionTime() { return _scan.getDoubleRetentionTime(); } public String getFilterLine() { return _scan.getFilterLine(); } public boolean isPrecursorMzCorrected() { return precursorMzCorrected; } public void setPrecursorMzCorrected(boolean precursorMzCorrected) { this.precursorMzCorrected = precursorMzCorrected; } public String toString() { return "MSScan(" + MSRun.this._filename + "," + getNum() + ")"; } private float[][] _parseSpectrum(ByteBuffer buf, int count) { int len = count * 2 * FLOATBYTES; int lenEnc = len / 3 * 4; if (0 != len % 3) lenEnc += 4; // validate buffer size and position if (buf.position() + lenEnc + 1 >= buf.limit()) return null; byte trailingByte = buf.get(buf.position() + lenEnc); // we expect '<' here, if not, then revert to safe parse if ('<' != trailingByte) return null; // seems to be faster to get byte[] than use buf assert readTimer.start(); if (encodedData == null || encodedData.length < lenEnc) encodedData = new byte[Math.max(lenEnc,encodedData==null?128*1024:encodedData.length+4*1024)]; buf.get(encodedData, 0, lenEnc); assert readTimer.stop(); assert decodeTimer.start(); byte[] byteData = encodedData; // HACK, we can use same array for output (decoded data is always shorter) int lenDecode = Base64.decode(encodedData, 0, lenEnc, byteData); assert decodeTimer.stop(); if (null == byteData) // IO error or bad encoding return null; if (lenDecode % (FLOATBYTES * 2) != 0) return null; if (lenDecode / FLOATBYTES / 2 != count) return null; assert toFloatTimer.start(); float[][] peakList = new float[2][count]; for (int i = 0, p = 0, intBits = 0; p < count; p++) { intBits = (((int)byteData[i]) << 24) | ((((int)byteData[i + 1]) & 0xff) << 16) | ((((int)byteData[i + 2]) & 0xff) << 8) | (((int)byteData[i + 3]) & 0xff); i += FLOATBYTES; peakList[0][p] = Float.intBitsToFloat(intBits); intBits = (((int)byteData[i]) << 24) | ((((int)byteData[i + 1]) & 0xff) << 16) | ((((int)byteData[i + 2]) & 0xff) << 8) | (((int)byteData[i + 3]) & 0xff); i += FLOATBYTES; peakList[1][p] = Float.intBitsToFloat(intBits); } assert toFloatTimer.stop(); return peakList; } } public File getFile() { return _file; } public void setFile(File file) { _file = file; } public String toString() { return "MSRun(" + _filename + ")"; } public MZXMLFileInfo getHeaderInfo() { if (null == _fileInfo) { try { MSXMLParser parser = new MSXMLParser(_file.getPath(), useSequentialParser); _fileInfo = parser.rapFileHeader(); } catch (IOException e) { throw new RuntimeException("Failed to parse mzXML file", e); } } return _fileInfo; } public FeatureSet getTandemFeatureSet(int level) { if (level != 2 && level != 3) throw new IllegalArgumentException(); MSScan[] scans = level == 2 ? _scans2 : _scans3; if (null == scans || 0 == scans.length) return null; ArrayList list = new ArrayList(scans.length); for (int i = 0; i < scans.length; i++) { MSScan msScan = scans[i]; Feature f = new Feature(msScan.getNum(), msScan.getPrecursorMz(), msScan.getBasePeakIntensity()); f.setTime((float)msScan.getDoubleRetentionTime()); if (msScan.getPrecursorCharge() > 0) { // If we know precursor charge we can fix up the mass as well f.setCharge(msScan.getPrecursorCharge()); f.updateMass(); } list.add(f); } Feature[] features = (Feature[])list.toArray(new Feature[0]); return new FeatureSet(features); } /** * Helper method to return a subset of the scans of this run * @param scanStart * @param scanEnd * @return the partial array of scans */ public MSRun.MSScan[] getPartialScanArray(int scanStart, int scanEnd) { if (scanEnd<scanStart) return null; MSRun.MSScan[] result = new MSRun.MSScan[scanEnd - scanStart + 1]; for (int scannum = scanStart; scannum <= scanEnd; scannum++) { int index = this.getIndexForScanNum(scannum); //TODO: need to add handling for pepXml, negative indexes if (index < 0) result[scannum - scanStart] = this.getScan((-index) - 1); else result[scannum - scanStart] = this.getScan(index); } return result; } }