/*************************************************************************** * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU Lesser General Public License as * * published by the Free Software Foundation; either version 2 of the * * License, or (at your option) any later version. * * * ***************************************************************************/ /******************************************************************************* * --------------------------------------------------------------------------- * * File: * @(#) MLScanHeaderParser.java * Author: * Ning Zhang * nzhang@systemsbiology.org * ****************************************************************************** * This software is provided ``AS IS'' and any express or implied * * * warranties, including, but not limited to, the implied warranties of * * * merchantability and fitness for a particular purpose, are disclaimed. * * In * no event shall the authors or the Institute for Systems Biology * * liable * for any direct, indirect, incidental, special, exemplary, or * * * consequential damages (including, but not limited to, procurement of * * * substitute goods or services; loss of use, data, or profits; or * * business * interruption) however caused and on any theory of liability, * * whether in * contract, strict liability, or tort (including negligence * * or otherwise) * arising in any way out of the use of this software, even * * if advised of * the possibility of such damage. * * * * ******************************************************************************/ //support mzML parsing package org.systemsbiology.jrap.stax; import java.io.FileInputStream; import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.util.zip.DataFormatException; import java.util.zip.Inflater; import javax.xml.stream.XMLInputFactory; import javax.xml.stream.XMLStreamException; import javax.xml.stream.XMLStreamReader; /** * dhmay changing 2009/10/21, incorporating Vagisha's changes and rebuilding my changes from 03/2009. * mzML 1.1 changes the way scan IDs are stored. They are now stored in * the "id" attribute of "spectrum", which is being used to contain multiple name-value pairs; the * name of the name-value pair containing the scan number is "scan", so I'm knocking off everything but that pair. * Also changing to cobble together Scan.massIntensityList from its components, which was missed earlier. * Also calling tmpScanHeader.setRetentionTime(), which was previously not set. * * F Levander changing 2010-02-11. Changed parsing of CVparams from names to accession numbers since those are stable. */ public class MLScanAndHeaderParser { public ScanHeader tmpScanHeader; public Scan tmpScan; FileInputStream fileIN = null; boolean isScan = false; public void setIsScan(boolean isScan) { this.isScan = isScan; } public void setFileInputStream(FileInputStream in) { try { this.fileIN = in; } catch(Exception e) { e.printStackTrace(); } } public ScanHeader getHeader() { return tmpScanHeader; } public Scan getScan() { return tmpScan; } /** * dhmay: mzML 1.1 changes the way scan IDs are stored. They are now stored in * the "id" attribute of "spectrum", which is being used to contain multiple name-value pairs; the * name of the name-value pair containing the scan number is "scan", so I'm knocking off everything but that pair. * @param idString * @return The scan number or if a numeric value couldn't be parsed. */ protected int parseScanNumberFromSpectrumIdField(String idString) { int retval=-1; if (idString.contains("scan=")) idString = idString.substring(idString.indexOf("scan=") + "scan=".length()); if (idString.contains("scanId=")) idString = idString.substring(idString.indexOf("scanId=") + "scanId=".length()); if (idString.contains(" ")) idString = idString.substring(0, idString.indexOf(" ")); try { retval=Integer.parseInt(idString); } catch(Exception e) { e.printStackTrace(); } return retval; } public void parseMLScanAndHeader() { XMLStreamReader xmlSR = null; try{ XMLInputFactory inputFactory = XMLInputFactory.newInstance(); xmlSR = inputFactory.createXMLStreamReader(fileIN,"ISO-8859-1"); parseMLScanAndHeader(xmlSR); } catch(Exception e) { String exception1=e.getMessage(); if(!exception1.equals("ScanHeaderEndFoundException")) { if(!exception1.equals("ScanEndFoundException")) e.printStackTrace(); } } finally { if(xmlSR != null) { try { xmlSR.close(); } catch (XMLStreamException e) { e.printStackTrace(); } } } } public void parseMLScanAndHeader(XMLStreamReader xmlSR) throws XMLStreamException { boolean inSpectrum = false; boolean inPeaks = false; boolean isPeaks = false; String elementName = null; String attriName = null; String attriValue = null; StringBuffer peaksBuffer = null; int count = 0; while(xmlSR.hasNext()) { int event = xmlSR.next(); if(event == xmlSR.START_ELEMENT) { elementName = xmlSR.getLocalName(); if(elementName.equals("spectrum")) { inSpectrum = true; count=0; tmpScanHeader = new ScanHeader(); //dhmay changing 2009/03/09. mzML 1.1 changes the way scan IDs are stored tmpScanHeader.setID(getStringValue(xmlSR, "id")); // dont bother with Num too much, the valuable stuff is in ID tmpScanHeader.setNum(getIntValue(xmlSR, "index")+1); tmpScanHeader.setPeaksCount(getIntValue(xmlSR, "defaultArrayLength")); } if(elementName.equals("cvParam")) { attriName = xmlSR.getAttributeValue(null,"name"); String attriAccession = xmlSR.getAttributeValue(null,"accession"); if(inSpectrum) { if(attriAccession.equals("MS:1000511")) tmpScanHeader.setMsLevel(getIntValue(xmlSR, "value")); if(attriAccession.equals("MS:1000127")) tmpScanHeader.setCentroided(1); if(attriAccession.equals("MS:1000504")) tmpScanHeader.setBasePeakMz(getFloatValue(xmlSR, "value")); if(attriAccession.equals("MS:1000505")) tmpScanHeader.setBasePeakIntensity(getFloatValue(xmlSR, "value")); if(attriAccession.equals("MS:1000285")) tmpScanHeader.setTotIonCurrent(getFloatValue(xmlSR, "value")); if(attriAccession.equals("MS:1000528")) tmpScanHeader.setStartMz(getFloatValue(xmlSR, "value")); if(attriAccession.equals("MS:1000527")) tmpScanHeader.setEndMz(getFloatValue(xmlSR, "value")); if(attriAccession.equals("MS:1000501")) tmpScanHeader.setLowMz(getFloatValue(xmlSR, "value")); if(attriAccession.equals("MS:1000500" )) tmpScanHeader.setHighMz(getFloatValue(xmlSR, "value")); if(attriAccession.equals("MS:1000512")) tmpScanHeader.setFilterLine(getStringValue(xmlSR,"value")); if(attriAccession.equals("MS:1000498")) tmpScanHeader.setScanType("full scan"); if(attriAccession.equals("MS:1000130")) tmpScanHeader.setPolarity("+"); //dhmay changed this for mzML 1.1.0RC5,a nd then again for RC6. //Hopefully the name of this attribute will settle down. if(attriAccession.equals("MS:1000016")) { String timeType = xmlSR.getAttributeValue(null,"unitName"); double rt = Double.parseDouble(xmlSR.getAttributeValue(null, "value")); // flevander changed if(timeType.equals("minute")) rt = rt * 60; tmpScanHeader.setRT(rt); //dhmay adding for backward compatibility. Probably this should be rewired so that //getDoubleRetentionTime just accesses the rt variable, but I don't want to sort out //that tangle tmpScanHeader.setRetentionTime("PT" + rt + "S"); } // Precursor m/z from isolation window target m/z or selected ion m/z // Selected m7z comes afterwards and will have precedence if(attriAccession.equals("MS:1000827") || attriAccession.equals("MS:1000744")) tmpScanHeader.setPrecursorMz(getFloatValue(xmlSR,"value")); if(attriAccession.equals("MS:1000042")) tmpScanHeader.setPrecursorIntensity(getFloatValue(xmlSR,"value")); if(attriAccession.equals("MS:1000041")) tmpScanHeader.setPrecursorCharge(getIntValue(xmlSR,"value")); if(attriAccession.equals("MS:1000045")) tmpScanHeader.setCollisionEnergy(getFloatValue(xmlSR,"value")); } if(inPeaks) { if(attriAccession.equals("MS:1000523")) { if(count == 1) tmpScanHeader.setMassPrecision(64); if(count == 2) tmpScanHeader.setIntenPrecision(64); } if(attriAccession.equals("MS:1000521")) { if(count == 1) tmpScanHeader.setMassPrecision(32); if(count == 2) tmpScanHeader.setIntenPrecision(32); } if(attriAccession.equals("MS:1000576")) { if(count == 1) { tmpScanHeader.setMassCompressionType("None"); } if(count == 2) { tmpScanHeader.setIntenCompressionType("None"); } } if(attriAccession.equals("MS:1000574")) { if(count == 1) { tmpScanHeader.setMassCompressionType("zlib"); } if(count == 2) { tmpScanHeader.setIntenCompressionType("zlib"); } } } } if(elementName.equals("binaryDataArrayList")) { if(isScan) { tmpScan = new Scan(); tmpScan.setHeader(tmpScanHeader); } else throw new XMLStreamException("ScanHeaderEndFoundException"); } if(elementName.equals("binaryDataArray")) { inPeaks = true; count++; //System.out.println("count "+count); if(count == 1) tmpScanHeader.setMassCompressedLen(getIntValue(xmlSR, "encodedLength")); if(count == 2) tmpScanHeader.setIntenCompressedLen(getIntValue(xmlSR, "encodedLength")); } if(elementName.equals("binary")) { inPeaks = false; isPeaks = true; peaksBuffer = new StringBuffer(); } } if(event == xmlSR.CHARACTERS) { if(isPeaks) peaksBuffer.append(xmlSR.getText()); } if(event ==xmlSR.END_ELEMENT) { elementName = xmlSR.getLocalName(); if(elementName.equals("spectrumDescription")) { inSpectrum = false; } if(elementName.equals("binary")) { getPeaks(peaksBuffer.toString(),count); isPeaks = false; peaksBuffer = null; } if(elementName.equals("binaryDataArrayList")) { throw new XMLStreamException("ScanEndFoundException"); } } } } public String getStringValue(XMLStreamReader xmlSR, String name) { String value=""; try{ if(xmlSR.getAttributeValue(null,name) == null) value=""; else value=xmlSR.getAttributeValue(null,name); } catch(Exception e) { e.printStackTrace(); } return value; } public int getIntValue(XMLStreamReader xmlSR, String name) { int value=-1; try{ if(xmlSR.getAttributeValue(null,name) == null) value = -1; else value = Integer.parseInt(xmlSR.getAttributeValue(null,name)); } catch(Exception e) { e.printStackTrace(); } return value; } public float getFloatValue(XMLStreamReader xmlSR, String name) { float value=-1f; try{ if(xmlSR.getAttributeValue(null,name) == null) value= -1f; else value=Float.parseFloat(xmlSR.getAttributeValue(null,name)); } catch(Exception e) { e.printStackTrace(); } return value; } public void getPeaks(String peakData, int count) { int precision = -1; if(count == 1) { precision = tmpScanHeader.getMassPrecision(); } if(count == 2) { precision = tmpScanHeader.getIntenPrecision(); } //support non-zlib byte[] peakArray = peakData.getBytes(); byte[] outPeakArray = peakArray; int outpos = Base64.decode(peakArray,0,peakArray.length,outPeakArray); double[] doubleMassList = null; double[] doubleIntenList = null; ByteBuffer peakBuffer = null; //check if it's compressed byte[] result=null; int unCompLen = outpos; String compressType = "None"; if(count == 1) compressType = tmpScanHeader.getMassCompressionType(); if(count == 2) compressType = tmpScanHeader.getIntenCompressionType(); if(compressType.equals("zlib")) { try{ Inflater decompresser = new Inflater(); decompresser.setInput(outPeakArray, 0, outpos); unCompLen = (tmpScanHeader.getPeaksCount())*(precision/4); result = new byte[unCompLen]; decompresser.inflate(result); decompresser.end(); } catch(DataFormatException e) { e.printStackTrace(); } peakBuffer = ByteBuffer.wrap(result); peakBuffer.order(ByteOrder.LITTLE_ENDIAN); } else { peakBuffer = ByteBuffer.wrap(outPeakArray,0,outpos); peakBuffer.order(ByteOrder.LITTLE_ENDIAN); } if(precision == 64) { if(count == 1) { doubleMassList = new double[unCompLen/8]; int i=0; while(peakBuffer.hasRemaining()) { doubleMassList[i] = peakBuffer.getDouble(); i++; } tmpScan.setDoubleMassList(doubleMassList); //System.out.println("massList size "+tmpScan.getDoubleMassList().length); } if(count == 2) { doubleIntenList = new double[unCompLen/8]; int i=0; while(peakBuffer.hasRemaining()) { doubleIntenList[i] = peakBuffer.getDouble(); i++; } tmpScan.setDoubleIntensityList(doubleIntenList); } } else { if(count == 1) { doubleMassList = new double[unCompLen/4]; int i=0; while(peakBuffer.hasRemaining()) { doubleMassList[i] = (double)peakBuffer.getFloat(); i++; } tmpScan.setDoubleMassList(doubleMassList); } if(count == 2) { doubleIntenList = new double[unCompLen/4]; int i=0; while(peakBuffer.hasRemaining()) { doubleIntenList[i] = (double)peakBuffer.getFloat(); i++; } tmpScan.setDoubleIntensityList(doubleIntenList); //System.out.println("intenList size "+tmpScan.getFloatIntensityList().length); } } //dhmay fixing up the massIntensityList, 2009/03/09. This seems to have been missed initially if (count == 2) { //System.err.println("****Setting mass-int list"); double[][] massIntensityList = new double[2][]; massIntensityList[0] = tmpScan.getDoubleMassList(); massIntensityList[1] = tmpScan.getDoubleIntensityList(); tmpScan.setMassIntensityList(massIntensityList); } } }