/*************************************************************************
* *
* This file is part of the 20n/act project. *
* 20n/act enables DNA prediction for synthetic biology/bioengineering. *
* Copyright (C) 2017 20n Labs, Inc. *
* *
* Please direct all queries to act@20n.com. *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* *
*************************************************************************/
package com.act.lcms;
import org.w3c.dom.Document;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathException;
import java.io.OutputStreamWriter;
import java.util.Iterator;
import java.util.regex.Matcher;
public class LCMS2mzMLParser extends MzMLParser<LCMS2MZSelection> {
public static final String SPECTRUM_PATH_MS_LEVEL = "/spectrum/cvParam[@name='ms level']/@value";
public static final String SPECTRUM_PATH_ISOLATION_WINDOW_TARGET =
"/spectrum/precursorList/precursor/isolationWindow/cvParam[@name='isolation window target m/z']/@value";
public static final String SPECTRUM_PATH_SELECTED_ION_MZ =
"/spectrum/precursorList/precursor/selectedIonList/selectedIon/cvParam[@name='selected ion m/z']/@value";
public static final String SPECTRUM_PATH_COLLISION_ENERGY =
"/spectrum/precursorList/precursor/activation/cvParam[@name='collision energy']/@value";
@Override
protected LCMS2MZSelection handleSpectrumEntry(Document doc) throws XPathException {
XPath xpath = XPATH_FACTORY.get().newXPath();
Double spectrumIndexD = (Double)xpath.evaluate(SPECTRUM_PATH_INDEX, doc, XPathConstants.NUMBER);
if (spectrumIndexD == null) {
System.err.format("WARNING: found spectrum document without index attribute.\n");
return null;
}
Integer spectrumIndex = spectrumIndexD.intValue();
Double msLevelD = (Double)xpath.evaluate(SPECTRUM_PATH_MS_LEVEL, doc, XPathConstants.NUMBER);
if (msLevelD == null || msLevelD.intValue() != 2) {
// If it is not MS2 Spectrum data then we will skip from the output. The 'ms level' value can be null.
return null;
}
String spectrumId = (String)xpath.evaluate(SPECTRUM_PATH_ID, doc, XPathConstants.STRING);
if (spectrumId == null) {
System.err.format("WARNING: no spectrum id found for documnt %d\n", spectrumIndex);
return null;
}
Matcher matcher = SPECTRUM_EXTRACTION_REGEX.matcher(spectrumId);
if (!matcher.find()) {
System.err.format("WARNING: spectrum id for documnt %d did not match regex: %s\n", spectrumIndex, spectrumId);
return null;
}
Integer spectrumFunction = Integer.parseInt(matcher.group(1));
Integer spectrumScan = Integer.parseInt(matcher.group(3));
Double scanStartTime = (Double)xpath.evaluate(SPECTRUM_PATH_SCAN_START_TIME, doc, XPathConstants.NUMBER);
if (scanStartTime == null) {
System.err.format("WARNING: no scan start time found for spectrum document %d\n", spectrumIndex);
return null;
}
String scanStartTimeUnit = (String)xpath.evaluate(SPECTRUM_PATH_SCAN_START_TIME_UNIT, doc, XPathConstants.STRING);
if (scanStartTimeUnit == null) {
System.err.format("WARNING: no scan start time unit found for spectrum document %d\n", spectrumIndex);
return null;
}
Double isolationWindowTarget =
(Double)xpath.evaluate(SPECTRUM_PATH_ISOLATION_WINDOW_TARGET, doc, XPathConstants.NUMBER);
if (isolationWindowTarget == null) {
System.err.format("WARNING: no isolation window target found for spectrum document %d\n", spectrumIndex);
return null;
}
Double selectedIonMZ = (Double)xpath.evaluate(SPECTRUM_PATH_SELECTED_ION_MZ, doc, XPathConstants.NUMBER);
if (selectedIonMZ == null) {
System.err.format("WARNING: no selection ion m/z found for spectrum document %d\n", spectrumIndex);
return null;
}
Double collisionEnergy = (Double)xpath.evaluate(SPECTRUM_PATH_COLLISION_ENERGY, doc, XPathConstants.NUMBER);
if (collisionEnergy == null) {
System.err.format("WARNING: no collision energy found for spectrum document %d\n", spectrumIndex);
return null;
}
return new LCMS2MZSelection(spectrumIndex, scanStartTime, scanStartTimeUnit, spectrumScan,
isolationWindowTarget, selectedIonMZ, collisionEnergy);
}
public static void main(String[] args) throws Exception {
Iterator<LCMS2MZSelection> selections = new LCMS2mzMLParser().getIterator(args[0]);
while (selections.hasNext()) {
LCMS2MZSelection s = selections.next();
System.out.format("%d: %d %f %f %f %f\n", s.getIndex(), s.getScan(), s.getTimeVal() * 60, s.getSelectedIonMZ(),
s.getIsolationWindowTargetMZ(), s.getCollisionEnergy());
}
}
}