/* * Copyright (c) 2003-2012 Fred Hutchinson Cancer Research Center * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.fhcrc.cpl.viewer.util; import org.fhcrc.cpl.toolbox.filehandler.SimpleXMLEventRewriter; import org.apache.log4j.Logger; import java.io.IOException; import java.util.ArrayList; import javax.xml.namespace.QName; import javax.xml.stream.XMLStreamException; import javax.xml.stream.events.EndElement; import javax.xml.stream.events.StartElement; import javax.xml.stream.events.XMLEvent; import javax.xml.stream.events.Attribute; /** * Split the "heavy" or "light" channel of a pepXML file into a new pepXML * file. Strips out analysis results as well (since some, like PeptideProphet, * will no longer be valid for a subset). * * TODO: emits one blank line for each excluded analysis/spectrum block */ public class PepXmlChannelSplitter { private static Logger _log = Logger.getLogger(PepXmlChannelSplitter.class); public static void split(String inFilename, char labeledResidue, boolean preserveLabeled, String outFilename) throws IOException, XMLStreamException { PepXmlSplitRewriter splitter = new PepXmlSplitRewriter(inFilename, labeledResidue, preserveLabeled, outFilename); try { splitter.rewrite(); } finally { splitter.close(); } } /** * */ static class PepXmlSplitRewriter extends SimpleXMLEventRewriter { private static final float EPSILON = .001f; String inFilename; String outFilename; char labeledResidue; boolean preserveLabeled; float labeledMass; ArrayList<XMLEvent> spectrumQueryEvents; boolean insideAnalysisBlock = false; boolean insideSpectrumQuery = false; int totalResidueCount; int labeledResidueCount; String currentPeptide; public PepXmlSplitRewriter(String inFilename, char labeledResidue, boolean preserveLabeled, String outFilename) { super(inFilename, outFilename); this.labeledResidue = labeledResidue; this.preserveLabeled = preserveLabeled; spectrumQueryEvents = new ArrayList<XMLEvent>(); totalResidueCount = 0; labeledResidueCount = 0; insideAnalysisBlock = false; insideSpectrumQuery = false; } public void handleStartElement(StartElement event) throws XMLStreamException { QName qname = event.getName(); // ???? final QName ANALYSIS_SUMMARY = new QName("analysis_summary"); // ???? ANALYSIS_SUMMARY.equals(qname) if ("analysis_summary".equals(qname.getLocalPart())) { insideAnalysisBlock = true; } else if ("analysis_result".equals(qname.getLocalPart())) { insideAnalysisBlock = true; } else if ("spectrum_query".equals(qname.getLocalPart())) { insideSpectrumQuery = true; } else if ("aminoacid_modification".equals(qname.getLocalPart())) { processModificationDefinition(event); } else if ("search_hit".equals(qname.getLocalPart())) { processSearchHit(event); } else if ("mod_aminoacid_mass".equals(qname.getLocalPart())) { processModificationMass(event); } conditionalAdd(event); } public void handleEndElement(EndElement event) throws XMLStreamException { QName qname = event.getName(); conditionalAdd(event); if ("spectrum_query".equals(qname.getLocalPart())) { insideSpectrumQuery = false; processSpectrumQueryEvents(); spectrumQueryEvents.clear(); } else if ("analysis_summary".equals(qname.getLocalPart())) { insideAnalysisBlock = false; } else if ("analysis_result".equals(qname.getLocalPart())) { insideAnalysisBlock = false; } } public void handleDefault(XMLEvent event) throws XMLStreamException { conditionalAdd(event); } private boolean conditionalAdd(XMLEvent event) throws XMLStreamException { if (insideAnalysisBlock) { return false; } if (insideSpectrumQuery) { spectrumQueryEvents.add(event); return false; } add(event); return false; } private void processSpectrumQueryEvents() throws XMLStreamException { if (totalResidueCount == 0) { // Unlabeled; could come from heavy or light addSpectrumQueryEvents(); } else if (labeledResidueCount == 0) { // Light if (!preserveLabeled) { addSpectrumQueryEvents(); } } else if (labeledResidueCount == totalResidueCount) { // Heavy if (preserveLabeled) { addSpectrumQueryEvents(); } } else { // Partial! // _log.warn("Skipping partially labeled peptide " + currentPeptide); } } private void addSpectrumQueryEvents() throws XMLStreamException { for (XMLEvent e : spectrumQueryEvents) { conditionalAdd(e); } } private void processModificationDefinition(StartElement event) { final QName AMINOACID = new QName("aminoacid"); final QName VARIABLE = new QName("variable"); final QName MASS = new QName("mass"); String residue = getAttribute(event, AMINOACID); if (residue != null && residue.charAt(0) == labeledResidue) { String variable = getAttribute(event, VARIABLE); String mass = getAttribute(event, MASS); if ("Y".equals(variable) && mass != null) { labeledMass = Float.parseFloat(mass); _log.info("Using " + labeledMass + "@" + labeledResidue); } } } /** * Count the number of potential labeled residues in each peptide hit */ private void processSearchHit(StartElement event) { final QName PEPTIDE = new QName("peptide"); currentPeptide = getAttribute(event, PEPTIDE); totalResidueCount = 0; labeledResidueCount = 0; for (int i = 0; i < currentPeptide.length(); i++) { if (currentPeptide.charAt(i) == labeledResidue) { totalResidueCount++; } } } /** * Count the number of labeled residues in each peptide hit */ private void processModificationMass(StartElement event) { final QName MASS = new QName("mass"); String mass = getAttribute(event, MASS); if (mass != null && Math.abs(Float.parseFloat(mass) - labeledMass) < EPSILON) { labeledResidueCount++; } } private String getAttribute(StartElement event, QName name) { Attribute attr = event.getAttributeByName(name); if (attr == null) { return null; } return attr.getValue(); } } /** * */ public static void main(String[] av) { if (av.length < 3) { System.err.println("PepXmlChannelSplitter [--light|--heavy] inFile outFile"); System.exit(1); } boolean preserveHeavy = "--heavy".equals(av[0]); try { PepXmlChannelSplitter.split(av[1], 'C', preserveHeavy, av[2]); } catch (Exception e) { e.printStackTrace(); } } }