/* * Copyright (c) 2003-2012 Fred Hutchinson Cancer Research Center * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.fhcrc.cpl.viewer.amt; import java.io.*; import java.util.Calendar; import java.util.List; import java.math.BigDecimal; import org.apache.log4j.Logger; import org.fhcrc.proteomics.schemaRevision.amtXml10.*; import org.fhcrc.cpl.toolbox.proteomics.MS2Modification; import org.w3c.dom.Node; import org.apache.xmlbeans.XmlOptions; /** * A restrictive wrapper for writing AmtXml files. We take advantage of XmlBeans to build * the structure of the amtxmlxml file, and to build individual peptide_entries, * but we stitch the XmlBeans XML output for features together by * hand, writing out to a file as we go, so that we don't have to hold the whole structure * in memory */ public class AmtXmlWriter { static Logger _log = Logger.getLogger(AmtXmlWriter.class); //document shell structure AmtDatabaseDocument mXmlBeansAmtDatabaseDoc = null; AmtDatabaseDocument.AmtDatabase mXmlBeansAmtDatabase = null; //all entries to be written protected AmtDatabase mAmtDatabase; //Strings of xml representing the structure before and after the feature content protected String _documentPrefix = null; protected String _documentPostscript = null; //encapsulates printing options for all fragments protected XmlOptions _optionsForPrinting = null; /** * Constructor creates the XmlBeans representing the shell of a AmtXml document, and * creates the "prefix" and "postscript" strings representing that shell */ public AmtXmlWriter() { init(); generateStubs(); } protected void init() { //Construct generic document structure mXmlBeansAmtDatabaseDoc = AmtDatabaseDocument.Factory.newInstance(); mXmlBeansAmtDatabase = mXmlBeansAmtDatabaseDoc.addNewAmtDatabase(); } protected void generateStubs() { //add a sentinel node that tells us where to split the document to insert entries Node amtDatabaseNode = mXmlBeansAmtDatabase.getDomNode(); Node dummyEntryLocationNode = amtDatabaseNode.getOwnerDocument().createElement("SENTINEL_FEATURE_LOCATION"); amtDatabaseNode.appendChild(dummyEntryLocationNode); //set printing options for xml fragments _optionsForPrinting = new XmlOptions(); _optionsForPrinting.setSaveOuter(); _optionsForPrinting.setSavePrettyPrint(); _optionsForPrinting.setSavePrettyPrintOffset(0); //create and break up the xml that defines the document structure String documentShell = mXmlBeansAmtDatabaseDoc.xmlText(_optionsForPrinting); String[] halves = documentShell.split("<SENTINEL_FEATURE_LOCATION[^\\/]*\\/>"); if (halves.length != 2) { _log.error("Failed to create document shell for writing"); return; } _documentPrefix = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" + halves[0]; _documentPostscript = halves[1]; //remove our dummy node amtDatabaseNode.removeChild(dummyEntryLocationNode); } public AmtXmlWriter(AmtDatabase amtDatabase) { setAmtDatabase(amtDatabase); } public void setAmtDatabase(AmtDatabase database) { init(); mAmtDatabase = database; mXmlBeansAmtDatabase.setHydrophobicityAlgorithm(database.getHydrophobicityAlgorithmName()); mXmlBeansAmtDatabase.setHydrophobicityAlgVersion(database.getHydrophobicityAlgorithmVersion()); generateStubs(); } protected void writeEntries(PrintWriter pw) { AmtPeptideEntry[] entries = mAmtDatabase.getEntries(); for (int i=0; i<entries.length; i++) writeEntry(entries[i], pw); } /** * Write out a single peptide entry * @param entry * @param pw */ protected void writeEntry(AmtPeptideEntry entry, PrintWriter pw) { AmtDatabaseDocument.AmtDatabase.PeptideEntry xmlBeansEntry = mXmlBeansAmtDatabase.addNewPeptideEntry(); xmlBeansEntry.setPeptideSequence(entry.getPeptideSequence()); xmlBeansEntry.setCalculatedHydrophobicity(BigDecimal.valueOf(entry.getPredictedHydrophobicity())); xmlBeansEntry.setMedianObservedHydrophobicity(BigDecimal.valueOf(entry.getMedianObservedHydrophobicity())); xmlBeansEntry.setMedianPeptideProphet(BigDecimal.valueOf(entry.getMedianPeptideProphet())); for (AmtPeptideEntry.AmtPeptideModificationStateEntry modState : entry.getModificationStateEntries()) { AmtDatabaseDocument.AmtDatabase.PeptideEntry.ModificationStateEntry xmlBeansModEntry = xmlBeansEntry.addNewModificationStateEntry(); xmlBeansModEntry.setModifiedSequence(modState.getModifiedSequence()); xmlBeansModEntry.setModifiedMass(BigDecimal.valueOf(modState.getModifiedMass())); xmlBeansModEntry.setMedianObservedHydrophobicity(BigDecimal.valueOf(modState.getMedianObservedHydrophobicity())); xmlBeansModEntry.setMedianPeptideProphet(BigDecimal.valueOf(modState.getMedianPeptideProphet())); List<MS2Modification>[] modifications = modState.getModifications(); if (modifications != null) { for (int i=0; i<modifications.length; i++) { List<MS2Modification> modificationList = modifications[i]; if (modificationList != null) { for (MS2Modification modification : modificationList) { int modId = mAmtDatabase.getSequenceForAminoacidModification(modification); AmtDatabaseDocument.AmtDatabase.PeptideEntry.ModificationStateEntry.AminoacidModInstance xmlBeansModInstance = xmlBeansModEntry.addNewAminoacidModInstance(); xmlBeansModInstance.setModificationId(modId); xmlBeansModInstance.setPosition(i); } } } } for (AmtPeptideEntry.AmtPeptideObservation observation : modState.getObservations()) { AmtDatabaseDocument.AmtDatabase.PeptideEntry.ModificationStateEntry.Observation xmlBeansObservation = xmlBeansModEntry.addNewObservation(); xmlBeansObservation.setObservedHydrophobicity(BigDecimal.valueOf(observation.getObservedHydrophobicity())); xmlBeansObservation.setPeptideProphet(BigDecimal.valueOf(observation.getPeptideProphet())); //TODO: getSequenceForRun could return -1, in which case the run isn't found. That should throw an exception xmlBeansObservation.setRunId(mAmtDatabase.getSequenceForRun(observation.getRunEntry())); xmlBeansObservation.setTimeInRun(BigDecimal.valueOf(observation.getTimeInRun())); if (observation.hasSpectralCount()) xmlBeansObservation.setSpectralCount(observation.getSpectralCount()); } } try { String fragment = mXmlBeansAmtDatabase.getPeptideEntryArray(0).xmlText(_optionsForPrinting); pw.print(fragment); pw.flush(); } catch (Exception e) { e.printStackTrace(System.err); } mXmlBeansAmtDatabase.removePeptideEntry(0); } protected void writeModifications(PrintWriter pw) { if (mAmtDatabase.numAminoacidModifications() == 0) return; for (MS2Modification mod : mAmtDatabase.getAminoacidModifications()) writeModification(mod, pw); } protected void writeModification(MS2Modification ms2Modification, PrintWriter pw) { AmtDatabaseDocument.AmtDatabase.AminoacidModification xmlBeansMod = mXmlBeansAmtDatabase.addNewAminoacidModification(); xmlBeansMod.setResidue(ms2Modification.getAminoAcid()); xmlBeansMod.setMassDifference(BigDecimal.valueOf(ms2Modification.getMassDiff())); xmlBeansMod.setVariableFlag(ms2Modification.getVariable()); xmlBeansMod.setModificationId(mAmtDatabase.getSequenceForAminoacidModification(ms2Modification)); try { String fragment = mXmlBeansAmtDatabase.getAminoacidModificationArray(0).xmlText(_optionsForPrinting); pw.print(fragment); pw.flush(); } catch (Exception e) { e.printStackTrace(System.err); } mXmlBeansAmtDatabase.removeAminoacidModification(0); } /** * Write out all the runs * @param pw */ protected void writeRuns(PrintWriter pw) { for (AmtRunEntry run : mAmtDatabase.getRuns()) { writeRun(run, pw); } } /** * Write out a single run * @param pw */ protected void writeRun(AmtRunEntry run, PrintWriter pw) { AmtDatabaseDocument.AmtDatabase.Run xmlBeansRun = mXmlBeansAmtDatabase.addNewRun(); //set required attributes xmlBeansRun.setRunId(mAmtDatabase.getSequenceForRun(run)); double[] coeffs = run.getTimeHydMapCoefficients(); for (int i=0; i<coeffs.length; i++) { AmtDatabaseDocument.AmtDatabase.Run.TimeHydroMappingCoeff xmlBeansCoeff = xmlBeansRun.addNewTimeHydroMappingCoeff(); xmlBeansCoeff.setDegree(i); xmlBeansCoeff.setValue(BigDecimal.valueOf(coeffs[i])); } Calendar timeAddedCalendar = Calendar.getInstance(); timeAddedCalendar.setTime(run.getTimeAdded()); xmlBeansRun.setTimeAdded(timeAddedCalendar); //set modifications, if any MS2Modification[] modifications = run.getModifications(); if (modifications != null && modifications.length > 0) { for (MS2Modification mod : modifications) { AmtDatabaseDocument.AmtDatabase.Run.AminoacidModUsage modUsage = xmlBeansRun.addNewAminoacidModUsage(); modUsage.setModificationId(mAmtDatabase.getSequenceForAminoacidModification(mod)); } } //set optional attributes if present if (run.getMzXmlFilename() != null) xmlBeansRun.setMzxmlFilename(run.getMzXmlFilename()); if (run.getPepXmlFilename() != null) xmlBeansRun.setPepxmlFilename(run.getPepXmlFilename()); if (run.getLSID() != null) xmlBeansRun.setLSID(run.getLSID()); if (run.getMinPeptideProphet() > 0.0) xmlBeansRun.setMinPeptideProphet(BigDecimal.valueOf(run.getMinPeptideProphet())); if (run.getTimeAnalyzed() != null) { Calendar timeAnalyzedCalendar = Calendar.getInstance(); timeAnalyzedCalendar.setTime(run.getTimeAnalyzed()); xmlBeansRun.setTimeAnalyzed(timeAnalyzedCalendar); } try { String fragment = mXmlBeansAmtDatabase.getRunArray(0).xmlText(_optionsForPrinting); pw.print(fragment); pw.flush(); } catch (Exception e) { e.printStackTrace(System.err); } mXmlBeansAmtDatabase.removeRun(0); } /** * Write out the full document, with all modifications and features, to a file * @param file * @throws IOException */ public void write(File file) throws IOException { PrintWriter pw = new PrintWriter(file); pw.print(_documentPrefix); writeModifications(pw); pw.println(""); writeRuns(pw); pw.println(""); writeEntries(pw); pw.print(_documentPostscript); pw.flush(); } }