AmtXmlWriter.java example

Explorer
msInspect-master
/*
 * Copyright (c) 2003-2012 Fred Hutchinson Cancer Research Center
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.fhcrc.cpl.viewer.amt;

import java.io.*;
import java.util.Calendar;
import java.util.List;
import java.math.BigDecimal;

import org.apache.log4j.Logger;

import org.fhcrc.proteomics.schemaRevision.amtXml10.*;
import org.fhcrc.cpl.toolbox.proteomics.MS2Modification;
import org.w3c.dom.Node;
import org.apache.xmlbeans.XmlOptions;

/**
 * A restrictive wrapper for writing AmtXml files.  We take advantage of XmlBeans to build
 * the structure of the amtxmlxml file, and to build individual peptide_entries,
 * but we stitch the XmlBeans XML output for features together by
 * hand, writing out to a file as we go, so that we don't have to hold the whole structure
 * in memory
 */
public class AmtXmlWriter
{
    static Logger _log = Logger.getLogger(AmtXmlWriter.class);

    //document shell structure
    AmtDatabaseDocument mXmlBeansAmtDatabaseDoc = null;
    AmtDatabaseDocument.AmtDatabase mXmlBeansAmtDatabase = null;

    //all entries to be written
    protected AmtDatabase mAmtDatabase;

    //Strings of xml representing the structure before and after the feature content
    protected String _documentPrefix = null;
    protected String _documentPostscript = null;

    //encapsulates printing options for all fragments
    protected XmlOptions _optionsForPrinting = null;


    /**
     * Constructor creates the XmlBeans representing the shell of a AmtXml document, and
     * creates the "prefix" and "postscript" strings representing that shell
     */
    public AmtXmlWriter()
    {
        init();
        generateStubs();
    }

    protected void init()
    {
        //Construct generic document structure
        mXmlBeansAmtDatabaseDoc = AmtDatabaseDocument.Factory.newInstance();
        mXmlBeansAmtDatabase = mXmlBeansAmtDatabaseDoc.addNewAmtDatabase();


    }

    protected void generateStubs()
    {
        //add a sentinel node that tells us where to split the document to insert entries
        Node amtDatabaseNode = mXmlBeansAmtDatabase.getDomNode();
        Node dummyEntryLocationNode =
                amtDatabaseNode.getOwnerDocument().createElement("SENTINEL_FEATURE_LOCATION");
        amtDatabaseNode.appendChild(dummyEntryLocationNode);

        //set printing options for xml fragments
        _optionsForPrinting = new XmlOptions();
        _optionsForPrinting.setSaveOuter();
        _optionsForPrinting.setSavePrettyPrint();
        _optionsForPrinting.setSavePrettyPrintOffset(0);

        //create and break up the xml that defines the document structure
        String documentShell = mXmlBeansAmtDatabaseDoc.xmlText(_optionsForPrinting);
        String[] halves = documentShell.split("<SENTINEL_FEATURE_LOCATION[^\\/]*\\/>");
        if (halves.length != 2)
        {
            _log.error("Failed to create document shell for writing");
            return;
        }

        _documentPrefix = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" + halves[0];
        _documentPostscript = halves[1];

        //remove our dummy node
        amtDatabaseNode.removeChild(dummyEntryLocationNode);
    }


    public AmtXmlWriter(AmtDatabase amtDatabase)
    {
        setAmtDatabase(amtDatabase);
    }

    public void setAmtDatabase(AmtDatabase database)
    {
        init();
        mAmtDatabase = database;
        mXmlBeansAmtDatabase.setHydrophobicityAlgorithm(database.getHydrophobicityAlgorithmName());
        mXmlBeansAmtDatabase.setHydrophobicityAlgVersion(database.getHydrophobicityAlgorithmVersion());        

        generateStubs();
    }

    protected void writeEntries(PrintWriter pw)
    {
        AmtPeptideEntry[] entries = mAmtDatabase.getEntries();
        for (int i=0; i<entries.length; i++)
            writeEntry(entries[i], pw);
    }

    /**
     * Write out a single peptide entry
     * @param entry
     * @param pw
     */
    protected void writeEntry(AmtPeptideEntry entry, PrintWriter pw)
    {
        AmtDatabaseDocument.AmtDatabase.PeptideEntry xmlBeansEntry =
                mXmlBeansAmtDatabase.addNewPeptideEntry();
        xmlBeansEntry.setPeptideSequence(entry.getPeptideSequence());
        xmlBeansEntry.setCalculatedHydrophobicity(BigDecimal.valueOf(entry.getPredictedHydrophobicity()));
        xmlBeansEntry.setMedianObservedHydrophobicity(BigDecimal.valueOf(entry.getMedianObservedHydrophobicity()));
        xmlBeansEntry.setMedianPeptideProphet(BigDecimal.valueOf(entry.getMedianPeptideProphet()));

        for (AmtPeptideEntry.AmtPeptideModificationStateEntry modState :
                entry.getModificationStateEntries())
        {
            AmtDatabaseDocument.AmtDatabase.PeptideEntry.ModificationStateEntry xmlBeansModEntry =
                    xmlBeansEntry.addNewModificationStateEntry();
            xmlBeansModEntry.setModifiedSequence(modState.getModifiedSequence());
            xmlBeansModEntry.setModifiedMass(BigDecimal.valueOf(modState.getModifiedMass()));
            xmlBeansModEntry.setMedianObservedHydrophobicity(BigDecimal.valueOf(modState.getMedianObservedHydrophobicity()));
            xmlBeansModEntry.setMedianPeptideProphet(BigDecimal.valueOf(modState.getMedianPeptideProphet()));
            List<MS2Modification>[] modifications = modState.getModifications();

            if (modifications != null)
            {
                for (int i=0; i<modifications.length; i++)
                {
                    List<MS2Modification> modificationList = modifications[i];
                    if (modificationList != null)
                    {
                        for (MS2Modification modification : modificationList)
                        {
                            int modId = mAmtDatabase.getSequenceForAminoacidModification(modification);


                            AmtDatabaseDocument.AmtDatabase.PeptideEntry.ModificationStateEntry.AminoacidModInstance
                                    xmlBeansModInstance = xmlBeansModEntry.addNewAminoacidModInstance();
                            xmlBeansModInstance.setModificationId(modId);
                            xmlBeansModInstance.setPosition(i);
                        }
                    }
                }
            }

            for (AmtPeptideEntry.AmtPeptideObservation observation :
                    modState.getObservations())
            {
                AmtDatabaseDocument.AmtDatabase.PeptideEntry.ModificationStateEntry.Observation
                        xmlBeansObservation =
                        xmlBeansModEntry.addNewObservation();
                xmlBeansObservation.setObservedHydrophobicity(BigDecimal.valueOf(observation.getObservedHydrophobicity()));
                xmlBeansObservation.setPeptideProphet(BigDecimal.valueOf(observation.getPeptideProphet()));
                //TODO: getSequenceForRun could return -1, in which case the run isn't found.  That should throw an exception
                xmlBeansObservation.setRunId(mAmtDatabase.getSequenceForRun(observation.getRunEntry()));
                xmlBeansObservation.setTimeInRun(BigDecimal.valueOf(observation.getTimeInRun()));
                if (observation.hasSpectralCount())
                    xmlBeansObservation.setSpectralCount(observation.getSpectralCount());
            }

        }

        try
        {
            String fragment = mXmlBeansAmtDatabase.getPeptideEntryArray(0).xmlText(_optionsForPrinting);
            pw.print(fragment);
            pw.flush();
        }
        catch (Exception e)
        {
            e.printStackTrace(System.err);
        }
        mXmlBeansAmtDatabase.removePeptideEntry(0);
    }

    protected void writeModifications(PrintWriter pw)
    {
        if (mAmtDatabase.numAminoacidModifications() == 0)
            return;
        for (MS2Modification mod : mAmtDatabase.getAminoacidModifications())
            writeModification(mod, pw);
    }

    protected void writeModification(MS2Modification ms2Modification, PrintWriter pw)
    {
        AmtDatabaseDocument.AmtDatabase.AminoacidModification xmlBeansMod =
                mXmlBeansAmtDatabase.addNewAminoacidModification();
        xmlBeansMod.setResidue(ms2Modification.getAminoAcid());
        xmlBeansMod.setMassDifference(BigDecimal.valueOf(ms2Modification.getMassDiff()));
        xmlBeansMod.setVariableFlag(ms2Modification.getVariable());
        xmlBeansMod.setModificationId(mAmtDatabase.getSequenceForAminoacidModification(ms2Modification));
        try
        {
            String fragment = mXmlBeansAmtDatabase.getAminoacidModificationArray(0).xmlText(_optionsForPrinting);
            pw.print(fragment);
            pw.flush();
        }
        catch (Exception e)
        {
            e.printStackTrace(System.err);
        }
        mXmlBeansAmtDatabase.removeAminoacidModification(0);
    }

    /**
     * Write out all the runs
     * @param pw
     */
    protected void writeRuns(PrintWriter pw)
    {
        for (AmtRunEntry run : mAmtDatabase.getRuns())
        {
            writeRun(run, pw);
        }
    }

    /**
     * Write out a single run
     * @param pw
     */
    protected void writeRun(AmtRunEntry run, PrintWriter pw)
    {
        AmtDatabaseDocument.AmtDatabase.Run xmlBeansRun =
            mXmlBeansAmtDatabase.addNewRun();
        //set required attributes
        xmlBeansRun.setRunId(mAmtDatabase.getSequenceForRun(run));
        double[] coeffs = run.getTimeHydMapCoefficients();
        for (int i=0; i<coeffs.length; i++)
        {
            AmtDatabaseDocument.AmtDatabase.Run.TimeHydroMappingCoeff xmlBeansCoeff =
                    xmlBeansRun.addNewTimeHydroMappingCoeff();
            xmlBeansCoeff.setDegree(i);
            xmlBeansCoeff.setValue(BigDecimal.valueOf(coeffs[i]));
        }        Calendar timeAddedCalendar = Calendar.getInstance();
        timeAddedCalendar.setTime(run.getTimeAdded());
        xmlBeansRun.setTimeAdded(timeAddedCalendar);

        //set modifications, if any
        MS2Modification[] modifications = run.getModifications();
        if (modifications != null && modifications.length > 0)
        {
            for (MS2Modification mod : modifications)
            {
                AmtDatabaseDocument.AmtDatabase.Run.AminoacidModUsage modUsage =
                        xmlBeansRun.addNewAminoacidModUsage();
                modUsage.setModificationId(mAmtDatabase.getSequenceForAminoacidModification(mod));
            }
        }

        //set optional attributes if present
        if (run.getMzXmlFilename() != null)
            xmlBeansRun.setMzxmlFilename(run.getMzXmlFilename());
        if (run.getPepXmlFilename() != null)
            xmlBeansRun.setPepxmlFilename(run.getPepXmlFilename());
        if (run.getLSID() != null)
            xmlBeansRun.setLSID(run.getLSID());
        if (run.getMinPeptideProphet() > 0.0)
            xmlBeansRun.setMinPeptideProphet(BigDecimal.valueOf(run.getMinPeptideProphet()));
        if (run.getTimeAnalyzed() != null)
        {
            Calendar timeAnalyzedCalendar = Calendar.getInstance();
            timeAnalyzedCalendar.setTime(run.getTimeAnalyzed());
            xmlBeansRun.setTimeAnalyzed(timeAnalyzedCalendar);
        }

        try
        {
            String fragment = mXmlBeansAmtDatabase.getRunArray(0).xmlText(_optionsForPrinting);
            pw.print(fragment);
            pw.flush();
        }
        catch (Exception e)
        {
            e.printStackTrace(System.err);
        }
        mXmlBeansAmtDatabase.removeRun(0);        
    }


    /**
     * Write out the full document, with all modifications and features, to a file
     * @param file
     * @throws IOException
     */
    public void write(File file) throws IOException
    {
        PrintWriter pw = new PrintWriter(file);
        pw.print(_documentPrefix);
        writeModifications(pw);
        pw.println("");
        writeRuns(pw);
        pw.println("");        
        writeEntries(pw);
        pw.print(_documentPostscript);
        pw.flush();
    }
}