/*
* Copyright (c) 2003-2012 Fred Hutchinson Cancer Research Center
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.fhcrc.cpl.toolbox.proteomics.feature;
import java.io.*;
import java.util.*;
import java.math.BigInteger;
import org.apache.log4j.Logger;
import net.systemsbiology.regisWeb.pepXML.*;
import org.fhcrc.cpl.toolbox.proteomics.feature.extraInfo.MS2ExtraInfoDef;
import org.fhcrc.cpl.toolbox.proteomics.feature.extraInfo.IsotopicLabelExtraInfoDef;
import org.fhcrc.cpl.toolbox.proteomics.MS2Modification;
import org.fhcrc.cpl.toolbox.proteomics.ModifiedAminoAcid;
import org.fhcrc.cpl.toolbox.proteomics.PeptideGenerator;
import org.fhcrc.cpl.toolbox.proteomics.QuantitationUtilities;
import org.fhcrc.cpl.toolbox.proteomics.feature.Feature;
import org.fhcrc.cpl.toolbox.proteomics.feature.FeatureSet;
import org.fhcrc.cpl.toolbox.proteomics.feature.AnalyzeICAT;
import org.fhcrc.cpl.toolbox.proteomics.filehandler.Q3Handler;
import org.fhcrc.cpl.toolbox.proteomics.filehandler.XPressHandler;
import org.fhcrc.cpl.toolbox.proteomics.filehandler.BasePepXmlWriter;
import org.fhcrc.cpl.toolbox.proteomics.filehandler.RelativeQuantAnalysisResult;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.Attr;
/**
* A restrictive wrapper for writing PepXml files, using a Feature array to populate the spectrum_queries
*/
public class FeaturePepXmlWriter extends BasePepXmlWriter
{
static Logger _log = Logger.getLogger(FeaturePepXmlWriter.class);
//all features and modifications to be written
protected Feature[] _features = null;
protected boolean writeIntensitiesAsXpressResults = false;
protected int firstSpectrumQueryIndex = 1;
public final int RATIO_MODE_XPRESS = 0;
public final int RATIO_MODE_Q3 = 1;
//we have to write out ratios, whose provenance we don't know, as either Q3 or XPress.
//This variable determines which
protected int ratioMode = RATIO_MODE_Q3;
//keep track of whether the FeatureSet thinks it has a ratio declaration, because we should write that
//out even if no features have it
protected boolean featureSetHasRatioDeclaration = false;
protected AnalyzeICAT.IsotopicLabel _isotopicLabel = null;
//spectrum names that have been written to the file. Must keep track to preserve uniqueness
protected Set<String> assignedSpectrumNames = new HashSet<String>();
/**
* Create doc structure, populate features and modifications
* @param features
* @param modifications
*/
public FeaturePepXmlWriter(Feature[] features, MS2Modification[] modifications)
{
super(modifications);
setFeatures(features);
}
public FeaturePepXmlWriter(FeatureSet featureSet)
{
super();
setModifications(MS2ExtraInfoDef.getFeatureSetModifications(featureSet));
setSearchDatabase(MS2ExtraInfoDef.getFeatureSetSearchDatabasePath(featureSet));
String quantitationAlgorithm = IsotopicLabelExtraInfoDef.getFeatureSetAlgorithm(featureSet);
if (quantitationAlgorithm != null)
{
featureSetHasRatioDeclaration = true;
if (quantitationAlgorithm.equals(QuantitationUtilities.ALGORITHM_Q3))
ratioMode = RATIO_MODE_Q3;
else if (quantitationAlgorithm.equals(QuantitationUtilities.ALGORITHM_XPRESS))
ratioMode = RATIO_MODE_XPRESS;
else throw new IllegalArgumentException("Unknown quantitation algorithm " + quantitationAlgorithm);
_log.debug("Quantitation Algorithm: " + quantitationAlgorithm);
}
else
_log.debug("No quantitation algorithm found");
String baseName = MS2ExtraInfoDef.getFeatureSetBaseName(featureSet);
if (featureSet != null)
setBaseName(baseName);
int maxCleavages =
MS2ExtraInfoDef.getFeatureSetSearchConstraintMaxIntCleavages(featureSet);
int minTermini =
MS2ExtraInfoDef.getFeatureSetSearchConstraintMinTermini(featureSet);
setSearchConstraints(maxCleavages, minTermini);
setFeatures(featureSet.getFeatures());
}
/**
* setter for features
* @param features
*/
public void setFeatures(Feature[] features)
{
_features = features;
}
public boolean willWriteIntensitiesAsXpressResults()
{
return writeIntensitiesAsXpressResults;
}
public void setWriteIntensitiesAsXpressResults(boolean writeIntensitiesAsXpressResults)
{
this.writeIntensitiesAsXpressResults = writeIntensitiesAsXpressResults;
}
/**
* Write out all features immediately
* @param pw
*/
protected void writeSpectrumQueries(PrintWriter pw)
{
if (_features == null)
return;
for (int i=0; i<_features.length; i++)
{
//only write features with peptide identifications
if (MS2ExtraInfoDef.getFirstPeptide(_features[i]) != null)
writeFeature(i, pw);
}
}
/**
* Add a SearchResult representing the passed-in feature to the first run of the file.
* Write out the XML for that search result
*/
protected void writeFeature(int featureIndex, PrintWriter pw)
{
Feature feature = _features[featureIndex];
MsmsPipelineAnalysisDocument.MsmsPipelineAnalysis.MsmsRunSummary.SpectrumQuery spectrumQuery =
addSpectrumQuery(feature.getScan(),
feature.getScan(),
feature.getCharge(), featureIndex + firstSpectrumQueryIndex);
// int indexAttribute = featureIndex + firstSpectrumQueryIndex;
//dhmay changing the contents of the spectrum attribute, 20100419. We've had reports that
//the TPP can't open these files unless the format matches [basename].[start_scan].[end_scan].[assumed_charge]
//dhmay changing again. Sometimes, the spectrumName can be non-unique. That is death for ProteinProphet.
//Strategy: perturb the base by adding a number at the end. Ensure uniqueness.
//todo: is it OK to mess with the base in this way? Base won't always be the same.
String spectrumName = _spectrumBaseString + feature.getScanFirst() + "." + feature.getScanLast() +
"." + feature.charge;
int baseSuffix=0;
while (assignedSpectrumNames.contains(spectrumName))
{
String tempBase = _spectrumBaseString;
if (tempBase.endsWith("."))
tempBase = tempBase.substring(0, tempBase.length()-1);
tempBase = tempBase + baseSuffix++ + ".";
spectrumName = tempBase + feature.getScanFirst() + "." + feature.getScanLast() +
"." + feature.charge;
}
spectrumQuery.setSpectrum(spectrumName);
assignedSpectrumNames.add(spectrumName);
float precursorNeutralMass = feature.getMass() + MS2ExtraInfoDef.getDeltaMass(feature);
spectrumQuery.setPrecursorNeutralMass(precursorNeutralMass);
//dhmay adding 7/1/08. retention_time_sec isn't defined in the pepXml spec, but it's used by
//various folks
if (feature.getTime() > 0)
{
Attr retentionTimeAttr =
spectrumQuery.getDomNode().getOwnerDocument().createAttribute("retention_time_sec");
retentionTimeAttr.setValue("" + feature.getTime());
spectrumQuery.getDomNode().getAttributes().setNamedItem(retentionTimeAttr);
}
MsmsPipelineAnalysisDocument.MsmsPipelineAnalysis.MsmsRunSummary.SpectrumQuery.SearchResult.SearchHit searchHit =
spectrumQuery.addNewSearchResult().addNewSearchHit();
searchHit.setPeptide(MS2ExtraInfoDef.getFirstPeptide(feature));
searchHit.setCalcNeutralPepMass(feature.getMass());
searchHit.setHitRank(1);
searchHit.setNumMatchedIons(BigInteger.valueOf(1));
searchHit.setNumTotProteins(1);
searchHit.setMassdiff(Float.toString(MS2ExtraInfoDef.getDeltaMass(feature)));
searchHit.setNumMissedCleavages(BigInteger.valueOf(0));
searchHit.setIsRejected(BigInteger.valueOf(0));
//num_tol_term is important to ProteinProphet. From David Shteynburg:
//"In general, having NTT information is very important for distinguishing correct from
// incorrect assignments both on the peptide and on the protein levels and this information
// should be given whenever available."
//If numTolTerm is unspecified but prevAA and nextAA are specified, ProteinProphet will
//calculate numTolTerm, assuming trypsin
searchHit.setNumTolTerm(BigInteger.valueOf(2));
Character prevAA = MS2ExtraInfoDef.getPrevAminoAcid(feature);
Character nextAA = MS2ExtraInfoDef.getNextAminoAcid(feature);
if (prevAA != null)
searchHit.setPeptidePrevAa("" + prevAA);
if (nextAA != null)
searchHit.setPeptideNextAa("" + nextAA);
if (MS2ExtraInfoDef.hasNumEnzymaticEnds(feature))
searchHit.setNumTolTerm(BigInteger.valueOf(MS2ExtraInfoDef.getNumEnzymaticEnds(feature)));
//Properly, we should actually carry the previous and next AAs forward
//through the Feature
//Should we do this? The goal would be to get ProteinProphet to specify
//n_enzymatic_termini, but that doesn't seem to work
// searchHit.setPeptidePrevAa("K");
// searchHit.setPeptideNextAa("A");
List<String> proteins =
MS2ExtraInfoDef.getProteinList(feature);
if (proteins != null && proteins.size() > 0)
{
searchHit.setProtein(proteins.get(0));
List<Integer> altProteinNTTs = MS2ExtraInfoDef.getAltProteinNTTs(feature);
for (int i=1; i<proteins.size(); i++)
{
MsmsPipelineAnalysisDocument.MsmsPipelineAnalysis.MsmsRunSummary.SpectrumQuery.SearchResult.SearchHit.AlternativeProtein altProtein =
searchHit.addNewAlternativeProtein();
altProtein.setProtein(proteins.get(i));
altProtein.setProteinDescr("(dummy description) Alternative Protein");
int altProteinNTT = MS2ExtraInfoDef.getNumEnzymaticEnds(feature);
if (altProteinNTTs != null && altProteinNTTs.size() >= i)
altProteinNTT = altProteinNTTs.get(i-1);
if (MS2ExtraInfoDef.hasNumEnzymaticEnds(feature))
altProtein.setNumTolTerm(BigInteger.valueOf(altProteinNTT));
}
}
//Peptide Prophet
if (MS2ExtraInfoDef.hasPeptideProphet(feature))
{
addPeptideProphet(searchHit,
(float) MS2ExtraInfoDef.getPeptideProphet(feature),
MS2ExtraInfoDef.getAllNttProb(feature),
(float) MS2ExtraInfoDef.getFval(feature));
}
//search scores
Map<String,String> searchScoreMap = MS2ExtraInfoDef.getSearchScores(feature);
if (searchScoreMap != null)
{
for (String searchScoreType : searchScoreMap.keySet())
{
addSearchScore(searchHit, searchScoreType, searchScoreMap.get(searchScoreType));
}
}
//intensity
// if (writeIntensitiesAsXpressResults && feature.getIntensity() > 0)
// {
// XPressHandler.XPressResult xpressAnalysisResult = new XPressHandler.XPressResult();
// MsmsPipelineAnalysisDocument.MsmsPipelineAnalysis.MsmsRunSummary.SpectrumQuery.SearchResult.SearchHit.AnalysisResult xmlBeansXpressAnalysisResult =
// searchHit.addNewAnalysisResult();
// xmlBeansXpressAnalysisResult.setAnalysis(xpressAnalysisResult.getAnalysisType());
//
// Element arElement =
// xmlBeansXpressAnalysisResult.getDomNode().getOwnerDocument().createElement("xpressratio_result");
// arElement.setAttribute("ratio", feature.getIntensity() + ":1");
// arElement.setAttribute("heavy_area", "1");
// arElement.setAttribute("light_area","" + feature.getIntensity());
// xmlBeansXpressAnalysisResult.getDomNode().appendChild(arElement);
// }
//NOTE: we don't distinguish between Q3 and XPress results
if (IsotopicLabelExtraInfoDef.hasRatio(feature))
{
String resultElementTagName = null;
RelativeQuantAnalysisResult myAnalysisResult = null;
switch (ratioMode)
{
case RATIO_MODE_Q3:
myAnalysisResult = new Q3Handler.Q3Result();
resultElementTagName = "q3ratio_result";
break;
default:
myAnalysisResult = new XPressHandler.XPressResult();
resultElementTagName = "xpressratio_result";
break;
}
MsmsPipelineAnalysisDocument.MsmsPipelineAnalysis.MsmsRunSummary.SpectrumQuery.SearchResult.SearchHit.AnalysisResult xmlBeansAnalysisResult =
searchHit.addNewAnalysisResult();
xmlBeansAnalysisResult.setAnalysis(myAnalysisResult.getAnalysisType());
Element arElement =
xmlBeansAnalysisResult.getDomNode().getOwnerDocument().createElement(resultElementTagName);
double ratio = IsotopicLabelExtraInfoDef.getRatio(feature);
arElement.setAttribute("decimal_ratio", "" + ratio);
arElement.setAttribute("heavy_area", "" + IsotopicLabelExtraInfoDef.getHeavyIntensity(feature));
arElement.setAttribute("light_area", "" + IsotopicLabelExtraInfoDef.getLightIntensity(feature));
float lightMass = (float) IsotopicLabelExtraInfoDef.getLightMass(feature);
float heavyMass = (float) IsotopicLabelExtraInfoDef.getHeavyMass(feature);
//if we haven't stored light and heavy masses, take our best guess
if (lightMass == 0 || heavyMass == 0)
{
//singly protonated mass. This is OK if there's no n-terminal label.
//TODO: store light and heavy mass, pass them through
lightMass = precursorNeutralMass +
(float) PeptideGenerator.getMasses(true)['h'];
float massDiff = 0;
if (IsotopicLabelExtraInfoDef.hasLabel(feature))
{
int labelCount = IsotopicLabelExtraInfoDef.getLabelCount(feature);
//HACK... sometimes this value doesn't get stored correctly on features
if (labelCount == 0) labelCount++;
massDiff = labelCount * (IsotopicLabelExtraInfoDef.getLabel(feature).getHeavy() -
IsotopicLabelExtraInfoDef.getLabel(feature).getLight());
}
else
{
if (_isotopicLabel != null)
massDiff = _isotopicLabel.getHeavy() - _isotopicLabel.getLight();
arElement.setAttribute("heavy_mass", "" + (lightMass + massDiff));
}
heavyMass = lightMass + massDiff;
}
arElement.setAttribute("light_mass", "" + lightMass);
arElement.setAttribute("heavy_mass", "" + heavyMass);
arElement.setAttribute("heavy_firstscan", "" + IsotopicLabelExtraInfoDef.getLightFirstScan(feature));
arElement.setAttribute("heavy_lastscan", "" + IsotopicLabelExtraInfoDef.getLightLastScan(feature));
arElement.setAttribute("light_firstscan", "" + IsotopicLabelExtraInfoDef.getHeavyFirstScan(feature));
arElement.setAttribute("light_lastscan", "" + IsotopicLabelExtraInfoDef.getHeavyLastScan(feature));
switch (ratioMode)
{
case RATIO_MODE_Q3:
arElement.setAttribute("q2_light_area", "" + IsotopicLabelExtraInfoDef.getLightIntensity(feature));
arElement.setAttribute("q2_heavy_area", "" + IsotopicLabelExtraInfoDef.getHeavyIntensity(feature));
break;
default:
double heavy2LightRatio = 999;
if (ratio != 0)
heavy2LightRatio = 1.0 / ratio;
arElement.setAttribute("heavy2light_ratio", "" + heavy2LightRatio);
break;
}
xmlBeansAnalysisResult.getDomNode().appendChild(arElement);
}
List<ModifiedAminoAcid>[] modifiedAminoAcids = MS2ExtraInfoDef.getModifiedAminoAcids(feature);
addModificationsToSearchHit(searchHit, modifiedAminoAcids,
MS2ExtraInfoDef.getNtermModMass(feature), MS2ExtraInfoDef.getCtermModMass(feature));
try
{
String fragment = _firstRunSummary.getSpectrumQueryArray(0).xmlText(_optionsForPrinting);
// String[] pieces = fragment.split("<[\\/]*xml-fragment[^>]*>");
// fragment = pieces[pieces.length-1];
fragment = fragment.replaceAll("<pep:","<");
fragment = fragment.replaceAll("</pep:","</");
//Empty namespace attributes are created, and I don't know of a cleaner way to get rid of them
//TODO: find a cleaner way to get rid of xmlns attrs on q3ratio_summary, peptideprophet_result
fragment = fragment.replaceAll("xmlns=\"\"", "");
fragment = fragment + "\n";
pw.print(fragment);
pw.flush();
}
catch (Exception e)
{
e.printStackTrace(System.err);
}
_xmlBeansRunSummaryArray[0].removeSpectrumQuery(0);
}
/**
* Make absolutely sure that we write a quantitative summary declaration at the top of the file, if either:
* -the incoming FeatureFile told us it had ratios, or
* -any feature in the file has ratios
*/
protected void preWrite()
{
super.preWrite();
boolean shouldWriteRatioDeclaration = featureSetHasRatioDeclaration;
for (Feature feature : _features)
{
if (IsotopicLabelExtraInfoDef.hasRatio(feature))
{
shouldWriteRatioDeclaration = true;
_isotopicLabel = IsotopicLabelExtraInfoDef.getLabel(feature);
break;
}
}
if (shouldWriteRatioDeclaration)
{
MsmsPipelineAnalysisDocument.MsmsPipelineAnalysis.AnalysisSummary quantAnalysisSummary =
_xmlBeansAnalysis.addNewAnalysisSummary();
switch (ratioMode)
{
case RATIO_MODE_Q3:
quantAnalysisSummary.setAnalysis("q3");
Element ratioSummaryElement =
quantAnalysisSummary.getDomNode().getOwnerDocument().createElement("q3ratio_summary");
ratioSummaryElement.setAttribute("version", "1.2");
ratioSummaryElement.setAttribute("author","Marc Coram");
if (_isotopicLabel != null)
{
ratioSummaryElement.setAttribute("labeled_residues", "" + _isotopicLabel.getResidue());
ratioSummaryElement.setAttribute("massdiff", "" +
(_isotopicLabel.getHeavy() - _isotopicLabel.getLight()));
}
//TODO: fix this HACK
ratioSummaryElement.setAttribute("massTol", ".25");
quantAnalysisSummary.getDomNode().appendChild(ratioSummaryElement);
//HACK
MsmsPipelineAnalysisDocument.MsmsPipelineAnalysis.MsmsRunSummary.AnalysisTimestamp q3Timestamp =
_xmlBeansRunSummaryArray[0].addNewAnalysisTimestamp();
q3Timestamp.setAnalysis("q3");
q3Timestamp.setTime(new GregorianCalendar());
q3Timestamp.setId(1);
break;
case RATIO_MODE_XPRESS:
quantAnalysisSummary.setAnalysis("xpress");
XpressratioSummaryDocument summaryDoc = XpressratioSummaryDocument.Factory.newInstance();
XpressratioSummaryDocument.XpressratioSummary xpressRatioSummaryOtherDoc =
summaryDoc.addNewXpressratioSummary();
if (_isotopicLabel != null)
{
xpressRatioSummaryOtherDoc.setXpressLight((long) _isotopicLabel.getLight());
xpressRatioSummaryOtherDoc.setMassdiff("" + (_isotopicLabel.getHeavy() - _isotopicLabel.getLight()));
xpressRatioSummaryOtherDoc.setLabeledResidues("" + _isotopicLabel.getResidue());
}
Node ratioSummaryNode =
quantAnalysisSummary.getDomNode().getOwnerDocument().importNode(xpressRatioSummaryOtherDoc.getDomNode(), true);
quantAnalysisSummary.getDomNode().appendChild(ratioSummaryNode);
break;
}
}
//Stuff specific to trypsin
//TODO: expose this, make it pluggable?
MsmsPipelineAnalysisDocument.MsmsPipelineAnalysis.MsmsRunSummary.SampleEnzyme
xmlBeansSampleEnzyme = _firstRunSummary.addNewSampleEnzyme();
xmlBeansSampleEnzyme.setName("trypsin");
MsmsPipelineAnalysisDocument.MsmsPipelineAnalysis.MsmsRunSummary.SampleEnzyme.Specificity
xmlBeansSpecificity = xmlBeansSampleEnzyme.addNewSpecificity();
xmlBeansSpecificity.setCut("KR");
xmlBeansSpecificity.setNoCut("P");
xmlBeansSpecificity.setSense(MsmsPipelineAnalysisDocument.MsmsPipelineAnalysis.MsmsRunSummary.SampleEnzyme.Specificity.Sense.Enum.forString("C"));
}
/**
* Write out the full document, with all modifications and features, to a file
*
* It's too bad I have to override the superclass version here. The only reason
* that's necessary is to strip out the "pep:" stuff, because programs like RefreshParser
* only _pretend_ to parse XML. In practice they're looking for actual strings like
* "/msms_run_summary"
* @throws IOException
*/
// public void write(File file) throws IOException
// {
// preWrite();
//
// //add a sentinel node that tells us where to split the document to insert features and modifications,
// //which, conveniently, is the same place for both
// Node runSummaryNode = _firstRunSummary.getDomNode();
// Node featureLocationNode = runSummaryNode.getOwnerDocument().createElement("SENTINEL_FEATURE_LOCATION");
//
// runSummaryNode.appendChild(featureLocationNode);
// //create and break up the xml that defines the document structure
// String documentShell = _xmlBeansPepXmlDoc.xmlText(_optionsForPrinting);
//
// String[] halves = documentShell.split("<SENTINEL_FEATURE_LOCATION[^\\/]*\\/>");
// if (halves.length != 2)
// {
// _log.error("Failed to create document shell for writing");
// return;
// }
//
// _documentPrefix = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" + halves[0];
// _documentPostscript = halves[1];
//
// //remove our dummy node
// runSummaryNode.removeChild(featureLocationNode);
//
//
// PrintWriter pw = new PrintWriter(file);
// pw.print(_documentPrefix);
// writeModifications(pw);
// writeSpectrumQueries(pw);
// pw.print(_documentPostscript);
// pw.flush();
// }
public int getFirstSpectrumQueryIndex()
{
return firstSpectrumQueryIndex;
}
public void setFirstSpectrumQueryIndex(int firstSpectrumQueryIndex)
{
this.firstSpectrumQueryIndex = firstSpectrumQueryIndex;
}
public int getRatioMode()
{
return ratioMode;
}
public void setRatioMode(int ratioMode)
{
this.ratioMode = ratioMode;
}
}