/* * Copyright (c) 2003-2012 Fred Hutchinson Cancer Research Center * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.fhcrc.cpl.toolbox.proteomics.filehandler; import org.fhcrc.cpl.toolbox.filehandler.SimpleXMLStreamReader; import javax.xml.stream.XMLStreamException; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.util.*; public class ProtXmlReader { private File _file; public ProtXmlReader(File file) { _file = file; } public ProteinGroupIterator iterator() throws FileNotFoundException, XMLStreamException { return new ProteinGroupIterator(); } public class ProteinGroupIterator implements Iterator<ProteinGroup> { private SimpleXMLStreamReader _parser; private ProteinGroup _nextProteinGroup = null; private FileInputStream _fIn; public ProteinGroupIterator() throws FileNotFoundException, XMLStreamException { _fIn = new FileInputStream(_file); _parser = new SimpleXMLStreamReader(_fIn); } public void close() { try { _fIn.close(); } catch (IOException e) {} try { _parser.close(); } catch (XMLStreamException e) {} } public boolean hasNext() { try { if (null == _nextProteinGroup) _nextProteinGroup = ProteinGroup.getProteinGroup(_parser); } catch (XMLStreamException e) { throw new RuntimeException(e); } return null != _nextProteinGroup; } public ProteinGroup next() { ProteinGroup currentProteinGroup = _nextProteinGroup; _nextProteinGroup = null; return currentProteinGroup; } public void remove() { throw new UnsupportedOperationException(); } public SimpleXMLStreamReader getReader() { return _parser; } } private static int parseIntAllowingNulls(String s) { if (s == null) { return 0; } return Integer.parseInt(s); } private static float parseFloatAllowingNulls(String s) { if (s == null) { return 0; } return Float.parseFloat(s); } public static class Protein implements Cloneable { private String _proteinName; private float _probability; private Float _percentCoverage; private int _totalNumberPeptides; private int _uniquePeptidesCount; private Float _pctSpectrumIds; private List<Peptide> _peptides = new ArrayList<Peptide>(); private List<String> _indistinguishableProteinNames = new ArrayList<String>(); private QuantitationRatio _quantRatio; public Protein() { super(); } public Protein(SimpleXMLStreamReader parser) throws XMLStreamException { String spectrumIdsString = parser.getAttributeValue(null, "pct_spectrum_ids"); Float spectrumIds = null; if (spectrumIdsString != null) { spectrumIds = Float.parseFloat(spectrumIdsString) / 100f; } setPctSpectrumIds(spectrumIds); String coverageString = parser.getAttributeValue(null, "percent_coverage"); Float coverage = null; if (coverageString != null) { coverage = Float.parseFloat(coverageString) / 100f; } setPercentCoverage(coverage); setProbability(Float.parseFloat(parser.getAttributeValue(null, "probability"))); setProteinName(parser.getAttributeValue(null, "protein_name")); setTotalNumberPeptides(parseIntAllowingNulls(parser.getAttributeValue(null, "total_number_peptides"))); String uniqueStrippedPeptides = parser.getAttributeValue(null, "unique_stripped_peptides"); int uniquePeptidesCount = 1; int index = uniqueStrippedPeptides.indexOf('+'); while (index != -1) { uniquePeptidesCount++; index = uniqueStrippedPeptides.indexOf('+', index + 1); } setUniquePeptidesCount(uniquePeptidesCount); loadChildren(parser); } private void loadChildren(SimpleXMLStreamReader parser) throws XMLStreamException { while (parser.hasNext() && !(parser.isEndElement() && "protein".equals(parser.getLocalName()))) { parser.next(); if (parser.isStartElement()) { String name = parser.getLocalName(); if ("peptide".equals(name)) _peptides.add(new Peptide(parser)); else if ("indistinguishable_protein".equals(name)) { for (int i = 0; i < parser.getAttributeCount(); i++) { if ("protein_name".equals(parser.getAttributeLocalName(i))) { _indistinguishableProteinNames.add(parser.getAttributeValue(i)); } } } else if ("XPressRatio".equals(name) || "Q3Ratio".equals(name)) { _quantRatio = new QuantitationRatio(parser); } } } } public Protein clone() { try { return (Protein)super.clone(); } catch (CloneNotSupportedException e) { throw new Error("This shouldn't happen", e); } } public String getProteinName() { return _proteinName; } public void setProteinName(String proteinName) { this._proteinName = proteinName; } public Float getPctSpectrumIds() { return _pctSpectrumIds; } public void setPctSpectrumIds(Float pctSpectrumIds) { this._pctSpectrumIds = pctSpectrumIds; } public Float getPercentCoverage() { return _percentCoverage; } public void setPercentCoverage(Float percentCoverage) { this._percentCoverage = percentCoverage; } public float getProbability() { return _probability; } public void setProbability(float probability) { this._probability = probability; } public int getTotalNumberPeptides() { return _totalNumberPeptides; } public void setTotalNumberPeptides(int totalNumberPeptides) { this._totalNumberPeptides = totalNumberPeptides; } public List<Peptide> getPeptides() { return _peptides; } public String toString() { return _proteinName + " " + _probability; } public List<String> getIndistinguishableProteinNames() { return _indistinguishableProteinNames; } public int getUniquePeptidesCount() { return _uniquePeptidesCount; } public void setUniquePeptidesCount(int uniquePeptidesCount) { _uniquePeptidesCount = uniquePeptidesCount; } public QuantitationRatio getQuantitationRatio() { return _quantRatio; } } public static class Peptide { private String _peptideSequence; private int _charge; private float _nspAdjustedProbability; private float _weight; private boolean _isNondegenerateEvidence; private int _enzymaticTermini; private float _siblingPeptides; private int _siblingPeptidesBin; private int _instances; private boolean _contributingEvidence; private float _calcNeutralPepMass; public Peptide() { super(); } public Peptide(SimpleXMLStreamReader parser) { setCalcNeutralPepMass(Float.parseFloat(parser.getAttributeValue(null, "calc_neutral_pep_mass"))); setCharge(Integer.parseInt(parser.getAttributeValue(null, "charge"))); setContributingEvidence("Y".equals(parser.getAttributeValue(null, "is_contributing_evidence"))); //dhmay adding default: //In the case of an empty string here, which we've seen, fill in 2 as default //value for number of tryptic ends. I checked with Jimmy, and he said //this would be the least harmful default. Obviously it would be better //just to carry forward the ends correctly String enzymaticTerminiString = parser.getAttributeValue(null, "n_enzymatic_termini"); if (enzymaticTerminiString == null || enzymaticTerminiString.length() == 0) enzymaticTerminiString = "2"; setEnzymaticTermini(Integer.parseInt(enzymaticTerminiString)); setInstances(Integer.parseInt(parser.getAttributeValue(null, "n_instances"))); setNondegenerateEvidence("Y".equals(parser.getAttributeValue(null, "is_nondegenerate_evidence"))); setNspAdjustedProbability(Float.parseFloat(parser.getAttributeValue(null, "nsp_adjusted_probability"))); setPeptideSequence(parser.getAttributeValue(null, "peptide_sequence")); setSiblingPeptides(Float.parseFloat(parser.getAttributeValue(null, "n_sibling_peptides"))); setSiblingPeptidesBin(Integer.parseInt(parser.getAttributeValue(null, "n_sibling_peptides_bin"))); setWeight(Float.parseFloat(parser.getAttributeValue(null, "weight"))); } public float getCalcNeutralPepMass() { return _calcNeutralPepMass; } public void setCalcNeutralPepMass(float calcNeutralPepMass) { this._calcNeutralPepMass = calcNeutralPepMass; } public int getCharge() { return _charge; } public void setCharge(int charge) { this._charge = charge; } public void setContributingEvidence(boolean contributingEvidence) { _contributingEvidence = contributingEvidence; } public boolean isNondegenerateEvidence() { return _isNondegenerateEvidence; } public void setNondegenerateEvidence(boolean nondegenerateEvidence) { _isNondegenerateEvidence = nondegenerateEvidence; } public int getEnzymaticTermini() { return _enzymaticTermini; } public void setEnzymaticTermini(int enzymaticTermini) { this._enzymaticTermini = enzymaticTermini; } public int getInstances() { return _instances; } public void setInstances(int instances) { this._instances = instances; } public float getSiblingPeptides() { return _siblingPeptides; } public void setSiblingPeptides(float nSiblingPeptides) { this._siblingPeptides = nSiblingPeptides; } public int getSiblingPeptidesBin() { return _siblingPeptidesBin; } public void setSiblingPeptidesBin(int siblingPeptidesBin) { this._siblingPeptidesBin = siblingPeptidesBin; } public float getNspAdjustedProbability() { return _nspAdjustedProbability; } public void setNspAdjustedProbability(float nspAdjustedProbability) { this._nspAdjustedProbability = nspAdjustedProbability; } public String getPeptideSequence() { return _peptideSequence; } public void setPeptideSequence(String peptideSequence) { this._peptideSequence = peptideSequence; } public float getWeight() { return _weight; } public void setWeight(float weight) { this._weight = weight; } public String toString() { return _peptideSequence; } public boolean isContributingEvidence() { return _contributingEvidence; } } public static class QuantitationRatio { private int _proteinGroupId; private float _ratioMean; private float _ratioStandardDev; private int _ratioNumberPeptides; private float _heavy2lightRatioMean; private float _heavy2lightRatioStandardDev; //dhmay adding 2008/11/10 private List<String> _peptides; public QuantitationRatio() { super(); } public QuantitationRatio(SimpleXMLStreamReader parser) { setHeavy2lightRatioMean(Float.parseFloat(parser.getAttributeValue(null, "heavy2light_ratio_mean"))); setHeavy2lightRatioStandardDev(Float.parseFloat(parser.getAttributeValue(null, "heavy2light_ratio_standard_dev"))); setRatioMean(Float.parseFloat(parser.getAttributeValue(null, "ratio_mean"))); setRatioStandardDev(Float.parseFloat(parser.getAttributeValue(null, "ratio_standard_dev"))); setRatioNumberPeptides(Integer.parseInt(parser.getAttributeValue(null, "ratio_number_peptides"))); String peptideString = parser.getAttributeValue(null, "peptide_string"); if (peptideString == null) _peptides = new ArrayList<String>(); else _peptides = Arrays.asList(peptideString.split("\\+")); } public int getProteinGroupId() { return _proteinGroupId; } public void setProteinGroupId(int proteinGroupId) { _proteinGroupId = proteinGroupId; } public float getRatioMean() { return _ratioMean; } public void setRatioMean(float ratioMean) { _ratioMean = ratioMean; } public float getRatioStandardDev() { return _ratioStandardDev; } public void setRatioStandardDev(float ratioStandardDev) { _ratioStandardDev = ratioStandardDev; } public int getRatioNumberPeptides() { return _ratioNumberPeptides; } public void setRatioNumberPeptides(int ratioNumberPeptides) { _ratioNumberPeptides = ratioNumberPeptides; } public float getHeavy2lightRatioMean() { return _heavy2lightRatioMean; } public void setHeavy2lightRatioMean(float heavy2lightRatioMean) { _heavy2lightRatioMean = heavy2lightRatioMean; } public float getHeavy2lightRatioStandardDev() { return _heavy2lightRatioStandardDev; } public void setHeavy2lightRatioStandardDev(float heavy2lightRatioStandardDev) { _heavy2lightRatioStandardDev = heavy2lightRatioStandardDev; } public List<String> getPeptides() { return _peptides; } public void setPeptides(List<String> _peptides) { this._peptides = _peptides; } } }