/* * Copyright (C) 2010-2012 "Oh no sequences!" * * This is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as * published by the Free Software Foundation, either version 3 of the * License, or (at your option) any later version. * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see <http://www.gnu.org/licenses/> */ package com.ohnosequences.xml.model.uniprot; import com.ohnosequences.xml.model.go.GoTermXML; import com.ohnosequences.xml.api.model.XMLElement; import com.ohnosequences.xml.api.model.XMLElementException; import java.util.ArrayList; import java.util.List; import org.jdom2.Element; import org.jdom2.JDOMException; import org.jdom2.filter.Filters; import org.jdom2.xpath.XPathExpression; import org.jdom2.xpath.XPathFactory; /** * * @author Pablo Pareja Tobes */ public class ProteinXML extends XMLElement{ public static final String TAG_NAME = "protein"; public static final String ID_TAG_NAME = "id"; public static final String NAME_TAG_NAME = "name"; public static final String FULL_NAME_TAG_NAME = "full_name"; public static final String SHORT_NAME_TAG_NAME = "short_name"; public static final String SEQUENCE_TAG_NAME = "sequence"; public static final String LENGTH_TAG_NAME = "length"; public static final String ORGANISM_TAG_NAME = "organism"; public static final String KEYWORDS_TAG_NAME = "keywords"; public static final String INTERPROS_TAG_NAME = "interpros"; public static final String COMMENTS_TAG_NAME = "comments"; public static final String SUBCELLULAR_LOCATIONS_TAG_NAME = "subcellular_locations"; public static final String ARTICLE_CITATIONS_TAG_NAME = "article_citations"; public static final String SIGNAL_PEPTIDE_FEATURES = "signal_peptide_features"; public static final String SPLICE_VARIANT_FEATURES = "splice_variant_features"; public static final String TRANSMEMBRANE_REGION_FEATURES = "transmembrane_region_features"; public static final String ACTIVE_SITE_FEATURES = "active_site_features"; public static final String PROTEIN_PROTEIN_OUTGOING_INTERACTIONS_TAG_NAME = "protein_protein_outgoing_interactions"; public static final String PROTEIN_PROTEIN_INCOMING_INTERACTIONS_TAG_NAME = "protein_protein_incoming_interactions"; public static final String PROTEIN_ISOFORM_OUTGOING_INTERACTIONS_TAG_NAME = "protein_isoform_outgoing_interactions"; public static final String PROTEIN_ISOFORM_INCOMING_INTERACTIONS_TAG_NAME = "protein_isoform_incoming_interactions"; public static final String PROTEIN_COVERAGE_ABSOLUTE = "protein_coverage_absolute"; public static final String PROTEIN_COVERAGE_PERCENTAGE = "protein_coverage_percentage"; public static final String NUMBER_OF_ISOTIGS = "number_of_isotigs"; public static final String GO_TERMS_TAG_NAME = "go_terms"; public static final String PROCESS_GO_TERMS_TAG_NAME = "biological_process"; public static final String FUNCTION_GO_TERMS_TAG_NAME = "molecular_function"; public static final String COMPONENT_GO_TERMS_TAG_NAME = "cellular_component"; public ProteinXML(){ super(new Element(TAG_NAME)); } public ProteinXML(Element elem) throws XMLElementException{ super(elem); if(!elem.getName().equals(TAG_NAME)){ throw new XMLElementException(XMLElementException.WRONG_TAG_NAME,new XMLElement(elem)); } } public ProteinXML(String value) throws Exception{ super(value); if(!root.getName().equals(TAG_NAME)){ throw new XMLElementException(XMLElementException.WRONG_TAG_NAME,new XMLElement(value)); } } //----------------SETTERS------------------- public void setId(String value){ setNodeText(ID_TAG_NAME, value);} public void setLength(int value){ setNodeText(LENGTH_TAG_NAME, String.valueOf(value));} public void setProteinCoverageAbsolute(int value){ setNodeText(PROTEIN_COVERAGE_ABSOLUTE, String.valueOf(value));} public void setProteinCoveragePercentage(double value){ setNodeText(PROTEIN_COVERAGE_PERCENTAGE, String.valueOf(value));} public void setNumberOfIsotigs(int value){ setNodeText(NUMBER_OF_ISOTIGS, String.valueOf(value));} public void setProteinName(String value){ setNodeText(NAME_TAG_NAME, value);} public void setFullName(String value){ setNodeText(FULL_NAME_TAG_NAME, value);} public void setShortName(String value){ setNodeText(SHORT_NAME_TAG_NAME, value);} public void setSequence(String value){ setNodeText(SEQUENCE_TAG_NAME,value);} public void setOrganism(String value){ setNodeText(ORGANISM_TAG_NAME, value);} //----------------GETTERS--------------------- public String getId( ){ return getNodeText(ID_TAG_NAME);} public int getLength(){ return Integer.parseInt(getNodeText(LENGTH_TAG_NAME));} public int getProteinCoverageAbsolute(){ return Integer.parseInt(getNodeText(PROTEIN_COVERAGE_ABSOLUTE));} public double getProteinCoveragePercentage(){ return Double.parseDouble(getNodeText(PROTEIN_COVERAGE_PERCENTAGE));} public int getNumberOfIsotigs(){ return Integer.parseInt(getNodeText(NUMBER_OF_ISOTIGS));} public String getProteinName() { return getNodeText(NAME_TAG_NAME); } public String getFullName(){ return getNodeText(FULL_NAME_TAG_NAME); } public String getShortName(){ return getNodeText(SHORT_NAME_TAG_NAME); } public String getSequence(){ return getNodeText(SEQUENCE_TAG_NAME);} public String getOrganism(){ return getNodeText(ORGANISM_TAG_NAME);} public void addArticleCitation(ArticleXML article){ initArticleCitationsTag(); root.getChild(ARTICLE_CITATIONS_TAG_NAME).addContent(article.asJDomElement()); } public void addSignalPeptideFeature(FeatureXML feature){ initSignalPeptideTag(); root.getChild(SIGNAL_PEPTIDE_FEATURES).addContent(feature.asJDomElement()); } public void addSpliceVariantFeature(FeatureXML feature){ initSpliceVariantTag(); root.getChild(SPLICE_VARIANT_FEATURES).addContent(feature.asJDomElement()); } public void addTransmembraneRegionFeature(FeatureXML feature){ initTransmembraneRegionTag(); root.getChild(TRANSMEMBRANE_REGION_FEATURES).addContent(feature.asJDomElement()); } public void addActiveSiteFeature(FeatureXML feature){ initActiveSiteTag(); root.getChild(ACTIVE_SITE_FEATURES).addContent(feature.asJDomElement()); } public void addKeyword(KeywordXML keyword){ initKeywordsTag(); root.getChild(KEYWORDS_TAG_NAME).addContent(keyword.asJDomElement()); } public void addInterpro(InterproXML interpro){ initInterprosTag(); root.getChild(INTERPROS_TAG_NAME).addContent(interpro.asJDomElement()); } public void addComment(CommentXML comment){ initCommentsTag(); root.getChild(COMMENTS_TAG_NAME).addContent(comment.asJDomElement()); } public void addProteinProteinOutgoingInteraction(ProteinXML prot){ initProteinProteinOutgoingInteractionsTag(); root.getChild(PROTEIN_PROTEIN_OUTGOING_INTERACTIONS_TAG_NAME).addContent(prot.asJDomElement()); } public void addProteinProteinIncomingInteraction(ProteinXML prot){ initProteinProteinIncomingInteractionsTag(); root.getChild(PROTEIN_PROTEIN_INCOMING_INTERACTIONS_TAG_NAME).addContent(prot.asJDomElement()); } public void addProteinIsoformOutgoingInteraction(IsoformXML iso){ initProteinIsoformOutgoingInteractionsTag(); root.getChild(PROTEIN_ISOFORM_OUTGOING_INTERACTIONS_TAG_NAME).addContent(iso.asJDomElement()); } public void addProteinIsoformIncomingInteraction(IsoformXML iso){ initProteinIsoformIncomingInteractionsTag(); root.getChild(PROTEIN_ISOFORM_INCOMING_INTERACTIONS_TAG_NAME).addContent(iso.asJDomElement()); } public void addSubcellularLocation(SubcellularLocationXML subCell){ initSubcellularLocationsTag(); root.getChild(SUBCELLULAR_LOCATIONS_TAG_NAME).addContent(subCell.asJDomElement()); } public List<KeywordXML> getKeywords() throws XMLElementException{ List<KeywordXML> list = new ArrayList<KeywordXML>(); Element keywords = root.getChild(KEYWORDS_TAG_NAME); List<Element> elemList = keywords.getChildren(KeywordXML.TAG_NAME); for (Element elem : elemList) { list.add(new KeywordXML(elem)); } return list; } public List<GoTermXML> getMolecularFunctionGoTerms(){ Element goTerms = root.getChild(GO_TERMS_TAG_NAME); if(goTerms != null){ Element molFunc = goTerms.getChild(FUNCTION_GO_TERMS_TAG_NAME); if(molFunc != null){ List<Element> gos = molFunc.getChildren(GoTermXML.TAG_NAME); ArrayList<GoTermXML> result = new ArrayList<GoTermXML>(); for (Element elem : gos) { result.add(new GoTermXML(elem)); } return result; }else{ return null; } }else{ return null; } } public List<GoTermXML> getBiologicalProcessGoTerms(){ Element goTerms = root.getChild(GO_TERMS_TAG_NAME); if(goTerms != null){ Element bioProc = goTerms.getChild(PROCESS_GO_TERMS_TAG_NAME); if(bioProc != null){ List<Element> gos = bioProc.getChildren(GoTermXML.TAG_NAME); ArrayList<GoTermXML> result = new ArrayList<GoTermXML>(); for (Element elem : gos) { result.add(new GoTermXML(elem)); } return result; }else{ return null; } }else{ return null; } } public List<GoTermXML> getCellularComponentGoTerms(){ Element goTerms = root.getChild(GO_TERMS_TAG_NAME); if(goTerms != null){ Element cellComp = goTerms.getChild(COMPONENT_GO_TERMS_TAG_NAME); if(cellComp != null){ List<Element> gos = cellComp.getChildren(GoTermXML.TAG_NAME); ArrayList<GoTermXML> result = new ArrayList<GoTermXML>(); for (Element elem : gos) { result.add(new GoTermXML(elem)); } return result; }else{ return null; } }else{ return null; } } public void addGoTerm(GoTermXML term, boolean basedOnAspect){ initGoTermsTag(); if(basedOnAspect){ if(term.getAspect().equals(GoTermXML.ASPECT_COMPONENT)){ initComponentTag(); root.getChild(GO_TERMS_TAG_NAME).getChild(COMPONENT_GO_TERMS_TAG_NAME).addContent(term.asJDomElement()); }else if(term.getAspect().equals(GoTermXML.ASPECT_FUNCTION)){ initFunctionTag(); root.getChild(GO_TERMS_TAG_NAME).getChild(FUNCTION_GO_TERMS_TAG_NAME).addContent(term.asJDomElement()); }else if(term.getAspect().equals(GoTermXML.ASPECT_PROCESS)){ initProcessTag(); root.getChild(GO_TERMS_TAG_NAME).getChild(PROCESS_GO_TERMS_TAG_NAME).addContent(term.asJDomElement()); } }else{ root.getChild(GO_TERMS_TAG_NAME).addContent(term.asJDomElement()); } } public void clasifyGoTermsByAspect() throws JDOMException{ initComponentTag(); initFunctionTag(); initProcessTag(); if(doc == null){ doc = root.getDocument(); } XPathExpression<Element> xpProcess = XPathFactory.instance().compile("//protein[id/text()='"+getId()+"']//"+GoTermXML.TAG_NAME+"["+GoTermXML.ASPECT_TAG_NAME+"/text()='"+GoTermXML.ASPECT_PROCESS+"']", Filters.element()); List<Element> processGoTerms = xpProcess.evaluate(doc); XPathExpression<Element> xpFunction = XPathFactory.instance().compile("//protein[id/text()='"+getId()+"']//"+GoTermXML.TAG_NAME+"["+GoTermXML.ASPECT_TAG_NAME+"/text()='"+GoTermXML.ASPECT_FUNCTION+"']", Filters.element()); List<Element> functionGoTerms = xpFunction.evaluate(doc); XPathExpression<Element> xpComponent = XPathFactory.instance().compile("//protein[id/text()='"+getId()+"']//"+GoTermXML.TAG_NAME+"["+GoTermXML.ASPECT_TAG_NAME+"/text()='"+GoTermXML.ASPECT_COMPONENT+"']", Filters.element()); List<Element> componentGoTerms = xpComponent.evaluate(doc); for(Element processGo : processGoTerms){ processGo.detach(); this.addGoTerm(new GoTermXML(processGo), true); } for(Element componentGo : componentGoTerms){ componentGo.detach(); this.addGoTerm(new GoTermXML(componentGo), true); } for(Element functionGo : functionGoTerms){ functionGo.detach(); this.addGoTerm(new GoTermXML(functionGo), true); } } private void initGoTermsTag(){ initTag(GO_TERMS_TAG_NAME); } private void initComponentTag(){ initGoTermsTag(); Element temp = root.getChild(GO_TERMS_TAG_NAME).getChild(COMPONENT_GO_TERMS_TAG_NAME); if(temp == null){ root.getChild(GO_TERMS_TAG_NAME).addContent(new Element(COMPONENT_GO_TERMS_TAG_NAME)); } } private void initFunctionTag(){ initGoTermsTag(); Element temp = root.getChild(GO_TERMS_TAG_NAME).getChild(FUNCTION_GO_TERMS_TAG_NAME); if(temp == null){ root.getChild(GO_TERMS_TAG_NAME).addContent(new Element(FUNCTION_GO_TERMS_TAG_NAME)); } } private void initProcessTag(){ initGoTermsTag(); Element temp = root.getChild(GO_TERMS_TAG_NAME).getChild(PROCESS_GO_TERMS_TAG_NAME); if(temp == null){ root.getChild(GO_TERMS_TAG_NAME).addContent(new Element(PROCESS_GO_TERMS_TAG_NAME)); } } private void initKeywordsTag(){ initTag(KEYWORDS_TAG_NAME); } private void initInterprosTag(){ initTag(INTERPROS_TAG_NAME); } private void initCommentsTag(){ initTag(COMMENTS_TAG_NAME);} private void initProteinProteinOutgoingInteractionsTag(){ initTag(PROTEIN_PROTEIN_OUTGOING_INTERACTIONS_TAG_NAME);} private void initProteinProteinIncomingInteractionsTag(){ initTag(PROTEIN_PROTEIN_INCOMING_INTERACTIONS_TAG_NAME);} private void initProteinIsoformOutgoingInteractionsTag(){ initTag(PROTEIN_ISOFORM_OUTGOING_INTERACTIONS_TAG_NAME);} private void initProteinIsoformIncomingInteractionsTag(){ initTag(PROTEIN_ISOFORM_INCOMING_INTERACTIONS_TAG_NAME);} private void initSubcellularLocationsTag(){ initTag(SUBCELLULAR_LOCATIONS_TAG_NAME);} private void initSignalPeptideTag(){ initTag(SIGNAL_PEPTIDE_FEATURES); } private void initActiveSiteTag(){ initTag(ACTIVE_SITE_FEATURES); } private void initTransmembraneRegionTag(){ initTag(TRANSMEMBRANE_REGION_FEATURES); } private void initSpliceVariantTag(){ initTag(SPLICE_VARIANT_FEATURES); } private void initArticleCitationsTag(){ initTag(ARTICLE_CITATIONS_TAG_NAME); } private void initTag(String tagName){ Element temp = root.getChild(tagName); if(temp == null){ root.addContent(new Element(tagName)); } } }