/************************************************************************* * * * This file is part of the 20n/act project. * * 20n/act enables DNA prediction for synthetic biology/bioengineering. * * Copyright (C) 2017 20n Labs, Inc. * * * * Please direct all queries to act@20n.com. * * * * This program is free software: you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation, either version 3 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program. If not, see <http://www.gnu.org/licenses/>. * * * *************************************************************************/ package act.installer.metacyc; import org.biopax.paxtools.model.Model; import org.biopax.paxtools.model.BioPAXElement; // Entities import org.biopax.paxtools.model.level3.Protein; import org.biopax.paxtools.model.level3.Rna; import org.biopax.paxtools.model.level3.ProteinReference; import org.biopax.paxtools.model.level3.RnaReference; import org.biopax.paxtools.model.level3.SequenceEntityReference; import org.biopax.paxtools.model.level3.SmallMolecule; import org.biopax.paxtools.model.level3.SmallMoleculeReference; import org.biopax.paxtools.model.level3.Complex; import org.biopax.paxtools.model.level3.ChemicalStructure; // Processes import org.biopax.paxtools.model.level3.Pathway; import org.biopax.paxtools.model.level3.BiochemicalPathwayStep; import org.biopax.paxtools.model.level3.Catalysis; // a type of process that appears as a step import org.biopax.paxtools.model.level3.Modulation; // another process that appears as a step import org.biopax.paxtools.model.level3.BiochemicalReaction; import org.biopax.paxtools.model.level3.TransportWithBiochemicalReaction; import org.biopax.paxtools.model.level3.Transport; import org.biopax.paxtools.model.level3.ComplexAssembly; import org.biopax.paxtools.model.level3.Conversion; // Annotations import org.biopax.paxtools.model.level3.CellularLocationVocabulary; import org.biopax.paxtools.model.level3.EvidenceCodeVocabulary; import org.biopax.paxtools.model.level3.RelationshipTypeVocabulary; import org.biopax.paxtools.model.level3.ControlledVocabulary; import org.biopax.paxtools.model.level3.Stoichiometry; import org.biopax.paxtools.model.level3.DeltaG; import org.biopax.paxtools.model.level3.BioSource; // References import org.biopax.paxtools.model.level3.Evidence; import org.biopax.paxtools.model.level3.Provenance; import org.biopax.paxtools.model.level3.PublicationXref; import org.biopax.paxtools.model.level3.RelationshipXref; import org.biopax.paxtools.model.level3.UnificationXref; import org.biopax.paxtools.io.BioPAXIOHandler; import org.biopax.paxtools.io.SimpleIOHandler; import org.biopax.paxtools.controller.Visitor; import org.biopax.paxtools.controller.Traverser; import org.biopax.paxtools.controller.PropertyEditor; import org.biopax.paxtools.controller.SimpleEditorMap; import org.biopax.paxtools.model.BioPAXFactory; import org.biopax.paxtools.model.BioPAXLevel; import org.biopax.paxtools.model.level3.Xref; import org.biopax.paxtools.model.level3.Interaction; import org.biopax.paxtools.model.level3.Entity; import org.biopax.paxtools.model.level3.StructureFormatType; // enums: import org.biopax.paxtools.model.level3.StepDirection; import org.biopax.paxtools.model.level3.CatalysisDirectionType; import org.biopax.paxtools.model.level3.ConversionDirectionType; import org.biopax.paxtools.model.level3.ControlType; // things that go into BPElement import org.biopax.paxtools.model.level3.Named; import org.biopax.paxtools.model.level3.XReferrable; import org.biopax.paxtools.model.level3.Level3Element; import org.biopax.paxtools.model.level3.Observable; import java.io.FileInputStream; import java.util.HashSet; import java.util.Set; import java.util.Map; import java.util.Collection; import java.util.HashMap; public class BioPaxFile { static final boolean quiet = true; OrganismComposition organism; public BioPaxFile(OrganismComposition o) { this.organism = o; } public void initFrom(FileInputStream is) { BioPAXIOHandler handler = new SimpleIOHandler(); Model model = handler.convertFromOWL(is); Set<BioPAXElement> objects = model.getObjects(); populateOrganismModel(objects); } private void populateOrganismModel(Set<BioPAXElement> all) { for (BioPAXElement e : all) { BPElement basic = getBasicData(e); if (!quiet) System.out.println("/--- ID: " + e.getRDFId().substring("http://biocyc.org/biopax/biopax-level3".length())); // Entities if (e instanceof Protein) addProtein(basic, (Protein)e); else if (e instanceof Rna) addRna(basic, (Rna)e); else if (e instanceof ProteinReference) addProteinRnaReference(basic, (ProteinReference)e); else if (e instanceof RnaReference) addProteinRnaReference(basic, (RnaReference)e); else if (e instanceof SmallMolecule) addSmallMolecule(basic, (SmallMolecule)e); else if (e instanceof SmallMoleculeReference) addSmallMoleculeReference(basic, (SmallMoleculeReference)e); else if (e instanceof Complex) addComplex(basic, (Complex)e); else if (e instanceof ChemicalStructure) addChemicalStructure(basic, (ChemicalStructure)e); // Processes else if (e instanceof Pathway) addPathway(basic, (Pathway)e); else if (e instanceof BiochemicalPathwayStep) addBiochemicalPathwayStep(basic, (BiochemicalPathwayStep)e); else if (e instanceof Catalysis) addCatalysis(basic, (Catalysis)e); else if (e instanceof Modulation) addModulation(basic, (Modulation)e); else if (e instanceof BiochemicalReaction) addConversion(basic, (BiochemicalReaction)e); else if (e instanceof TransportWithBiochemicalReaction) addConversion(basic, (TransportWithBiochemicalReaction)e); else if (e instanceof Transport) addConversion(basic, (Transport)e); else if (e instanceof ComplexAssembly) addConversion(basic, (ComplexAssembly)e); // Annotations else if (e instanceof CellularLocationVocabulary) addTerm(basic, (CellularLocationVocabulary)e); else if (e instanceof EvidenceCodeVocabulary) addTerm(basic, (EvidenceCodeVocabulary)e); else if (e instanceof RelationshipTypeVocabulary) addTerm(basic, (RelationshipTypeVocabulary)e); else if (e instanceof Stoichiometry) addStoichiometry(basic, (Stoichiometry)e); else if (e instanceof DeltaG) addDeltaG(basic, (DeltaG)e); else if (e instanceof BioSource) addBioSource(basic, (BioSource)e); // References else if (e instanceof Evidence) addEvidence(basic, (Evidence)e); else if (e instanceof Provenance) addProvenance(basic, (Provenance)e); else if (e instanceof PublicationXref) addPublicationXref(basic, (PublicationXref)e); else if (e instanceof RelationshipXref) addRelationshipXref(basic, (RelationshipXref)e); else if (e instanceof UnificationXref) addUnificationXref(basic, (UnificationXref)e); // Extra elements that we currently do not handle else if (SeenButNotHandled.haveSeen(e)) addGeneric(basic); else { System.err.println("Unexpected BioPAX element: " + e + "\nOf class: " + e.getClass()); System.exit(-1); } } } void addProtein(BPElement basics, Protein p) { Resource refToSeq; BioPAXElement entityRef = p.getEntityReference(); if (entityRef != null) { refToSeq = new Resource(id(entityRef)); } else { refToSeq = null; // Encountered for Example-Complex1: We cannot seem to locate the ref for the complex // that makes up a protein. The documentation says it should be in entityRef // and not the deprecated "getMemberPhysicalEntity" (which is also empty, btw) // "Please avoid using this property in your BioPAX L3 models unless absolutely // sure/required, for there is an alternative way (using // PhysicalEntity/entityReference/memberEntityReference), and this will // probably be deprecated in the future BioPAX releases." // Where is the reference to the Complex?? BUG? System.out.println("---- There might be a BioPAX bug when the protein is a complex."); System.out.println("---- We cannot find the entityReference to the Complex that the"); System.out.println("---- Protein is made of (instead of sequence), even though the"); System.out.println("---- documentation says it should be there."); } act.installer.metacyc.entities.Protein protein = new act.installer.metacyc.entities.Protein(basics, refToSeq); this.organism.add(protein.getID(), protein); } void addRna(BPElement basics, Rna r) { Resource refToSeq = new Resource(id(r.getEntityReference())); Resource localization; if (r.getCellularLocation() == null) { localization = null; // compartment is not necessarily specified } else { localization = new Resource( id(r.getCellularLocation()) ); } act.installer.metacyc.entities.RNA rna = new act.installer.metacyc.entities.RNA(basics, refToSeq, localization); this.organism.add(rna.getID(), rna); } // Both ProteinReference and RnaReference are subclasses of // SequenceEntityReference, which provides the getSeq, getOrg methods void addProteinRnaReference(BPElement basics, SequenceEntityReference e) { String seq = e.getSequence(); Set<String> comments = e.getComment(); String name = e.getStandardName(); Resource org = e.getOrganism() != null ? new Resource( id(e.getOrganism()) ) : null; Set<Resource> memRef = mapToPtrs( e.getMemberEntityReference() ); act.installer.metacyc.entities.ProteinRNARef ref = new act.installer.metacyc.entities.ProteinRNARef(basics, org, seq, name, comments, memRef); this.organism.add(ref.getID(), ref); if (!quiet) System.out.println(ref.getStandardName()); } void addSmallMolecule(BPElement basics, SmallMolecule e) { Resource smRef = mapToPtrs( e.getEntityReference() ); Resource loc = mapToPtrs( e.getCellularLocation() ); act.installer.metacyc.entities.SmallMolecule sm = new act.installer.metacyc.entities.SmallMolecule(basics, smRef, loc); this.organism.add(sm.getID(), sm); if (!quiet) System.out.println(sm.getStandardName()); } void addSmallMoleculeReference(BPElement basics, SmallMoleculeReference e) { Set<Resource> memRefs = mapToPtrs( e.getMemberEntityReference() ); Resource struc = mapToPtrs( e.getStructure() ); Float molw = e.getMolecularWeight(); act.installer.metacyc.entities.SmallMoleculeRef smref = new act.installer.metacyc.entities.SmallMoleculeRef(basics, memRefs, struc, molw); this.organism.add(smref.getID(), smref); if (!quiet) System.out.println(smref.getStandardName()); } void addComplex(BPElement basics, Complex e) { Set<Resource> stoi = mapToPtrs( e.getComponentStoichiometry() ); Set<Resource> comp = mapToPtrs( e.getComponent() ); act.installer.metacyc.entities.Complex complex = new act.installer.metacyc.entities.Complex(basics, stoi, comp); this.organism.add(complex.getID(), complex); if (!quiet) System.out.println(complex.getStandardName()); } void addChemicalStructure(BPElement basics, ChemicalStructure e) { StructureFormatType format = e.getStructureFormat(); String data = e.getStructureData(); data = data.replaceAll("<", "<"); data = data.replaceAll(">", "<"); data = data.replaceAll(""", "\""); act.installer.metacyc.entities.ChemicalStructure struc = new act.installer.metacyc.entities.ChemicalStructure(basics, format, data); this.organism.add(struc.getID(), struc); if (!quiet) System.out.println(struc.getSMILES()); } void addPathway(BPElement basics, Pathway e) { Set<Resource> order = mapToPtrs( e.getPathwayOrder() ); Set<Resource> components = mapToPtrs( e.getPathwayComponent() ); act.installer.metacyc.processes.Pathway pathway = new act.installer.metacyc.processes.Pathway(basics, order, components); this.organism.add(pathway.getID(), pathway); if (!quiet) System.out.println(pathway.getStandardName()); } void addBiochemicalPathwayStep(BPElement basics, BiochemicalPathwayStep e) { StepDirection dir = e.getStepDirection(); Resource conv = mapToPtrs( e.getStepConversion() ); Set<Resource> proc = mapToPtrs( e.getStepProcess() ); Set<Resource> next = mapToPtrs( e.getNextStep() ); act.installer.metacyc.processes.BiochemicalPathwayStep step = new act.installer.metacyc.processes.BiochemicalPathwayStep(basics, dir, conv, proc, next); this.organism.add(step.getID(), step); if (!quiet) System.out.println(step.getConversion()); } void addCatalysis(BPElement basics, Catalysis e) { CatalysisDirectionType dir = e.getCatalysisDirection(); ControlType typ = e.getControlType(); Set<Resource> controller = mapToPtrs( e.getController() ); Set<Resource> controlled = mapToPtrs( e.getControlled() ); Set<Resource> cofactors = mapToPtrs( e.getCofactor() ); act.installer.metacyc.processes.Catalysis catalysis = new act.installer.metacyc.processes.Catalysis(basics, dir, typ, controller, controlled, cofactors); this.organism.add(catalysis.getID(), catalysis); if (!quiet) System.out.println(catalysis.getStandardName()); } void addModulation(BPElement basics, Modulation e) { ControlType typ = e.getControlType(); Set<Resource> controller = mapToPtrs( e.getController() ); Set<Resource> controlled = mapToPtrs( e.getControlled() ); act.installer.metacyc.processes.Modulation modulate = new act.installer.metacyc.processes.Modulation(basics, typ, controller, controlled); this.organism.add(modulate.getID(), modulate); if (!quiet) System.out.println(modulate.getStandardName()); } // BiochemicalReaction, Transport, TransportWithBiochemicalReaction, ComplexAssembly are // subclasses of Conversion, in model.level3, and in our datamodel they are // annotated as different types of conversions using an Enum in Conversion void addConversion(BPElement basics, Conversion e) { Set<Resource> left = mapToPtrs( e.getLeft() ); Set<Resource> right = mapToPtrs( e.getRight() ); Set<Resource> stoi = mapToPtrs( e.getParticipantStoichiometry() ); ConversionDirectionType dir = e.getConversionDirection(); Boolean spont; /* Behold and despair! A lamentable hack required thanks to the Biopax parser's weird handling of <bp:spontaneous> * tags! For additional, see the discussion that culminates in the approach below: * https://github.com/20n/act/issues/541#issuecomment-265618708 */ if (e.getSpontaneous() == null) { // If there is no spontaneous tag, then assume the reaction in not spontaneous; spont = Boolean.FALSE; } else if (!e.getSpontaneous()) { /* If there is a spontaneous tag, the Biopax parser *always* sets spontaneous to false! However, it is always * non-null when a spontaneous tag exists, and I have to assume that the presence of a tag indicates some manner * of spontaneity in a reaction. Alas, the directionality information that appears as the tag text seems to be * lost by the parser. No matter--we press onward! */ spont = Boolean.TRUE; } else { /* Here is the truly weird case: the parser decides to set spontaneous to true. I've confirmed in actv01 that * this has not happened on the most recent installer run, and so will assume here that this case should not be * possible under the current parser version and data model. As such, we make it a *hard assumption* by crashing * the installer if ever we discover that we have in fact found one of these impossible `true` spontaneous values. */ throw new RuntimeException(String.format("Found a 'true' spontaneous reaction, which should be impossible: %s", e.toString())); // This will print the reaction identifier for easier debugging. } // ec, deltaG are only set for BiochemicalReaction, for others they are empty Set<String> ec = new HashSet<String>(); Set<Resource> deltaG = new HashSet<Resource>(); act.installer.metacyc.processes.Conversion.TYPE type = null; if (e instanceof BiochemicalReaction) { ec = ((BiochemicalReaction)e).getECNumber(); deltaG = mapToPtrs( ((BiochemicalReaction)e).getDeltaG() ); type = act.installer.metacyc.processes.Conversion.TYPE.BIOCHEMICAL_RXN; } else if (e instanceof Transport) { type = act.installer.metacyc.processes.Conversion.TYPE.TRANSPORT; } else if (e instanceof TransportWithBiochemicalReaction) { type = act.installer.metacyc.processes.Conversion.TYPE.TRANSPORT_W_BIOCHEMICAL_RXN; } else if (e instanceof ComplexAssembly) { type = act.installer.metacyc.processes.Conversion.TYPE.COMPLEX_ASSEMBLY; } BPElement rxn = new act.installer.metacyc.processes.Conversion(basics, left, right, stoi, dir, spont, ec, deltaG, type); this.organism.add(rxn.getID(), rxn); if (!quiet) System.out.println(rxn.getStandardName()); } // CellularLocationVocabulary, EvidenceCodeVocabulary, RelationshipTypeVocabulary // are subclasses of ControlledVocabulary, which provides the Set<String> getTerm fn void addTerm(BPElement basics, ControlledVocabulary e) { Set<String> terms = e.getTerm(); act.installer.metacyc.annotations.Term t = new act.installer.metacyc.annotations.Term(basics, terms); this.organism.add(t.getID(), t); if (!quiet) System.out.println(terms); } void addStoichiometry(BPElement basics, Stoichiometry e) { Resource physicalEntity = mapToPtrs( e.getPhysicalEntity() ); float coeff = e.getStoichiometricCoefficient(); act.installer.metacyc.annotations.Stoichiometry s = new act.installer.metacyc.annotations.Stoichiometry(basics, physicalEntity, coeff); this.organism.add(s.getID(), s); if (!quiet) System.out.println("coeff: " + s.getCoefficient() + " on " + s.getPhysicalEntity()); } void addDeltaG(BPElement basics, DeltaG e) { Float dg = e.getDeltaGPrime0(); act.installer.metacyc.annotations.DeltaG deltaG = new act.installer.metacyc.annotations.DeltaG(basics, dg); this.organism.add(deltaG.getID(), deltaG); if (!quiet) System.out.println(dg); } void addBioSource(BPElement basics, BioSource e) { act.installer.metacyc.annotations.BioSource org = new act.installer.metacyc.annotations.BioSource(basics); this.organism.add(org.getID(), org); if (!quiet) System.out.println(org.getName()); } void addEvidence(BPElement basics, Evidence e) { Set<Resource> codes = mapToPtrs( e.getEvidenceCode() ); act.installer.metacyc.references.Evidence evidence = new act.installer.metacyc.references.Evidence(basics, codes); this.organism.add(evidence.getID(), evidence); if (!quiet) System.out.println(evidence.getStandardName()); } void addProvenance(BPElement basics, Provenance e) { act.installer.metacyc.references.Provenance provenance = new act.installer.metacyc.references.Provenance(basics); this.organism.add(provenance.getID(), provenance); if (!quiet) System.out.println(provenance.getStandardName()); } void addPublicationXref(BPElement basics, PublicationXref e) { int yr = e.getYear(); String title = e.getTitle(); Set<String> src = e.getSource(); String db = e.getDb(); String id = e.getId(); Set<String> auth = e.getAuthor(); act.installer.metacyc.references.Publication pubs = new act.installer.metacyc.references.Publication(basics, yr, title, src, db, id, auth); this.organism.add(pubs.getID(), pubs); if (!quiet) System.out.println(title + " by " + auth); } void addRelationshipXref(BPElement basics, RelationshipXref e) { Resource term = mapToPtrs( e.getRelationshipType() ); String db = e.getDb(); String id = e.getId(); act.installer.metacyc.references.Relationship reln = new act.installer.metacyc.references.Relationship(basics, term, db, id); this.organism.add(reln.getID(), reln); // if (!quiet) System.out.format("reln %s:%s of type: %s\n", db, id, term); } void addUnificationXref(BPElement basics, UnificationXref e) { String db = e.getDb(); String id = e.getId(); act.installer.metacyc.references.Unification unificationXref = new act.installer.metacyc.references.Unification(basics, db, id); this.organism.add(unificationXref.getID(), unificationXref); // if (!quiet) System.out.format("unif %s:%s\n", db, id); } void addGeneric(BPElement basics) { BPElement bpe = new BPElement(basics); this.organism.add(bpe.getID(), bpe); if (!quiet) System.out.println("Added without reading details: " + bpe.getID()); } static String id(BioPAXElement bpe) { return bpe.getRDFId(); } private BPElement getBasicData(BioPAXElement e) { Resource id = new Resource( id(e) ); String standardName = null; // getStandardName() in level3.Named String displayName = null; // getDisplayName() in level3.Named Set<String> name = null; // getName() in level3.Named Set<Resource> xrefs = null; // getXref() in level3.XReferrable Set<Resource> dataSource = null; // getDataSource() in level3.Entity Set<Resource> evidence = null; // getEvidence() in level3.Observable Set<String> comment = null; // getComment in level3.Level3Element if (e instanceof Named) { Named n = (Named)e; standardName = n.getStandardName(); displayName = n.getDisplayName(); name = n.getName(); } if (e instanceof XReferrable) { xrefs = mapToPtrs( ((XReferrable)e).getXref() ); } if (e instanceof Entity) { dataSource = mapToPtrs( ((Entity)e).getDataSource() ); // returns the Provenance entries } if (e instanceof Observable) { evidence = mapToPtrs( ((Observable)e).getEvidence() ); // returns the Evidence entries } if (e instanceof Level3Element) { comment = ((Level3Element)e).getComment(); } return new BPElement(id, standardName, displayName, name, xrefs, dataSource, evidence, comment); } private Resource mapToPtrs(BioPAXElement e) { if (e == null) return null; return new Resource(e.getRDFId()); } private Set<Resource> mapToPtrs(Set s) { if (s == null) return null; Set<Resource> r = new HashSet<Resource>(); for (Object e : s) r.add(new Resource(((BioPAXElement)e).getRDFId())); return r; } } /* =========================================================================== Example-Complex1: Source: ano2cyc/biopax-level3.owl <bp:Protein rdf:ID="Protein53715"> <bp:xref rdf:resource="#UnificationXref53716"/> <bp:standardName rdf:datatype="http://www.w3.org/2001/XMLSchema#string">a glucosyl-glycogenin</bp:standardName> <bp:entityReference> <bp:Complex rdf:ID="Complex53711"> <bp:xref rdf:resource="#UnificationXref53713"/> <bp:xref rdf:resource="#UnificationXref53712"/> <bp:standardName rdf:datatype="http://www.w3.org/2001/XMLSchema#string">a glycogenin</bp:standardName> <bp:dataSource rdf:resource="#Provenance30449"/> </bp:Complex> </bp:entityReference> <bp:dataSource rdf:resource="#Provenance30449"/> <bp:cellularLocation rdf:resource="#CellularLocationVocabulary30461"/> </bp:Protein> =========================================================================== =========================================================================== =========================================================================== */