/************************************************************************* * * * This file is part of the 20n/act project. * * 20n/act enables DNA prediction for synthetic biology/bioengineering. * * Copyright (C) 2017 20n Labs, Inc. * * * * Please direct all queries to act@20n.com. * * * * This program is free software: you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation, either version 3 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program. If not, see <http://www.gnu.org/licenses/>. * * * *************************************************************************/ package act.installer.metacyc; import org.biopax.paxtools.model.BioPAXElement; import org.biopax.paxtools.model.level3.EntityFeature; import org.biopax.paxtools.model.level3.ModificationFeature; import org.biopax.paxtools.model.level3.SequenceSite; import org.biopax.paxtools.model.level3.SequenceModificationVocabulary; import org.biopax.paxtools.model.level3.PhysicalEntity; public class SeenButNotHandled { // NOTE: IF THIS IS FOR leishcyc/biopax-level3.owl: // has two very bad data cases Example4 and Example5. // We actually edit those bad annotation out. The diff is in Example(4,5)diff // Do not put exceptions here for such bad data. Instead just tweak the datafile public static boolean haveSeen(BioPAXElement e) { // biopax standard is all encompassing, // metacyc references a portion of it, // we handle most of metacyc, but there might be features that we intend // to handle later, and are annotations we have understood where they // appear in the level3 files (and we ret = true for them). // // Once things are handled specifically, we remove them from this function if (e instanceof EntityFeature || e instanceof ModificationFeature || e instanceof SequenceModificationVocabulary || e instanceof SequenceSite) { // protein annotations such as phosphorylation appear as ModificationFeature's (Example1) // and these annotations might be specified to apply on a site as SequenceSite (Example3) return true; } return false; // default, this thing was never seen before } } /* =========================================================================== Example1: Source: aara574087cyc/biopax-level3.owl: <bp:ModificationFeature rdf:ID="ModificationFeature26252"> <bp:modificationType> <bp:SequenceModificationVocabulary rdf:ID="SequenceModificationVocabulary26253"> <bp:xref rdf:resource="#UnificationXref26254"/> <bp:term rdf:datatype="http://www.w3.org/2001/XMLSchema#string">phosphorylation</bp:term> </bp:SequenceModificationVocabulary> </bp:modificationType> </bp:ModificationFeature> =========================================================================== Example3: Source: ecocyc/biopax-level3.owl: Comment: Illustrates SequenceSite annotation to modification (e.g., of type phosphorylation) <bp:Protein rdf:ID="Protein147222"> <bp:xref rdf:resource="#RelationshipXref147209"/> <bp:standardName rdf:datatype="http://www.w3.org/2001/ XMLSchema#string">QseC sensory histidine kinase</bp:standardName> <bp:notFeature> <bp:ModificationFeature rdf:ID="ModificationFeature147212"> <bp:modificationType> <bp:SequenceModificationVocabulary rdf: ID="SequenceModificationVocabulary77431"> <bp:xref rdf:resource="#UnificationXref77432"/> <bp:term rdf:datatype="http://www.w3.org/2001/ XMLSchema#string">phosphorylation</bp:term> </bp:SequenceModificationVocabulary> </bp:modificationType> <bp:featureLocation> <bp:SequenceSite rdf:ID="SequenceSite147213"> <bp:sequencePosition rdf:datatype="http://www.w3.org/2001/ XMLSchema#int">246</bp:sequencePosition> <bp:positionStatus rdf:datatype="http://www.w3.org/2001/ XMLSchema#string">EQUAL</bp:positionStatus> </bp:SequenceSite> </bp:featureLocation> <bp:evidence> <bp:Evidence rdf:ID="Evidence145993"> <bp:xref rdf:resource="#PublicationXref145992"/> <bp:evidenceCode> <bp:EvidenceCodeVocabulary rdf: ID="EvidenceCodeVocabulary79023"> <bp:xref rdf:resource="#UnificationXref79024"/> <bp:term rdf:datatype="http://www.w3.org/2001/ XMLSchema#string">EV-COMP-HINF</bp:term> </bp:EvidenceCodeVocabulary> </bp:evidenceCode> </bp:Evidence> </bp:evidence> </bp:ModificationFeature> </bp:notFeature> =========================================================================== Example4: Source: leishcyc/biopax-level3.owl: // physical entities in and of themselves are completely fine. its just that // we have handle their subclasses Protein, SmallMolecule, Rna, Dna etc // but in just one file leishcyc/biopax-level3.owl a Relationship that is // typically a (type, id, db) tuple has id as a physical Entity and simply // called a rdf:ID="Protein" with no number id etc. Really bad data. (Example4) <bp:relationshipType rdf:resource="#RelationshipTypeVocabulary14033"/> <bp:id> <bp:PhysicalEntity rdf:ID="Protein"> <bp:comment rdf:datatype="http://www.w3.org/2001/XMLSchema#string">A physical entity consisting of a sequence of amino-acids; a protein monomer; a single polypeptide chain. An example is the EGFR protein.</bp:comment> </bp:PhysicalEntity> </bp:id> <bp:db rdf:datatype="http://www.w3.org/2001/XMLSchema#string">LeishCyc</bp:db> </bp:RelationshipXref> =========================================================================== Example5: Source: leishcyc/biopax-level3.owl: <bp:left rdf:resource="#SmallMolecule25819"/> is referenced in a reaction as a reactant, but its definition does not contain a SmallMoleculeRef, and instead is just junk that says it is DNA as below. So we just remove the above reference from the reaction! <bp:SmallMolecule rdf:ID="SmallMolecule25819"> <bp:xref rdf:resource="#RelationshipXref25820"/> <bp:standardName rdf:datatype="http://www.w3.org/2001/XMLSchema#string">a deoxyribonucleic acid</bp:standardName> <bp:entityReference rdf:datatype="http://www.w3.org/2001/XMLSchema#string">NIL</bp:entityReference> <bp:dataSource rdf:resource="#Provenance14019"/> <bp:comment rdf:datatype="http://www.w3.org/2001/XMLSchema#string">DNA is a high molecular weight linear polymer composed of nucleotides containing deoxyribose and linked by phosphodiester bonds.</bp:comment> <bp:cellularLocation rdf:resource="#CellularLocationVocabulary14046"/> </bp:SmallMolecule> =========================================================================== Example(4,5)diff: Source: leishcyc/biopax-level3.owl: 19921c19921,19926 < <bp:id rdf:datatype="http://www.w3.org/2001/XMLSchema#string">A physical entity consisting of a sequence of amino-acids; a protein monomer; a single polypeptide chain. An example is the EGFR protein.</bp:id> --- > <bp:id> > <bp:PhysicalEntity rdf:ID="Protein"> > <bp:comment rdf:datatype="http://www.w3.org/2001/XMLSchema#string">A physical entity consisting of a sequence of amino-acids; a protein monomer; a single polypeptide chain. An example is > the EGFR protein.</bp:comment> > </bp:PhysicalEntity> > </bp:id> 126569a126575 > <bp:left rdf:resource="#SmallMolecule25819"/> =========================================================================== ExampleN: Source: XXXXXXXXXXXXX/biopax-level3.owl: =========================================================================== */