/*************************************************************************
* *
* This file is part of the 20n/act project. *
* 20n/act enables DNA prediction for synthetic biology/bioengineering. *
* Copyright (C) 2017 20n Labs, Inc. *
* *
* Please direct all queries to act@20n.com. *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* *
*************************************************************************/
package act.installer.sequence;
import java.util.List;
import java.util.Set;
import act.server.MongoDB;
import com.mongodb.DBObject;
import act.shared.Seq;
import org.json.JSONObject;
public abstract class SequenceEntry {
abstract Long getOrgId();
abstract String getEc();
abstract String getSeq();
abstract List<JSONObject> getRefs();
abstract Set<Long> getCatalyzedRxns();
abstract DBObject getMetadata();
public int writeToDB(MongoDB db, Seq.AccDB src) {
Long org_id = getOrgId();
String org = org_id != null ? db.getOrganismNameFromId(org_id) : null;
// note that we install the full data as "metadata" in the db
// what we extract here are just things we might want to use are keys
// and join them against other collections.. e.g., (ec+org+pmid) can
// be used to assign sequences to brenda actfamilies
int id = db.submitToActSeqDB(
src, // genbank, uniprot, swissprot, trembl, embl
getEc(),
org, org_id,
getSeq(),
getRefs(),
getCatalyzedRxns(),
getMetadata());
return id;
// ==== Fields of importance in SwissProt ====
// (See sample.json for example)
// data.sequence.content: "MSTAGKVIKCKAAV.."
// data.organism.dbReference.{id: 9606, type: "NCBI Taxonomy"}
// data.organism.name{[{content:Homo sapiens, type:scientific}, {content: Human, type: common}]
// data.proteinExistence: { type: "evidence at protein level" }
// data.gene.name: [{content: ADH1B, type: primary}, {content: ADH2, type: synonym}]
// data.name: "ADH1B_HUMAN"
// data.protein.recommendedName.fullName: "Alcohol dehydrogenase 1B"
// data.protein.recommendedName.ecNumber: 1.1.1.1
// data.accession: [ list of acc#s ]
//
// data.reference.[ {citation: {type: "journal article", dbReference.{id:, type:PubMed}, title:XYA } ... ]
// data.reference.[ {citation: {type: "submission", db:"EMBL/Genbank/DDBJ databases" } ... ]
//
// data.dbReference.[{id:x.x.x.x, type:"EC"}...]
// data.dbReference.[{id:REACT_34, type: Reactome}]
// data.dbReference.[{id:MetaCyc:MONOMER66-321, type: BioCyc}]
// also GO, Pfam
//
// data.comment.[{ type:"catalytic activity", text: "An alcohol + NAD(+) = an aldehyde or ketone + NADH." }..]
//
// data.feature: descriptive notations on sublocation's functions
// data.comment: extra notes
}
}