/*************************************************************************
* *
* This file is part of the 20n/act project. *
* 20n/act enables DNA prediction for synthetic biology/bioengineering. *
* Copyright (C) 2017 20n Labs, Inc. *
* *
* Please direct all queries to act@20n.com. *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* *
*************************************************************************/
package act.installer.sequence;
import act.server.MongoDB;
import act.shared.Organism;
import act.shared.Reaction;
import act.shared.Seq;
import act.shared.helpers.MongoDBToJSON;
import com.act.biointerpretation.Utils.OrgMinimalPrefixGenerator;
import com.act.biointerpretation.test.util.MockedMongoDB;
import com.act.utils.parser.GenbankInterpreter;
import com.mongodb.DBObject;
import org.biojava.nbio.core.sequence.features.FeatureInterface;
import org.biojava.nbio.core.sequence.template.AbstractSequence;
import org.biojava.nbio.core.sequence.template.Compound;
import org.json.JSONArray;
import org.json.JSONObject;
import org.junit.Before;
import org.junit.Test;
import java.io.File;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import static org.junit.Assert.assertEquals;
public class GenbankSeqEntryTest {
private ArrayList<GenbankSeqEntry> proteinSeqEntries;
private ArrayList<GenbankSeqEntry> dnaSeqEntries;
private ArrayList<String> sequences;
@Before
public void setUp() throws Exception {
MockedMongoDB mockAPI = new MockedMongoDB();
Map<Long, String> organismNames = new HashMap<>();
organismNames.put(4000000648L, "Bacillus cereus");
organismNames.put(4000002681L, "Homo sapiens");
organismNames.put(4000005381L, "Rhodobacter capsulatus");
// only information needed for these set of tests is a db with organism id's.
mockAPI.installMocks(new ArrayList<Reaction>(), new ArrayList<Seq>(), organismNames, new HashMap<>());
MongoDB mockDb = mockAPI.getMockMongoDB();
// manually assemble an Org Iterator since you can't mock DBCollection in getDbIteratorOverOrgs()
List<Organism> orgs = new ArrayList<>();
for (Map.Entry<Long, String> orgName : organismNames.entrySet()) {
orgs.add(new Organism(orgName.getKey(), orgName.getValue()));
}
Iterator<Organism> orgIterator = orgs.iterator();
OrgMinimalPrefixGenerator prefixGenerator = new OrgMinimalPrefixGenerator(orgIterator);
Map<String, String> minimalPrefixMapping = prefixGenerator.getMinimalPrefixMapping();
dnaSeqEntries = new ArrayList<>();
proteinSeqEntries = new ArrayList<>();
sequences = new ArrayList<>();
GenbankInterpreter giProtein =
new GenbankInterpreter(new File(this.getClass().getResource("genbank_test_protein.gb").getFile()), "Protein");
giProtein.init();
sequences.add(giProtein.getSequences().get(0).getSequenceAsString());
GenbankSeqEntry seqEntry =
new GenbankSeqEntryFactory().createFromProteinSequenceReference(giProtein.getSequences().get(0), mockDb,
minimalPrefixMapping);
proteinSeqEntries.add(seqEntry);
giProtein =
new GenbankInterpreter(new File(this.getClass().getResource("genbank_test_protein_2.gb").getFile()), "Protein");
giProtein.init();
sequences.add(giProtein.getSequences().get(0).getSequenceAsString());
seqEntry =
new GenbankSeqEntryFactory().createFromProteinSequenceReference(giProtein.getSequences().get(0), mockDb,
minimalPrefixMapping);
proteinSeqEntries.add(seqEntry);
GenbankInterpreter giDna =
new GenbankInterpreter(new File(this.getClass().getResource("genbank_test_dna.gb").getFile()), "DNA");
giDna.init();
AbstractSequence sequence = giDna.getSequences().get(0);
List<FeatureInterface<AbstractSequence<Compound>, Compound>> features = sequence.getFeatures();
for (FeatureInterface<AbstractSequence<Compound>, Compound> feature : features) {
if (feature.getType().equals("CDS") && feature.getQualifiers().containsKey("EC_number")) {
sequences.add(feature.getQualifiers().get("translation").get(0).getValue());
seqEntry =
new GenbankSeqEntryFactory().createFromDNASequenceReference(sequence, feature.getQualifiers(), mockDb,
minimalPrefixMapping);
dnaSeqEntries.add(seqEntry);
}
}
}
@Test
public void testMetadata() {
ArrayList<DBObject> metadatas = new ArrayList<>();
List<String> geneSynonyms = Arrays.asList("STP", "STP1", "SULT1A1");
List<String> emptyGeneSynonyms = new ArrayList<>();
JSONObject obj = new JSONObject();
JSONObject accessionObject = new JSONObject();
accessionObject.put("genbank_protein", new JSONArray(Collections.singletonList("CUB13083")));
obj.put("xref", new JSONObject());
obj.put("synonyms", emptyGeneSynonyms);
obj.put("product_names", Collections.singletonList("Arylamine N-acetyltransferase"));
obj.put("accession", accessionObject);
metadatas.add(MongoDBToJSON.conv(obj));
obj = new JSONObject();
accessionObject = new JSONObject();
accessionObject.put("genbank_protein", new JSONArray(Collections.singletonList("P50225")));
obj.put("xref", new JSONObject());
obj.put("name", "ST1A1_HUMAN");
obj.put("synonyms", geneSynonyms);
obj.put("product_names", Collections.singletonList("Sulfotransferase 1A1"));
obj.put("accession", accessionObject);
metadatas.add(MongoDBToJSON.conv(obj));
obj = new org.json.JSONObject();
accessionObject = new JSONObject();
accessionObject.put("genbank_protein", new JSONArray(Collections.singletonList("BAB21065")));
accessionObject.put("genbank_nucleotide", new JSONArray(Collections.singletonList("AB006984")));
obj.put("xref", new JSONObject());
obj.put("name", "ureA");
obj.put("synonyms", emptyGeneSynonyms);
obj.put("product_names", Collections.singletonList("gamma subunit of urase"));
obj.put("accession", accessionObject);
metadatas.add(MongoDBToJSON.conv(obj));
obj = new org.json.JSONObject();
accessionObject = new JSONObject();
accessionObject.put("genbank_protein", new JSONArray(Collections.singletonList("BAB21066")));
accessionObject.put("genbank_nucleotide", new JSONArray(Collections.singletonList("AB006984")));
obj.put("xref", new JSONObject());
obj.put("name", "ureB");
obj.put("synonyms", emptyGeneSynonyms);
obj.put("product_names", Collections.singletonList("beta subunit of urease"));
obj.put("accession", accessionObject);
metadatas.add(MongoDBToJSON.conv(obj));
obj = new org.json.JSONObject();
accessionObject = new JSONObject();
accessionObject.put("genbank_protein", new JSONArray(Collections.singletonList("BAB21067")));
accessionObject.put("genbank_nucleotide", new JSONArray(Collections.singletonList("AB006984")));
obj.put("xref", new JSONObject());
obj.put("name", "ureC");
obj.put("synonyms", emptyGeneSynonyms);
obj.put("product_names", Collections.singletonList("alpha subunit of urease"));
obj.put("accession", accessionObject);
metadatas.add(MongoDBToJSON.conv(obj));
assertEquals("tests whether metadata is extracted accurately", metadatas.get(0),
proteinSeqEntries.get(0).getMetadata());
assertEquals("tests whether metadata is extracted accurately", metadatas.get(1),
proteinSeqEntries.get(1).getMetadata());
assertEquals("tests whether metadata is extracted accurately", metadatas.get(2),
dnaSeqEntries.get(0).getMetadata());
assertEquals("tests whether metadata is extracted accurately", metadatas.get(3),
dnaSeqEntries.get(1).getMetadata());
assertEquals("tests whether metadata is extracted accurately", metadatas.get(4),
dnaSeqEntries.get(2).getMetadata());
}
@Test
public void testAccession() {
JSONObject accessionObject = new JSONObject();
accessionObject.put("genbank_protein", new JSONArray(Collections.singletonList("CUB13083")));
assertEquals("tests whether accession ID is extracted accurately", accessionObject.toString(),
proteinSeqEntries.get(0).getAccession().toString());
accessionObject = new JSONObject();
accessionObject.put("genbank_protein", new JSONArray(Collections.singletonList("P50225")));
assertEquals("tests whether accession ID is extracted accurately", accessionObject.toString(),
proteinSeqEntries.get(1).getAccession().toString());
accessionObject = new JSONObject();
accessionObject.put("genbank_protein", new JSONArray(Collections.singletonList("BAB21065")));
accessionObject.put("genbank_nucleotide", new JSONArray(Collections.singletonList("AB006984")));
assertEquals("tests whether accession ID is extracted accurately", accessionObject.toString(),
dnaSeqEntries.get(0).getAccession().toString());
accessionObject = new JSONObject();
accessionObject.put("genbank_protein", new JSONArray(Collections.singletonList("BAB21066")));
accessionObject.put("genbank_nucleotide", new JSONArray(Collections.singletonList("AB006984")));
assertEquals("tests whether accession ID is extracted accurately", accessionObject.toString(),
dnaSeqEntries.get(1).getAccession().toString());
accessionObject = new JSONObject();
accessionObject.put("genbank_protein", new JSONArray(Collections.singletonList("BAB21067")));
accessionObject.put("genbank_nucleotide", new JSONArray(Collections.singletonList("AB006984")));
assertEquals("tests whether accession ID is extracted accurately", accessionObject.toString(),
dnaSeqEntries.get(2).getAccession().toString());
}
@Test
public void testGeneName() {
assertEquals("tests whether gene name is extracted accurately", null,
proteinSeqEntries.get(0).getGeneName());
assertEquals("tests whether gene name is extracted accurately", "ST1A1_HUMAN",
proteinSeqEntries.get(1).getGeneName());
assertEquals("tests whether gene name is extracted accurately", "ureA", dnaSeqEntries.get(0).getGeneName());
assertEquals("tests whether gene name is extracted accurately", "ureB", dnaSeqEntries.get(1).getGeneName());
assertEquals("tests whether gene name is extracted accurately", "ureC", dnaSeqEntries.get(2).getGeneName());
}
@Test
public void testGeneSynonyms() {
List<String> geneSynonyms = Arrays.asList("STP", "STP1", "SULT1A1");
assertEquals("tests whether gene synonyms are extracted accurately", geneSynonyms,
proteinSeqEntries.get(1).getGeneSynonyms());
geneSynonyms = new ArrayList<>();
assertEquals("tests whether gene synonyms are extracted accurately", geneSynonyms,
proteinSeqEntries.get(0).getGeneSynonyms());
assertEquals("tests whether gene synonyms are extrated accurately", geneSynonyms,
dnaSeqEntries.get(0).getGeneSynonyms());
assertEquals("tests whether gene synonyms are extrated accurately", geneSynonyms,
dnaSeqEntries.get(1).getGeneSynonyms());
assertEquals("tests whether gene synonyms are extrated accurately", geneSynonyms,
dnaSeqEntries.get(2).getGeneSynonyms());
}
@Test
public void testProductName() {
assertEquals("tests whether product names are extracted accurately",
Collections.singletonList("Arylamine N-acetyltransferase"), proteinSeqEntries.get(0).getProductName());
assertEquals("tests whether product names are extracted accurately",
Collections.singletonList("Sulfotransferase 1A1"), proteinSeqEntries.get(1).getProductName());
assertEquals("tests whether product names are extracted accurately",
Collections.singletonList("gamma subunit of urase"), dnaSeqEntries.get(0).getProductName());
assertEquals("tests whether product names are extracted accurately",
Collections.singletonList("beta subunit of urease"), dnaSeqEntries.get(1).getProductName());
assertEquals("tests whether product names are extracted accurately",
Collections.singletonList("alpha subunit of urease"), dnaSeqEntries.get(2).getProductName());
}
@Test
public void testOrgId() {
assertEquals("tests whether organism ids are extracted accurately", (Long) 4000000648L,
proteinSeqEntries.get(0).getOrgId());
assertEquals("tests whether organism ids are extracted accurately", (Long) 4000002681L,
proteinSeqEntries.get(1).getOrgId());
assertEquals("tests whether organism ids are extracted accurately", (Long) 4000005381L,
dnaSeqEntries.get(0).getOrgId());
assertEquals("tests whether organism ids are extracted accurately", (Long) 4000005381L,
dnaSeqEntries.get(1).getOrgId());
assertEquals("tests whether organism ids are extracted accurately", (Long) 4000005381L,
dnaSeqEntries.get(2).getOrgId());
}
@Test
public void testOrg() {
assertEquals("tests whether organism names are extracted accurately", "Bacillus cereus",
proteinSeqEntries.get(0).getOrg());
assertEquals("tests whether organism names are extracted accurately", "Homo sapiens",
proteinSeqEntries.get(1).getOrg());
assertEquals("tests whether organism names are extracted accurately", "Rhodobacter capsulatus",
dnaSeqEntries.get(0).getOrg());
assertEquals("tests whether organism names are extracted accurately", "Rhodobacter capsulatus",
dnaSeqEntries.get(1).getOrg());
assertEquals("tests whether organism names are extracted accurately", "Rhodobacter capsulatus",
dnaSeqEntries.get(2).getOrg());
}
@Test
public void testSeq() {
assertEquals("tests whether sequences are extracted accurately", sequences.get(0),
proteinSeqEntries.get(0).getSeq());
assertEquals("tests whether sequences are extracted accurately", sequences.get(1),
proteinSeqEntries.get(1).getSeq());
assertEquals("tests whether sequences are extracted accurately", sequences.get(2),
dnaSeqEntries.get(0).getSeq());
assertEquals("tests whether sequences are extracted accurately", sequences.get(3),
dnaSeqEntries.get(1).getSeq());
assertEquals("tests whether sequences are extracted accurately", sequences.get(4),
dnaSeqEntries.get(2).getSeq());
}
@Test
public void testEc() {
assertEquals("tests whether ec_numbers are extracted accurately", "2.3.1.5",
proteinSeqEntries.get(0).getEc());
assertEquals("tests whether ec_numbers are extracted accurately", "2.8.2.1",
proteinSeqEntries.get(1).getEc());
assertEquals("tests whether ec_numbers are extracted accurately", "3.5.1.5",
dnaSeqEntries.get(0).getEc());
assertEquals("tests whether ec_numbers are extracted accurately", "3.5.1.5",
dnaSeqEntries.get(1).getEc());
assertEquals("tests whether ec_numbers are extracted accurately", "3.5.1.5",
dnaSeqEntries.get(2).getEc());
}
@Test
public void testPMID() {
List<JSONObject> pmidRefs = new ArrayList<>();
assertEquals("tests whether PMIDs were assigned accurately", pmidRefs, proteinSeqEntries.get(0).getPmids());
List<String> pmids = Arrays.asList("8363592", "8484775", "8423770", "8033246", "7864863", "7695643", "7581483",
"8912648", "8924211", "9855620", "15616553", "15489334", "8288252", "8093002", "8033270", "24275569",
"25944712", "12471039", "16221673", "20417180", "21723874", "22069470", "9345314", "10762004", "21269460");
for (String pmid : pmids) {
JSONObject obj = new JSONObject();
obj.put("val", pmid);
obj.put("src", "PMID");
pmidRefs.add(obj);
}
assertEquals("tests whether PMIDs were assigned accurately", pmidRefs.toString(),
proteinSeqEntries.get(1).getPmids().toString());
pmidRefs = new ArrayList<>();
JSONObject obj = new JSONObject();
obj.put("src", "PMID");
obj.put("val", "9484481");
pmidRefs.add(obj);
assertEquals("tests whether PMIDs were assigned accurately", pmidRefs.toString(),
dnaSeqEntries.get(0).getPmids().toString());
assertEquals("tests whether PMIDs were assigned accurately", pmidRefs.toString(),
dnaSeqEntries.get(1).getPmids().toString());
assertEquals("tests whether PMIDs were assigned accurately", pmidRefs.toString(),
dnaSeqEntries.get(2).getPmids().toString());
}
@Test
public void testPatents() {
List<JSONObject> patentRefs = new ArrayList<>();
assertEquals("tests whether Patent references were assigned accurately", patentRefs,
proteinSeqEntries.get(0).getPatents());
JSONObject obj = new JSONObject();
obj.put("src", "Patent");
obj.put("country_code", "JP");
obj.put("patent_number", "2008518610");
obj.put("patent_year", "2008");
patentRefs.add(obj);
assertEquals("tests whether Patent references were assigned accurately", patentRefs.toString(),
proteinSeqEntries.get(1).getPatents().toString());
JSONObject obj2 = new JSONObject();
obj2.put("src", "Patent");
obj2.put("country_code", "EP");
obj2.put("patent_number", "2904117");
obj2.put("patent_year", "2015");
patentRefs.add(obj2);
assertEquals("tests whether Patent references were assigned accurately", patentRefs.toString(),
dnaSeqEntries.get(0).getPatents().toString());
assertEquals("tests whether Patent references were assigned accurately", patentRefs.toString(),
dnaSeqEntries.get(1).getPatents().toString());
assertEquals("tests whether Patent references were assigned accurately", patentRefs.toString(),
dnaSeqEntries.get(2).getPatents().toString());
}
}