/*************************************************************************
* *
* This file is part of the 20n/act project. *
* 20n/act enables DNA prediction for synthetic biology/bioengineering. *
* Copyright (C) 2017 20n Labs, Inc. *
* *
* Please direct all queries to act@20n.com. *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* *
*************************************************************************/
package act.installer;
import act.server.MongoDB;
import act.shared.Organism;
import act.shared.Reaction;
import act.shared.Seq;
import act.shared.helpers.MongoDBToJSON;
import com.act.biointerpretation.Utils.OrgMinimalPrefixGenerator;
import com.act.biointerpretation.test.util.MockedMongoDB;
import org.json.JSONArray;
import org.json.JSONObject;
import org.junit.Before;
import org.junit.Test;
import java.io.File;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import static org.junit.Assert.assertEquals;
public class GenbankInstallerTest {
private MockedMongoDB mockAPI;
private static final String protSeqNullNull = "MMTNLQKEFFKRLKIPAKEITFNDLDEILLKMGLTLPYENLDIMAGTIKDISKNNLVEKILIQKRGGL" +
"CYELNSLLYYFLMDCGFQVYKVAGTVYDLYDNKWKPDDGHVIIVLTHNNKDYVIDAGFASHLPLHPVPFNGEVISSQTGEYRIRKRTTRKGTHILEMRKGANGESTNFLQ" +
"SEPSHEWKVGYAFTLDPIDEKKVNNIQKVIVEHKESPFNKGAITCKLTDYGHVSLTNKNYTETFKGTKNKRPIESKDYAHILRESFGITQVKYVGKTLERG";
private static final String protSeqNullFull = "MELIQDTSRPPLEYVKGVPLIKYFAEALGPLQSFQARPDDLLISTYPKSGTTWVSQILDMIYQGGDLE" +
"KCHRAPIFMRVPFLEFKAPGIPSGMETLKDTPAPRLLKTHLPLALLPQTLLDQKVKVVYVARNAKDVAVSYYHFYHMAKVHPEPGTWDSFLEKFMVGEVSYGSWYQHVQE" +
"WWELSRTHPVLYLFYEDMKENPKREIQKILEFVGRSLPEETVDFVVQHTSFKEMKKNPMTNYTTVPQEFMDHSISPFMRKGMAGDWKTTFTVAQNERFDADYAEKMAGCS" +
"LSFRSEL";
private static final String protSeqFullNull = "MMTNLQKEFFKRLKIPAKEITFNDLDEILLKMGLTLPYENLDIMAGTIKDISKNNLVEKILIQKRGGL" +
"CYELNSLLYYFLMDCGFQVYKVAGTVYDLYDNKWKPDDGHVIIVLTHNNKDYVIDAGFASHLPLHPVPFNGEVISSQTGEYRIRKRTTRKGT";
private static final String protSeqFullFull = "MDNKDEYLLNFKGYNFQKTLVKMEVVENIENYEIRDDDIFIVTYPKSGTIWTQQILSLIYFEGHRNRT" +
"ENIETIDRAPFFEYNIHKLDYAKMPSPRIFSSHIPYYLVPKGLKDKKAKILYMYRNPKDVLISYFHFSNLMLIFQNPDTVESFMQTFLDGDVVGSLWFDHIRGWYEHRHD" +
"FNIMFMSFEDMKKDFRSSVLKICSFLEKELSEEDVDAVVRQATFQKMKADPRANYEHIIKDELGTRNEMGSFLRKGVVGAWKHYLTVDQSERFDKIFHRNMKNIPLKFIW" +
"DINEE";
private static final String protSeqEcSeqOrgQuery = "MDLLPREKDKLLLFTAALLAERRRARGLKLNYPEAIAFISSAVVEGAREGRTVAELMCYGATL" +
"LTREDVMDGVAEMIHDIQVEATFADGTKLVTVHNPIP";
private static final String protSeqAccQuery1 = "MKWGPCKAFFTKLANFLWMLSRSSWCPLLISLYFWPFCLASPSPVGWWSFASDWFAPRYSVRALPFT" +
"LSNYRRSYEAFLSQCQVDIPTWGTKHPLGMLWHHKVSTLIDEMVSRRMYRIMEKAGQAAWKQVVSEATLSRISSLDVVAHFQHLAAIEAETCKYLASRLPMLHNLRMTGS" +
"NVTIVYNSTLNQVFAIFPTPGSRPKLNDFQQWLIAVHSSIFSSVAASCTLFVVLWLRVPILRTVFGFRWLGAIFLSNSQ";
private static final String protSeqAccQuery2 = "MTTRRRKLSELEGISLGIIYKQQPCTAYRIRSELKEAPSSHWRASAGSLYPLLVRLEAEGLVASTTD" +
"KNDGRGRKLLKVTPQGRQSLKAWVMAGADQQLISSVTDPIRSRTFFLNVLAAPKRREYLDNLIVLTESYLSETKDHLEQKKMTGELFDYLGSLGAMKVTEARLDWLRVVR" +
"KQS";
private static final String dnaSeq1 = "MNLSPREKEKLLVSLAAMVARNRLARGVKLNHPEAIAIISDFVVEGAREGRSVADLMEAGAQVITRDQCMEGIAEM" +
"IHSIQVEATFPDGTKLVTVHHPIR";
private static final String dnaSeq2 = "MIPGEIFPAEGDIELNAGAATITLMVANTGDRPVQVGSHYHFAETNPGLVFDRTAARGYRLDIAAGTAVRFEPGQS" +
"REVQLVPLSGARRVFGFNAKVMGEL";
private static final String dnaSeq3 = "MPRLISRATYADMFGPTTGDKVRLADTDLIIEVEKDLTTYGEEVKFGGGKVIRDGMGQSQIPRSGGAMDTVITNAL" +
"IVDHTGIYKADVGLRDGRIAGIGKAGNPDTQPGVTLIIGPGTEVIAGEGKILTAGGIDTHIHFICPQQIEDALASGITTMLGGGTGPAHGTLATTCTPGPWHISRMLQSF" +
"EAFPMNLALAGKGNASLPEGLVEQVKAGACALKLHEDWGTTPAAIDCCLTVAEDMDVQVMIHTDTLNESGFVENTLAAFKGRTIHAFHTEGAGGGHAPDILKVVSSQNVI" +
"PSSTNPTRPYTKNTVEEHLDMLMVCHHLDNKVPEDVAFAESRIRKETIAAEDILHDMGAMAVISSDSQAMGRVGEIIIRCWQTADKMRKQRGRLAEETGANDNFRVRRYI" +
"AKYTINPAITHGLAEHVGSVEVGKRADLVLWHPAFFGAKPEMVLMGGMIVAAQMGDPNGSIPAQPFYTRPMFGAFGKALSNSAVTFVSAAAEAEGVAGKLGLSKTVLPVK" +
"GTRTIGKASMRLNSATPQIEVDPETYEVRADGEILTCEPAETLPLAQRYFLY";
private static final String dnaSeq4 = "MFDSATKPRLQRSHGQAAVAFEGARLKGLVQRGSAKALLPHVRGVPEVVFLNTSGGLTAGDTLRYGLDLDAGAKVV" +
"ATTQAAERAYRAEGEAARVSVAHRVGQGGWLDWLPQETILFDRARLHRETTVDLAEDAGCLLLEAVVLGRAAMGETLHDLHFSDMRRINRSGKPVFLEPFLQNSNLLAKG" +
"PRGALLGSARAFATLALCAQGAEDAVGPARAALTVPGVQAAASGFDGKCVVRLLAEDGWPLRQQILQLMGALRRGAPPPRVWQT";
private static final String dnaSeq5 = "MTNGPLRVGIGGPVGAGKTTLTEQLCRALAGRLSMAVVTNDIYTREDAEALMRAQVLPADRIRGVETGGCPHTAIR" +
"EDASINLAAIADLTRAHPDLELILIESGGDNLAATFSPELADLTIYVIDTAAGQDIPRKRGPGVTRSDLLVVNKTDLAPHVGVDPVLLEADTQRARGPRPYVMAQLRHGV" +
"GIDEIVAFLIREGGLEQASAPA";
private static final String dnaSeq6 = "MASERQALMLILLTTFFFTIKPSQASTTGGITIYWGQNIDDGTLTSTCDTGNFEIVNLAFLNAFGCGITPSWNFAG" +
"HCGDWNPCSILEPQIQYCQQKGVKVFLSLGGAKGTYSLCSPEDAKEVANYLYQNFLSGKPGPLGSVTLEGIDFDIELGSNLYWGDLAKELDALRHQNDHYFYLSAAPQCF" +
"MPDYHLDNAIKTGLFDHVNVQFYNNPPCQYSPGNTQLLFNSWDDWTSNVLPNNSVFFGLPASPDAAPSGGYIPPQVLISEVLPYVKQASNYGGVMLWDRYHDVLNYHSDQ" +
"IKDYVPKYAMRFVTAVSDAIYESVSARTHRILQKKPY";
@Before
public void setUp() throws Exception {
JSONObject accessionObject = new JSONObject();
accessionObject.put("genbank_protein", new JSONArray(Collections.singletonList("CUB13083")));
JSONObject metadata = new JSONObject();
metadata.put("accession", accessionObject);
Seq emptyTestSeq = new Seq(91973L, "2.3.1.5", 4000000648L, "Bacillus cereus", protSeqNullNull, new ArrayList<>(),
MongoDBToJSON.conv(metadata), Seq.AccDB.genbank);
Seq emptyTestSeq3 = new Seq(91974L, "2.3.1.5", 4000000648L, "Bacillus cereus", protSeqNullNull, new ArrayList<>(),
MongoDBToJSON.conv(metadata), Seq.AccDB.genbank);
accessionObject = new JSONObject();
accessionObject.put("genbank_protein", new JSONArray(Collections.singletonList("P50225")));
metadata.remove("accession");
metadata.put("accession", accessionObject);
Seq emptyTestSeq2 = new Seq(29034L, "2.8.2.1", 4000002681L, "Homo sapiens", protSeqNullFull, new ArrayList<>(),
MongoDBToJSON.conv(metadata), Seq.AccDB.genbank);
accessionObject = new JSONObject();
accessionObject.put("genbank_protein", new JSONArray(Collections.singletonList("NUR84963")));
metadata = new JSONObject();
metadata.put("accession", accessionObject);
metadata.put("synonyms", Arrays.asList("STP", "STP1", "ST1A1"));
metadata.put("product_names", Collections.singletonList("Sulfotransferase 1A1"));
metadata.put("name", "SULT1A1");
List<JSONObject> references = new ArrayList<>();
List<String> pmids = Arrays.asList("8363592", "8484775", "8423770", "8033246", "7864863", "7695643", "7581483",
"8912648", "8924211", "9855620");
for (String pmid : pmids) {
JSONObject obj = new JSONObject();
obj.put("src", "PMID");
obj.put("val", pmid);
references.add(obj);
}
JSONObject refObj = new JSONObject();
refObj.put("src", "Patent");
refObj.put("country_code", "JP");
refObj.put("patent_number", "2008518610");
refObj.put("patent_year", "2008");
references.add(refObj);
refObj = new JSONObject();
refObj.put("src", "Patent");
refObj.put("country_code", "EP");
refObj.put("patent_number", "2904117");
refObj.put("patent_year", "2015");
references.add(refObj);
refObj = new JSONObject();
refObj.put("src", "Patent");
refObj.put("country_code", "EP");
refObj.put("patent_number", "1731531");
refObj.put("patent_year", "2006");
references.add(refObj);
Seq fullTestSeq = new Seq(93766L, "2.4.1.8", 4000006340L, "Thermus sp.", protSeqFullNull, references,
MongoDBToJSON.conv(metadata), Seq.AccDB.genbank);
accessionObject = new JSONObject();
accessionObject.put("genbank_protein", new JSONArray(Collections.singletonList("O35403")));
metadata = new JSONObject();
metadata.put("accession", accessionObject);
metadata.put("synonyms", Arrays.asList("STP", "STP1", "ST1A1"));
metadata.put("product_names", Arrays.asList("Sulfotransferase 1A1"));
metadata.put("name", "SULT1A1");
Seq fullTestSeq2 = new Seq(82754L, "2.8.2.3", 4000003474L, "Mus musculus", protSeqFullFull, references,
MongoDBToJSON.conv(metadata), Seq.AccDB.genbank);
accessionObject = new JSONObject();
accessionObject.put("genbank_protein", new JSONArray(Collections.singletonList("AKJ32561")));
metadata = new JSONObject();
metadata.put("accession", accessionObject);
Seq proteinAccessionTestQuery = new Seq(89045L, null, 5L,
"Porcine reproductive and respiratory syndrome virus", protSeqAccQuery1, new ArrayList<>(),
MongoDBToJSON.conv(metadata), Seq.AccDB.genbank);
accessionObject = new JSONObject();
accessionObject.put("genbank_protein", new JSONArray(Collections.singletonList("BAB21065")));
accessionObject.put("genbank_nucleotide", new JSONArray(Collections.singletonList("AB006984")));
metadata = new JSONObject();
metadata.put("accession", accessionObject);
Seq dnaTestSeq1 = new Seq(84937L, "3.5.1.5", 4000005381L, "Rhodobacter capsulatus", dnaSeq1, new ArrayList<>(),
MongoDBToJSON.conv(metadata), Seq.AccDB.genbank);
accessionObject = new JSONObject();
accessionObject.put("genbank_protein", new JSONArray(Collections.singletonList("BAB21066")));
accessionObject.put("genbank_nucleotide", new JSONArray(Collections.singletonList("AB006984")));
metadata.remove("accession");
metadata.put("accession", accessionObject);
Seq dnaTestSeq2 = new Seq(84938L, "3.5.1.5", 4000005381L, "Rhodobacter capsulatus", dnaSeq2, new ArrayList<>(),
MongoDBToJSON.conv(metadata), Seq.AccDB.genbank);
accessionObject = new JSONObject();
accessionObject.put("genbank_protein", new JSONArray(Collections.singletonList("BAB21067")));
accessionObject.put("genbank_nucleotide", new JSONArray(Collections.singletonList("AB006984")));
metadata.remove("accession");
metadata.put("accession", accessionObject);
Seq dnaTestSeq3 = new Seq(84939L, "3.5.1.5", 4000005381L, "Rhodobacter capsulatus", dnaSeq3, new ArrayList<>(),
MongoDBToJSON.conv(metadata), Seq.AccDB.genbank);
accessionObject = new JSONObject();
accessionObject.put("genbank_protein", new JSONArray(Collections.singletonList("BAB21064")));
accessionObject.put("genbank_nucleotide", new JSONArray(Collections.singletonList("AB006984")));
metadata.remove("accession");
metadata.put("accession", accessionObject);
Seq dnaTestSeq4 = new Seq(23849L, null, 4000005381L, "Rhodobacter capsulatus", dnaSeq4, new ArrayList<>(),
MongoDBToJSON.conv(metadata), Seq.AccDB.genbank);
mockAPI = new MockedMongoDB();
Map<Long, String> orgNames = new HashMap<>();
orgNames.put(4000005381L, "Rhodobacter capsulatus");
orgNames.put(4000003474L, "Mus musculus");
orgNames.put(4000006340L, "Thermus sp.");
orgNames.put(4000002681L, "Homo sapiens");
orgNames.put(4000000648L, "Bacillus cereus");
mockAPI.installMocks(new ArrayList<Reaction>(),
Arrays.asList(emptyTestSeq, emptyTestSeq2, emptyTestSeq3, fullTestSeq, fullTestSeq2, proteinAccessionTestQuery,
dnaTestSeq1, dnaTestSeq2, dnaTestSeq3, dnaTestSeq4),
orgNames, new HashMap<>());
MongoDB mockDb = mockAPI.getMockMongoDB();
// manually assemble an Org Iterator since you can't mock DBCollection in getDbIteratorOverOrgs()
List<Organism> orgs = new ArrayList<>();
for (Map.Entry<Long, String> orgName : orgNames.entrySet()) {
orgs.add(new Organism(orgName.getKey(), orgName.getValue()));
}
Iterator<Organism> orgIterator = orgs.iterator();
OrgMinimalPrefixGenerator prefixGenerator = new OrgMinimalPrefixGenerator(orgIterator);
Map<String, String> minimalPrefixMapping = prefixGenerator.getMinimalPrefixMapping();
GenbankInstaller genbankInstaller = new GenbankInstaller(
new File(this.getClass().getResource("genbank_installer_test_protein.gb").getFile()), "Protein", mockDb,
minimalPrefixMapping);
genbankInstaller.init();
genbankInstaller = new GenbankInstaller(
new File(this.getClass().getResource("genbank_installer_test_dna.gb").getFile()), "DNA", mockDb,
minimalPrefixMapping);
genbankInstaller.init();
}
/**
* Tests the case where the existing reference list and metadata json object in the database are null and the
* information acquired from the protein file is also null. Also tests that ec, seq, org queries can match with
* multiple sequences in the database.
*/
@Test
public void testProteinNullNull() {
JSONObject accessionObject = new JSONObject();
accessionObject.put("genbank_protein", new JSONArray(Collections.singletonList("CUB13083")));
JSONObject metadata = new JSONObject();
metadata.put("accession", accessionObject);
Map<Long, Seq> seqs = mockAPI.getSeqMap();
Seq emptyTestSeq = new Seq(91973L, "2.3.1.5", 4000000648L, "Bacillus cereus", protSeqNullNull, new ArrayList<>(),
MongoDBToJSON.conv(metadata), Seq.AccDB.genbank);
compareSeqs("for testProteinNullNull (query by ec, seq, org; database match exists)", emptyTestSeq,
seqs.get(91973L));
compareSeqs("for testProteinNullNull (query by ec, seq, org; database match exists)", emptyTestSeq,
seqs.get(91974L));
}
/**
* Tests the case where the existing reference list and metadata json object in the database are null but
* the protein file has all fields of information
*/
@Test
public void testProteinNullFull() {
JSONObject accessionObject = new JSONObject();
accessionObject.put("genbank_protein", new JSONArray(Collections.singletonList("P50225")));
JSONObject metadata = new JSONObject();
metadata.put("accession", accessionObject);
metadata.put("synonyms", Arrays.asList("STP", "STP1", "ST1A1"));
metadata.put("product_names", Arrays.asList("Sulfotransferase 1A1"));
metadata.put("name", "SULT1A1");
Map<Long, Seq> seqs = mockAPI.getSeqMap();
List<JSONObject> references = new ArrayList<>();
List<String> pmids = Arrays.asList("8363592", "8484775", "8423770", "8033246", "7864863", "7695643", "7581483",
"8912648", "8924211", "9855620");
for (String pmid : pmids) {
JSONObject obj = new JSONObject();
obj.put("src", "PMID");
obj.put("val", pmid);
references.add(obj);
}
JSONObject refObj = new JSONObject();
refObj.put("src", "Patent");
refObj.put("country_code", "JP");
refObj.put("patent_number", "2008518610");
refObj.put("patent_year", "2008");
references.add(refObj);
refObj = new JSONObject();
refObj.put("src", "Patent");
refObj.put("country_code", "EP");
refObj.put("patent_number", "2904117");
refObj.put("patent_year", "2015");
references.add(refObj);
refObj = new JSONObject();
refObj.put("src", "Patent");
refObj.put("country_code", "EP");
refObj.put("patent_number", "1731531");
refObj.put("patent_year", "2006");
references.add(refObj);
Seq testSeq = new Seq(29034L, "2.8.2.1", 4000002681L, "Homo sapiens", protSeqNullFull, references,
MongoDBToJSON.conv(metadata), Seq.AccDB.genbank);
compareSeqs("for testProteinNullFull; (query by ec, seq, org; database match exists)", testSeq, seqs.get(29034L));
}
/**
* Tests the case where the existing reference list and metadata json object in the database are not null but
* the information acquired from the protein file is null
*/
@Test
public void testProteinFullNull() {
JSONObject accessionObject = new JSONObject();
accessionObject.put("genbank_protein", new JSONArray(Collections.singletonList("NUR84963")));
JSONObject metadata = new JSONObject();
metadata.put("accession", accessionObject);
metadata.put("synonyms", Arrays.asList("STP", "STP1", "ST1A1"));
metadata.put("product_names", Arrays.asList("Sulfotransferase 1A1"));
metadata.put("name", "SULT1A1");
Map<Long, Seq> seqs = mockAPI.getSeqMap();
List<JSONObject> references = new ArrayList<>();
List<String> pmids = Arrays.asList("8363592", "8484775", "8423770", "8033246", "7864863", "7695643", "7581483",
"8912648", "8924211", "9855620");
for (String pmid : pmids) {
JSONObject obj = new JSONObject();
obj.put("src", "PMID");
obj.put("val", pmid);
references.add(obj);
}
JSONObject refObj = new JSONObject();
refObj.put("src", "Patent");
refObj.put("country_code", "JP");
refObj.put("patent_number", "2008518610");
refObj.put("patent_year", "2008");
references.add(refObj);
refObj = new JSONObject();
refObj.put("src", "Patent");
refObj.put("country_code", "EP");
refObj.put("patent_number", "2904117");
refObj.put("patent_year", "2015");
references.add(refObj);
refObj = new JSONObject();
refObj.put("src", "Patent");
refObj.put("country_code", "EP");
refObj.put("patent_number", "1731531");
refObj.put("patent_year", "2006");
references.add(refObj);
Seq fullTestSeq = new Seq(93766L, "2.4.1.8", 4000006340L, "Thermus sp.", protSeqFullNull, references,
MongoDBToJSON.conv(metadata), Seq.AccDB.genbank);
compareSeqs("for testProteinFullNull (query by ec, seq, org; database match exists)", fullTestSeq,
seqs.get(93766L));
}
/**
* Tests the case where the existing reference list and metadata json object in the database are not null and
* the protein file has all fields of information
*/
@Test
public void testProteinFullFull() {
JSONObject accessionObject = new JSONObject();
accessionObject.put("genbank_protein", new JSONArray(Collections.singletonList("O35403")));
JSONObject metadata = new JSONObject();
metadata.put("accession", accessionObject);
metadata.put("synonyms", Arrays.asList("STP", "STP1", "ST1A1", "St3a1", "Sult3a1", "ST3A1_MOUSE"));
metadata.put("product_names", Arrays.asList("Sulfotransferase 1A1", "Amine sulfotransferase"));
metadata.put("name", "SULT1A1");
Map<Long, Seq> seqs = mockAPI.getSeqMap();
List<JSONObject> references = new ArrayList<>();
List<String> pmids = Arrays.asList("8363592", "8484775", "8423770", "8033246", "7864863", "7695643", "7581483",
"8912648", "8924211", "9855620");
for (String pmid : pmids) {
JSONObject obj = new JSONObject();
obj.put("src", "PMID");
obj.put("val", pmid);
references.add(obj);
}
JSONObject refObj = new JSONObject();
refObj.put("src", "Patent");
refObj.put("country_code", "JP");
refObj.put("patent_number", "2008518610");
refObj.put("patent_year", "2008");
references.add(refObj);
refObj = new JSONObject();
refObj.put("src", "Patent");
refObj.put("country_code", "EP");
refObj.put("patent_number", "2904117");
refObj.put("patent_year", "2015");
references.add(refObj);
refObj = new JSONObject();
refObj.put("src", "Patent");
refObj.put("country_code", "EP");
refObj.put("patent_number", "1731531");
refObj.put("patent_year", "2006");
references.add(refObj);
JSONObject pmid_obj = new JSONObject();
pmid_obj.put("src", "PMID");
pmid_obj.put("val", "9647753");
references.add(pmid_obj);
refObj = new JSONObject();
refObj.put("src", "Patent");
refObj.put("country_code", "WO");
refObj.put("patent_number", "8472927");
refObj.put("patent_year", "2009");
references.add(refObj);
Seq fullTestSeq2 = new Seq(82754L, "2.8.2.3", 4000003474L, "Mus musculus", protSeqFullFull, references,
MongoDBToJSON.conv(metadata), Seq.AccDB.genbank);
compareSeqs("for testProteinFullFull (query by ec, seq, org; database match exists)", fullTestSeq2,
seqs.get(82754L));
}
/**
* Tests the case where the protein file does have an EC_number listed and so a normal query to the database is
* performed, but no database match exists.
*/
@Test
public void testProteinEcSeqOrgQuery() {
Map<Long, Seq> seqs = mockAPI.getSeqMap();
JSONObject accessionObject = new JSONObject();
accessionObject.put("genbank_protein", new JSONArray(Collections.singletonList("AKK24634")));
JSONObject metadata = new JSONObject();
metadata.put("accession", accessionObject);
metadata.put("synonyms", new ArrayList());
metadata.put("product_names", Collections.singletonList("urease subunit gamma"));
metadata.put("xref", new JSONObject());
Seq proteinEcSeqOrgTestQuery = new Seq(89342L, "3.5.1.5", 7L,
"Pandoraea oxalativorans", protSeqEcSeqOrgQuery, new ArrayList<>(),
MongoDBToJSON.conv(metadata), Seq.AccDB.genbank);
for (Map.Entry<Long, Seq> seqentry : seqs.entrySet()) {
if (seqentry.getValue().getSequence().equals(protSeqEcSeqOrgQuery)) {
compareSeqs("for testProteinEcSeqOrgQuery (query by ec, org, seq with no database match)",
proteinEcSeqOrgTestQuery, seqentry.getValue());
}
}
}
/**
* Tests the case where the protein file doesn't have an EC_number listed and instead the query to the database must
* be performed by accession number, both in the case when a database match exists and when it doesn't. Also tests the
* addition of more than one new organism to the database and the assignment of orgId.
*/
@Test
public void testProteinAccessionQuery() {
Map<Long, Seq> seqs = mockAPI.getSeqMap();
List<JSONObject> references = new ArrayList<>();
JSONObject refObj = new JSONObject();
refObj.put("src", "PMID");
refObj.put("val", "26889041");
references.add(refObj);
JSONObject accessionObject = new JSONObject();
accessionObject.put("genbank_protein", new JSONArray(Collections.singletonList("AKJ32561")));
JSONObject metadata = new JSONObject();
metadata.put("accession", accessionObject);
metadata.put("product_names", Collections.singletonList("envelope glycoprotein GP2"));
metadata.put("name", "ORF2");
Seq proteinAccessionTestQuery1 = new Seq(89045L, null, 5L,
"Porcine reproductive and respiratory syndrome virus", protSeqAccQuery1, references,
MongoDBToJSON.conv(metadata), Seq.AccDB.genbank);
references = new ArrayList<>();
refObj = new JSONObject();
refObj.put("src", "PMID");
refObj.put("val", "27268727");
references.add(refObj);
accessionObject = new JSONObject();
accessionObject.put("genbank_protein", new JSONArray(Collections.singletonList("AEJ31929")));
metadata = new JSONObject();
metadata.put("accession", accessionObject);
metadata.put("synonyms", new ArrayList());
metadata.put("product_names", Collections.singletonList("transcriptional regulator PadR-like family protein"));
metadata.put("xref", new JSONObject());
Seq proteinAccessionTestQuery2 = new Seq(79542L, null, 6L, "uncultured microorganism", protSeqAccQuery2,
references, MongoDBToJSON.conv(metadata), Seq.AccDB.genbank);
compareSeqs("for testProteinAccessionQuery (query by accession; database match exists)", proteinAccessionTestQuery1,
seqs.get(89045L));
for (Map.Entry<Long, Seq> seqentry : seqs.entrySet()) {
if (seqentry.getValue().getSequence().equals(protSeqAccQuery2)) {
compareSeqs("for testProteinAccessionQuery (query by accession with no database match)",
proteinAccessionTestQuery2, seqentry.getValue());
}
}
}
@Test
public void testDnaInstall() {
Map<Long, Seq> seqs = mockAPI.getSeqMap();
List<JSONObject> references = new ArrayList<>();
JSONObject refObj = new JSONObject();
refObj.put("src", "PMID");
refObj.put("val", "9484481");
references.add(refObj);
refObj = new JSONObject();
refObj.put("src", "Patent");
refObj.put("country_code", "JP");
refObj.put("patent_number", "2008518610");
refObj.put("patent_year", "2008");
references.add(refObj);
refObj = new JSONObject();
refObj.put("src", "Patent");
refObj.put("country_code", "EP");
refObj.put("patent_number", "2904117");
refObj.put("patent_year", "2015");
references.add(refObj);
JSONObject accessionObject = new JSONObject();
accessionObject.put("genbank_protein", new JSONArray(Collections.singletonList("BAB21065")));
accessionObject.put("genbank_nucleotide", new JSONArray(Collections.singletonList("AB006984")));
JSONObject metadata = new JSONObject();
metadata.put("accession", accessionObject);
metadata.put("product_names", Collections.singletonList("gamma subunit of urase"));
metadata.put("name", "ureA");
Seq dnaTestSeq1 = new Seq(84937L, "3.5.1.5", 4000005381L, "Rhodobacter capsulatus", dnaSeq1, references,
MongoDBToJSON.conv(metadata), Seq.AccDB.genbank);
accessionObject = new JSONObject();
accessionObject.put("genbank_protein", new JSONArray(Collections.singletonList("BAB21066")));
accessionObject.put("genbank_nucleotide", new JSONArray(Collections.singletonList("AB006984")));
metadata = new JSONObject();
metadata.put("accession", accessionObject);
metadata.put("product_names", Collections.singletonList("beta subunit of urease"));
metadata.put("name", "ureB");
Seq dnaTestSeq2 = new Seq(84938L, "3.5.1.5", 4000005381L, "Rhodobacter capsulatus", dnaSeq2, references,
MongoDBToJSON.conv(metadata), Seq.AccDB.genbank);
accessionObject = new JSONObject();
accessionObject.put("genbank_protein", new JSONArray(Collections.singletonList("BAB21067")));
accessionObject.put("genbank_nucleotide", new JSONArray(Collections.singletonList("AB006984")));
metadata = new JSONObject();
metadata.put("accession", accessionObject);
metadata.put("product_names", Collections.singletonList("alpha subunit of urease"));
metadata.put("name", "ureC");
Seq dnaTestSeq3 = new Seq(84939L, "3.5.1.5", 4000005381L, "Rhodobacter capsulatus", dnaSeq3, references,
MongoDBToJSON.conv(metadata), Seq.AccDB.genbank);
accessionObject = new JSONObject();
accessionObject.put("genbank_protein", new JSONArray(Collections.singletonList("BAB21064")));
accessionObject.put("genbank_nucleotide", new JSONArray(Collections.singletonList("AB006984")));
metadata = new JSONObject();
metadata.put("accession", accessionObject);
metadata.put("name", "ureD");
Seq dnaTestSeq4 = new Seq(23849L, null, 4000005381L, "Rhodobacter capsulatus", dnaSeq4, references,
MongoDBToJSON.conv(metadata), Seq.AccDB.genbank);
accessionObject = new JSONObject();
accessionObject.put("genbank_protein", new JSONArray(Collections.singletonList("BAB21071")));
accessionObject.put("genbank_nucleotide", new JSONArray(Collections.singletonList("AB006984")));
metadata = new JSONObject();
metadata.put("accession", accessionObject);
metadata.put("name", "ureG");
metadata.put("xref", new JSONObject());
metadata.put("synonyms", new ArrayList());
metadata.put("product_names", new ArrayList());
Seq dnaTestSeq5 = new Seq(23894L, null, 4000005381L, "Rhodobacter capsulatus", dnaSeq5, references,
MongoDBToJSON.conv(metadata), Seq.AccDB.genbank);
accessionObject = new JSONObject();
accessionObject.put("genbank_protein", new JSONArray(Collections.singletonList("BAA25015")));
accessionObject.put("genbank_nucleotide", new JSONArray(Collections.singletonList("AB006984")));
metadata = new JSONObject();
metadata.put("accession", accessionObject);
metadata.put("xref", new JSONObject());
metadata.put("synonyms", new ArrayList());
metadata.put("product_names", Collections.singletonList("class III acidic endochitinase"));
Seq dnaTestSeq6 = new Seq(89345L, "3.2.1.14", 4000005381L, "Rhodobacter capsulatus", dnaSeq6, references,
MongoDBToJSON.conv(metadata), Seq.AccDB.genbank);
compareSeqs("for testDnaInstall (query by ec, seq, org; database match exists)", dnaTestSeq1, seqs.get(84937L));
compareSeqs("for testDnaInstall (query by ec, seq, org; database match exists)", dnaTestSeq2, seqs.get(84938L));
compareSeqs("for testDnaInstall (query by ec, seq, org; database match exists)", dnaTestSeq3, seqs.get(84939L));
compareSeqs("for testDnaInstall (query by accession; database match exists)", dnaTestSeq4, seqs.get(23849L));
for (Map.Entry<Long, Seq> seqentry : seqs.entrySet()) {
if (seqentry.getValue().getSequence().equals(dnaSeq5)) {
compareSeqs("for testDnaInstall (query by accession with no database match)", dnaTestSeq5, seqentry.getValue());
continue;
}
if (seqentry.getValue().getSequence().equals(dnaSeq6)) {
compareSeqs("for testDnaInstall (query by ec, seq, org with no database match)", dnaTestSeq6,
seqentry.getValue());
}
}
}
private void compareSeqs(String message, Seq expectedSeq, Seq testSeq) {
assertEquals("comparing ec " + message, expectedSeq.getEc(), testSeq.getEc());
assertEquals("comparing org_id " + message, expectedSeq.getOrgId(), testSeq.getOrgId());
assertEquals("comparing organism " + message, expectedSeq.getOrgName(), testSeq.getOrgName());
assertEquals("comparing sequence " + message, expectedSeq.getSequence(), testSeq.getSequence());
assertEquals("comparing references " + message, expectedSeq.getReferences().toString(),
testSeq.getReferences().toString());
assertEquals("comparing metadata " + message, expectedSeq.getMetadata().toString(),
testSeq.getMetadata().toString());
assertEquals("comapring src db " + message, expectedSeq.getSrcdb(), testSeq.getSrcdb());
}
}