/************************************************************************* * * * This file is part of the 20n/act project. * * 20n/act enables DNA prediction for synthetic biology/bioengineering. * * Copyright (C) 2017 20n Labs, Inc. * * * * Please direct all queries to act@20n.com. * * * * This program is free software: you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation, either version 3 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program. If not, see <http://www.gnu.org/licenses/>. * * * *************************************************************************/ package act.installer; import act.server.MongoDB; import act.shared.Organism; import act.shared.Seq; import act.shared.helpers.MongoDBToJSON; import com.act.biointerpretation.Utils.OrgMinimalPrefixGenerator; import com.act.biointerpretation.test.util.MockedMongoDB; import com.mongodb.BasicDBObject; import org.json.JSONObject; import org.junit.Before; import org.junit.Test; import java.io.File; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import static org.junit.Assert.assertEquals; public class UniprotInstallerTest { private MockedMongoDB mockAPI; private String protSeqNullNull = "MFTQYRKTLLAGTLALTFGLAAGNSLAAGFQPAQPAGKLGAIVVDPYGNAPLTALVELDS" + "HVISDVKVTVHGKGEKGVPVTYTVGKESLATYDGIPIFGLYQKFANKVTVEYKENGKAMK" + "DDYVVQTSAIVNHYMDNRSISDLQQTKVIKVAPGFEDRLYLVNTHTFTPQGAEFHWHGEK" + "DKNAGILDAGPAGGALPFDIAPFTFVVDTEGEYRWWLDQDTFYDGHDMDINKRGYLMGIR" + "ETPRGTFTAVQGQHWYEFDMLGQILADHKLPRGFLDASHESVETVNGTVLLRVGKRDYRK" + "EDGLHVHTIRDQIIEVDKSGRVVDVWDLTQILDPMRDALLGALDAGAVCVNVDLAHAGQQ" + "AKLEPDTPYGDALGVGAGRNWAHVNSIAYDAKDDSIILSSRHQGVVKIGRDKQVKWILAP" + "SKGWNKALASKLLKPVDDKGNALKCDENGKCENTDFDFTYTQHTAWLSSKGTLTIFDNGD" + "GRGLEQPALPTMKYSRFVEYKIDEKKGTVQQVWEYGKERGYDFYSPITSVIEYQKDRDTM" + "FGFGGSINLFDVGQPTIGKINEIDYKTKEVKVEIDVLSDKPNQTHYRALLVRPQQMFK"; private String protSeqFullNull = "MADLPTASIDMILCDLPYGTTANAWDKVIPFEYLWGQYERLIKPQGAIVLTATERFSADL" + "VQSNPALYRYKWVWIKNTVTNFVNAKNRPLSRFEEILVFSKSGTANFGNSPDTRGMNYFP" + "QGLLPYNKTVNSRKYENANQMHPWNAPDSYTQEWTKYPSDVLNYKSDRTGWHPTQKPVDL" + "FAYLIKTYTQPGEIVLDNCMGSGTTAIAAMDTDRHFIGYEISEEYWRRALDRIKHHHATQ" + "TELF"; private String protSeqFullFull = "MDNKDEYLLNFKGYNFQKTLVKMEVVENIENYEIRDDDIFIVTYPKSGTIWTQQILSLIYFEGHRNRTENIETIDRAPFF" + "EYNIHKLDYAKMPSPRIFSSHIPYYLVPKGLKDKKAKILYMYRNPKDVLISYFHFSNLMLIFQNPDTVESFMQTFLDGDVVGSLWFDHIRGWYEHRHDFNIMFMSFEDM" + "KKDFRSSVLKICSFLEKELSEEDVDAVVRQATFQKMKADPRANYEHIIKDELGTRNEMGSFLRKGVVGAWKHYLTVDQSERFDKIFHRNMKNIPLKFIWDINEE"; private String protSeqNullFull = "MMTNLQKEFFKRLKIPAKEITFNDLDEILLKMGLTLPYENLDIMAGTIKDISKNNLVEKI" + "LIQKRGGLCYELNSLLYYFLMDCGFQVYKVAGTVYDLYDNKWKPDDGHVIIVLTHNNKDY" + "VIDAGFASHLPLHPVPFNGEVISSQTGEYRIRKRTTRKGTHILEMRKGANGESTNFLQSE" + "PSHEWKVGYAFTLDPIDEKKVNNIQKVIVEHKESPFNKGAITCKLTDYGHVSLTNKNYTE" + "TFKGTKNKRPIESKDYAHILRESFGITQVKYVGKTLERG"; private String protSeqAccQuery = "MPSVAAVLLWHVIALLLVANLGYASSHDAKRLRAEVIYARNGAVATDDRRCSRIGKDILL" + "EGGHAADAAVAAALCLGVVSPASSGLGGGAFMLLRQANGESKAFDMRETAPALASKDMYG" + "GNTTLKAQGGLSVAVPGELAGLHEAWKQYGKLPWKRLVNPAENLARRGFKISAYLHMQMK" + "STESDILQDKGLRSILAPNGKLLNIGDTCYNKKLADTLRAISVFGPKAFYDGLIGHNLVK" + "DVQNAGGILTTKDLKNYTVNQKKPLSTNVLGLNLLAMPPPSGGPPMILLLNILDQYKLPS" + "GLSGALGIHREIEALKHVFAVRMNLGDPDFVNITEVVSDMLSRRFATVLKNDINDNKTFS" + "PTHYGGKWNQIHDHGTSHLCVIDLERNAISMTTTVNAYFGSKILSPSTGIVLNNEMDDFS" + "IPRNVSKDVPPPAPSNFIMPGKRPLSSMSPTIALKDGKLKAVVGASGGAFIIGGTSEVLL" + "NHFGKGLDPFSSVTAPRVYHQLIPNVVNYENWTTVTGDHFELGADIRKVLRSKGHVLQSL" + "AGGTICQFIVVENSVSSRKTKVTGIERLVAVSDPRKGGLPAGF"; private String nucSeqAccQuery = "FCSAADLMELDMAMEPDRKAAVSHWQQQSYLDSGIHSGATTTAPSLSGKGNPEEEDVDTS" + "QVLYEWEQGFSQSFTQEQVADIDGQYAMTRAQRVRAAMFPETLDEGMQIPSTQFDAAHPT" + "NVQRLAEPSQMLKHAVVNLINYQDDAELATRAIPELTKLLNDEDQVVVNKAAVMVHQLSK" + "KEASRHAIMRSPQMVSAIVRTMQNTNDVETARCTAGTLHNLSHHREGLLAIFKSGGIPAL" + "VKMLGSPVDSVLFYAITTLHNLLLHQEGAKMAVRLAGGLQKMVALLNKTNVKFLAITTDC" + "LQILAYGNQESKLIILASGGPQALVNIMRTYTYEKLLWTTSRVLKVLSVCSSNKPAIVEA" + "GGMQALGLHLTDPSQRLVQNCLWTLRNLSDAATKQEGMEGLLGTLVQLLGSDDINVVTCA" + "AGILSNLTCNNYKNKMMVCQVGGIEALVRTVLRAGDREDITEPAICALRHLTSRHQEAEM" + "AQNAVRLHYGLPVVVKLLHPPSHWPLIKATVGLIRNLALCPANHAPLREQGAIPRLVQLL" + "VRAHQDTQRRTSMGGTQQQFVEGVRMEEIVEGCTGALHILARDVHNRIVIRGLNTIPLFV" + "QLLYSPIENIQRVAAGVLCELAQDKEAAEAIEAEGATAPLTELLHSRNEGVATYAAAVLF" + "RMSEDKPQDYKKRLSVELTSSLFRTEPMAWNETADLGLDIGAQGEPLGYRQDDPSYRSFH" + "SGGYGQDTLGMDPMMEHEMGGHHPGADYPVDGLPDLGHAQDLMDGLPPGDSNQLAWFDTD" + "L"; @Before public void setUp() throws Exception { Seq nullNullTestSeq = new Seq(21389L, "2.8.2.22", 4000001398L, "Citrobacter freundii", protSeqNullNull, new ArrayList<>(), new BasicDBObject(), Seq.AccDB.uniprot); JSONObject accessions = new JSONObject(); accessions.put(Seq.AccType.uniprot.toString(), Collections.singletonList("234890")); JSONObject metadata = new JSONObject(); metadata.put("accession", accessions); metadata.put("synonyms", Arrays.asList("HYT1", "HYT")); metadata.put("product_names", Collections.singletonList("Methyltransferase")); metadata.put("name", "N422"); List<JSONObject> references = new ArrayList<>(); JSONObject obj = new JSONObject(); obj.put("src", "PMID"); obj.put("val", "24435875"); references.add(obj); Seq fullNullTestSeq = new Seq(93482L, "2.1.1.1", 4000008473L, "Lactobacillus casei 5b", protSeqFullNull, references, MongoDBToJSON.conv(metadata), Seq.AccDB.uniprot); accessions = new JSONObject(); accessions.put(Seq.AccType.uniprot.toString(), Collections.singletonList("NUR84963")); metadata = new JSONObject(); metadata.put("accession", accessions); metadata.put("synonyms", Arrays.asList("STP", "STP1", "ST1A1")); metadata.put("product_names", Collections.singletonList("Sulfotransferase 1A1")); metadata.put("name", "SULT1A1"); references = new ArrayList<>(); List<String> pmids = Arrays.asList("8363592", "8484775", "8423770", "8033246", "7864863", "7695643", "7581483", "8912648", "8924211", "9855620"); for (String pmid : pmids) { obj = new JSONObject(); obj.put("src", "PMID"); obj.put("val", pmid); references.add(obj); } Seq fullFullTestSeq = new Seq(93766L, "2.8.2.3", 4000003474L, "Mus musculus", protSeqFullFull, references, MongoDBToJSON.conv(metadata), Seq.AccDB.uniprot); accessions = new JSONObject(); accessions.put(Seq.AccType.genbank_protein.toString(), Collections.singletonList("CUB13083")); metadata = new JSONObject(); metadata.put("accession", accessions); Seq nullFullTestSeq = new Seq(38942L, "2.3.1.5", 4000000648L, "Bacillus cereus", protSeqNullFull, new ArrayList<>(), MongoDBToJSON.conv(metadata), Seq.AccDB.uniprot); accessions = new JSONObject(); accessions.put(Seq.AccType.genbank_protein.toString(), Collections.singletonList("ESW35608")); metadata = new JSONObject(); metadata.put("accession", accessions); Seq protAccessionQueryTestSeq = new Seq(23894L, null, 4000004746L, "Phaseolus vulgaris", protSeqAccQuery, new ArrayList<>(), MongoDBToJSON.conv(metadata), Seq.AccDB.uniprot); accessions = new JSONObject(); accessions.put(Seq.AccType.uniprot.toString(), Collections.singletonList("H0UZN6")); accessions.put(Seq.AccType.genbank_nucleotide.toString(), Collections.singletonList("AAKN02012235")); metadata = new JSONObject(); metadata.put("accession", accessions); Seq nucAccessionQueryTestSeq = new Seq(58923L, null, 4000001225L, "Cavia porcellus", nucSeqAccQuery, new ArrayList<>(), MongoDBToJSON.conv(metadata), Seq.AccDB.uniprot); mockAPI = new MockedMongoDB(); Map<Long, String> orgNames = new HashMap<>(); orgNames.put(4000003474L, "Mus musculus"); orgNames.put(4000000648L, "Bacillus cereus"); orgNames.put(4000004746L, "Phaseolus vulgaris"); orgNames.put(4000001225L, "Cavia porcellus"); orgNames.put(4000001398L, "Citrobacter freundii"); orgNames.put(4000008473L, "Lactobacillus casei 5b"); // manually assemble an Org Iterator since you can't mock DBCollection in getDbIteratorOverOrgs() List<Organism> orgs = new ArrayList<>(); for (Map.Entry<Long, String> orgName : orgNames.entrySet()) { orgs.add(new Organism(orgName.getKey(), orgName.getValue())); } Iterator<Organism> orgIterator = orgs.iterator(); OrgMinimalPrefixGenerator prefixGenerator = new OrgMinimalPrefixGenerator(orgIterator); Map<String, String> minimalPrefixMapping = prefixGenerator.getMinimalPrefixMapping(); mockAPI.installMocks(new ArrayList<>(), Arrays.asList(nullNullTestSeq, fullNullTestSeq, fullFullTestSeq, nullFullTestSeq, protAccessionQueryTestSeq, nucAccessionQueryTestSeq), orgNames, new HashMap<>()); MongoDB mockDb = mockAPI.getMockMongoDB(); // loading test file for testProteinEcSeqOrgQuery UniprotInstaller uniprotInstaller = new UniprotInstaller( new File(this.getClass().getResource("uniprot_installer_test_1.xml").getFile()), mockDb, minimalPrefixMapping); uniprotInstaller.init(); // loading test file for testProteinFullFull uniprotInstaller = new UniprotInstaller( new File(this.getClass().getResource("uniprot_installer_test_2.xml").getFile()), mockDb, minimalPrefixMapping); uniprotInstaller.init(); // loading test file for testProteinNullFull uniprotInstaller = new UniprotInstaller( new File(this.getClass().getResource("uniprot_installer_test_3.xml").getFile()), mockDb, minimalPrefixMapping); uniprotInstaller.init(); // loading test file for testProteinAccessionQuery with database match uniprotInstaller = new UniprotInstaller( new File(this.getClass().getResource("uniprot_installer_test_4.xml").getFile()), mockDb, minimalPrefixMapping); uniprotInstaller.init(); // loading test file for testNucleotideAccessionQuery with database match uniprotInstaller = new UniprotInstaller( new File(this.getClass().getResource("uniprot_installer_test_5.xml").getFile()), mockDb, minimalPrefixMapping); uniprotInstaller.init(); // loading test file for testProteinAccessionQuery without database match uniprotInstaller = new UniprotInstaller( new File(this.getClass().getResource("uniprot_installer_test_6.xml").getFile()), mockDb, minimalPrefixMapping); uniprotInstaller.init(); // loading test file for testNucleotideAccessionQuery without database match uniprotInstaller = new UniprotInstaller( new File(this.getClass().getResource("uniprot_installer_test_7.xml").getFile()), mockDb, minimalPrefixMapping); uniprotInstaller.init(); // loading test file for testProteinNullNull uniprotInstaller = new UniprotInstaller( new File(this.getClass().getResource("uniprot_installer_test_8.xml").getFile()), mockDb, minimalPrefixMapping); uniprotInstaller.init(); // loading test file for testProteinFullNull uniprotInstaller = new UniprotInstaller( new File(this.getClass().getResource("uniprot_installer_test_9.xml").getFile()), mockDb, minimalPrefixMapping); uniprotInstaller.init(); } /** * Tests the case where the existing reference list and metadata json object in the database are null and the * information acquired from the protein file is also null. */ @Test public void testProteinNullNull() { Map<Long, Seq> seqs = mockAPI.getSeqMap(); JSONObject accessions = new JSONObject(); accessions.put(Seq.AccType.uniprot.toString(), new ArrayList()); accessions.put(Seq.AccType.genbank_nucleotide.toString(), new ArrayList()); accessions.put(Seq.AccType.genbank_protein.toString(), new ArrayList()); JSONObject metadata = new JSONObject(); metadata.put("accession", accessions); Seq nullNullTestSeq = new Seq(21389L, "2.8.2.22", 4000001398L, "Citrobacter freundii", protSeqNullNull, new ArrayList<>(), MongoDBToJSON.conv(metadata), Seq.AccDB.uniprot); compareSeqs("for testProteinNullNull; (query by ec, seq, org; database match exists)", nullNullTestSeq, seqs.get(21389L)); } /** * Tests the case where the existing reference list and metadata json object in the database are null but * the protein file has all fields of information */ @Test public void testProteinNullFull() { List<String> oldAccessions = Collections.singletonList("CUB13083"); List<String> uniprotAccessions = Collections.singletonList("A0A0K6JCJ7"); List<String> genbankNucleotideAccessions = Collections.singletonList("CYHI01000402"); JSONObject accessions = new JSONObject(); accessions.put(Seq.AccType.uniprot.toString(), uniprotAccessions); accessions.put(Seq.AccType.genbank_nucleotide.toString(), genbankNucleotideAccessions); accessions.put(Seq.AccType.genbank_protein.toString(), oldAccessions); JSONObject metadata = new JSONObject(); metadata.put("accession", accessions); metadata.put("product_names", Collections.singletonList("Arylamine N-acetyltransferase")); metadata.put("name", "nat_1"); metadata.put("catalytic_activity", "An aryl sulfate + a phenol = a phenol + an aryl sulfate."); List<String> pmids = Collections.singletonList("8493748"); List<JSONObject> references = new ArrayList<>(); for (String pmid : pmids) { JSONObject obj = new JSONObject(); obj.put("src", "PMID"); obj.put("val", pmid); references.add(obj); } Map<Long, Seq> seqs = mockAPI.getSeqMap(); Seq nullFullTestSeq = new Seq(38942L, "2.3.1.5", 4000000648L, "Bacillus cereus", protSeqNullFull, references, MongoDBToJSON.conv(metadata), Seq.AccDB.uniprot); compareSeqs("for testProteinNullFull; (query by ec, seq, org; database match exists)", nullFullTestSeq, seqs.get(38942L)); } /** * Tests the case where the existing reference list and metadata json object in the database are not null but * the information acquired from the protein file is null */ @Test public void testProteinFullNull() { Map<Long, Seq> seqs = mockAPI.getSeqMap(); JSONObject accessions = new JSONObject(); accessions.put(Seq.AccType.uniprot.toString(), Collections.singletonList("234890")); JSONObject metadata = new JSONObject(); metadata.put("accession", accessions); metadata.put("synonyms", Arrays.asList("HYT1", "HYT")); metadata.put("product_names", Collections.singletonList("Methyltransferase")); metadata.put("name", "N422"); List<JSONObject> references = new ArrayList<>(); JSONObject obj = new JSONObject(); obj.put("src", "PMID"); obj.put("val", "24435875"); references.add(obj); Seq fullNullTestSeq = new Seq(93482L, "2.1.1.1", 4000008473L, "Lactobacillus casei 5b", protSeqFullNull, references, MongoDBToJSON.conv(metadata), Seq.AccDB.uniprot); compareSeqs("for testProteinFullNull (query by ec, seq, org; database match exists)", fullNullTestSeq, seqs.get(93482L)); } /** * Tests the case where the existing reference list and metadata json object in the database are not null and * the protein file has all fields of information */ @Test public void testProteinFullFull() { List<String> uniprotAccessions = Arrays.asList("NUR84963", "O35403"); List<String> genbankNucleotideAccessions = Collections.singletonList("AF026075"); List<String> genbankProteinAccessions = Collections.singletonList("AAB82293"); JSONObject accessions = new JSONObject(); accessions.put(Seq.AccType.uniprot.toString(), uniprotAccessions); accessions.put(Seq.AccType.genbank_nucleotide.toString(), genbankNucleotideAccessions); accessions.put(Seq.AccType.genbank_protein.toString(), genbankProteinAccessions); JSONObject metadata = new JSONObject(); metadata.put("accession", accessions); metadata.put("synonyms", Arrays.asList("STP", "STP1", "ST1A1", "St3a1", "Sult3a1")); metadata.put("product_names", Arrays.asList("Sulfotransferase 1A1", "Amine sulfotransferase", "SULT-X2", "Sulfotransferase 3A1")); metadata.put("name", "SULT1A1"); metadata.put("catalytic_activity", "3'-phosphoadenylyl sulfate + an amine = adenosine 3',5'-bisphosphate + a sulfamate."); Map<Long, Seq> seqs = mockAPI.getSeqMap(); List<JSONObject> references = new ArrayList<>(); List<String> oldPmids = Arrays.asList("8363592", "8484775", "8423770", "8033246", "7864863", "7695643", "7581483", "8912648", "8924211", "9855620"); List<String> newPmids = Collections.singletonList("9647753"); List<String> pmids = new ArrayList<>(); pmids.addAll(oldPmids); pmids.addAll(newPmids); for (String pmid : pmids) { JSONObject obj = new JSONObject(); obj.put("src", "PMID"); obj.put("val", pmid); references.add(obj); } Seq fullTestSeq2 = new Seq(93766L, "2.8.2.3", 4000003474L, "Mus musculus", protSeqFullFull, references, MongoDBToJSON.conv(metadata), Seq.AccDB.uniprot); compareSeqs("for testProteinFullFull (query by ec, seq, org; database match exists)", fullTestSeq2, seqs.get(93766L)); } /** * Tests the case where the protein file does have an EC_number listed and so a normal query to the database is * performed, but no database match exists. Also tests the addition of more than one new organism to the database * and the assignment of orgId. */ @Test public void testProteinEcSeqOrgQuery() { String protSeqEcSeqOrgQuery = "MSTTGQIIRCKAAVAWEAGKPLVIEEVEVAPPQKHEVRIKILFTSLCHTDVYFWEAKGQT" + "PLFPRIFGHEAGGIVESVGEGVTDLQPGDHVLPIFTGECGECRHCHSEESNMCDLLRINT" + "ERGGMIHDGESRFSINGKPIYHFLGTSTFSEYTVVHSGQVAKINPDAPLDKVCIVSCGLS" + "TGLGATLNVAKPKKGQSVAIFGLGAVGLGAAEGARIAGASRIIGVDFNSKRFDQAKEFGV" + "TECVNPKDHDKPIQQVIAEMTDGGVDRSVECTGSVQAMIQAFECVHDGWGVAVLVGVPSK" + "DDAFKTHPMNFLNERTLKGTFFGNYKPKTDIPGVVEKYMNKELELEKFITHTVPFSEINK" + "AFDYMLKGESIRCIITMGA"; List<String> uniprotAccessions = Arrays.asList("P06525", "O04080", "O04713", "O04717", "O04868", "O23821", "Q8LA61", "Q94AY6", "Q9CAZ2", "Q9CAZ3", "Q9SX08"); List<String> genbankNucleotideAccessions = Arrays.asList("M12196", "X77943", "D84240", "D84241", "D84242", "D84243", "D84244", "D84245", "D84246", "D84247", "D84248", "D84249", "D63460", "D63461", "D63462", "D63463", "D63464", "AF110456", "AB048394", "AB048395", "AY536888", "AC002291", "CP002684", "AY045612", "AY090330", "AY088010", "AF056557"); List<String> genbankProteinAccessions = Arrays.asList("AAA32728", "CAA54911", "BAA19615", "BAA19616", "BAA19617", "BAA19618", "BAA19619", "BAA19620", "BAA19621", "BAA19622", "BAA19623", "BAA19624", "BAA22983", "BAA22979", "BAA22980", "BAA22981", "BAA22982", "AAF23554", "BAB32568", "BAB32569", "AAS45601", "AAC00625", "AEE35937", "AAK73970", "AAL90991", "AAM65556", "AAD41572"); JSONObject accessions = new JSONObject(); accessions.put(Seq.AccType.uniprot.toString(), uniprotAccessions); accessions.put(Seq.AccType.genbank_nucleotide.toString(), genbankNucleotideAccessions); accessions.put(Seq.AccType.genbank_protein.toString(), genbankProteinAccessions); JSONObject metadata = new JSONObject(); metadata.put("xref", new JSONObject()); metadata.put("accession", accessions); metadata.put("synonyms", Arrays.asList("ADH")); metadata.put("product_names", Arrays.asList("Alcohol dehydrogenase class-P")); metadata.put("name", "ADH1"); metadata.put("catalytic_activity", "An alcohol + NAD(+) = an aldehyde or ketone + NADH."); Map<Long, Seq> seqs = mockAPI.getSeqMap(); List<JSONObject> references = new ArrayList<>(); List<String> pmids = Arrays.asList("2937058", "7851777", "8844162", "8587508", "11018155", "11158375", "11130712", "14593172", "10382288", "3377754", "2277648", "12231733", "8787023", "9522467", "9611167", "9880346", "11402191", "11402202", "11987307", "12509334", "12857811", "16055689", "18433157", "18441225", "19245862", "20508152", "22223895", "23707506", "24395201", "26566261", "25447145"); for (String pmid : pmids) { JSONObject obj = new JSONObject(); obj.put("src", "PMID"); obj.put("val", pmid); references.add(obj); } Seq proteinEcSeqOrgTestQuery = new Seq(82934L, "1.1.1.1", 6L, "Arabidopsis thaliana", protSeqEcSeqOrgQuery, references, MongoDBToJSON.conv(metadata), Seq.AccDB.uniprot); for (Map.Entry<Long, Seq> seqentry : seqs.entrySet()) { if (seqentry.getValue().getSequence().equals(protSeqEcSeqOrgQuery)) { compareSeqs("for testProteinEcSeqOrgQuery (query by ec, org, seq with no database match)", proteinEcSeqOrgTestQuery, seqentry.getValue()); } } } /** * Tests the case where the protein file doesn't have an EC_number listed and instead the query to the database must * be performed by Genbank protein accession number, both in the case when a database match exists and when it * doesn't. */ @Test public void testProteinAccessionQuery() { Map<Long, Seq> seqs = mockAPI.getSeqMap(); List<String> oldAccessions = Collections.singletonList("ESW35608"); List<String> uniprotAccessions = Collections.singletonList("V7D1Q1"); List<String> genbankNucleotideAccessions = Collections.singletonList("CM002288"); JSONObject accessions = new JSONObject(); accessions.put(Seq.AccType.uniprot.toString(), uniprotAccessions); accessions.put(Seq.AccType.genbank_nucleotide.toString(), genbankNucleotideAccessions); accessions.put(Seq.AccType.genbank_protein.toString(), oldAccessions); JSONObject metadata = new JSONObject(); metadata.put("accession", accessions); Seq protAccessionQueryTestSeq = new Seq(23894L, null, 4000004746L, "Phaseolus vulgaris", protSeqAccQuery, new ArrayList<>(), MongoDBToJSON.conv(metadata), Seq.AccDB.uniprot); String protSeqAccessionQuery = "MAPAPSLLHYPIIVCHLLFFAELTTGMSASTERPYVSSESPIRISVSTEGANTSSSTSTS" + "TTGTSHLIKCAEKEKTFCVNGGECFMVKDLSNPSRYLCKCQPGFTGARCTENVPMKVQTQ" + "EKAEELYQKRVLTITGICIALLVVGIMCVVAYCKTKKQRQKLHDRLRQSLRSERNNMVNI" + "ANGPHHPNPPPENVQLVNQYVSKNVISSEHIVEREVETSFSTSHYTSTAHHSTTVTQTPS" + "HSWSNGHTESIISESHSVIMMSSVESSRHSSPAGGPRGRLHGLGGPRECNSFLRHARETP" + "DSYRDSPHSER"; uniprotAccessions = Collections.singletonList("Q3TD94"); List<String> genbankProteinAccessions = Collections.singletonList("BAE41710"); genbankNucleotideAccessions = Collections.singletonList("AK170314"); accessions = new JSONObject(); accessions.put(Seq.AccType.uniprot.toString(), uniprotAccessions); accessions.put(Seq.AccType.genbank_nucleotide.toString(), genbankNucleotideAccessions); accessions.put(Seq.AccType.genbank_protein.toString(), genbankProteinAccessions); List<String> pmids = Arrays.asList("10349636", "11042159", "11076861", "11217851", "12466851", "16141073"); List<JSONObject> references = new ArrayList<>(); for (String pmid : pmids) { JSONObject obj = new JSONObject(); obj.put("src", "PMID"); obj.put("val", pmid); references.add(obj); } metadata = new JSONObject(); metadata.put("accession", accessions); metadata.put("synonyms", new ArrayList()); metadata.put("product_names", new ArrayList()); metadata.put("xref", new JSONObject()); metadata.put("name", "Nrg1"); Seq protAccessionQueryTestSeq2 = new Seq(48922, null, 4000003474L, "Mus musculus", protSeqAccessionQuery, references, MongoDBToJSON.conv(metadata), Seq.AccDB.uniprot); compareSeqs("for testProteinAccessionQuery (query by protein accession; database match exists)", protAccessionQueryTestSeq, seqs.get(23894L)); for (Map.Entry<Long, Seq> seqentry : seqs.entrySet()) { if (seqentry.getValue().getSequence().equals(protSeqAccessionQuery)) { compareSeqs("for testProteinAccessionQuery (query by protein accession with no database match)", protAccessionQueryTestSeq2, seqentry.getValue()); } } } /** * Tests the case where the protein file doesn't have an EC_number listed or a genbank protein accession number * referenced, in which case it queries using the Genbank nucleotide accession number and sequence, both in the case * when a database match exists and when it doesn't. */ @Test public void testNucleotideAccessionQuery() { Map<Long, Seq> seqs = mockAPI.getSeqMap(); List<String> uniprotAccessions = Collections.singletonList("H0UZN6"); List<String> genbankNucleotideAccessions = Collections.singletonList("AAKN02012235"); JSONObject accessions = new JSONObject(); accessions.put(Seq.AccType.uniprot.toString(), uniprotAccessions); accessions.put(Seq.AccType.genbank_nucleotide.toString(), genbankNucleotideAccessions); JSONObject metadata = new JSONObject(); metadata.put("accession", accessions); metadata.put("name", "CTNNB1"); List<JSONObject> references = new ArrayList<>(); JSONObject obj = new JSONObject(); obj.put("src", "PMID"); obj.put("val", "21993624"); references.add(obj); Seq nucAccessionQueryTestSeq = new Seq(58923L, null, 4000001225L, "Cavia porcellus", nucSeqAccQuery, references, MongoDBToJSON.conv(metadata), Seq.AccDB.uniprot); String nucSeqAccQuery2 = "FLTADLMELDMAMEPDRKAAVSHWQQQSYLDSGIHSGATTTAPSLSGKGNPEEEDVDTTQ" + "VLYEWEQGFSQSFTQEQVADIDGQYAMTRAQRVRAAMFPETLDEGMQIPSTQFDAAHPTN" + "VQRLAEPSQMLKHAVVNLINYQDDAELATRAIPELTKLLNDEDQVVVNKAAVMVHQLSKK" + "EASRHAIMRSPQMVSAIVRTMQNTNDVETARCTAGTLHNLSHHREGLLAIFKSGGIPALV" + "KMLGSPVDSVLFYAITTLHNLLLHQEGAKMAVRLAGGLQKMVALLNKTNVKFLAITTDCL" + "QILAYGNQESKLIILASGGPQALVNIMRTYTYEKLLWTTSRVLKVLSVCSSNKPAIVEAG" + "GMQALGLHLTDPSQRLVQNCLWTLRNLSDAATKQEGMEGLLGTLVQLLGSDDINVVTCAA" + "GILSNLTCNNYKNKMMVCQVGGIEALVRTVLRAGDREDITEPAICALRHLTSRHQEAEMA" + "QNAVRLHYGLPVVVKLLHPPSHWPLIKATVGLIRNLALCPANHAPLREQGAIPRLVQLLV" + "RAHQDTQRRTSMGGTQQQFVEGVRMEEIVEGCTGALHILARDVHNRIVIRGLNTIPLFVQ" + "LLYSPIENIQRVAAGVLCELAQDKEAAEAIEAEGATAPLTELLHSRNEGVATYAAAVLFR" + "MSEDKPQDYKKRLSVELTSSLFRTEPMAWNETADLGLDIGAQGEPLGYRPDDPSYRSFHS" + "GGYGQDALGMDPMMEHEMGGHHPGADYPVDGLPDLGHAQDLMDGLPPGDSNQLAWFDTDL"; uniprotAccessions = Collections.singletonList("H0Z303"); genbankNucleotideAccessions = Collections.singletonList("ABQF01014180"); accessions = new JSONObject(); accessions.put(Seq.AccType.uniprot.toString(), uniprotAccessions); accessions.put(Seq.AccType.genbank_nucleotide.toString(), genbankNucleotideAccessions); accessions.put(Seq.AccType.genbank_protein.toString(), new ArrayList()); metadata = new JSONObject(); metadata.put("accession", accessions); metadata.put("synonyms", new ArrayList()); metadata.put("product_names", new ArrayList()); metadata.put("xref", new JSONObject()); metadata.put("name", "CTNNB1"); references = new ArrayList<>(); obj = new JSONObject(); obj.put("src", "PMID"); obj.put("val", "20360741"); references.add(obj); Seq nucAccessionQueryTestSeq2 = new Seq(94032L, null, 7L, "Taeniopygia guttata", nucSeqAccQuery2, references, MongoDBToJSON.conv(metadata), Seq.AccDB.uniprot); compareSeqs("for testNucleotideAccessionQuery (query by nucleotide accession and seq; database match exists)", nucAccessionQueryTestSeq, seqs.get(58923L)); for (Map.Entry<Long, Seq> seqentry : seqs.entrySet()) { if (seqentry.getValue().getSequence().equals(nucSeqAccQuery2)) { compareSeqs("for testNucleotideAccessionQuery (query by nucleotide accession and seq with no database match)", nucAccessionQueryTestSeq2, seqentry.getValue()); } } } private void compareSeqs(String message, Seq expectedSeq, Seq testSeq) { assertEquals("comparing ec " + message, expectedSeq.getEc(), testSeq.getEc()); assertEquals("comparing org_id " + message, expectedSeq.getOrgId(), testSeq.getOrgId()); assertEquals("comparing organism " + message, expectedSeq.getOrgName(), testSeq.getOrgName()); assertEquals("comparing sequence " + message, expectedSeq.getSequence(), testSeq.getSequence()); assertEquals("comparing references " + message, expectedSeq.getReferences().toString(), testSeq.getReferences().toString()); assertEquals("comparing metadata " + message, expectedSeq.getMetadata().toString(), testSeq.getMetadata().toString()); assertEquals("comapring src db " + message, expectedSeq.getSrcdb(), testSeq.getSrcdb()); } }