/************************************************************************* * * * This file is part of the 20n/act project. * * 20n/act enables DNA prediction for synthetic biology/bioengineering. * * Copyright (C) 2017 20n Labs, Inc. * * * * Please direct all queries to act@20n.com. * * * * This program is free software: you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation, either version 3 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program. If not, see <http://www.gnu.org/licenses/>. * * * *************************************************************************/ package com.act.biointerpretation.sequencemerging; import act.server.MongoDB; import act.server.NoSQLAPI; import act.shared.Organism; import act.shared.Reaction; import act.shared.Seq; import act.shared.helpers.MongoDBToJSON; import com.act.biointerpretation.test.util.MockedNoSQLAPI; import org.biopax.paxtools.model.level3.ConversionDirectionType; import org.biopax.paxtools.model.level3.StepDirection; import org.json.JSONArray; import org.json.JSONObject; import org.junit.Before; import org.junit.Test; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; import static org.junit.Assert.assertEquals; public class SequenceMergerTest { private MockedNoSQLAPI mockAPI; @Before public void setUp() throws Exception { // ========================================== // assembling reaction // ========================================== List<Reaction> testReactions = new ArrayList<>(); Reaction reaction = new Reaction(789345L, new Long[]{1L, 2L, 3L}, new Long[]{4L, 5L, 6L}, new Long[]{}, new Long[]{}, new Long[]{}, "1.1.1.1", ConversionDirectionType.LEFT_TO_RIGHT, StepDirection.LEFT_TO_RIGHT, "test reaction", Reaction.RxnDetailType.CONCRETE); Set<JSONObject> proteinData = new HashSet<>(); JSONObject proteinDataObj = new JSONObject(); Set<Long> sequenceSet = new HashSet<>(Arrays.asList(85932L, 7589L, 84321L)); proteinDataObj.put("sequences", sequenceSet); proteinDataObj.put("organism", 4000003475L); proteinDataObj.put("source_reaction_id", 829334L); proteinData.add(proteinDataObj); reaction.setProteinData(proteinData); sequenceSet = new HashSet<>(Arrays.asList(85932L, 7589L, 78643L)); proteinDataObj = new JSONObject(); proteinDataObj.put("sequences", sequenceSet); proteinDataObj.put("organism", 4000003474L); proteinDataObj.put("source_reaction_id", 91923L); reaction.addProteinData(proteinDataObj); testReactions.add(reaction); // ======================================== // assembling sequences // ======================================== List<Seq> testSequences = new ArrayList<>(); JSONObject metadata = new JSONObject(); metadata.put("proteinExistence", new JSONObject()); JSONObject commentObject = new JSONObject(); commentObject.put("text", 128930); commentObject.put("type", "brenda_id"); metadata.put("comment", new JSONArray(Collections.singletonList(commentObject))); metadata.put("name", "ADH1"); JSONObject accessions = new JSONObject(); List<String> genbankProteinAccs = Collections.singletonList("PAH84937"); List<String> genbankNucAccs = Arrays.asList("P04839", "N84937"); accessions.put(Seq.AccType.genbank_protein.toString(), genbankProteinAccs); accessions.put(Seq.AccType.genbank_nucleotide.toString(), genbankNucAccs); metadata.put("accession", accessions); List<JSONObject> references = new ArrayList<>(); JSONObject pmid = new JSONObject(); pmid.put("src", "PMID"); pmid.put("val", "2423423"); references.add(pmid); JSONObject patent = new JSONObject(); patent.put("src", "Patent"); patent.put("country_code", "JP"); patent.put("patent_number", "2008518610"); patent.put("patent_year", "2008"); references.add(patent); Seq sequence1 = new Seq(85932L, "1.1.1.1", 4000003474L, "Mus musculus", "AJKFLGKJDFS", references, MongoDBToJSON.conv(metadata), Seq.AccDB.genbank); sequence1.addReactionsCatalyzed(789345L); testSequences.add(sequence1); commentObject.put("text", 128931); metadata.put("comment", new JSONArray(Collections.singletonList(commentObject))); metadata.put("name", "ADH1"); metadata.put("synonyms", Arrays.asList("ADH2", "ADH3")); metadata.put("product_names", Arrays.asList("Alcohol dehydrogenase")); accessions = new JSONObject(); genbankProteinAccs = Arrays.asList("PAH84937", "JFH67382"); genbankNucAccs = Arrays.asList("B83472", "P04839"); List<String> uniprotAccs = Arrays.asList("O0ABC3", "B8NJH9"); accessions.put(Seq.AccType.genbank_protein.toString(), genbankProteinAccs); accessions.put(Seq.AccType.genbank_nucleotide.toString(), genbankNucAccs); accessions.put(Seq.AccType.uniprot.toString(), uniprotAccs); metadata.put("accession", accessions); references = new ArrayList<>(); references.add(pmid); patent = new JSONObject(); patent.put("src", "Patent"); patent.put("country_code", "EP"); patent.put("patent_number", "2904117"); patent.put("patent_year", "2015"); references.add(patent); Seq sequence2 = new Seq(7589L, "1.1.1.1", 4000003475L, "Mus musculus sp.", "AJKFLGKJDFS", references, MongoDBToJSON.conv(metadata), Seq.AccDB.genbank); sequence2.addReactionsCatalyzed(789345L); testSequences.add(sequence2); commentObject.put("text", 128932); metadata.put("comment", new JSONArray(Collections.singletonList(commentObject))); metadata.put("name", "ADH4"); metadata.put("synonyms", Arrays.asList("ADH3", "ADH5")); metadata.put("product_names", Arrays.asList("Alcohol dehydrogenase", "Alcohol de-hydrogenase")); accessions = new JSONObject(); uniprotAccs = Arrays.asList("B3NTY7", "O0ABC3"); accessions.put(Seq.AccType.uniprot.toString(), uniprotAccs); metadata.put("accession", accessions); references = new ArrayList<>(); pmid = new JSONObject(); pmid.put("src", "PMID"); pmid.put("val", "218394"); references.add(pmid); patent = new JSONObject(); patent.put("src", "Patent"); patent.put("country_code", "JP"); patent.put("patent_number", "2008518610"); patent.put("patent_year", "2008"); references.add(patent); Seq sequence3 = new Seq(84321L, "1.1.1.1", 4000003476L, "Mus musculus sp. 123", "AJKFLGKJDFS", references, MongoDBToJSON.conv(metadata), Seq.AccDB.genbank); sequence3.addReactionsCatalyzed(789345L); testSequences.add(sequence3); Seq sequence4 = new Seq(78643L, "1.1.1.2", 4000008594L, "Homo sapiens", "AJKFLGKJDFS", references, MongoDBToJSON.conv(metadata), Seq.AccDB.genbank); sequence4.addReactionsCatalyzed(789345L); testSequences.add(sequence4); // ======================================== // assembling organisms // ======================================== Map<Long, String> testOrgNames = new HashMap<>(); testOrgNames.put(4000003474L, "Mus musculus"); testOrgNames.put(4000003475L, "Mus musculus sp."); testOrgNames.put(4000003476L, "Mus musculus sp. 123"); testOrgNames.put(400008594L, "Homo sapiens"); // ======================================== // installing and merging all data // ======================================== mockAPI = new MockedNoSQLAPI(); mockAPI.installMocks(testReactions, testSequences, testOrgNames, new HashMap<>()); NoSQLAPI noSQLAPI = mockAPI.getMockNoSQLAPI(); SequenceMerger seqMerger = new SequenceMerger(noSQLAPI); seqMerger.init(); seqMerger.run(); } @Test public void testMergeEndToEnd() { List<JSONObject> references = new ArrayList<>(); JSONObject pmid = new JSONObject(); pmid.put("src", "PMID"); pmid.put("val", "2423423"); references.add(pmid); JSONObject patent = new JSONObject(); patent.put("src", "Patent"); patent.put("country_code", "JP"); patent.put("patent_number", "2008518610"); patent.put("patent_year", "2008"); references.add(patent); patent = new JSONObject(); patent.put("src", "Patent"); patent.put("country_code", "EP"); patent.put("patent_number", "2904117"); patent.put("patent_year", "2015"); references.add(patent); pmid = new JSONObject(); pmid.put("src", "PMID"); pmid.put("val", "218394"); references.add(pmid); JSONObject metadata = new JSONObject(); JSONObject xrefObject = new JSONObject(); xrefObject.put("brenda_id", new JSONArray(Arrays.asList(128931, 128930, 128932))); metadata.put("xref", xrefObject); metadata.put("name", "ADH1"); metadata.put("synonyms", Arrays.asList("ADH2", "ADH3", "ADH5", "ADH4")); metadata.put("product_names", Arrays.asList("Alcohol dehydrogenase", "Alcohol de-hydrogenase")); JSONObject accessions = new JSONObject(); List<String> genbankProteinAccs = Arrays.asList("PAH84937", "JFH67382"); List<String> genbankNucAccs = Arrays.asList("P04839", "N84937", "B83472"); List<String> uniprotAccs = Arrays.asList("O0ABC3", "B8NJH9", "B3NTY7"); accessions.put(Seq.AccType.genbank_protein.toString(), genbankProteinAccs); accessions.put(Seq.AccType.genbank_nucleotide.toString(), genbankNucAccs); accessions.put(Seq.AccType.uniprot.toString(), uniprotAccs); metadata.put("accession", accessions); metadata.put("source_sequence_ids", Arrays.asList(84321, 7589L, 85932)); Seq mergedSeq = new Seq(1L, "1.1.1.1", MongoDB.ORG_ID_BASE, "Mus musculus", "AJKFLGKJDFS", references, MongoDBToJSON.conv(metadata), Seq.AccDB.genbank); mergedSeq.addReactionsCatalyzed(1L); Reaction reaction = new Reaction(1L, new Long[]{1L, 2L, 3L}, new Long[]{4L, 5L, 6L}, new Long[]{}, new Long[]{}, new Long[]{}, "1.1.1.1", ConversionDirectionType.LEFT_TO_RIGHT, StepDirection.LEFT_TO_RIGHT, "test reaction", Reaction.RxnDetailType.CONCRETE); JSONObject proteinData = new JSONObject(); Set<Long> sequenceSet = new HashSet<>(Arrays.asList(1L, 2L)); proteinData.put("sequences", sequenceSet); proteinData.put("organism", MongoDB.ORG_ID_BASE); proteinData.put("source_reaction_id", 789345L); reaction.addProteinData(proteinData); proteinData = new JSONObject(); sequenceSet = new HashSet<>(Collections.singletonList(1L)); proteinData.put("sequences", sequenceSet); proteinData.put("organism", MongoDB.ORG_ID_BASE); proteinData.put("source_reaction_id", 789345L); reaction.addProteinData(proteinData); Seq testSeq = mockAPI.getMockWriteMongoDB().getSeqFromID(1L); Reaction testReaction = mockAPI.getMockWriteMongoDB().getReactionFromUUID(1L); if (testSeq != null) { compareSeqs(" for testMergeEndToEnd", mergedSeq, testSeq); } if (testReaction != null) { compareReactions(" for testMergeEndToEnd", reaction, mockAPI.getMockWriteMongoDB().getReactionFromUUID(1L)); } } @Test public void testOrgPrefixMatching() { List<Organism> organismList = new ArrayList<>(); Organism musMusculus = new Organism(0L + MongoDB.ORG_ID_BASE, "Mus musculus"); organismList.add(musMusculus); Organism homoSapiens = new Organism(1L + MongoDB.ORG_ID_BASE, "Homo sapiens"); organismList.add(homoSapiens); Map<Long, String> writtenOrganisms = mockAPI.getWrittenOrganismNames(); Iterator organismIterator = writtenOrganisms.entrySet().iterator(); int iteratorIndex = 0; while (organismIterator.hasNext()) { Map.Entry pair = (Map.Entry) organismIterator.next(); compareOrgs("for testOrgPrefixMatching", organismList.get(iteratorIndex), pair); iteratorIndex++; } } private void compareSeqs(String message, Seq expectedSeq, Seq testSeq) { assertEquals("comparing ec " + message, expectedSeq.getEc(), testSeq.getEc()); assertEquals("comparing org_id " + message, expectedSeq.getOrgId(), testSeq.getOrgId()); assertEquals("comparing organism " + message, expectedSeq.getOrgName(), testSeq.getOrgName()); assertEquals("comparing sequence " + message, expectedSeq.getSequence(), testSeq.getSequence()); assertEquals("comparing references " + message, expectedSeq.getReferences().toString(), testSeq.getReferences().toString()); assertEquals("comparing metadata " + message, expectedSeq.getMetadata().toString(), testSeq.getMetadata().toString()); assertEquals("comapring src db " + message, expectedSeq.getSrcdb(), testSeq.getSrcdb()); assertEquals("comparing rxn_refs" + message, expectedSeq.getReactionsCatalyzed(), testSeq.getReactionsCatalyzed()); } private void compareReactions(String message, Reaction expectedReaction, Reaction testReaction) { assertEquals("comparing ec " + message, expectedReaction.getECNum(), testReaction.getECNum()); Set<JSONObject> expectedData = expectedReaction.getProteinData(); Set<JSONObject> actualData = testReaction.getProteinData(); List sortedExpectedData = new ArrayList<>(expectedData); List sortedActualData = new ArrayList<>(actualData); Comparator proteinDataComparator = new Comparator<JSONObject>() { @Override public int compare(JSONObject o1, JSONObject o2) { JSONArray o1SeqArray = o1.getJSONArray("sequences"); List<Long> o1Seqs = new ArrayList<>(); for (int i = 0; i < o1SeqArray.length(); i++) { o1Seqs.add(o1SeqArray.getLong(i)); } Collections.sort(o1Seqs); JSONArray o2SeqArray = o2.getJSONArray("sequences"); List<Long> o2Seqs = new ArrayList<>(); for (int i = 0; i < o2SeqArray.length(); i++) { o2Seqs.add(o2SeqArray.getLong(i)); } Collections.sort(o2Seqs); int size1 = o1Seqs.size(); int size2 = o2Seqs.size(); return (size1 > size2 ? -1 : (size1 == size2 ? compareSeqLists(o1Seqs, o2Seqs) : 1)); } }; Collections.sort(sortedExpectedData, proteinDataComparator); Collections.sort(sortedActualData, proteinDataComparator); assertEquals("testing comparison function", -1, compareSeqLists(Arrays.asList(1L, 2L, 5L), Arrays.asList(1L, 2L, 4L))); assertEquals("testing comparison function", 0, compareSeqLists(Arrays.asList(1L, 2L, 4L), Arrays.asList(1L, 2L, 4L))); assertEquals("testing comparison function", 1, compareSeqLists(Arrays.asList(1L, 2L, 4L), Arrays.asList(1L, 3L, 4L))); assertEquals("comparing protein data " + message, sortedExpectedData.toString(), sortedActualData.toString()); } private void compareOrgs(String message, Organism expectedOrg, Map.Entry writtenOrg) { assertEquals("comparing org_id " + message, expectedOrg.getUUID(), writtenOrg.getKey()); assertEquals("comparing organism name " + message, expectedOrg.getName(), writtenOrg.getValue()); } /** * Compares each element of two sorted, equivalent sized lists of Seq IDs to see which one should be ordered first. * @param o1Seqs * @param o2Seqs * @return */ private int compareSeqLists(List<Long> o1Seqs, List<Long> o2Seqs) { if (o1Seqs.size() != o2Seqs.size()) { throw new RuntimeException("seq lists of not same length, violates assumption for this comparison"); } for (int i = 0; i < o1Seqs.size(); i++) { if (o1Seqs.get(i) > o2Seqs.get(i)) { return -1; } else if (o1Seqs.get(i) < o2Seqs.get(i)) { return 1; } } return 0; } }