/*************************************************************************
* *
* This file is part of the 20n/act project. *
* 20n/act enables DNA prediction for synthetic biology/bioengineering. *
* Copyright (C) 2017 20n Labs, Inc. *
* *
* Please direct all queries to act@20n.com. *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* *
*************************************************************************/
package com.act.biointerpretation;
import act.shared.Chemical;
import act.shared.Reaction;
import act.shared.Seq;
import com.act.biointerpretation.test.util.MockedNoSQLAPI;
import com.mongodb.BasicDBObject;
import org.biopax.paxtools.model.level3.ConversionDirectionType;
import org.biopax.paxtools.model.level3.StepDirection;
import org.json.JSONArray;
import org.json.JSONObject;
import org.junit.Before;
import org.junit.Test;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
public class BiointerpretationProcessorTest {
private static final String POTATO = "Solanum tuberosum";
private static final String[] INCHIS = new String[] {
"InChI=1S/12CN.2Fe.2H/c12*1-2;;;;/q12*-1;+2;+3;;",
"InChI=1S/CH5O4P/c1-5-6(2,3)4/h1H3,(H2,2,3,4)",
"InChI=1S/C7H5Cl3/c8-4-5-2-1-3-6(9)7(5)10/h1-3H,4H2",
"InChI=1S/C8H9NO2/c1-6(10)9-7-2-4-8(11)5-3-7/h2-5,11H,1H3,(H,9,10)",
};
private MockedNoSQLAPI noSQLAPI;
/* The gist of this whole test is to ensure that the biointerpretation processor's default methods don't change
* any data when copying it from one DB to another. All 100+ lines of this test is here in the hopes of
* preventing a bug like the one I fixed at
* https://github.com/20n/act/pull/470/files#diff-bdab45f1dc0188bd80c9a412b3689380R2775,
* where sequences were being written into the reactions collection. :scream:
*
* We set up a DB with a reaction, the chemicals it involves, an organism, and a sequence.
* Then we run the default `BiointerpretationProcessor` and verify that the a) each type of object lives in its
* appropriate collection, and 2) the only things that have materially changed are the ids.
*/
@Before
public void setup() throws Exception {
Map<Long, String> inchiMap = new HashMap<>();
for (int i = 0; i < INCHIS.length; i++) {
inchiMap.put(i + 1L, INCHIS[i]);
}
// Install a reaction, a sequence, an organism, and some chemicals. Just enough to touch all relevant collections.
Reaction testReaction = new Reaction(
100L, new Long[]{1L, 2L}, new Long[]{3L, 4L}, new Long[0], new Long[0], new Long[0],
"1.1.1.1", ConversionDirectionType.LEFT_TO_RIGHT, StepDirection.LEFT_TO_RIGHT, "Reaction 1",
Reaction.RxnDetailType.CONCRETE);
Seq testSeq = new Seq(1000L, "1.1.1.1", 10L,
POTATO,
"TATERISAWARE", // This is a valid amino acid sequence.
new ArrayList<>(),
new BasicDBObject(),
Seq.AccDB.trembl // Tremble before the mighty POTATO!
);
testSeq.setReactionsCatalyzed(Collections.singleton(100L));
Map<Long, String> organismMap = new HashMap<Long, String>() {{
put(10L, POTATO);
}};
testReaction.addProteinData(new JSONObject().
put("datasource", "FAKE!").
put("organisms", new JSONArray(Collections.singletonList(10L))).
put("sequences", new JSONArray(Collections.singletonList(Long.valueOf(testSeq.getUUID()))))
);
noSQLAPI = new MockedNoSQLAPI();
noSQLAPI.installMocks(
Collections.singletonList(testReaction),
Collections.singletonList(testSeq),
organismMap,
inchiMap
);
}
@Test
public void testBiointerpretationProcessor() throws Exception {
/* The BiointerpretationProcessor's default run() method should move data from the read to the write DB without
* altering anything but the ids. We run the processor and then verify that all of the biological data is where
* it should be. */
BiointerpretationProcessor processor = new BiointerpretationProcessor(noSQLAPI.getMockNoSQLAPI()) {
@Override
public String getName() {
return "testProcessor";
}
@Override
public void init() throws Exception {
this.initCalled = true;
}
};
// Must call init or run() will throw an exception.
processor.init();
// Do the thing!
processor.run();
// Check that we didn't break the data.
List<Reaction> reactions = noSQLAPI.getWrittenReactions();
Map<Long, Chemical> chemicals = noSQLAPI.getWrittenChemicals();
Map<Long, Seq> seqs = noSQLAPI.getWrittenSequences();
Map<Long, String> orgNames = noSQLAPI.getWrittenOrganismNames();
// Make sure the reactions are reactions.
assertEquals("One reaction written to DB", 1, reactions.size());
Reaction r = reactions.get(0);
assertTrue("Reaction is of type reaction", r instanceof Reaction);
assertEquals("EC num matches expected", "1.1.1.1", r.getECNum());
// And that the chemicals are all there.
Set<String> substrates = new HashSet<String>() {{
add("InChI=1S/12CN.2Fe.2H/c12*1-2;;;;/q12*-1;+2;+3;;");
add("InChI=1S/CH5O4P/c1-5-6(2,3)4/h1H3,(H2,2,3,4)");
}};
Set<String> products = new HashSet<String>() {{
add("InChI=1S/C7H5Cl3/c8-4-5-2-1-3-6(9)7(5)10/h1-3H,4H2");
add("InChI=1S/C8H9NO2/c1-6(10)9-7-2-4-8(11)5-3-7/h2-5,11H,1H3,(H,9,10)");
}};
for (Long id : r.getSubstrates()) {
assertTrue("Substrate appears in expected set", substrates.contains(chemicals.get(id).getInChI()));
}
for (Long id : r.getProductCofactors()) {
assertTrue("Product appears in expected set", products.contains(chemicals.get(id).getInChI()));
}
// And that the protein(s) was/were migrated for the one reaction, along with its/their organism(s).
assertEquals("Reaction has one protein", 1, r.getProteinData().size());
JSONObject protein = r.getProteinData().iterator().next();
assertEquals("Protein organism maps to expected name",
POTATO, orgNames.get(protein.getJSONArray("organisms").getLong(0)));
// Finally ensure the single sequence was migrated, its ids were updated, and its organism was copied. Phew!
assertEquals("One seq written to DB", 1, seqs.size());
Seq seq = seqs.values().iterator().next();
assertEquals("Protein links to single seq in DB", seq, seqs.get(protein.getJSONArray("sequences").getLong(0)));
assertEquals("Sequence EC number is expected", "1.1.1.1", seq.getEc());
assertEquals("Sequence organisms is expected", POTATO, seq.getOrgName());
assertEquals("Sequence refers to one reaction", 1, seq.getReactionsCatalyzed().size());
assertEquals("Sequence refers to reaction correctly",
Long.valueOf(r.getUUID()), seq.getReactionsCatalyzed().iterator().next());
}
}