/************************************************************************* * * * This file is part of the 20n/act project. * * 20n/act enables DNA prediction for synthetic biology/bioengineering. * * Copyright (C) 2017 20n Labs, Inc. * * * * Please direct all queries to act@20n.com. * * * * This program is free software: you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation, either version 3 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program. If not, see <http://www.gnu.org/licenses/>. * * * *************************************************************************/ package com.act.biointerpretation.desalting; import com.act.biointerpretation.Utils.ReactionProjector; import com.act.utils.TSVParser; import org.apache.commons.lang3.tuple.Pair; import org.junit.Test; import java.io.BufferedReader; import java.io.InputStream; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; /** * Note: to use the Chemaxon desalter, you'll need to have a Chemaxon license file installed in your home directory. * To do this, run (after connecting to the NAS): * $ MNT_SHARED_DATA/3rdPartySoftware/Chemaxon/marvinbeans/bin/license [path to a valid license file] * This will copy the license to ~/.chemaxon/license.cxl, which the Chemaxon libraries will find automatically when * the license manager is invoked. */ public class DesalterTest { private final DesaltingROCorpus DESALTING_CORPUS_ROS = new DesaltingROCorpus(); @Test public void testDesalting() throws Exception { List<DesaltingRO> tests = DESALTING_CORPUS_ROS.getDesaltingROS().getRos(); Desalter desalter = new Desalter(new ReactionProjector()); desalter.initReactors(); //Test all the things that should get cleaned for proper cleaning for (DesaltingRO ro : tests) { for (int i = 0; i < ro.getTestCases().size(); i++) { String input = ro.getTestCases().get(i).getInput(); String expectedOutput = ro.getTestCases().get(i).getExpected(); String name = ro.getTestCases().get(i).getLabel(); Map<String, Integer> results = desalter.desaltInchi(input); assertNotNull(results); assertEquals(String.format("Desalting RO Test: %s", name), results.size(), 1); String desaltedCompound = results.keySet().iterator().next(); assertEquals(String.format("Desalting RO Test: %s", name), expectedOutput, desaltedCompound); } } } @Test public void testDesaltingConstants() throws Exception { BufferedReader desaltConstantsReader = DESALTING_CORPUS_ROS.getDesalterConstantsReader(); Desalter desalter = new Desalter(new ReactionProjector()); desalter.initReactors(); String inchi = null; while ((inchi = desaltConstantsReader.readLine()) != null) { Map<String, Integer> results = desalter.desaltInchi(inchi); assertTrue(results.size() == 1); String desaltedMolecule = results.keySet().iterator().next(); assertEquals(inchi, desaltedMolecule); } desaltConstantsReader.close(); } @Test public void testDesaltingDetectsAndCountsRepeatedFragments() throws Exception { List<Pair<String, Map<String, Integer>>> testCases = new ArrayList<Pair<String, Map<String, Integer>>>() {{ add(Pair.of( // Phenanthroline! "InChI=1S/2C12H8N2.2ClH.Ru/c2*1-3-9-5-6-10-4-2-8-14-12(10)11(9)13-7-1;;;/h2*1-8H;2*1H;/q2*-2;;;+9", new HashMap<String, Integer>() {{ put("InChI=1S/C12H8N2/c1-3-9-5-6-10-4-2-8-14-12(10)11(9)13-7-1/h1-8H/q-2", 2); }} )); add(Pair.of( // Cyanide! "InChI=1S/12CN.2Fe.2H/c12*1-2;;;;/q12*-1;+2;+3;;", new HashMap<String, Integer>() {{ put("InChI=1S/CN/c1-2/q-1", 12); }} )); add(Pair.of( // Bypyradine! "InChI=1S/2C10H10N2.2ClH.Ru/c2*1-3-7-11-9(5-1)10-6-2-4-8-12-10;;;/h2*1-10H;2*1H;/q2*-2;;;+8/p-2", new HashMap<String, Integer>() {{ put("InChI=1S/C10H10N2/c1-3-7-11-9(5-1)10-6-2-4-8-12-10/h1-10H/q-2", 2); }} )); add(Pair.of( // Cyclopentadien! "InChI=1S/2C5H5.F6P.Fe/c2*1-2-4-5-3-1;1-7(2,3,4,5)6;/h2*1-5H;;/q3*-1;+3", new HashMap<String, Integer>() {{ put("InChI=1S/C5H5/c1-2-4-5-3-1/h1-5H/q-1", 2); }} )); add(Pair.of( // Citrate! (Bonus: multiple copper ions.) "InChI=1S/2C6H8O7.3Cu/c2*7-3(8)1-6(13,5(11)12)2-4(9)10;;;/h2*13H,1-2H2,(H,7,8)(H,9,10)(H,11,12);;;/q;;3*+2/p-6", new HashMap<String, Integer>() {{ put("InChI=1S/C6H8O7/c7-3(8)1-6(13,5(11)12)2-4(9)10/h13H,1-2H2,(H,7,8)(H,9,10)(H,11,12)", 2); }} )); }}; Desalter desalter = new Desalter(new ReactionProjector()); desalter.initReactors(); for (Pair<String, Map<String, Integer>> testCase : testCases) { String inchi = testCase.getLeft(); Map<String, Integer> expectedFragmentCounts = testCase.getRight(); Map<String, Integer> actual = desalter.desaltInchi(inchi); assertEquals(String.format("Fragments and counts match for %s", inchi), expectedFragmentCounts, actual); } } /** * This test is odd in that it's a test of consistency rather than of correctness. The input dataset is ~1000 InChIs * from the production DB that were run through both the current and previous implementations of the desalter and * found to be sufficiently equivalent in all but a few (now understood) cases. The included InChIs were either * modified or unaltered by the desalter; complex InChIs are not considered at the moment. * * This test ensures that the set of InChIs that were used to evaluate the behavior of the desalter in its conversion * to Chemaxon's libraries are treated consistently as the desalter evolves. * * If this test fails, **do not panic.** It is possible you've actually improved the desalter's behavior, so this * test may be telling you that your changes are doing the /right/ thing. If the difference in InChIs (which you can * determine by using the ReactionDesalter on the list of test InChIs) looks good, update the data file to match your * new output. * * TODO: add some complex InChIs that can be split into components and/or desalted. * * TODO: if this test gets in the way of forward progress, remove it. * * @throws Exception */ @Test public void testDesaltingOnKnownInChIs() throws Exception { InputStream testInChIsStream = getClass().getResourceAsStream("desalter_test_cases.txt"); TSVParser parser = new TSVParser(); parser.parse(testInChIsStream); Desalter desalter = new Desalter(new ReactionProjector()); desalter.initReactors(); int i = 0; for (Map<String, String> row : parser.getResults()) { i++; String input = row.get("input"); String expected = row.get("expected_output"); Map<String, Integer> results = desalter.desaltInchi(input); assertNotNull(String.format("Case %d: desalter results are not null", i), results); assertEquals(String.format("Case %d: only one desalted molecule is produced", i), 1, results.size()); assertEquals(String.format("Case %d: desalter produces expected results", i), expected, results.keySet().iterator().next()); } } }