package org.genedb.db.loading; import org.gmod.schema.feature.Chromosome; import org.apache.log4j.Logger; import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.Test; import org.springframework.context.ApplicationContext; import org.springframework.context.support.ClassPathXmlApplicationContext; import org.springframework.transaction.annotation.Transactional; import java.io.File; import java.io.FileReader; import java.io.IOException; import java.io.Reader; /** * Test the loading of orthologue data in implicit-cluster mode. * Implicit cluster mode is engaged when the input file does not * contain explicit clusters, but an algorithm is specified (indicating * that the orthologues are algorithmically predicted). * <p> * This is not a unit test, in that it relies on the EMBL loader * to load the genes before we load their orthologue data. * * @author rh11 * */ public class OrthologueLoaderImplicitClusterTest { private static final Logger logger = TestLogger.getLogger(OrthologueLoaderImplicitClusterTest.class); private static ApplicationContext applicationContext; private static OrthologueTester tester; private static final String program = "fasta"; private static final String programVersion = "3.4t26"; private static final String algorithm = "Reciprocal best match"; private static final String DATASET_NAME = "test"; @BeforeClass public static void setup() throws IOException, ParsingException { applicationContext = new ClassPathXmlApplicationContext(new String[] {"Load.xml", "Test.xml"}); loadEmblFile("test/data/MRSA252_subset.embl", "Saureus_MRSA252"); loadEmblFile("test/data/MSSA476_subset.embl", "Saureus_MSSA476"); loadEmblFile("test/data/EMRSA15_subset.embl", "Saureus_EMRSA15"); loadOrthologues("test/data/Saureus_subset_genenames.ortho", program, programVersion, algorithm, true); loadOrthologues("test/data/Saureus_subset_transcriptnames.ortho", program, programVersion, algorithm, false); tester = applicationContext.getBean("orthologueTester", OrthologueTester.class); } private static void loadOrthologues(String filename, String program, String programVersion, String algorithm, boolean geneNames) throws IOException, ParsingException { OrthologuesLoader loader = applicationContext.getBean("orthologuesLoader", OrthologuesLoader.class); File file = new File(filename); Reader reader = new FileReader(file); try { OrthologueFile orthologueFile = new OrthologueFile(file, reader); loader.setAnalysisProperties(program, programVersion, algorithm); loader.setDatasetName(DATASET_NAME); loader.setGeneNames(geneNames); loader.load(orthologueFile); } finally { reader.close(); } } @AfterClass public static void cleanUp() { if (tester == null) { // This can happen if there's an error in setup: // JUnit still calls us even if setup threw an exception. logger.error("Tester is null in cleanUp"); } else { tester.cleanUp(); } } private static void loadEmblFile(String filename, String organismCommonName) throws IOException, ParsingException { logger.trace(String.format("Loading '%s' into organism '%s'", filename, organismCommonName)); EmblLoader emblLoader = applicationContext.getBean("emblLoader", EmblLoader.class); emblLoader.setOrganismCommonName(organismCommonName); emblLoader.setSloppyControlledCuration(true); emblLoader.setTopLevelFeatureClass(Chromosome.class); File file = new File(filename); Reader reader = new FileReader(file); try { emblLoader.load(new EmblFile(file, reader)); } finally { reader.close(); } } private void testOrthologueGroup(Double identity, String... polypeptideUniqueNames) { tester.orthologueGroup(program, programVersion, algorithm, identity, polypeptideUniqueNames); } @Transactional @Test public void testGeneNameOrthologueGroups() { testOrthologueGroup(100.0, "SAEMRSA1513290.1:pep", "SAR1478.1:pep"); testOrthologueGroup(92.9, "SAEMRSA1516500.1:pep", "SAR1820.1:pep"); testOrthologueGroup(100.0, "SAEMRSA1519750.1:pep", "SAR2153.1:pep"); testOrthologueGroup(95.7, "SAEMRSA1519820.1:pep", "SAR2160.1:pep"); testOrthologueGroup(null, "SAEMRSA1521630.1:pep", "SAR2349.1:pep"); testOrthologueGroup(99.2, "SAEMRSA1502320.1:pep", "SAR0270.1:pep"); testOrthologueGroup(96.6, "SAEMRSA1523410.1:pep", "SAR2531.1:pep"); testOrthologueGroup(100.0, "SAEMRSA1525060.1:pep", "SAR2680.1:pep"); testOrthologueGroup(96.5, "SAEMRSA1503370.1:pep", "SAR0403.1:pep"); testOrthologueGroup(null, "SAEMRSA1504360.1:pep", "SAR0511.1:pep"); testOrthologueGroup(100.0, "SAEMRSA1507570.1:pep", "SAR0889.1:pep"); testOrthologueGroup(100.0, "SAEMRSA1511870.1:pep", "SAS1280.1:pep"); testOrthologueGroup(99.5, "SAEMRSA1517490.1:pep", "SAS1765.1:pep"); testOrthologueGroup(99.6, "SAEMRSA1518070.1:pep", "SAS1823.1:pep"); testOrthologueGroup(null, "SAEMRSA1520150.1:pep", "SAS2010.1:pep"); testOrthologueGroup(98.5, "SAEMRSA1523990.1:pep", "SAS2388.1:pep"); testOrthologueGroup(98.6, "SAEMRSA1524970.1:pep", "SAS2480.1:pep"); testOrthologueGroup(99.2, "SAEMRSA1503330.1:pep", "SAS0357.1:pep"); testOrthologueGroup(98.9, "SAEMRSA1504320.1:pep", "SAS0463.1:pep"); testOrthologueGroup(100.0, "SAEMRSA1504870.1:pep", "SAS0518.1:pep"); testOrthologueGroup(98.6, "SAEMRSA1505550.1:pep", "SAS0595.1:pep"); testOrthologueGroup(98.7, "SAEMRSA1500750.1:pep", "SAS0083.1:pep"); testOrthologueGroup(93.0, "SAEMRSA1508090.1:pep", "SAS0850.1:pep"); testOrthologueGroup(99.4, "SAR1647.1:pep", "SAS1508.1:pep"); testOrthologueGroup(100.0, "SAR1712.1:pep", "SAS1568.1:pep"); testOrthologueGroup(99.4, "SAR2389.1:pep", "SAS2196.1:pep"); testOrthologueGroup(100.0, "SAR2601.1:pep", "SAS2406.1:pep"); testOrthologueGroup(97.1, "SAR1812.1:pep", "SAS1660.1:pep"); testOrthologueGroup(99.0, "SAR0736.1:pep", "SAS0648.1:pep"); testOrthologueGroup(99.2, "SAR0156.1:pep", "SAS0129.1:pep"); testOrthologueGroup(97.9, "SAR1639.1:pep", "SAS1500.1:pep"); testOrthologueGroup(100.0, "SAR0015.1:pep", "SAS0015.1:pep"); testOrthologueGroup(100.0, "SAR1663.1:pep", "SAS1523.1:pep"); testOrthologueGroup(100.0, "SAR1939.1:pep", "SAS1769.1:pep"); } @Transactional @Test public void testPepNameOrthologueGroups() { testOrthologueGroup(53.0, "SAEMRSA1511480.1:pep", "SAR1311.1:pep"); testOrthologueGroup(96.9, "SAEMRSA1512940.1:pep", "SAR1444.1:pep"); testOrthologueGroup(99.4, "SAEMRSA1513440.1:pep", "SAR1493.1:pep"); testOrthologueGroup(100.0, "SAEMRSA1514830.1:pep", "SAR1640.1:pep"); testOrthologueGroup(null, "SAEMRSA1514980.1:pep", "SAR1655.1:pep"); testOrthologueGroup(99.6, "SAEMRSA1515150.1:pep", "SAR1673.1:pep"); testOrthologueGroup(85.5, "SAEMRSA1516810.1:pep", "SAR0692.1:pep"); testOrthologueGroup(96.2, "SAEMRSA1516860.1:pep", "SAR1859.1:pep"); testOrthologueGroup(98.5, "SAEMRSA1517760.1:pep", "SAR1959.1:pep"); testOrthologueGroup(null, "SAEMRSA1518420.1:pep", "SAR2018.1:pep"); testOrthologueGroup(99.7, "SAEMRSA1520260.1:pep", "SAR2206.1:pep"); testOrthologueGroup(98.5, "SAEMRSA1521860.1:pep", "SAR2373.1:pep"); testOrthologueGroup(98.5, "SAEMRSA1525490.1:pep", "SAR2723.1:pep"); testOrthologueGroup(100.0, "SAEMRSA1508900.1:pep", "SAR1032.1:pep"); testOrthologueGroup(null, "SAEMRSA1509280.1:pep", "SAR1072.1:pep"); testOrthologueGroup(98.9, "SAEMRSA1510800.1:pep", "SAS1181.1:pep"); testOrthologueGroup(98.9, "SAEMRSA1514330.1:pep", "SAS1451.1:pep"); testOrthologueGroup(99.5, "SAEMRSA1501520.1:pep", "SAS0162.1:pep"); testOrthologueGroup(100.0, "SAEMRSA1519310.1:pep", "SAS1928.1:pep"); testOrthologueGroup(99.7, "SAEMRSA1520710.1:pep", "SAS2067.1:pep"); testOrthologueGroup(95.0, "SAEMRSA1522400.1:pep", "SAS2234a.1:pep"); testOrthologueGroup(99.1, "SAEMRSA1522570.1:pep", "SAS2250.1:pep"); testOrthologueGroup(88.3, "SAEMRSA1523030.1:pep", "SAS2295.1:pep"); testOrthologueGroup(100.0, "SAEMRSA1523490.1:pep", "SAS2341.1:pep"); testOrthologueGroup(99.5, "SAEMRSA1525780.1:pep", "SAS2557.1:pep"); testOrthologueGroup(98.4, "SAEMRSA1504600.1:pep", "SAS0491.1:pep"); testOrthologueGroup(100.0, "SAEMRSA1504710.1:pep", "SAS0502.1:pep"); testOrthologueGroup(98.5, "SAEMRSA1505740.1:pep", "SAS0613.1:pep"); testOrthologueGroup(100.0, "SAR1035.1:pep", "SAS0997.1:pep"); testOrthologueGroup(88.4, "SAR1141.1:pep", "SAS1101.1:pep"); testOrthologueGroup(100.0, "SAR1187.1:pep", "SAS1145.1:pep"); testOrthologueGroup(99.8, "SAR0014.1:pep", "SAS0014.1:pep"); testOrthologueGroup(100.0, "SAR1512.1:pep", "SAS0939.1:pep"); testOrthologueGroup(100.0, "SAR1729.1:pep", "SAS1585.1:pep"); testOrthologueGroup(89.6, "SAR0628.1:pep", "SAS0587.1:pep"); testOrthologueGroup(100.0, "SAR0772.1:pep", "SAS0684.1:pep"); testOrthologueGroup(99.4, "SAR0883.1:pep", "SAS0791.1:pep"); testOrthologueGroup(99.8, "SAR2691.1:pep", "SAS2498.1:pep"); testOrthologueGroup(99.2, "SAR0864.1:pep", "SAS0773.1:pep"); testOrthologueGroup(99.4, "SAR0234.1:pep", "SAS0217.1:pep"); testOrthologueGroup(99.4, "SAR2454.1:pep", "SAS2256.1:pep"); testOrthologueGroup(98.8, "SAR0594.1:pep", "SAS0547.1:pep"); } }