package org.nextprot.api.core.utils.exon; import com.google.common.base.Preconditions; import org.junit.Assert; import org.junit.Test; import org.nextprot.api.core.domain.AminoAcid; import org.nextprot.api.core.domain.Exon; import java.util.ArrayList; import java.util.List; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; /** * * Created by fnikitin on 22/07/15. */ public class TranscriptExonsAnalyserTest { @Test public void testExtractInfosNX_Q9Y281_3() throws Exception { List<Exon> exons = createMockExonList(134, 286, 1263, 1570, 1688, 1764, 1847, 4437); InfoCollectorAnalysis collector = new InfoCollectorAnalysis(); TranscriptExonsAnalyser extractor = new TranscriptExonsAnalyser(collector); extractor.extract("MASGVTVNDEVIKVFNDMKVRKSSTQEEIKKRKKAVLFCLSDDKRQIIVEEAKQILVGDIGDTVEDPYTSFVKLLPLNDCRYALYDATYETKESKKEDLVFIFWAPESAPLKSKMIYASSKDAIKKKFTGIKHEWQVNGLDDIKDRSTLGEKLGGNVVVSLEGKPL", 284, 1956, exons); /** name positions gene_id sequence NX_Q9Y281-3 [1311=1570, 1688=1764, 1847=1956] 266594 MKVRKSSTQEEIKKRKKAVLFCLSDDKRQIIVEEAKQILVGDIGDTVEDPYTSFVKLLPLNDCRYALYDATYETKESKKEDLVFIFWAPESAPLKSKMIYASSKDAIKKKFTGIKHEWQVNGLDDIKDRSTLGEKLGGNVVVSLEGKPL iso transcript exons accession NX_Q9Y281-3 NX_ENST00000298159 [gene-pos=[134,286], gene-pos=[1263,1570], gene-pos=[1688,1764], gene-pos=[1847,4437]] ENST00000298159 */ Assert.assertEquals(4, collector.size()); assertInfoEquals(collector.getInfoAt(0), 'M', 1, 0, 'M', 1, 0, ExonCategory.START); assertInfoEquals(collector.getInfoAt(1), 'A', 2, 0, 'W', 104, 2, ExonCategory.CODING); assertInfoEquals(collector.getInfoAt(2), 'W', 104, 2, 'G', 130, 1, ExonCategory.CODING); assertInfoEquals(collector.getInfoAt(3), 'G', 130, 1, 'L', 166, 0, ExonCategory.STOP); } @Test public void testExtractInfosNX_Q96M20() throws Exception { List<Exon> exons = createMockExonList(34,224,4040,4177,7360,7413,11870,12033,15393,15549,16038,16189,18806,18944,26449,26563,39708,39885,42548,42668,61768,62080); InfoCollectorAnalysis collector = new InfoCollectorAnalysis(); TranscriptExonsAnalyser extractor = new TranscriptExonsAnalyser(collector); extractor.extract("MRRHMVTYAWQLLKKELGLYQLAMDIIIMIRVCKMFRQGLRGFREYQIIETAHWKHPIFSFWDKKMQSRVTFDTMDFIAEEGHFPPKAIQIMQKKPSWRTEDEIQAVCNILQVLDSYRNYAEPLQLLLAKVMRFERFGRRRVIIKKGQKGNSFYFIYLGTVAITKDEDGSSAFLDPHPKLLHKGSCFGEMDVLHASVRRSTIVCMEETEFLVVDREDFFANKLDQEVQKDAQYRFEFFRKMELFASWSDEKLWQLVAMAKIERFSYGQLISKDFGESPFIMFISKGSCEVLRLLDLGASPSYRRWIWQHLELIDGRPLKTHLSEYSPMERFKEFQIKSYPLQDFSSLKLPHLKKAWGLQGTSFSRKIRTSGDTLPKMLGPKIQSRPAQSIKCAMINIKPGELPKEAAVGAYVKVHTVEQGEIL", 174, 61767, exons); /** name positions gene_id sequence NX_Q96M20-3 [174=224, 4040=4177, 7360=7413, 11870=12033, 15393=15549, 16038=16189, 18806=18944, 26449=26563, 39708=39885, 42548=42668, 61768=61767] 507928 MRRHMVTYAWQLLKKELGLYQLAMDIIIMIRVCKMFRQGLRGFREYQIIETAHWKHPIFSFWDKKMQSRVTFDTMDFIAEEGHFPPKAIQIMQKKPSWRTEDEIQAVCNILQVLDSYRNYAEPLQLLLAKVMRFERFGRRRVIIKKGQKGNSFYFIYLGTVAITKDEDGSSAFLDPHPKLLHKGSCFGEMDVLHASVRRSTIVCMEETEFLVVDREDFFANKLDQEVQKDAQYRFEFFRKMELFASWSDEKLWQLVAMAKIERFSYGQLISKDFGESPFIMFISKGSCEVLRLLDLGASPSYRRWIWQHLELIDGRPLKTHLSEYSPMERFKEFQIKSYPLQDFSSLKLPHLKKAWGLQGTSFSRKIRTSGDTLPKMLGPKIQSRPAQSIKCAMINIKPGELPKEAAVGAYVKVHTVEQGEIL iso transcript exons accession NX_Q96M20-3 NX_ENST00000538900 [gene-pos=[34,224], gene-pos=[4040,4177], gene-pos=[7360,7413], gene-pos=[11870,12033], gene-pos=[15393,15549], gene-pos=[16038,16189], gene-pos=[18806,18944], gene-pos=[26449,26563], gene-pos=[39708,39885], gene-pos=[42548,42668], gene-pos=[61768,62080]] ENST00000538900 */ Assert.assertEquals(11, collector.size()); assertInfoEquals(collector.getInfoAt(0), 'M', 1, 0, 'L', 17, 0, ExonCategory.START); assertInfoEquals(collector.getInfoAt(1), 'G', 18, 0, 'D', 63, 0, ExonCategory.CODING); assertInfoEquals(collector.getInfoAt(2), 'K', 64, 0, 'E', 81, 0, ExonCategory.CODING); assertInfoEquals(collector.getInfoAt(3), 'G', 82, 0, 'R', 136, 2, ExonCategory.CODING); assertInfoEquals(collector.getInfoAt(4), 'R', 136, 2, 'G', 188, 0, ExonCategory.CODING); assertInfoEquals(collector.getInfoAt(5), 'E', 189, 0, 'R', 239, 2, ExonCategory.CODING); assertInfoEquals(collector.getInfoAt(6), 'R', 239, 2, 'K', 285, 0, ExonCategory.CODING); assertInfoEquals(collector.getInfoAt(7), 'G', 286, 0, 'E', 324, 1, ExonCategory.CODING); assertInfoEquals(collector.getInfoAt(8), 'E', 324, 1, 'Q', 383, 2, ExonCategory.CODING); assertInfoEquals(collector.getInfoAt(9), 'Q', 383, 2, 'L', 423, 0, ExonCategory.CODING); Assert.assertEquals(ExonCategory.STOP_ONLY, collector.getInfoAt(10).getExonCategory()); Assert.assertEquals(null, collector.getInfoAt(10).getFirstAA()); Assert.assertEquals(null, collector.getInfoAt(10).getLastAA()); } @Test public void testExtractInfosNX_P20592_2() throws Exception { List<Exon> exons = createMockExonList(8143, 8407, 14894, 15213, 15847, 16039, 20468, 20622, 33645, 33843, 35671, 35749, 36955, 37000); InfoCollectorAnalysis collector = new InfoCollectorAnalysis(); TranscriptExonsAnalyser extractor = new TranscriptExonsAnalyser(collector); extractor.extract("MSKAHKPWPYRRRSQFSSRKYLKKEMNSFQQQPPPFGTVPPQMMFPPNWQGAEKDAAFLAKDFNFLTLNNQPPPGNRSQPRAMGPENNLYSQYEQKVRPCIDLIDSLRALGVEQDLALPAIAVIGDQSSGKSSVLEALSGVALPRGSAQNVMAGNGRGISHELISLEITSPEVPDLTIIDLPGITRVAVDNQPRDIGLQVS", 14965, 33650, exons); Assert.assertEquals(7, collector.size()); Assert.assertEquals(ExonCategory.NOT_CODING_PRE, collector.getInfoAt(0).getExonCategory()); Assert.assertNull(collector.getInfoAt(0).getFirstAA()); Assert.assertNull(collector.getInfoAt(0).getLastAA()); assertInfoEquals(collector.getInfoAt(1), 'M', 1, 0, 'M', 83, 0, ExonCategory.START); assertInfoEquals(collector.getInfoAt(2), 'G', 84, 0, 'A', 148, 1, ExonCategory.CODING); assertInfoEquals(collector.getInfoAt(3), 'A', 148, 1, 'Q', 199, 0, ExonCategory.CODING); assertInfoEquals(collector.getInfoAt(4), 'V', 200, 0, 'S', 201, 0, ExonCategory.STOP); Assert.assertEquals(ExonCategory.NOT_CODING_POST, collector.getInfoAt(5).getExonCategory()); Assert.assertNull(collector.getInfoAt(5).getFirstAA()); Assert.assertNull(collector.getInfoAt(5).getLastAA()); Assert.assertEquals(ExonCategory.NOT_CODING_POST, collector.getInfoAt(6).getExonCategory()); Assert.assertNull(collector.getInfoAt(6).getFirstAA()); Assert.assertNull(collector.getInfoAt(6).getLastAA()); } @Test public void testExtractInfosMonoNX_O15541() throws Exception { List<Exon> exons = createMockExonList(1, 1295); InfoCollectorAnalysis collector = new InfoCollectorAnalysis(); TranscriptExonsAnalyser extractor = new TranscriptExonsAnalyser(collector); extractor.extract("MAEQLSPGKAVDQVCTFLFKKPGRKGAAGRRKRPACDPEPGESGSSSDEGCTVVRPEKKRVTHNPMIQKTRDSGKQKAAYGDLSSEEEEENEPESLGVVYKSTRSAKPVGPEDMGATAVYELDTEKERDAQAIFERSQKIQEELRGKEDDKIYRGINNYQKYMKPKDTSMGNASSGMVRKGPIRAPEHLRATVRWDYQPDICKDYKETGFCGFGDSCKFLHDRSDYKHGWQIERELDEGRYGVYEDENYEVGSDDEEIPFKCFICRQSFQNPVVTKCRHYFCESCALQHFRTTPRCYVCDQQTNGVFNPAKELIAKLEKHRATGEGGASDLPEDPDEDAIPIT", 216, 1244, exons); Assert.assertEquals(1, collector.size()); assertInfoEquals(collector.getInfoAt(0), 'M', 1, 0, 'T', 343, 0, ExonCategory.MONO); } @Test public void testExtractInfosNX_Q8NFW8_2() throws Exception { List<Exon> exons = createMockExonList(52, 390, 8976, 9118, 9282, 9437, 12394, 12527, 14659, 14753, 16108, 16261, 18948, 19501); InfoCollectorAnalysis collector = new InfoCollectorAnalysis(); TranscriptExonsAnalyser extractor = new TranscriptExonsAnalyser(collector); extractor.extract("MDSVEKGAATSVSNPRGRPSRGRPPKLQRNSRGGQGRGVEKPPHLAALILARGGSKGIPLKNIKHLAGVPLIGWVLRAALDSGAFQSVWVSTDHDEIENVAKQFGAQVHRRSSEVSKDSSTSLDAIIEFLNYHNEVDIVGNIQATSPCLHPTDLQKVAEMIREEGYDSVFSVVRRHQFRWSEIQKGVREVTEPLNLNPAKRPRRQDWDGELYENGSFYFAKRHLIEMGYLQGGKMAYYEMRAEHSVDIDVDIDWPIAEQRVLR", 131, 16108, exons); Assert.assertEquals(7, collector.size()); assertInfoEquals(collector.getInfoAt(0), 'M', 1, 0, 'S', 87, 2, ExonCategory.START); assertInfoEquals(collector.getInfoAt(1), 'S', 87, 2, 'E', 135, 1, ExonCategory.CODING); assertInfoEquals(collector.getInfoAt(2), 'E', 135, 1, 'V', 187, 1, ExonCategory.CODING); assertInfoEquals(collector.getInfoAt(3), 'V', 187, 1, 'Q', 231, 0, ExonCategory.CODING); assertInfoEquals(collector.getInfoAt(4), 'G', 232, 0, 'R', 263, 2, ExonCategory.CODING); assertInfoEquals(collector.getInfoAt(5), 'R', 263, 2, 'R', 263, 0, ExonCategory.STOP); Assert.assertEquals(ExonCategory.NOT_CODING_POST, collector.getInfoAt(6).getExonCategory()); Assert.assertNull(collector.getInfoAt(6).getFirstAA()); Assert.assertNull(collector.getInfoAt(6).getLastAA()); } @Test public void testExtractInfosMiniExons() throws Exception { List<Exon> exons = createMockExonList(1, 11, 100, 100, 150, 151, 200, 300); InfoCollectorAnalysis collector = new InfoCollectorAnalysis(); TranscriptExonsAnalyser extractor = new TranscriptExonsAnalyser(collector); extractor.extract("MRTEQ", 10, 209, exons); Assert.assertEquals(4, collector.size()); assertInfoEquals(collector.getInfoAt(0), 'M', 1, 0, 'M', 1, 2, ExonCategory.START); assertInfoEquals(collector.getInfoAt(1), 'M', 1, 2, 'M', 1, 0, ExonCategory.CODING); assertInfoEquals(collector.getInfoAt(2), 'R', 2, 0, 'R', 2, 2, ExonCategory.CODING); assertInfoEquals(collector.getInfoAt(3), 'R', 2, 2, 'Q', 5, 0, ExonCategory.STOP); } @Test public void testExtractInfosNX_Q5JQC4_1_ENST00000416816AndException() throws Exception { List<Exon> exons = createMockExonList(1, 997, 1885, 2040, 54668, 54808); InfoCollectorAnalysis collector = new InfoCollectorAnalysis(); TranscriptExonsAnalyser extractor = new TranscriptExonsAnalyser(collector); extractor.extract("MSATGDRHPTQGDQEAPVSQEGAQAEAAGAGNQEGGDSGPDSSDVVPAAEVVGVAGPVEGLGEEEGEQAAGLAAVPRGGSAEEDSDIGPATEEEEEEEGNEAANFDLAVVARRYPASGIHFVLLDMVHSLLHRLSHNDHILIENRQLSRLMVGPHAAARNLWGNLPPLLLPQRLGAGAAARAGEGLGLIQEAASVPEPAVPADLAEMAREPAEEAAEEKLSEEATEEPDAEEPATEEPTAQEATAPEEVTKSQPEKWDEEAQDAAGEEEKEQEKEKDAENKVKNSKGT", 256, 53495, exons); Assert.assertEquals(1, collector.size()); assertInfoEquals(collector.getInfoAt(0), 'M', 1, 0, 'E', 248, 1, ExonCategory.START); //assertInfoEquals(collector.getInfoAt(1), 'E', 248, 1, 'T', 288, 1, ExonCategory.CODING); //Assert.assertEquals(ExonCategory.NOT_CODING_POST, collector.getInfoAt(2).getExonCategory()); //Assert.assertNull(collector.getInfoAt(2).getFirstAA()); //Assert.assertNull(collector.getInfoAt(2).getLastAA()); Assert.assertTrue(collector.hasError()); } /*@Test public void testExtractInfosNX_Q658P3Iso3() throws Exception { List<Exon> exons = createMockExonList(1, 81, 6813, 7227, 21682, 22181, 23872, 24399, 30877, 31041, 39250, 41842); TranscriptInfoCollector collector = new TranscriptInfoCollector(); TranscriptInfosExtractor extractor = new TranscriptInfosExtractor(collector); extractor.extract("NX_Q658P3-3.ENST00000354888", "MPEEMDKPLISLHLVDSDSSLAKVPDEAPKVGILGSGDFARSLATRLVGSGFKVVVGSRNPKRTARLFPSAAQVTFQEEAVSSPEVIFVAVFREHYSSLCSLSDQLAGKILVDVSNPTEQEHLQHRESNAEYLASLFPTCTVVKAFNVISAWTLQAGPRDGNRQVPICGDQPEAKRAVSEMALAMGFMPVDMGSLASAWEVEAMPLRLLPAWKVPTLLALGLFVCFYAYNFVRDVLQPYVQESQNKFFKLPVSVVNTTLPCVAYVLLSLVYLPGVLAAALQLRRGTKYQRFPDWLDHWLQHRKQIGLLSFFCAALHALYSFCLPLRRAHRYDLVNLAVKQVLANKSHLWVEEVWRMEIYLSLGVLALGTLSLLAVTSLPSIANSLNWREFSFVQSSLGFVALVLSTLHTLTYGWTRAFEESRYKFYLPPTFTLTLLVPCVVILAKALFLLPCISRRLARIRRGWERESTIKFTLPTDHALAEKTSHV", 21690, 39528, exons); Assert.assertEquals(5, collector.size()); Assert.assertEquals(ExonCategory.NOT_CODING_PRE, collector.getInfoAt(0).getExonCategory()); Assert.assertNull(collector.getInfoAt(0).getFirstAA()); Assert.assertNull(collector.getInfoAt(0).getLastAA()); assertInfoEquals(collector.getInfoAt(1), 'M', 1, 0, 'Q', 164, 0, ExonCategory.START); assertInfoEquals(collector.getInfoAt(2), 'V', 165, 0, 'Q', 340, 0, ExonCategory.CODING); assertInfoEquals(collector.getInfoAt(3), 'V', 341, 0, 'V', 350, 0, ExonCategory.CODING); assertInfoEquals(collector.getInfoAt(4), 'E', 351, 0, 'Q', 394, 0, ExonCategory.CODING); assertInfoEquals(collector.getInfoAt(5), 'S', 395, 0, 'V', 487, 0, ExonCategory.STOP); } @Test public void testExtractInfosNX_Q658P3Iso3np1() throws Exception { List<Exon> exons = createMockExonList(46, 81, 21682, 22181, 23872, 24399, 30877, 30906, 30910, 31041, 39250, 39891); TranscriptInfoCollector collector = new TranscriptInfoCollector(); TranscriptInfosExtractor extractor = new TranscriptInfosExtractor(collector); extractor.extract("NX_Q658P3-3.ENST00000354888", "MPEEMDKPLISLHLVDSDSSLAKVPDEAPKVGILGSGDFARSLATRLVGSGFKVVVGSRNPKRTARLFPSAAQVTFQEEAVSSPEVIFVAVFREHYSSLCSLSDQLAGKILVDVSNPTEQEHLQHRESNAEYLASLFPTCTVVKAFNVISAWTLQAGPRDGNRQVPICGDQPEAKRAVSEMALAMGFMPVDMGSLASAWEVEAMPLRLLPAWKVPTLLALGLFVCFYAYNFVRDVLQPYVQESQNKFFKLPVSVVNTTLPCVAYVLLSLVYLPGVLAAALQLRRGTKYQRFPDWLDHWLQHRKQIGLLSFFCAALHALYSFCLPLRRAHRYDLVNLAVKQVLANKSHLWVEEVWRMEIYLSLGVLALGTLSLLAVTSLPSIANSLNWREFSFVQSSLGFVALVLSTLHTLTYGWTRAFEESRYKFYLPPTFTLTLLVPCVVILAKALFLLPCISRRLARIRRGWERESTIKFTLPTDHALAEKTSHV", 21690, 39528, exons); Assert.assertEquals(6, collector.size()); Assert.assertEquals(ExonCategory.NOT_CODING_PRE, collector.getInfoAt(0).getExonCategory()); Assert.assertNull(collector.getInfoAt(0).getFirstAA()); Assert.assertNull(collector.getInfoAt(0).getLastAA()); assertInfoEquals(collector.getInfoAt(1), 'M', 1, 0, 'Q', 164, 0, ExonCategory.START); assertInfoEquals(collector.getInfoAt(2), 'V', 165, 0, 'Q', 340, 0, ExonCategory.CODING); assertInfoEquals(collector.getInfoAt(3), 'V', 341, 0, 'V', 350, 0, ExonCategory.CODING); assertInfoEquals(collector.getInfoAt(4), 'E', 351, 0, 'Q', 394, 0, ExonCategory.CODING); assertInfoEquals(collector.getInfoAt(5), 'S', 395, 0, 'V', 487, 0, ExonCategory.STOP); }*/ private void assertInfoEquals(ExonInfo info, char firstAA, int firstPos, int startPhase, char lastAA, int lastPos, int endPhase, ExonCategory type) { Assert.assertEquals(type, info.getExonCategory()); Assert.assertEquals(firstAA, info.getFirstAA().getBase()); Assert.assertEquals(firstPos, info.getFirstAA().getPosition()); Assert.assertEquals(startPhase, info.getFirstAA().getPhase()); Assert.assertEquals(lastAA, info.getLastAA().getBase()); Assert.assertEquals(lastPos, info.getLastAA().getPosition()); Assert.assertEquals(endPhase, info.getLastAA().getPhase()); } private List<Exon> createMockExonList(int... startEnds) { Preconditions.checkArgument(startEnds.length % 2 == 0); List<Exon> exons = new ArrayList<>(); for (int i=0 ; i<startEnds.length-1 ; i+=2) { Exon exon = mock(Exon.class); when(exon.getFirstPositionOnGene()).thenReturn(startEnds[i]); when(exon.getLastPositionOnGene()).thenReturn(startEnds[i+1]); exons.add(exon); } return exons; } public static class ExonInfo { private ExonCategory exonCategory; private AminoAcid firstAA; private AminoAcid lastAA; public ExonCategory getExonCategory() { return exonCategory; } public void setExonCategory(ExonCategory exonCategory) { this.exonCategory = exonCategory; } public AminoAcid getFirstAA() { return firstAA; } public void setFirstAA(AminoAcid firstAA) { this.firstAA = firstAA; } public AminoAcid getLastAA() { return lastAA; } public void setLastAA(AminoAcid lastAA) { this.lastAA = lastAA; } } private class InfoCollectorAnalysis implements ExonsAnalysisListener { private final List<ExonInfo> exonInfos; private ExonInfo exonInfo; private boolean error; private InfoCollectorAnalysis() { this.exonInfos = new ArrayList<>(); } @Override public void started() {} @Override public void startedExon(Exon exon) { exonInfo = new ExonInfo(); } @Override public void analysedCodingExon(Exon exon, AminoAcid first, AminoAcid last, ExonCategory category) { exonInfo.setFirstAA(first); exonInfo.setLastAA(last); exonInfo.setExonCategory(category); } @Override public void analysedCodingExonFailed(Exon exon, ExonOutOfBoundError exonOutOfBoundError) { error = true; } @Override public void analysedNonCodingExon(Exon exon, ExonCategory cat) { exonInfo.setExonCategory(cat); } @Override public void terminated(Exon exon) { exonInfos.add(exonInfo); } @Override public void terminated() {} public ExonInfo getInfoAt(int index) { Preconditions.checkElementIndex(index, exonInfos.size()); return exonInfos.get(index); } public boolean hasError() { return error; } public int size() { return exonInfos.size(); } } }