package org.molgenis.data.annotation.core.entity.impl; import org.molgenis.data.DataService; import org.molgenis.data.Entity; import org.molgenis.data.annotation.core.RepositoryAnnotator; import org.molgenis.data.annotation.core.entity.AnnotatorConfig; import org.molgenis.data.annotation.core.resources.Resources; import org.molgenis.data.annotation.core.resources.impl.ResourcesImpl; import org.molgenis.data.annotation.web.AnnotationService; import org.molgenis.data.annotation.web.settings.GoNLAnnotatorSettings; import org.molgenis.data.meta.model.AttributeFactory; import org.molgenis.data.meta.model.EntityType; import org.molgenis.data.meta.model.EntityTypeFactory; import org.molgenis.data.support.DynamicEntity; import org.molgenis.data.vcf.model.VcfAttributes; import org.molgenis.test.data.AbstractMolgenisSpringTest; import org.molgenis.util.ResourceUtils; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.context.ApplicationContext; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.ComponentScan; import org.springframework.context.annotation.Configuration; import org.springframework.test.context.ContextConfiguration; import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; import java.io.IOException; import java.util.Collections; import java.util.Iterator; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; import static org.molgenis.data.annotation.core.entity.impl.GoNLAnnotator.*; import static org.molgenis.data.meta.AttributeType.STRING; import static org.testng.Assert.*; @ContextConfiguration(classes = { GoNLAnnotatorTest.Config.class, GoNLAnnotator.class }) public class GoNLAnnotatorTest extends AbstractMolgenisSpringTest { @Autowired ApplicationContext context; @Autowired AttributeFactory attributeFactory; @Autowired EntityTypeFactory entityTypeFactory; @Autowired VcfAttributes vcfAttributes; private final static String GONL_TEST_PATTERN = "gonl.chr%s.snps_indels.r5.vcf.gz"; private final static String GONL_TEST_ROOT_DIRECTORY = "/gonl"; private final static String GONL_TEST_CHROMOSOMES = "1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,X"; private final static String GONL_TEST_OVERRIDE_CHROMOSOME_FILES_PROPERTY = "X:gonl.chrX.release4.gtc.vcf.gz"; @Autowired RepositoryAnnotator annotator; private EntityType emd; @BeforeClass public void beforeClass() throws IOException { AnnotatorConfig annotatorConfig = context.getBean(AnnotatorConfig.class); annotatorConfig.init(); emd = entityTypeFactory.create().setName("gonl"); emd.addAttribute(vcfAttributes.getChromAttribute()); emd.addAttribute(vcfAttributes.getPosAttribute()); emd.addAttribute(vcfAttributes.getRefAttribute()); emd.addAttribute(vcfAttributes.getAltAttribute()); emd.addAttribute(attributeFactory.create().setName(GONL_GENOME_AF).setDataType(STRING) .setDescription("The allele frequency for variants seen in the population used for the GoNL project") .setLabel(GONL_AF_LABEL)); emd.addAttribute(attributeFactory.create().setName(GONL_GENOME_GTC).setDataType(STRING).setDescription( "GenoType Counts. For each ALT allele in the same order as listed = 0/0,0/1,1/1,0/2,1/2,2/2,0/3,1/3,2/3,3/3,etc. Phasing is ignored; hence 1/0, 0|1 and 1|0 are all counted as 0/1. When one or more alleles is not called for a genotype in a specific sample (./., ./0, ./1, ./2, etc.), that sample's genotype is completely discarded for calculating GTC.") .setLabel(GONL_GTC_LABEL)); } // 14 tests below are test cases from the "test-edgecases.vcf" @Test public void testAnnotate1() { Entity entity1 = new DynamicEntity(emd); entity1.set(VcfAttributes.CHROM, "1"); entity1.set(VcfAttributes.POS, 13380); entity1.set(VcfAttributes.REF, "C"); entity1.set(VcfAttributes.ALT, "G"); Iterator<Entity> results = annotator.annotate(Collections.singletonList(entity1)); assertTrue(results.hasNext()); Entity resultEntity = results.next(); assertFalse(results.hasNext()); assertEquals(resultEntity.get(GONL_GENOME_AF), null); assertEquals(resultEntity.get(GONL_GENOME_GTC), null); } @Test public void testAnnotate2() { Entity entity2 = new DynamicEntity(emd); entity2.set(VcfAttributes.CHROM, "1"); entity2.set(VcfAttributes.POS, 13980); entity2.set(VcfAttributes.REF, "T"); entity2.set(VcfAttributes.ALT, "C"); Iterator<Entity> results = annotator.annotate(Collections.singletonList(entity2)); assertTrue(results.hasNext()); Entity resultEntity = results.next(); assertFalse(results.hasNext()); assertEquals(resultEntity.get(GONL_GENOME_AF), "0.013052208835341365"); assertEquals(resultEntity.get(GONL_GENOME_GTC), "485|13|0"); } @Test public void testAnnotate3() { Entity entity3 = new DynamicEntity(emd); entity3.set(VcfAttributes.CHROM, "1"); entity3.set(VcfAttributes.POS, 78383467); entity3.set(VcfAttributes.REF, "G"); entity3.set(VcfAttributes.ALT, "A"); Iterator<Entity> results = annotator.annotate(Collections.singletonList(entity3)); assertTrue(results.hasNext()); Entity resultEntity = results.next(); assertFalse(results.hasNext()); assertEquals(resultEntity.get(GONL_GENOME_AF), "0.8674698795180723"); assertEquals(resultEntity.get(GONL_GENOME_GTC), "6|120|372"); } @Test public void testAnnotate4() { Entity entity4 = new DynamicEntity(emd); entity4.set(VcfAttributes.CHROM, "1"); entity4.set(VcfAttributes.POS, 231094050); entity4.set(VcfAttributes.REF, "GAA"); entity4.set(VcfAttributes.ALT, "G,GAAA,GA"); Iterator<Entity> results = annotator.annotate(Collections.singletonList(entity4)); assertTrue(results.hasNext()); Entity resultEntity = results.next(); assertFalse(results.hasNext()); assertEquals(resultEntity.get(GONL_GENOME_AF), null); assertEquals(resultEntity.get(GONL_GENOME_GTC), null); } @Test public void testAnnotate5() { Entity entity5 = new DynamicEntity(emd); entity5.set(VcfAttributes.CHROM, "2"); entity5.set(VcfAttributes.POS, 171570151); entity5.set(VcfAttributes.REF, "C"); entity5.set(VcfAttributes.ALT, "T"); Iterator<Entity> results = annotator.annotate(Collections.singletonList(entity5)); assertTrue(results.hasNext()); Entity resultEntity = results.next(); assertFalse(results.hasNext()); assertEquals(resultEntity.get(GONL_GENOME_AF), "0.30823293172690763"); assertEquals(resultEntity.get(GONL_GENOME_GTC), "241|207|50"); } @Test public void testAnnotate6() { Entity entity6 = new DynamicEntity(emd); entity6.set(VcfAttributes.CHROM, "4"); entity6.set(VcfAttributes.POS, 69964234); entity6.set(VcfAttributes.REF, "CT"); entity6.set(VcfAttributes.ALT, "CTT,CTTT,C"); Iterator<Entity> results = annotator.annotate(Collections.singletonList(entity6)); assertTrue(results.hasNext()); Entity resultEntity = results.next(); assertFalse(results.hasNext()); assertEquals(resultEntity.get(GONL_GENOME_AF), null); assertEquals(resultEntity.get(GONL_GENOME_GTC), null); } @Test public void testAnnotate7() { Entity entity7 = new DynamicEntity(emd); entity7.set(VcfAttributes.CHROM, "15"); entity7.set(VcfAttributes.POS, 66641732); entity7.set(VcfAttributes.REF, "G"); entity7.set(VcfAttributes.ALT, "A,C,T"); Iterator<Entity> results = annotator.annotate(Collections.singletonList(entity7)); assertTrue(results.hasNext()); Entity resultEntity = results.next(); assertFalse(results.hasNext()); assertEquals(resultEntity.get(GONL_GENOME_AF), ".,0.09538152610441768,."); assertEquals(resultEntity.get(GONL_GENOME_GTC), ".,412|77|9,."); } @Test public void testAnnotate8() { Entity entity8 = new DynamicEntity(emd); entity8.set(VcfAttributes.CHROM, "21"); entity8.set(VcfAttributes.POS, 46924425); entity8.set(VcfAttributes.REF, "CGGCCCCCCA"); entity8.set(VcfAttributes.ALT, "C"); Iterator<Entity> results = annotator.annotate(Collections.singletonList(entity8)); assertTrue(results.hasNext()); Entity resultEntity = results.next(); assertFalse(results.hasNext()); assertEquals(resultEntity.get(GONL_GENOME_AF), null); assertEquals(resultEntity.get(GONL_GENOME_GTC), null); } @Test public void testAnnotate9() { Entity entity9 = new DynamicEntity(emd); entity9.set(VcfAttributes.CHROM, "X"); entity9.set(VcfAttributes.POS, 79943569); entity9.set(VcfAttributes.REF, "T"); entity9.set(VcfAttributes.ALT, "C"); Iterator<Entity> results = annotator.annotate(Collections.singletonList(entity9)); assertTrue(results.hasNext()); Entity resultEntity = results.next(); assertFalse(results.hasNext()); assertEquals(resultEntity.get(GONL_GENOME_AF), "0.9989939637826962"); assertEquals(resultEntity.get(GONL_GENOME_GTC), "0|1|496"); } @Test public void testAnnotate10() { Entity entity10 = new DynamicEntity(emd); entity10.set(VcfAttributes.CHROM, "2"); entity10.set(VcfAttributes.POS, 191904021); entity10.set(VcfAttributes.REF, "G"); entity10.set(VcfAttributes.ALT, "T"); Iterator<Entity> results = annotator.annotate(Collections.singletonList(entity10)); assertTrue(results.hasNext()); Entity resultEntity = results.next(); assertFalse(results.hasNext()); assertEquals(resultEntity.get(GONL_GENOME_AF), "0.0030120481927710845"); assertEquals(resultEntity.get(GONL_GENOME_GTC), "495|3|0"); } @Test public void testAnnotate11() { Entity entity11 = new DynamicEntity(emd); entity11.set(VcfAttributes.CHROM, "3"); entity11.set(VcfAttributes.POS, 53219680); entity11.set(VcfAttributes.REF, "G"); entity11.set(VcfAttributes.ALT, "C"); Iterator<Entity> results = annotator.annotate(Collections.singletonList(entity11)); assertTrue(results.hasNext()); Entity resultEntity = results.next(); assertFalse(results.hasNext()); assertEquals(resultEntity.get(GONL_GENOME_AF), null); assertEquals(resultEntity.get(GONL_GENOME_GTC), null); } @Test public void testAnnotate12() { Entity entity12 = new DynamicEntity(emd); entity12.set(VcfAttributes.CHROM, "2"); entity12.set(VcfAttributes.POS, 219142023); entity12.set(VcfAttributes.REF, "G"); entity12.set(VcfAttributes.ALT, "A"); Iterator<Entity> results = annotator.annotate(Collections.singletonList(entity12)); assertTrue(results.hasNext()); Entity resultEntity = results.next(); assertFalse(results.hasNext()); assertEquals(resultEntity.get(GONL_GENOME_AF), "0.9969879518072289"); assertEquals(resultEntity.get(GONL_GENOME_GTC), "0|3|495"); } @Test public void testAnnotate13() { Entity entity13 = new DynamicEntity(emd); entity13.set(VcfAttributes.CHROM, "1"); entity13.set(VcfAttributes.POS, 1115548); entity13.set(VcfAttributes.REF, "G"); entity13.set(VcfAttributes.ALT, "A"); Iterator<Entity> results = annotator.annotate(Collections.singletonList(entity13)); assertTrue(results.hasNext()); Entity resultEntity = results.next(); assertFalse(results.hasNext()); assertEquals(resultEntity.get(GONL_GENOME_AF), "0.02610441767068273"); assertEquals(resultEntity.get(GONL_GENOME_GTC), "472|26|0"); } @Test public void testAnnotate14() { Entity entity14 = new DynamicEntity(emd); entity14.set(VcfAttributes.CHROM, "21"); entity14.set(VcfAttributes.POS, 45650009); entity14.set(VcfAttributes.REF, "T"); entity14.set(VcfAttributes.ALT, "TG, A, G"); Iterator<Entity> results = annotator.annotate(Collections.singletonList(entity14)); assertTrue(results.hasNext()); Entity resultEntity = results.next(); assertFalse(results.hasNext()); assertEquals(resultEntity.get(GONL_GENOME_AF), "0.22188755020080322,.,."); assertEquals(resultEntity.get(GONL_GENOME_GTC), "306|163|29,.,."); } /** * Test for bugfix where alt allele in resource was trimmed too much causing index out of bounds. */ @Test public void testAnnotate15() { Entity entity14 = new DynamicEntity(emd); entity14.set(VcfAttributes.CHROM, "1"); entity14.set(VcfAttributes.POS, 28227207); entity14.set(VcfAttributes.REF, "A"); entity14.set(VcfAttributes.ALT, "G"); Iterator<Entity> results = annotator.annotate(Collections.singletonList(entity14)); assertTrue(results.hasNext()); Entity resultEntity = results.next(); assertFalse(results.hasNext()); assertEquals(resultEntity.get(GONL_GENOME_AF), null); assertEquals(resultEntity.get(GONL_GENOME_GTC), null); } @Configuration @ComponentScan({ "org.molgenis.data.vcf.model" }) public static class Config { @Bean public Entity goNLAnnotatorSettings() { Entity settings = mock(Entity.class); when(settings.getString(GoNLAnnotatorSettings.Meta.ROOT_DIRECTORY)) .thenReturn(ResourceUtils.getFile(getClass(), GONL_TEST_ROOT_DIRECTORY).getPath()); when(settings.getString(GoNLAnnotatorSettings.Meta.CHROMOSOMES)).thenReturn(GONL_TEST_CHROMOSOMES); when(settings.getString(GoNLAnnotatorSettings.Meta.FILEPATTERN)).thenReturn(GONL_TEST_PATTERN); when(settings.getString(GoNLAnnotatorSettings.Meta.OVERRIDE_CHROMOSOME_FILES)) .thenReturn(GONL_TEST_OVERRIDE_CHROMOSOME_FILES_PROPERTY); return settings; } @Bean public DataService dataService() { return mock(DataService.class); } @Bean public AnnotationService annotationService() { return mock(AnnotationService.class); } @Bean public Resources resources() { return new ResourcesImpl(); } } }