package org.molgenis.data.annotation.core.entity.impl; import com.google.common.base.Optional; import com.google.common.collect.Lists; import org.molgenis.data.Entity; import org.molgenis.data.MolgenisDataException; import org.molgenis.data.annotation.core.filter.MultiAllelicResultFilter; import org.molgenis.data.meta.model.AttributeFactory; import org.molgenis.data.meta.model.EntityType; import org.molgenis.data.meta.model.EntityTypeFactory; import org.molgenis.data.support.DynamicEntity; import org.molgenis.data.vcf.model.VcfAttributes; import org.molgenis.test.data.AbstractMolgenisSpringTest; import org.molgenis.util.EntityUtils; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.context.annotation.ComponentScan; import org.springframework.context.annotation.Configuration; import org.springframework.test.context.ContextConfiguration; import org.testng.Assert; import org.testng.annotations.BeforeMethod; import org.testng.annotations.Test; import java.util.Arrays; import java.util.Collections; import static org.molgenis.data.meta.AttributeType.STRING; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertTrue; @ContextConfiguration(classes = { MultiAllelicResultFilterTest.Config.class }) public class MultiAllelicResultFilterTest extends AbstractMolgenisSpringTest { @Autowired AttributeFactory attributeFactory; @Autowired EntityTypeFactory entityTypeFactory; @Autowired VcfAttributes vcfAttributes; private EntityType emd; private EntityType resultEmd; private DynamicEntity entity1; private DynamicEntity entity2; private DynamicEntity entity3; private DynamicEntity entity7; private DynamicEntity entity8; private DynamicEntity entity9; private DynamicEntity entity10; private DynamicEntity entityNoRef; private DynamicEntity entityMismatchChrom; private DynamicEntity entityMismatchPos; private DynamicEntity resultEntity1; private DynamicEntity resultEntity2; private DynamicEntity resultEntity3; private DynamicEntity resultEntity4; private DynamicEntity resultEntity5; private DynamicEntity resultEntity6; private DynamicEntity resultEntity7; private DynamicEntity resultEntity8; private DynamicEntity resultEntity9; private DynamicEntity resultEntity10; @BeforeMethod public void setUp() { emd = entityTypeFactory.create().setName("entity"); resultEmd = entityTypeFactory.create().setName("resultEntity"); emd.addAttribute(vcfAttributes.getChromAttribute()); emd.addAttribute(vcfAttributes.getPosAttribute()); emd.addAttribute(vcfAttributes.getRefAttribute()); emd.addAttribute(vcfAttributes.getAltAttribute()); emd.addAttribute(vcfAttributes.getIdAttribute()); resultEmd.addAttribute(vcfAttributes.getChromAttribute()); resultEmd.addAttribute(vcfAttributes.getPosAttribute()); resultEmd.addAttribute(vcfAttributes.getRefAttribute()); resultEmd.addAttribute(vcfAttributes.getAltAttribute()); resultEmd.addAttribute(vcfAttributes.getIdAttribute()); resultEmd.addAttribute(attributeFactory.create().setName("annotation").setDataType(STRING)); entity1 = new DynamicEntity(emd); entity1.set(VcfAttributes.CHROM, "1"); entity1.set(VcfAttributes.POS, 100); entity1.set(VcfAttributes.REF, "C"); entity1.set(VcfAttributes.ALT, "A"); entity1.set(VcfAttributes.ID, "entity1"); entity2 = new DynamicEntity(emd); entity2.set(VcfAttributes.CHROM, "1"); entity2.set(VcfAttributes.POS, 100); entity2.set(VcfAttributes.REF, "C"); entity2.set(VcfAttributes.ALT, "A,T"); entity2.set(VcfAttributes.ID, "entity2"); entity3 = new DynamicEntity(emd); entity3.set(VcfAttributes.CHROM, "1"); entity3.set(VcfAttributes.POS, 100); entity3.set(VcfAttributes.REF, "C"); entity3.set(VcfAttributes.ALT, "A,T,G"); entity3.set(VcfAttributes.ID, "entity3"); entity7 = new DynamicEntity(emd); entity7.set(VcfAttributes.CHROM, "1"); entity7.set(VcfAttributes.POS, 100); entity7.set(VcfAttributes.REF, "TTCCTCC"); entity7.set(VcfAttributes.ALT, "TTCC"); entity7.set(VcfAttributes.ID, "entity7"); entity8 = new DynamicEntity(emd); entity8.set(VcfAttributes.CHROM, "1"); entity8.set(VcfAttributes.POS, 100); entity8.set(VcfAttributes.REF, "TTCCTCCTCC"); entity8.set(VcfAttributes.ALT, "TTCCTCC"); entity8.set(VcfAttributes.ID, "entity8"); entity9 = new DynamicEntity(emd); entity9.set(VcfAttributes.CHROM, "1"); entity9.set(VcfAttributes.POS, 100); entity9.set(VcfAttributes.REF, "GA"); entity9.set(VcfAttributes.ALT, "G"); entity9.set(VcfAttributes.ID, "entity9"); entity10 = new DynamicEntity(emd); entity10.set(VcfAttributes.CHROM, "1"); entity10.set(VcfAttributes.POS, 100); entity10.set(VcfAttributes.REF, "GAA"); entity10.set(VcfAttributes.ALT, "GA"); entity10.set(VcfAttributes.ID, "entity10"); entityNoRef = new DynamicEntity(emd); entityNoRef.set(VcfAttributes.CHROM, "1"); entityNoRef.set(VcfAttributes.POS, 100); entityNoRef.set(VcfAttributes.ID, "entityNoRef"); entityMismatchChrom = new DynamicEntity(emd); entityMismatchChrom.set(VcfAttributes.CHROM, "2"); entityMismatchChrom.set(VcfAttributes.POS, 100); entityMismatchChrom.set(VcfAttributes.REF, "A"); entityMismatchChrom.set(VcfAttributes.ALT, "C"); entityMismatchChrom.set(VcfAttributes.ID, "entityMismatchChrom"); entityMismatchPos = new DynamicEntity(emd); entityMismatchPos.set(VcfAttributes.CHROM, "1"); entityMismatchPos.set(VcfAttributes.POS, 101); entityMismatchPos.set(VcfAttributes.REF, "A"); entityMismatchPos.set(VcfAttributes.ALT, "C"); entityMismatchPos.set(VcfAttributes.ID, "entityMismatchPos"); resultEntity1 = new DynamicEntity(resultEmd); resultEntity1.set(VcfAttributes.CHROM, "1"); resultEntity1.set(VcfAttributes.POS, 100); resultEntity1.set(VcfAttributes.REF, "C"); resultEntity1.set(VcfAttributes.ALT, "A"); resultEntity1.set("annotation", "1"); resultEntity1.set(VcfAttributes.ID, "resultEntity1"); resultEntity2 = new DynamicEntity(resultEmd); resultEntity2.set(VcfAttributes.CHROM, "1"); resultEntity2.set(VcfAttributes.POS, 100); resultEntity2.set(VcfAttributes.REF, "C"); resultEntity2.set(VcfAttributes.ALT, "T"); resultEntity2.set("annotation", "2"); resultEntity2.set(VcfAttributes.ID, "resultEntity2"); resultEntity3 = new DynamicEntity(resultEmd); resultEntity3.set(VcfAttributes.CHROM, "1"); resultEntity3.set(VcfAttributes.POS, 100); resultEntity3.set(VcfAttributes.REF, "C"); resultEntity3.set(VcfAttributes.ALT, "A,T"); resultEntity3.set("annotation", "3,4"); resultEntity3.set(VcfAttributes.ID, "resultEntity3"); resultEntity4 = new DynamicEntity(resultEmd); resultEntity4.set(VcfAttributes.CHROM, "1"); resultEntity4.set(VcfAttributes.POS, 100); resultEntity4.set(VcfAttributes.REF, "C"); resultEntity4.set(VcfAttributes.ALT, "T,A"); resultEntity4.set("annotation", "5,6"); resultEntity4.set(VcfAttributes.ID, "resultEntity4"); resultEntity5 = new DynamicEntity(resultEmd); resultEntity5.set(VcfAttributes.CHROM, "1"); resultEntity5.set(VcfAttributes.POS, 100); resultEntity5.set(VcfAttributes.REF, "C"); resultEntity5.set(VcfAttributes.ALT, "G,A,T"); resultEntity5.set("annotation", "7,8,9"); resultEntity5.set(VcfAttributes.ID, "resultEntity5"); resultEntity6 = new DynamicEntity(resultEmd); resultEntity6.set(VcfAttributes.CHROM, "1"); resultEntity6.set(VcfAttributes.POS, 100); resultEntity6.set(VcfAttributes.REF, "C"); resultEntity6.set(VcfAttributes.ALT, "G,A"); resultEntity6.set("annotation", "10,11"); resultEntity6.set(VcfAttributes.ID, "resultEntity6"); resultEntity7 = new DynamicEntity(resultEmd); resultEntity7.set(VcfAttributes.CHROM, "1"); resultEntity7.set(VcfAttributes.POS, 100); resultEntity7.set(VcfAttributes.REF, "TTCCTCCTCC"); resultEntity7.set(VcfAttributes.ALT, "TTGGTCC,TTCCTCC"); resultEntity7.set("annotation", "12,13"); resultEntity7.set(VcfAttributes.ID, "resultEntity7"); resultEntity8 = new DynamicEntity(resultEmd); resultEntity8.set(VcfAttributes.CHROM, "1"); resultEntity8.set(VcfAttributes.POS, 100); resultEntity8.set(VcfAttributes.REF, "TTCCTCC"); resultEntity8.set(VcfAttributes.ALT, "TTGGT,TTCC"); resultEntity8.set("annotation", "14,15"); resultEntity8.set(VcfAttributes.ID, "resultEntity8"); resultEntity9 = new DynamicEntity(resultEmd); resultEntity9.set(VcfAttributes.CHROM, "1"); resultEntity9.set(VcfAttributes.POS, 100); resultEntity9.set(VcfAttributes.REF, "GAA"); resultEntity9.set(VcfAttributes.ALT, "GA,G"); resultEntity9.set("annotation", "16,17"); resultEntity9.set(VcfAttributes.ID, "resultEntity9"); resultEntity10 = new DynamicEntity(resultEmd); resultEntity10.set(VcfAttributes.CHROM, "1"); resultEntity10.set(VcfAttributes.POS, 100); resultEntity10.set(VcfAttributes.REF, "GA"); resultEntity10.set(VcfAttributes.ALT, "GC,G"); resultEntity10.set("annotation", "18,19"); resultEntity10.set(VcfAttributes.ID, "resultEntity10"); } @Test public void filterResultsTest1() { MultiAllelicResultFilter filter = new MultiAllelicResultFilter( Collections.singletonList(attributeFactory.create().setName("annotation").setDataType(STRING)), vcfAttributes); Optional<Entity> result1 = filter.filterResults(Collections.singletonList(resultEntity1), entity1, false); assertEquals(Lists.newArrayList(result1.asSet()).get(0).getString("annotation"), "1"); } @Test public void filterResultsTest2() { MultiAllelicResultFilter filter = new MultiAllelicResultFilter( Collections.singletonList(attributeFactory.create().setName("annotation").setDataType(STRING)), vcfAttributes); Optional<Entity> result2 = filter.filterResults(Collections.singletonList(resultEntity2), entity1, false); Assert.assertTrue(Lists.newArrayList(result2.asSet()).size() == 0); } @Test public void filterResultsTest3() { MultiAllelicResultFilter filter = new MultiAllelicResultFilter( Collections.singletonList(attributeFactory.create().setName("annotation").setDataType(STRING)), vcfAttributes); Optional<Entity> result3 = filter.filterResults(Collections.singletonList(resultEntity3), entity2, false); assertEquals(Lists.newArrayList(result3.asSet()).get(0).getString("annotation"), "3,4"); } @Test public void filterResultsTest4() { MultiAllelicResultFilter filter = new MultiAllelicResultFilter( Collections.singletonList(attributeFactory.create().setName("annotation").setDataType(STRING)), vcfAttributes); Optional<Entity> result4 = filter.filterResults(Collections.singletonList(resultEntity4), entity2, false); assertEquals(Lists.newArrayList(result4.asSet()).get(0).getString("annotation"), "6,5"); } @Test public void filterResultsTest5() { MultiAllelicResultFilter filter = new MultiAllelicResultFilter( Collections.singletonList(attributeFactory.create().setName("annotation").setDataType(STRING)), vcfAttributes); Optional<Entity> result5 = filter.filterResults(Collections.singletonList(resultEntity5), entity3, false); assertEquals(Lists.newArrayList(result5.asSet()).get(0).getString("annotation"), "8,9,7"); } @Test public void filterResultsTest6() { MultiAllelicResultFilter filter = new MultiAllelicResultFilter( Collections.singletonList(attributeFactory.create().setName("annotation").setDataType(STRING)), vcfAttributes); Optional<Entity> result6 = filter.filterResults(Collections.singletonList(resultEntity2), entity2, false); assertEquals(Lists.newArrayList(result6.asSet()).get(0).getString("annotation"), ".,2"); } @Test public void filterResultsTest7() { MultiAllelicResultFilter filter = new MultiAllelicResultFilter( Collections.singletonList(attributeFactory.create().setName("annotation").setDataType(STRING)), vcfAttributes); Optional<Entity> result7 = filter.filterResults(Collections.singletonList(resultEntity6), entity3, false); assertEquals(Lists.newArrayList(result7.asSet()).get(0).getString("annotation"), "11,.,10"); } @Test public void filterResultsTest8() { MultiAllelicResultFilter filter = new MultiAllelicResultFilter( Collections.singletonList(attributeFactory.create().setName("annotation").setDataType(STRING)), vcfAttributes); Optional<Entity> result8 = filter.filterResults(Collections.singletonList(resultEntity5), entity1, false); assertEquals(Lists.newArrayList(result8.asSet()).get(0).getString("annotation"), "8"); } @Test public void filterResultsTest9() { MultiAllelicResultFilter filter = new MultiAllelicResultFilter( Collections.singletonList(attributeFactory.create().setName("annotation").setDataType(STRING)), vcfAttributes); Optional<Entity> result = filter.filterResults(Collections.singletonList(resultEntity7), entity7, false); assertEquals(Lists.newArrayList(result.asSet()).get(0).getString("annotation"), "13"); } @Test public void filterResultsTest10() { MultiAllelicResultFilter filter = new MultiAllelicResultFilter( Collections.singletonList(attributeFactory.create().setName("annotation").setDataType(STRING)), vcfAttributes); Optional<Entity> result = filter.filterResults(Collections.singletonList(resultEntity8), entity8, false); assertEquals(Lists.newArrayList(result.asSet()).get(0).getString("annotation"), "15"); } @Test public void filterResultsTest11() { MultiAllelicResultFilter filter = new MultiAllelicResultFilter( Collections.singletonList(attributeFactory.create().setName("annotation").setDataType(STRING)), vcfAttributes); Optional<Entity> result = filter.filterResults(Collections.singletonList(resultEntity9), entity9, false); assertEquals(Lists.newArrayList(result.asSet()).get(0).getString("annotation"), "16"); } @Test public void filterResultsTest12() { MultiAllelicResultFilter filter = new MultiAllelicResultFilter( Collections.singletonList(attributeFactory.create().setName("annotation").setDataType(STRING)), vcfAttributes); Optional<Entity> result = filter.filterResults(Collections.singletonList(resultEntity10), entity10, false); assertEquals(Lists.newArrayList(result.asSet()).get(0).getString("annotation"), "19"); } @Test public void filterResultsSourceHasNoRef() { MultiAllelicResultFilter filter = new MultiAllelicResultFilter( Collections.singletonList(attributeFactory.create().setName("annotation").setDataType(STRING)), vcfAttributes); Optional<Entity> result = filter.filterResults(Collections.singletonList(resultEntity10), entityNoRef, false); assertEquals(result, Optional.absent()); } @Test public void filterResultsMergeMultilineMismatchChrom() { MultiAllelicResultFilter filter = new MultiAllelicResultFilter( Collections.singletonList(attributeFactory.create().setName("annotation").setDataType(STRING)), true, vcfAttributes); try { filter.filterResults(Arrays.asList(resultEntity10, entityMismatchChrom), entity10, false); Assert.fail("Should throw exception for mismatching chromosomes"); } catch (MolgenisDataException actual) { assertEquals(actual.getMessage(), "Mismatch in location! Location{chrom=1, pos=100} vs Location{chrom=2, pos=100}"); } } @Test public void filterResultsMergeMultilineMismatchPos() { MultiAllelicResultFilter filter = new MultiAllelicResultFilter( Collections.singletonList(attributeFactory.create().setName("annotation").setDataType(STRING)), true, vcfAttributes); try { filter.filterResults(Arrays.asList(entityMismatchPos, resultEntity10), entity10, false); Assert.fail("Should throw exception for mismatching positions"); } catch (MolgenisDataException actual) { assertEquals(actual.getMessage(), "Mismatch in location! Location{chrom=1, pos=101} vs Location{chrom=1, pos=100}"); } } /* * entity list: * 3 300 G A 0.2|23.1 * 3 300 G T -2.4|0.123 * 3 300 G X -0.002|2.3 * 3 300 G C 0.5|14.5 * 3 300 GC A 0.2|23.1 * 3 300 GC T -2.4|0.123 * 3 300 C GX -0.002|2.3 * 3 300 C GC 0.5|14.5 * * should become: * 3 300 G A,T,X,C 0.2|23.1,-2.4|0.123,-0.002|2.3,0.5|14.5 * 3 300 GC A,T 0.2|23.1,-2.4|0.123 * 3 300 C GX,GC -0.002|2.3,0.5|14.5 */ @Test public void testMultiLineMerge() { String customAttrb = "MyAnnotation"; EntityType multiLineTestEMD = entityTypeFactory.create().setName("entity"); multiLineTestEMD.addAttribute(vcfAttributes.getChromAttribute()); multiLineTestEMD.addAttribute(vcfAttributes.getPosAttribute()); multiLineTestEMD.addAttribute(vcfAttributes.getIdAttribute()); multiLineTestEMD.addAttribute(vcfAttributes.getRefAttribute()); multiLineTestEMD.addAttribute(vcfAttributes.getAltAttribute()); multiLineTestEMD.addAttribute(attributeFactory.create().setName(customAttrb).setDataType(STRING)); Entity multiLineEntity1 = new DynamicEntity(multiLineTestEMD); multiLineEntity1.set(VcfAttributes.CHROM, "3"); multiLineEntity1.set(VcfAttributes.POS, 300); multiLineEntity1.set(VcfAttributes.REF, "G"); multiLineEntity1.set(VcfAttributes.ALT, "A"); multiLineEntity1.set(customAttrb, "0.2|23.1"); Entity multiLineEntity2 = new DynamicEntity(multiLineTestEMD); multiLineEntity2.set(VcfAttributes.CHROM, "3"); multiLineEntity2.set(VcfAttributes.POS, 300); multiLineEntity2.set(VcfAttributes.REF, "G"); multiLineEntity2.set(VcfAttributes.ALT, "T"); multiLineEntity2.set(customAttrb, "-2.4|0.123"); Entity multiLineEntity3 = new DynamicEntity(multiLineTestEMD); multiLineEntity3.set(VcfAttributes.CHROM, "3"); multiLineEntity3.set(VcfAttributes.POS, 300); multiLineEntity3.set(VcfAttributes.REF, "G"); multiLineEntity3.set(VcfAttributes.ALT, "X"); multiLineEntity3.set(customAttrb, "-0.002|2.3"); Entity multiLineEntity4 = new DynamicEntity(multiLineTestEMD); multiLineEntity4.set(VcfAttributes.CHROM, "3"); multiLineEntity4.set(VcfAttributes.POS, 300); multiLineEntity4.set(VcfAttributes.REF, "G"); multiLineEntity4.set(VcfAttributes.ALT, "C"); multiLineEntity4.set(customAttrb, "0.5|14.5"); Entity multiLineEntity5 = new DynamicEntity(multiLineTestEMD); multiLineEntity5.set(VcfAttributes.CHROM, "3"); multiLineEntity5.set(VcfAttributes.POS, 300); multiLineEntity5.set(VcfAttributes.REF, "GC"); multiLineEntity5.set(VcfAttributes.ALT, "A"); multiLineEntity5.set("MyAnnotation", "0.2|23.1"); Entity multiLineEntity6 = new DynamicEntity(multiLineTestEMD); multiLineEntity6.set(VcfAttributes.CHROM, "3"); multiLineEntity6.set(VcfAttributes.POS, 300); multiLineEntity6.set(VcfAttributes.REF, "GC"); multiLineEntity6.set(VcfAttributes.ALT, "T"); multiLineEntity6.set(customAttrb, "-2.4|0.123"); Entity multiLineEntity7 = new DynamicEntity(multiLineTestEMD); multiLineEntity7.set(VcfAttributes.CHROM, "3"); multiLineEntity7.set(VcfAttributes.POS, 300); multiLineEntity7.set(VcfAttributes.REF, "C"); multiLineEntity7.set(VcfAttributes.ALT, "GX"); multiLineEntity7.set(customAttrb, "-0.002|2.3"); Entity multiLineEntity8 = new DynamicEntity(multiLineTestEMD); multiLineEntity8.set(VcfAttributes.CHROM, "3"); multiLineEntity8.set(VcfAttributes.POS, 300); multiLineEntity8.set(VcfAttributes.REF, "C"); multiLineEntity8.set(VcfAttributes.ALT, "GC"); multiLineEntity8.set(customAttrb, "0.5|14.5"); Entity expectedResultEntity1 = new DynamicEntity(multiLineTestEMD); expectedResultEntity1.set(VcfAttributes.CHROM, "3"); expectedResultEntity1.set(VcfAttributes.POS, 300); expectedResultEntity1.set(VcfAttributes.REF, "G"); expectedResultEntity1.set(VcfAttributes.ALT, "A,T,X,C"); expectedResultEntity1.set(customAttrb, "0.2|23.1,-2.4|0.123,-0.002|2.3,0.5|14.5"); Entity expectedResultEntity2 = new DynamicEntity(multiLineTestEMD); expectedResultEntity2.set(VcfAttributes.CHROM, "3"); expectedResultEntity2.set(VcfAttributes.POS, 300); expectedResultEntity2.set(VcfAttributes.REF, "GC"); expectedResultEntity2.set(VcfAttributes.ALT, "A,T"); expectedResultEntity2.set(customAttrb, "0.2|23.1,-2.4|0.123"); Entity expectedResultEntity3 = new DynamicEntity(multiLineTestEMD); expectedResultEntity3.set(VcfAttributes.CHROM, "3"); expectedResultEntity3.set(VcfAttributes.POS, 300); expectedResultEntity3.set(VcfAttributes.REF, "C"); expectedResultEntity3.set(VcfAttributes.ALT, "GX,GC"); expectedResultEntity3.set(customAttrb, "-0.002|2.3,0.5|14.5"); Iterable<Entity> multiLineInput = Arrays .asList(multiLineEntity1, multiLineEntity2, multiLineEntity3, multiLineEntity4, multiLineEntity5, multiLineEntity6, multiLineEntity7, multiLineEntity8); MultiAllelicResultFilter multiAllelicResultFilter = new MultiAllelicResultFilter( Lists.newArrayList((multiLineTestEMD.getAttribute(customAttrb))), vcfAttributes); Iterable<Entity> expectedResult = Arrays .asList(expectedResultEntity1, expectedResultEntity2, expectedResultEntity3); Iterable<Entity> actualResult = multiAllelicResultFilter.merge(multiLineInput); assertTrue(EntityUtils.equals(actualResult.iterator().next(), expectedResult.iterator().next())); } @Configuration @ComponentScan({ "org.molgenis.data.vcf.model" }) public static class Config { } }