package org.molgenis.data.annotation.core.entity.impl.snpeff; import com.google.common.collect.Iterators; import org.mockito.InjectMocks; import org.mockito.Mock; import org.molgenis.data.Entity; import org.molgenis.data.annotation.core.effects.EffectsMetaData; import org.molgenis.data.annotation.core.utils.JarRunner; import org.molgenis.data.annotation.core.utils.JarRunnerImpl; import org.molgenis.data.meta.model.*; import org.molgenis.data.populate.IdGenerator; import org.molgenis.data.populate.UuidGenerator; import org.molgenis.data.support.DynamicEntity; import org.molgenis.data.vcf.model.VcfAttributes; import org.molgenis.test.data.AbstractMolgenisSpringTest; import org.molgenis.util.EntityUtils; import org.molgenis.util.ResourceUtils; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.context.ApplicationContext; import org.springframework.context.annotation.ComponentScan; import org.springframework.context.annotation.Configuration; import org.springframework.test.context.ContextConfiguration; import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; import java.io.BufferedReader; import java.io.File; import java.io.FileReader; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.Iterator; import java.util.List; import static com.google.common.collect.Lists.newArrayList; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; import static org.molgenis.data.annotation.core.effects.EffectsMetaData.*; import static org.molgenis.data.meta.AttributeType.XREF; import static org.molgenis.data.meta.model.EntityType.AttributeRole.ROLE_ID; import static org.testng.Assert.*; @ContextConfiguration(classes = { SnpEffRunnerTest.Config.class }) public class SnpEffRunnerTest extends AbstractMolgenisSpringTest { @Autowired ApplicationContext context; @Autowired AttributeFactory attributeFactory; @Autowired EntityTypeFactory entityTypeFactory; @Autowired VcfAttributes vcfAttributes; @Autowired EffectsMetaData effectsMetaData; @Autowired PackageFactory packageFactory; private final ArrayList<Entity> singleAlleleEntities = newArrayList(); private final ArrayList<Entity> multiAlleleEntities = newArrayList(); private final ArrayList<Entity> multiGeneEntities = newArrayList(); private final List<Entity> expectedSingleAlleleEffectEntities = newArrayList(); private final List<Entity> expectedMultiAlleleEffectEntities = newArrayList(); private final List<Entity> expectedMultiGeneEffectEntities = newArrayList(); private final ArrayList<Entity> entities = new ArrayList<>(); ; private EntityType metaDataCanAnnotate; private EntityType effectsEMD; @InjectMocks private SnpEffRunner snpEffRunner; @Mock private JarRunner jarRunner; @Mock private Entity snpEffAnnotatorSettings; @BeforeClass public void beforeMethod() throws IOException { jarRunner = mock(JarRunnerImpl.class); IdGenerator idGenerator = new UuidGenerator(); snpEffRunner = new SnpEffRunner(jarRunner, snpEffAnnotatorSettings, idGenerator, vcfAttributes, effectsMetaData, entityTypeFactory, attributeFactory); metaDataCanAnnotate = entityTypeFactory.create().setName("test").setSimpleName("test"); Attribute attributeChrom = vcfAttributes.getChromAttribute(); Attribute attributePos = vcfAttributes.getPosAttribute(); Attribute attributeRef = vcfAttributes.getRefAttribute(); Attribute attributeAlt = vcfAttributes.getAltAttribute(); metaDataCanAnnotate.addAttribute(attributeChrom, ROLE_ID); metaDataCanAnnotate.addAttribute(attributePos); metaDataCanAnnotate.addAttribute(attributeRef); metaDataCanAnnotate.addAttribute(attributeAlt); effectsEMD = entityTypeFactory.create().setSimpleName("test_EFFECTS"); effectsEMD.addAttribute(attributeFactory.create().setName("ID").setAuto(true).setVisible(false)); effectsEMD.addAttribute(effectsMetaData.getAltAttr()); effectsEMD.addAttribute(effectsMetaData.getGeneNameAttr()); effectsEMD.addAttribute(effectsMetaData.getAnnotationAttr()); effectsEMD.addAttribute(effectsMetaData.getPutativeImpactAttr()); effectsEMD.addAttribute(effectsMetaData.getGeneIdAttr()); effectsEMD.addAttribute(effectsMetaData.getFeatureTypeAttr()); effectsEMD.addAttribute(effectsMetaData.getFeatureIdAttr()); effectsEMD.addAttribute(effectsMetaData.getTranscriptBiotypeAttr()); effectsEMD.addAttribute(effectsMetaData.getRankTotalAttr()); effectsEMD.addAttribute(effectsMetaData.getHgvsCAttr()); effectsEMD.addAttribute(effectsMetaData.getHgvsPAttr()); effectsEMD.addAttribute(effectsMetaData.getCdnaPositionAttr()); effectsEMD.addAttribute(effectsMetaData.getCdsPositionAttr()); effectsEMD.addAttribute(effectsMetaData.getProteinPositionAttr()); effectsEMD.addAttribute(effectsMetaData.getDistanceToFeatureAttr()); effectsEMD.addAttribute(effectsMetaData.getErrorsAttr()); effectsEMD.addAttribute( attributeFactory.create().setName(EffectsMetaData.VARIANT).setNillable(false).setDataType(XREF) .setRefEntity(metaDataCanAnnotate)); Entity singleAlleleEntity1 = new DynamicEntity(metaDataCanAnnotate); singleAlleleEntity1.set(vcfAttributes.CHROM, "1"); singleAlleleEntity1.set(vcfAttributes.POS, 13380); singleAlleleEntity1.set(vcfAttributes.REF, "C"); singleAlleleEntity1.set(vcfAttributes.ALT, "G"); Entity singleAlleleEntity2 = new DynamicEntity(metaDataCanAnnotate); singleAlleleEntity2.set(vcfAttributes.CHROM, "1"); singleAlleleEntity2.set(vcfAttributes.POS, 13980); singleAlleleEntity2.set(vcfAttributes.REF, "T"); singleAlleleEntity2.set(vcfAttributes.ALT, "C"); Entity singleAlleleEntity3 = new DynamicEntity(metaDataCanAnnotate); singleAlleleEntity3.set(vcfAttributes.CHROM, "1"); singleAlleleEntity3.set(vcfAttributes.POS, 78383467); singleAlleleEntity3.set(vcfAttributes.REF, "G"); singleAlleleEntity3.set(vcfAttributes.ALT, "A"); Entity singleAlleleEntity4 = new DynamicEntity(metaDataCanAnnotate); singleAlleleEntity4.set(vcfAttributes.CHROM, "21"); singleAlleleEntity4.set(vcfAttributes.POS, 46924425); singleAlleleEntity4.set(vcfAttributes.REF, "CGGCCCCCCA"); singleAlleleEntity4.set(vcfAttributes.ALT, "C"); Entity singleAlleleEntity5 = new DynamicEntity(metaDataCanAnnotate); singleAlleleEntity5.set(vcfAttributes.CHROM, "X"); singleAlleleEntity5.set(vcfAttributes.POS, 79943569); singleAlleleEntity5.set(vcfAttributes.REF, "T"); singleAlleleEntity5.set(vcfAttributes.ALT, "C"); Entity singleAlleleEntity6 = new DynamicEntity(metaDataCanAnnotate); singleAlleleEntity6.set(vcfAttributes.CHROM, "2"); singleAlleleEntity6.set(vcfAttributes.POS, 191904021); singleAlleleEntity6.set(vcfAttributes.REF, "G"); singleAlleleEntity6.set(vcfAttributes.ALT, "T"); Entity singleAlleleEntity7 = new DynamicEntity(metaDataCanAnnotate); singleAlleleEntity7.set(vcfAttributes.CHROM, "3"); singleAlleleEntity7.set(vcfAttributes.POS, 53219680); singleAlleleEntity7.set(vcfAttributes.REF, "G"); singleAlleleEntity7.set(vcfAttributes.ALT, "C"); Entity singleAlleleEntity8 = new DynamicEntity(metaDataCanAnnotate); singleAlleleEntity8.set(vcfAttributes.CHROM, "1"); singleAlleleEntity8.set(vcfAttributes.POS, 1115548); singleAlleleEntity8.set(vcfAttributes.REF, "G"); singleAlleleEntity8.set(vcfAttributes.ALT, "A"); singleAlleleEntities .addAll(newArrayList(singleAlleleEntity1, singleAlleleEntity2, singleAlleleEntity3, singleAlleleEntity4, singleAlleleEntity5, singleAlleleEntity6, singleAlleleEntity7, singleAlleleEntity8)); Entity multiAlleleEntity1 = new DynamicEntity(metaDataCanAnnotate); multiAlleleEntity1.set(vcfAttributes.CHROM, "1"); multiAlleleEntity1.set(vcfAttributes.POS, 231094050); multiAlleleEntity1.set(vcfAttributes.REF, "GAA"); multiAlleleEntity1.set(vcfAttributes.ALT, "G,GAAA,GA"); Entity multiAlleleEntity2 = new DynamicEntity(metaDataCanAnnotate); multiAlleleEntity2.set(vcfAttributes.CHROM, "4"); multiAlleleEntity2.set(vcfAttributes.POS, 69964234); multiAlleleEntity2.set(vcfAttributes.REF, "CT"); multiAlleleEntity2.set(vcfAttributes.ALT, "CTT,CTTT,C"); Entity multiAlleleEntity3 = new DynamicEntity(metaDataCanAnnotate); multiAlleleEntity3.set(vcfAttributes.CHROM, "15"); multiAlleleEntity3.set(vcfAttributes.POS, 66641732); multiAlleleEntity3.set(vcfAttributes.REF, "G"); multiAlleleEntity3.set(vcfAttributes.ALT, "A,C,T"); Entity multiAlleleEntity4 = new DynamicEntity(metaDataCanAnnotate); multiAlleleEntity4.set(vcfAttributes.CHROM, "21"); multiAlleleEntity4.set(vcfAttributes.POS, 45650009); multiAlleleEntity4.set(vcfAttributes.REF, "T"); multiAlleleEntity4.set(vcfAttributes.ALT, "TG, A, G"); multiAlleleEntities .addAll(newArrayList(multiAlleleEntity1, multiAlleleEntity2, multiAlleleEntity3, multiAlleleEntity4)); Entity multiGeneEntity1 = new DynamicEntity(metaDataCanAnnotate); multiGeneEntity1.set(vcfAttributes.CHROM, "2"); multiGeneEntity1.set(vcfAttributes.POS, 171570151); multiGeneEntity1.set(vcfAttributes.REF, "C"); multiGeneEntity1.set(vcfAttributes.ALT, "T"); Entity multiGeneEntity2 = new DynamicEntity(metaDataCanAnnotate); multiGeneEntity2.set(vcfAttributes.CHROM, "2"); multiGeneEntity2.set(vcfAttributes.POS, 219142023); multiGeneEntity2.set(vcfAttributes.REF, "G"); multiGeneEntity2.set(vcfAttributes.ALT, "A"); Entity expectedSingleAllele1 = new DynamicEntity(effectsEMD); expectedSingleAllele1.set(EffectsMetaData.ID, null); expectedSingleAllele1.set(EffectsMetaData.ALT, "G"); expectedSingleAllele1.set(EffectsMetaData.GENE_NAME, "DDX11L1"); expectedSingleAllele1.set(EffectsMetaData.VARIANT, singleAlleleEntity1); expectedSingleAllele1.set(ANNOTATION, "non_coding_exon_variant"); expectedSingleAllele1.set(PUTATIVE_IMPACT, "MODIFIER"); expectedSingleAllele1.set(GENE_NAME, "DDX11L1"); expectedSingleAllele1.set(GENE_ID, "DDX11L1"); expectedSingleAllele1.set(FEATURE_TYPE, "transcript"); expectedSingleAllele1.set(FEATURE_ID, "NR_046018.2"); expectedSingleAllele1.set(TRANSCRIPT_BIOTYPE, "Noncoding"); expectedSingleAllele1.set(RANK_TOTAL, "3/3"); expectedSingleAllele1.set(HGVS_C, "n.623C>G"); expectedSingleAllele1.set(HGVS_P, ""); expectedSingleAllele1.set(C_DNA_POSITION, ""); expectedSingleAllele1.set(CDS_POSITION, ""); expectedSingleAllele1.set(PROTEIN_POSITION, ""); expectedSingleAllele1.set(DISTANCE_TO_FEATURE, ""); expectedSingleAllele1.set(ERRORS, ""); Entity expectedSingleAllele2 = new DynamicEntity(effectsEMD); expectedSingleAllele2.set(EffectsMetaData.ID, null); expectedSingleAllele2.set(EffectsMetaData.ALT, "C"); expectedSingleAllele2.set(EffectsMetaData.GENE_NAME, "DDX11L1"); expectedSingleAllele2.set(EffectsMetaData.VARIANT, singleAlleleEntity2); expectedSingleAllele2.set(ANNOTATION, "non_coding_exon_variant"); expectedSingleAllele2.set(PUTATIVE_IMPACT, "MODIFIER"); expectedSingleAllele2.set(GENE_NAME, "DDX11L1"); expectedSingleAllele2.set(GENE_ID, "DDX11L1"); expectedSingleAllele2.set(FEATURE_TYPE, "transcript"); expectedSingleAllele2.set(FEATURE_ID, "NR_046018.2"); expectedSingleAllele2.set(TRANSCRIPT_BIOTYPE, "Noncoding"); expectedSingleAllele2.set(RANK_TOTAL, "3/3"); expectedSingleAllele2.set(HGVS_C, "n.1223T>C"); expectedSingleAllele2.set(HGVS_P, ""); expectedSingleAllele2.set(C_DNA_POSITION, ""); expectedSingleAllele2.set(CDS_POSITION, ""); expectedSingleAllele2.set(PROTEIN_POSITION, ""); expectedSingleAllele2.set(DISTANCE_TO_FEATURE, ""); expectedSingleAllele2.set(ERRORS, ""); Entity expectedSingleAllele3 = new DynamicEntity(effectsEMD); expectedSingleAllele3.set(EffectsMetaData.ID, null); expectedSingleAllele3.set(EffectsMetaData.ALT, "A"); expectedSingleAllele3.set(EffectsMetaData.GENE_NAME, "NEXN"); expectedSingleAllele3.set(EffectsMetaData.VARIANT, singleAlleleEntity3); expectedSingleAllele3.set(ANNOTATION, "intron_variant"); expectedSingleAllele3.set(PUTATIVE_IMPACT, "MODIFIER"); expectedSingleAllele3.set(GENE_NAME, "NEXN"); expectedSingleAllele3.set(GENE_ID, "NEXN"); expectedSingleAllele3.set(FEATURE_TYPE, "transcript"); expectedSingleAllele3.set(FEATURE_ID, "NM_144573.3"); expectedSingleAllele3.set(TRANSCRIPT_BIOTYPE, "Coding"); expectedSingleAllele3.set(RANK_TOTAL, "3/12"); expectedSingleAllele3.set(HGVS_C, "c.219+25G>A"); expectedSingleAllele3.set(HGVS_P, ""); expectedSingleAllele3.set(C_DNA_POSITION, ""); expectedSingleAllele3.set(CDS_POSITION, ""); expectedSingleAllele3.set(PROTEIN_POSITION, ""); expectedSingleAllele3.set(DISTANCE_TO_FEATURE, ""); expectedSingleAllele3.set(ERRORS, ""); Entity expectedSingleAllele4 = new DynamicEntity(effectsEMD); expectedSingleAllele4.set(EffectsMetaData.ID, null); expectedSingleAllele4.set(EffectsMetaData.ALT, "C"); expectedSingleAllele4.set(EffectsMetaData.GENE_NAME, "COL18A1"); expectedSingleAllele4.set(EffectsMetaData.VARIANT, singleAlleleEntity4); expectedSingleAllele4.set(ANNOTATION, "frameshift_variant&splice_acceptor_variant&splice_donor_variant&splice_region_variant&splice_region_variant&splice_region_variant&intron_variant"); expectedSingleAllele4.set(PUTATIVE_IMPACT, "HIGH"); expectedSingleAllele4.set(GENE_NAME, "COL18A1"); expectedSingleAllele4.set(GENE_ID, "COL18A1"); expectedSingleAllele4.set(FEATURE_TYPE, "transcript"); expectedSingleAllele4.set(FEATURE_ID, "NM_030582.3"); expectedSingleAllele4.set(TRANSCRIPT_BIOTYPE, "Coding"); expectedSingleAllele4.set(RANK_TOTAL, "33/42"); expectedSingleAllele4.set(HGVS_C, "c.3364_3365-2delGGCCCCCCA"); expectedSingleAllele4.set(HGVS_P, "p.Gly1122fs"); expectedSingleAllele4.set(C_DNA_POSITION, "3385/5894"); expectedSingleAllele4.set(CDS_POSITION, "3364/4551"); expectedSingleAllele4.set(PROTEIN_POSITION, "1122/1516"); expectedSingleAllele4.set(DISTANCE_TO_FEATURE, ""); expectedSingleAllele4.set(ERRORS, ""); Entity expectedSingleAllele5 = new DynamicEntity(effectsEMD); expectedSingleAllele5.set(EffectsMetaData.ID, null); expectedSingleAllele5.set(EffectsMetaData.ALT, "C"); expectedSingleAllele5.set(EffectsMetaData.GENE_NAME, "BRWD3"); expectedSingleAllele5.set(EffectsMetaData.VARIANT, singleAlleleEntity5); expectedSingleAllele5.set(ANNOTATION, "missense_variant&splice_region_variant"); expectedSingleAllele5.set(PUTATIVE_IMPACT, "MODERATE"); expectedSingleAllele5.set(GENE_NAME, "BRWD3"); expectedSingleAllele5.set(GENE_ID, "BRWD3"); expectedSingleAllele5.set(FEATURE_TYPE, "transcript"); expectedSingleAllele5.set(FEATURE_ID, "NM_153252.4"); expectedSingleAllele5.set(TRANSCRIPT_BIOTYPE, "Coding"); expectedSingleAllele5.set(RANK_TOTAL, "34/41"); expectedSingleAllele5.set(HGVS_C, "c.3863A>G"); expectedSingleAllele5.set(HGVS_P, "p.Lys1288Arg"); expectedSingleAllele5.set(C_DNA_POSITION, "4126/12793"); expectedSingleAllele5.set(CDS_POSITION, "3863/5409"); expectedSingleAllele5.set(PROTEIN_POSITION, "1288/1802"); expectedSingleAllele5.set(DISTANCE_TO_FEATURE, ""); expectedSingleAllele5.set(ERRORS, ""); Entity expectedSingleAllele6 = new DynamicEntity(effectsEMD); expectedSingleAllele6.set(EffectsMetaData.ID, null); expectedSingleAllele6.set(EffectsMetaData.ALT, "T"); expectedSingleAllele6.set(EffectsMetaData.GENE_NAME, "STAT4"); expectedSingleAllele6.set(EffectsMetaData.VARIANT, singleAlleleEntity6); expectedSingleAllele6.set(ANNOTATION, "splice_region_variant&synonymous_variant"); expectedSingleAllele6.set(PUTATIVE_IMPACT, "LOW"); expectedSingleAllele6.set(GENE_NAME, "STAT4"); expectedSingleAllele6.set(GENE_ID, "STAT4"); expectedSingleAllele6.set(FEATURE_TYPE, "transcript"); expectedSingleAllele6.set(FEATURE_ID, "NM_001243835.1"); expectedSingleAllele6.set(TRANSCRIPT_BIOTYPE, "Coding"); expectedSingleAllele6.set(RANK_TOTAL, "16/24"); expectedSingleAllele6.set(HGVS_C, "c.1338C>A"); expectedSingleAllele6.set(HGVS_P, "p.Thr446Thr"); expectedSingleAllele6.set(C_DNA_POSITION, "1602/2775"); expectedSingleAllele6.set(CDS_POSITION, "1338/2247"); expectedSingleAllele6.set(PROTEIN_POSITION, "446/748"); expectedSingleAllele6.set(DISTANCE_TO_FEATURE, ""); expectedSingleAllele6.set(ERRORS, ""); Entity expectedSingleAllele7 = new DynamicEntity(effectsEMD); expectedSingleAllele7.set(EffectsMetaData.ID, null); expectedSingleAllele7.set(EffectsMetaData.ALT, "C"); expectedSingleAllele7.set(EffectsMetaData.GENE_NAME, "PRKCD"); expectedSingleAllele7.set(EffectsMetaData.VARIANT, singleAlleleEntity7); expectedSingleAllele7.set(ANNOTATION, "missense_variant"); expectedSingleAllele7.set(PUTATIVE_IMPACT, "MODERATE"); expectedSingleAllele7.set(GENE_NAME, "PRKCD"); expectedSingleAllele7.set(GENE_ID, "PRKCD"); expectedSingleAllele7.set(FEATURE_TYPE, "transcript"); expectedSingleAllele7.set(FEATURE_ID, "NM_006254.3"); expectedSingleAllele7.set(TRANSCRIPT_BIOTYPE, "Coding"); expectedSingleAllele7.set(RANK_TOTAL, "11/19"); expectedSingleAllele7.set(HGVS_C, "c.949G>C"); expectedSingleAllele7.set(HGVS_P, "p.Glu317Gln"); expectedSingleAllele7.set(C_DNA_POSITION, "1302/2835"); expectedSingleAllele7.set(CDS_POSITION, "949/2031"); expectedSingleAllele7.set(PROTEIN_POSITION, "317/676"); expectedSingleAllele7.set(DISTANCE_TO_FEATURE, ""); expectedSingleAllele7.set(ERRORS, ""); Entity expectedSingleAllele8 = new DynamicEntity(effectsEMD); expectedSingleAllele8.set(EffectsMetaData.ID, null); expectedSingleAllele8.set(EffectsMetaData.ALT, "A"); expectedSingleAllele8.set(EffectsMetaData.GENE_NAME, "TTLL10"); expectedSingleAllele8.set(EffectsMetaData.VARIANT, singleAlleleEntity8); expectedSingleAllele8.set(ANNOTATION, "missense_variant"); expectedSingleAllele8.set(PUTATIVE_IMPACT, "MODERATE"); expectedSingleAllele8.set(GENE_NAME, "TTLL10"); expectedSingleAllele8.set(GENE_ID, "TTLL10"); expectedSingleAllele8.set(FEATURE_TYPE, "transcript"); expectedSingleAllele8.set(FEATURE_ID, "NM_001130045.1"); expectedSingleAllele8.set(TRANSCRIPT_BIOTYPE, "Coding"); expectedSingleAllele8.set(RANK_TOTAL, "6/16"); expectedSingleAllele8.set(HGVS_C, "c.334G>A"); expectedSingleAllele8.set(HGVS_P, "p.Gly112Arg"); expectedSingleAllele8.set(C_DNA_POSITION, "485/2259"); expectedSingleAllele8.set(CDS_POSITION, "334/2022"); expectedSingleAllele8.set(PROTEIN_POSITION, "112/673"); expectedSingleAllele8.set(DISTANCE_TO_FEATURE, ""); expectedSingleAllele8.set(ERRORS, ""); expectedSingleAlleleEffectEntities .addAll(newArrayList(expectedSingleAllele1, expectedSingleAllele2, expectedSingleAllele3, expectedSingleAllele4, expectedSingleAllele5, expectedSingleAllele6, expectedSingleAllele7, expectedSingleAllele8)); Entity expectedMultiAllele1 = new DynamicEntity(effectsEMD); expectedMultiAllele1.set(EffectsMetaData.ID, null); expectedMultiAllele1.set(EffectsMetaData.ALT, "G"); expectedMultiAllele1.set(EffectsMetaData.GENE_NAME, "TTC13"); expectedMultiAllele1.set(EffectsMetaData.VARIANT, multiAlleleEntity1); expectedMultiAllele1.set(ANNOTATION, "splice_region_variant&intron_variant"); expectedMultiAllele1.set(PUTATIVE_IMPACT, "LOW"); expectedMultiAllele1.set(GENE_NAME, "TTC13"); expectedMultiAllele1.set(GENE_ID, "TTC13"); expectedMultiAllele1.set(FEATURE_TYPE, "transcript"); expectedMultiAllele1.set(FEATURE_ID, "NM_024525.4"); expectedMultiAllele1.set(TRANSCRIPT_BIOTYPE, "Coding"); expectedMultiAllele1.set(RANK_TOTAL, "2/22"); expectedMultiAllele1.set(HGVS_C, "c.367-7_367-6delTT"); expectedMultiAllele1.set(HGVS_P, ""); expectedMultiAllele1.set(C_DNA_POSITION, ""); expectedMultiAllele1.set(CDS_POSITION, ""); expectedMultiAllele1.set(PROTEIN_POSITION, ""); expectedMultiAllele1.set(DISTANCE_TO_FEATURE, ""); expectedMultiAllele1.set(ERRORS, ""); Entity expectedMultiAllele2 = new DynamicEntity(effectsEMD); expectedMultiAllele2.set(EffectsMetaData.ID, null); expectedMultiAllele2.set(EffectsMetaData.ALT, "GA"); expectedMultiAllele2.set(EffectsMetaData.GENE_NAME, "TTC13"); expectedMultiAllele2.set(EffectsMetaData.VARIANT, multiAlleleEntity1); expectedMultiAllele2.set(ANNOTATION, "splice_region_variant&intron_variant"); expectedMultiAllele2.set(PUTATIVE_IMPACT, "LOW"); expectedMultiAllele2.set(GENE_NAME, "TTC13"); expectedMultiAllele2.set(GENE_ID, "TTC13"); expectedMultiAllele2.set(FEATURE_TYPE, "transcript"); expectedMultiAllele2.set(FEATURE_ID, "NM_024525.4"); expectedMultiAllele2.set(TRANSCRIPT_BIOTYPE, "Coding"); expectedMultiAllele2.set(RANK_TOTAL, "2/22"); expectedMultiAllele2.set(HGVS_C, "c.367-7delT"); expectedMultiAllele2.set(HGVS_P, ""); expectedMultiAllele2.set(C_DNA_POSITION, ""); expectedMultiAllele2.set(CDS_POSITION, ""); expectedMultiAllele2.set(PROTEIN_POSITION, ""); expectedMultiAllele2.set(DISTANCE_TO_FEATURE, ""); expectedMultiAllele2.set(ERRORS, ""); Entity expectedMultiAllele3 = new DynamicEntity(effectsEMD); expectedMultiAllele3.set(EffectsMetaData.ID, null); expectedMultiAllele3.set(EffectsMetaData.ALT, "GAAA"); expectedMultiAllele3.set(EffectsMetaData.GENE_NAME, "TTC13"); expectedMultiAllele3.set(EffectsMetaData.VARIANT, multiAlleleEntity1); expectedMultiAllele3.set(ANNOTATION, "splice_region_variant&intron_variant"); expectedMultiAllele3.set(PUTATIVE_IMPACT, "LOW"); expectedMultiAllele3.set(GENE_NAME, "TTC13"); expectedMultiAllele3.set(GENE_ID, "TTC13"); expectedMultiAllele3.set(FEATURE_TYPE, "transcript"); expectedMultiAllele3.set(FEATURE_ID, "NM_024525.4"); expectedMultiAllele3.set(TRANSCRIPT_BIOTYPE, "Coding"); expectedMultiAllele3.set(RANK_TOTAL, "2/22"); expectedMultiAllele3.set(HGVS_C, "c.367-8_367-7insT"); expectedMultiAllele3.set(HGVS_P, ""); expectedMultiAllele3.set(C_DNA_POSITION, ""); expectedMultiAllele3.set(CDS_POSITION, ""); expectedMultiAllele3.set(PROTEIN_POSITION, ""); expectedMultiAllele3.set(DISTANCE_TO_FEATURE, ""); expectedMultiAllele3.set(ERRORS, ""); Entity expectedMultiAllele4 = new DynamicEntity(effectsEMD); expectedMultiAllele4.set(EffectsMetaData.ID, null); expectedMultiAllele4.set(EffectsMetaData.ALT, "C"); expectedMultiAllele4.set(EffectsMetaData.GENE_NAME, "UGT2B7"); expectedMultiAllele4.set(EffectsMetaData.VARIANT, multiAlleleEntity2); expectedMultiAllele4.set(ANNOTATION, "intron_variant"); expectedMultiAllele4.set(PUTATIVE_IMPACT, "MODIFIER"); expectedMultiAllele4.set(GENE_NAME, "UGT2B7"); expectedMultiAllele4.set(GENE_ID, "UGT2B7"); expectedMultiAllele4.set(FEATURE_TYPE, "transcript"); expectedMultiAllele4.set(FEATURE_ID, "NM_001074.2"); expectedMultiAllele4.set(TRANSCRIPT_BIOTYPE, "Coding"); expectedMultiAllele4.set(RANK_TOTAL, "1/5"); expectedMultiAllele4.set(HGVS_C, "c.722-23delT"); expectedMultiAllele4.set(HGVS_P, ""); expectedMultiAllele4.set(C_DNA_POSITION, ""); expectedMultiAllele4.set(CDS_POSITION, ""); expectedMultiAllele4.set(PROTEIN_POSITION, ""); expectedMultiAllele4.set(DISTANCE_TO_FEATURE, ""); expectedMultiAllele4.set(ERRORS, ""); Entity expectedMultiAllele5 = new DynamicEntity(effectsEMD); expectedMultiAllele5.set(EffectsMetaData.ID, null); expectedMultiAllele5.set(EffectsMetaData.ALT, "CTT"); expectedMultiAllele5.set(EffectsMetaData.GENE_NAME, "UGT2B7"); expectedMultiAllele5.set(EffectsMetaData.VARIANT, multiAlleleEntity2); expectedMultiAllele5.set(ANNOTATION, "intron_variant"); expectedMultiAllele5.set(PUTATIVE_IMPACT, "MODIFIER"); expectedMultiAllele5.set(GENE_NAME, "UGT2B7"); expectedMultiAllele5.set(GENE_ID, "UGT2B7"); expectedMultiAllele5.set(FEATURE_TYPE, "transcript"); expectedMultiAllele5.set(FEATURE_ID, "NM_001074.2"); expectedMultiAllele5.set(TRANSCRIPT_BIOTYPE, "Coding"); expectedMultiAllele5.set(RANK_TOTAL, "1/5"); expectedMultiAllele5.set(HGVS_C, "c.722-23_722-22insT"); expectedMultiAllele5.set(HGVS_P, ""); expectedMultiAllele5.set(C_DNA_POSITION, ""); expectedMultiAllele5.set(CDS_POSITION, ""); expectedMultiAllele5.set(PROTEIN_POSITION, ""); expectedMultiAllele5.set(DISTANCE_TO_FEATURE, ""); expectedMultiAllele5.set(ERRORS, ""); Entity expectedMultiAllele6 = new DynamicEntity(effectsEMD); expectedMultiAllele6.set(EffectsMetaData.ID, null); expectedMultiAllele6.set(EffectsMetaData.ALT, "CTTT"); expectedMultiAllele6.set(EffectsMetaData.GENE_NAME, "UGT2B7"); expectedMultiAllele6.set(EffectsMetaData.VARIANT, multiAlleleEntity2); expectedMultiAllele6.set(ANNOTATION, "intron_variant"); expectedMultiAllele6.set(PUTATIVE_IMPACT, "MODIFIER"); expectedMultiAllele6.set(GENE_NAME, "UGT2B7"); expectedMultiAllele6.set(GENE_ID, "UGT2B7"); expectedMultiAllele6.set(FEATURE_TYPE, "transcript"); expectedMultiAllele6.set(FEATURE_ID, "NM_001074.2"); expectedMultiAllele6.set(TRANSCRIPT_BIOTYPE, "Coding"); expectedMultiAllele6.set(RANK_TOTAL, "1/5"); expectedMultiAllele6.set(HGVS_C, "c.722-23_722-22insTT"); expectedMultiAllele6.set(HGVS_P, ""); expectedMultiAllele6.set(C_DNA_POSITION, ""); expectedMultiAllele6.set(CDS_POSITION, ""); expectedMultiAllele6.set(PROTEIN_POSITION, ""); expectedMultiAllele6.set(DISTANCE_TO_FEATURE, ""); expectedMultiAllele6.set(ERRORS, ""); Entity expectedMultiAllele7 = new DynamicEntity(effectsEMD); expectedMultiAllele7.set(EffectsMetaData.ID, null); expectedMultiAllele7.set(EffectsMetaData.ALT, "A"); expectedMultiAllele7.set(EffectsMetaData.GENE_NAME, "TIPIN"); expectedMultiAllele7.set(EffectsMetaData.VARIANT, multiAlleleEntity3); expectedMultiAllele7.set(ANNOTATION, "missense_variant"); expectedMultiAllele7.set(PUTATIVE_IMPACT, "MODERATE"); expectedMultiAllele7.set(GENE_NAME, "TIPIN"); expectedMultiAllele7.set(GENE_ID, "TIPIN"); expectedMultiAllele7.set(FEATURE_TYPE, "transcript"); expectedMultiAllele7.set(FEATURE_ID, "NM_017858.2"); expectedMultiAllele7.set(TRANSCRIPT_BIOTYPE, "Coding"); expectedMultiAllele7.set(RANK_TOTAL, "5/8"); expectedMultiAllele7.set(HGVS_C, "c.332C>T"); expectedMultiAllele7.set(HGVS_P, "p.Ala111Val"); expectedMultiAllele7.set(C_DNA_POSITION, "418/1280"); expectedMultiAllele7.set(CDS_POSITION, "332/906"); expectedMultiAllele7.set(PROTEIN_POSITION, "111/301"); expectedMultiAllele7.set(DISTANCE_TO_FEATURE, ""); expectedMultiAllele7.set(ERRORS, ""); Entity expectedMultiAllele8 = new DynamicEntity(effectsEMD); expectedMultiAllele8.set(EffectsMetaData.ID, null); expectedMultiAllele8.set(EffectsMetaData.ALT, "C"); expectedMultiAllele8.set(EffectsMetaData.GENE_NAME, "TIPIN"); expectedMultiAllele8.set(EffectsMetaData.VARIANT, multiAlleleEntity3); expectedMultiAllele8.set(ANNOTATION, "missense_variant"); expectedMultiAllele8.set(PUTATIVE_IMPACT, "MODERATE"); expectedMultiAllele8.set(GENE_NAME, "TIPIN"); expectedMultiAllele8.set(GENE_ID, "TIPIN"); expectedMultiAllele8.set(FEATURE_TYPE, "transcript"); expectedMultiAllele8.set(FEATURE_ID, "NM_017858.2"); expectedMultiAllele8.set(TRANSCRIPT_BIOTYPE, "Coding"); expectedMultiAllele8.set(RANK_TOTAL, "5/8"); expectedMultiAllele8.set(HGVS_C, "c.332C>G"); expectedMultiAllele8.set(HGVS_P, "p.Ala111Gly"); expectedMultiAllele8.set(C_DNA_POSITION, "418/1280"); expectedMultiAllele8.set(CDS_POSITION, "332/906"); expectedMultiAllele8.set(PROTEIN_POSITION, "111/301"); expectedMultiAllele8.set(DISTANCE_TO_FEATURE, ""); expectedMultiAllele8.set(ERRORS, ""); Entity expectedMultiAllele9 = new DynamicEntity(effectsEMD); expectedMultiAllele9.set(EffectsMetaData.ID, null); expectedMultiAllele9.set(EffectsMetaData.ALT, "T"); expectedMultiAllele9.set(EffectsMetaData.GENE_NAME, "TIPIN"); expectedMultiAllele9.set(EffectsMetaData.VARIANT, multiAlleleEntity3); expectedMultiAllele9.set(ANNOTATION, "missense_variant"); expectedMultiAllele9.set(PUTATIVE_IMPACT, "MODERATE"); expectedMultiAllele9.set(GENE_NAME, "TIPIN"); expectedMultiAllele9.set(GENE_ID, "TIPIN"); expectedMultiAllele9.set(FEATURE_TYPE, "transcript"); expectedMultiAllele9.set(FEATURE_ID, "NM_017858.2"); expectedMultiAllele9.set(TRANSCRIPT_BIOTYPE, "Coding"); expectedMultiAllele9.set(RANK_TOTAL, "5/8"); expectedMultiAllele9.set(HGVS_C, "c.332C>A"); expectedMultiAllele9.set(HGVS_P, "p.Ala111Glu"); expectedMultiAllele9.set(C_DNA_POSITION, "418/1280"); expectedMultiAllele9.set(CDS_POSITION, "332/906"); expectedMultiAllele9.set(PROTEIN_POSITION, "111/301"); expectedMultiAllele9.set(DISTANCE_TO_FEATURE, ""); expectedMultiAllele9.set(ERRORS, ""); Entity expectedMultiAllele10 = new DynamicEntity(effectsEMD); expectedMultiAllele10.set(EffectsMetaData.ID, null); expectedMultiAllele10.set(EffectsMetaData.ALT, "A"); expectedMultiAllele10.set(EffectsMetaData.GENE_NAME, "ICOSLG"); expectedMultiAllele10.set(EffectsMetaData.VARIANT, multiAlleleEntity4); expectedMultiAllele10.set(ANNOTATION, "intron_variant"); expectedMultiAllele10.set(PUTATIVE_IMPACT, "MODIFIER"); expectedMultiAllele10.set(GENE_NAME, "ICOSLG"); expectedMultiAllele10.set(GENE_ID, "ICOSLG"); expectedMultiAllele10.set(FEATURE_TYPE, "transcript"); expectedMultiAllele10.set(FEATURE_ID, "NM_001283050.1"); expectedMultiAllele10.set(TRANSCRIPT_BIOTYPE, "Coding"); expectedMultiAllele10.set(RANK_TOTAL, "5/6"); expectedMultiAllele10.set(HGVS_C, "c.863-37A>T"); expectedMultiAllele10.set(HGVS_P, ""); expectedMultiAllele10.set(C_DNA_POSITION, ""); expectedMultiAllele10.set(CDS_POSITION, ""); expectedMultiAllele10.set(PROTEIN_POSITION, ""); expectedMultiAllele10.set(DISTANCE_TO_FEATURE, ""); expectedMultiAllele10.set(ERRORS, ""); Entity expectedMultiAllele11 = new DynamicEntity(effectsEMD); expectedMultiAllele11.set(EffectsMetaData.ID, null); expectedMultiAllele11.set(EffectsMetaData.ALT, "G"); expectedMultiAllele11.set(EffectsMetaData.GENE_NAME, "ICOSLG"); expectedMultiAllele11.set(EffectsMetaData.VARIANT, multiAlleleEntity4); expectedMultiAllele11.set(ANNOTATION, "intron_variant"); expectedMultiAllele11.set(PUTATIVE_IMPACT, "MODIFIER"); expectedMultiAllele11.set(GENE_NAME, "ICOSLG"); expectedMultiAllele11.set(GENE_ID, "ICOSLG"); expectedMultiAllele11.set(FEATURE_TYPE, "transcript"); expectedMultiAllele11.set(FEATURE_ID, "NM_001283050.1"); expectedMultiAllele11.set(TRANSCRIPT_BIOTYPE, "Coding"); expectedMultiAllele11.set(RANK_TOTAL, "5/6"); expectedMultiAllele11.set(HGVS_C, "c.863-37A>C"); expectedMultiAllele11.set(HGVS_P, ""); expectedMultiAllele11.set(C_DNA_POSITION, ""); expectedMultiAllele11.set(CDS_POSITION, ""); expectedMultiAllele11.set(PROTEIN_POSITION, ""); expectedMultiAllele11.set(DISTANCE_TO_FEATURE, ""); expectedMultiAllele11.set(ERRORS, ""); Entity expectedMultiAllele12 = new DynamicEntity(effectsEMD); expectedMultiAllele12.set(EffectsMetaData.ID, null); expectedMultiAllele12.set(EffectsMetaData.ALT, "TG"); expectedMultiAllele12.set(EffectsMetaData.GENE_NAME, "ICOSLG"); expectedMultiAllele12.set(EffectsMetaData.VARIANT, multiAlleleEntity4); expectedMultiAllele12.set(ANNOTATION, "intron_variant"); expectedMultiAllele12.set(PUTATIVE_IMPACT, "MODIFIER"); expectedMultiAllele12.set(GENE_NAME, "ICOSLG"); expectedMultiAllele12.set(GENE_ID, "ICOSLG"); expectedMultiAllele12.set(FEATURE_TYPE, "transcript"); expectedMultiAllele12.set(FEATURE_ID, "NM_001283050.1"); expectedMultiAllele12.set(TRANSCRIPT_BIOTYPE, "Coding"); expectedMultiAllele12.set(RANK_TOTAL, "5/6"); expectedMultiAllele12.set(HGVS_C, "c.863-38_863-37insC"); expectedMultiAllele12.set(HGVS_P, ""); expectedMultiAllele12.set(C_DNA_POSITION, ""); expectedMultiAllele12.set(CDS_POSITION, ""); expectedMultiAllele12.set(PROTEIN_POSITION, ""); expectedMultiAllele12.set(DISTANCE_TO_FEATURE, ""); expectedMultiAllele12.set(ERRORS, ""); expectedMultiAlleleEffectEntities .addAll(newArrayList(expectedMultiAllele1, expectedMultiAllele2, expectedMultiAllele3, expectedMultiAllele4, expectedMultiAllele5, expectedMultiAllele6, expectedMultiAllele7, expectedMultiAllele8, expectedMultiAllele9, expectedMultiAllele10, expectedMultiAllele11, expectedMultiAllele12)); Entity expectedMultiGene1 = new DynamicEntity(effectsEMD); expectedMultiGene1.set(EffectsMetaData.ID, null); expectedMultiGene1.set(EffectsMetaData.ALT, "T"); expectedMultiGene1.set(EffectsMetaData.GENE_NAME, "LOC101926913"); expectedMultiGene1.set(EffectsMetaData.VARIANT, multiGeneEntity1); expectedMultiGene1.set(ANNOTATION, "intron_variant"); expectedMultiGene1.set(PUTATIVE_IMPACT, "MODIFIER"); expectedMultiGene1.set(GENE_NAME, "LOC101926913"); expectedMultiGene1.set(GENE_ID, "LOC101926913"); expectedMultiGene1.set(FEATURE_TYPE, "transcript"); expectedMultiGene1.set(FEATURE_ID, "NR_110185.1"); expectedMultiGene1.set(TRANSCRIPT_BIOTYPE, "Noncoding"); expectedMultiGene1.set(RANK_TOTAL, "5/5"); expectedMultiGene1.set(HGVS_C, "n.376+9863G>A"); expectedMultiGene1.set(HGVS_P, ""); expectedMultiGene1.set(C_DNA_POSITION, ""); expectedMultiGene1.set(CDS_POSITION, ""); expectedMultiGene1.set(PROTEIN_POSITION, ""); expectedMultiGene1.set(DISTANCE_TO_FEATURE, ""); expectedMultiGene1.set(ERRORS, ""); Entity expectedMultiGene2 = new DynamicEntity(effectsEMD); expectedMultiGene2.set(EffectsMetaData.ID, null); expectedMultiGene2.set(EffectsMetaData.ALT, "T"); expectedMultiGene2.set(EffectsMetaData.GENE_NAME, "LINC01124"); expectedMultiGene2.set(EffectsMetaData.VARIANT, multiGeneEntity1); expectedMultiGene2.set(ANNOTATION, "non_coding_exon_variant"); expectedMultiGene2.set(PUTATIVE_IMPACT, "MODIFIER"); expectedMultiGene2.set(GENE_NAME, "LINC01124"); expectedMultiGene2.set(GENE_ID, "LINC01124"); expectedMultiGene2.set(FEATURE_TYPE, "transcript"); expectedMultiGene2.set(FEATURE_ID, "NR_027433.1"); expectedMultiGene2.set(TRANSCRIPT_BIOTYPE, "Noncoding"); expectedMultiGene2.set(RANK_TOTAL, "1/1"); expectedMultiGene2.set(HGVS_C, "n.927G>A"); expectedMultiGene2.set(HGVS_P, ""); expectedMultiGene2.set(C_DNA_POSITION, ""); expectedMultiGene2.set(CDS_POSITION, ""); expectedMultiGene2.set(PROTEIN_POSITION, ""); expectedMultiGene2.set(DISTANCE_TO_FEATURE, ""); expectedMultiGene2.set(ERRORS, ""); Entity expectedMultiGene3 = new DynamicEntity(effectsEMD); expectedMultiGene3.set(EffectsMetaData.ID, null); expectedMultiGene3.set(EffectsMetaData.ALT, "A"); expectedMultiGene3.set(EffectsMetaData.GENE_NAME, "PNKD"); expectedMultiGene3.set(EffectsMetaData.VARIANT, multiGeneEntity2); expectedMultiGene3.set(ANNOTATION, "intron_variant"); expectedMultiGene3.set(PUTATIVE_IMPACT, "MODIFIER"); expectedMultiGene3.set(GENE_NAME, "PNKD"); expectedMultiGene3.set(GENE_ID, "PNKD"); expectedMultiGene3.set(FEATURE_TYPE, "transcript"); expectedMultiGene3.set(FEATURE_ID, "NM_015488.4"); expectedMultiGene3.set(TRANSCRIPT_BIOTYPE, "Coding"); expectedMultiGene3.set(RANK_TOTAL, "2/9"); expectedMultiGene3.set(HGVS_C, "c.236+5751G>A"); expectedMultiGene3.set(HGVS_P, ""); expectedMultiGene3.set(C_DNA_POSITION, ""); expectedMultiGene3.set(CDS_POSITION, ""); expectedMultiGene3.set(PROTEIN_POSITION, ""); expectedMultiGene3.set(DISTANCE_TO_FEATURE, ""); expectedMultiGene3.set(ERRORS, ""); Entity expectedMultiGene4 = new DynamicEntity(effectsEMD); expectedMultiGene4.set(EffectsMetaData.ID, null); expectedMultiGene4.set(EffectsMetaData.ALT, "A"); expectedMultiGene4.set(EffectsMetaData.GENE_NAME, "TMBIM1"); expectedMultiGene4.set(EffectsMetaData.VARIANT, multiGeneEntity2); expectedMultiGene4.set(ANNOTATION, "intron_variant"); expectedMultiGene4.set(PUTATIVE_IMPACT, "MODIFIER"); expectedMultiGene4.set(GENE_NAME, "TMBIM1"); expectedMultiGene4.set(GENE_ID, "TMBIM1"); expectedMultiGene4.set(FEATURE_TYPE, "transcript"); expectedMultiGene4.set(FEATURE_ID, "NM_022152.4"); expectedMultiGene4.set(TRANSCRIPT_BIOTYPE, "Coding"); expectedMultiGene4.set(RANK_TOTAL, "9/11"); expectedMultiGene4.set(HGVS_C, "c.639+66C>T"); expectedMultiGene4.set(HGVS_P, ""); expectedMultiGene4.set(C_DNA_POSITION, ""); expectedMultiGene4.set(CDS_POSITION, ""); expectedMultiGene4.set(PROTEIN_POSITION, ""); expectedMultiGene4.set(DISTANCE_TO_FEATURE, ""); expectedMultiGene4.set(ERRORS, ""); multiGeneEntities.addAll(newArrayList(multiGeneEntity1, multiGeneEntity2)); expectedMultiGeneEffectEntities .addAll(newArrayList(expectedMultiGene1, expectedMultiGene2, expectedMultiGene3, expectedMultiGene4)); } @Test public void annotateCountTest() { try { List<String> params = Arrays .asList("-Xmx2g", null, "hg19", "-noStats", "-noLog", "-lof", "-canon", "-ud", "0", "-spliceSiteSize", "5"); when(jarRunner .runJar(SnpEffAnnotator.NAME, params, ResourceUtils.getFile(getClass(), "/test-edgecases.vcf"))) .thenReturn(ResourceUtils.getFile(getClass(), "/snpEffOutputCount.vcf")); } catch (Exception e) { e.printStackTrace(); } Iterator<Entity> results = snpEffRunner.getSnpEffects(singleAlleleEntities.iterator(), ResourceUtils.getFile(getClass(), "/test-edgecases.vcf")); assertEquals(Iterators.size(results), 8); } @Test public void getInputVcfFileTest() { BufferedReader br = null; try { File singleAlleleFile = snpEffRunner.getInputVcfFile(singleAlleleEntities.iterator()); br = new BufferedReader(new FileReader(singleAlleleFile.getAbsolutePath())); assertEquals(br.readLine(), "#CHROM POS ID REF ALT QUAL FILTER INFO".replace(" ", "\t")); assertEquals(br.readLine(), "1 13380 . C G . . .".replace(" ", "\t")); assertEquals(br.readLine(), "1 13980 . T C . . .".replace(" ", "\t")); assertEquals(br.readLine(), "1 78383467 . G A . . .".replace(" ", "\t")); assertEquals(br.readLine(), "21 46924425 . CGGCCCCCCA C . . .".replace(" ", "\t")); assertEquals(br.readLine(), "X 79943569 . T C . . .".replace(" ", "\t")); assertEquals(br.readLine(), "2 191904021 . G T . . .".replace(" ", "\t")); assertEquals(br.readLine(), "3 53219680 . G C . . .".replace(" ", "\t")); assertEquals(br.readLine(), "1 1115548 . G A . . .".replace(" ", "\t")); br.close(); File multiAlleleFile = snpEffRunner.getInputVcfFile(multiAlleleEntities.iterator()); br = new BufferedReader(new FileReader(multiAlleleFile.getAbsolutePath())); assertEquals(br.readLine(), "#CHROM POS ID REF ALT QUAL FILTER INFO".replace(" ", "\t")); assertEquals(br.readLine(), "1 231094050 . GAA G,GAAA,GA . . .".replace(" ", "\t")); assertEquals(br.readLine(), "4 69964234 . CT CTT,CTTT,C . . .".replace(" ", "\t")); assertEquals(br.readLine(), "15 66641732 . G A,C,T . . .".replace(" ", "\t")); assertEquals(br.readLine(), "21\t45650009\t.\tT\tTG, A, G\t.\t\t.\t."); br.close(); File multiGeneFile = snpEffRunner.getInputVcfFile(multiGeneEntities.iterator()); br = new BufferedReader(new FileReader(multiGeneFile.getAbsolutePath())); assertEquals(br.readLine(), "#CHROM POS ID REF ALT QUAL FILTER INFO".replace(" ", "\t")); assertEquals(br.readLine(), "2 171570151 . C T . . .".replace(" ", "\t")); assertEquals(br.readLine(), "2 219142023 . G A . . .".replace(" ", "\t")); br.close(); } catch (Exception e) { fail(); } finally { try { br.close(); } catch (IOException e) { e.printStackTrace(); } } } @Test public void getSnpEffectsSingleAlleleTest() { try { List<String> params = Arrays .asList("-Xmx2g", null, "hg19", "-noStats", "-noLog", "-lof", "-canon", "-ud", "0", "-spliceSiteSize", "5"); when(jarRunner.runJar(SnpEffAnnotator.NAME, params, ResourceUtils.getFile(getClass(), "/test-snpeff.vcf"))) .thenReturn(ResourceUtils.getFile(getClass(), "/snpeff-single-allele-output.vcf")); } catch (Exception e) { e.printStackTrace(); } List<Entity> results = newArrayList(snpEffRunner .getSnpEffects(singleAlleleEntities.iterator(), ResourceUtils.getFile(getClass(), "/test-snpeff.vcf"))); // Set id to null to prevent matching auto generated identifiers for (Entity resultEntity : results) { resultEntity.set("id", null); } assertTrue(EntityUtils.entitiesEquals(results, expectedSingleAlleleEffectEntities)); } @Test public void getSnpEffectsMultiAlleleTest() { try { List<String> params = Arrays .asList("-Xmx2g", null, "hg19", "-noStats", "-noLog", "-lof", "-canon", "-ud", "0", "-spliceSiteSize", "5"); when(jarRunner.runJar(SnpEffAnnotator.NAME, params, ResourceUtils.getFile(getClass(), "/test-snpeff.vcf"))) .thenReturn(ResourceUtils.getFile(getClass(), "/snpeff-multi-allele-output.vcf")); } catch (Exception e) { e.printStackTrace(); } List<Entity> results = newArrayList(snpEffRunner .getSnpEffects(multiAlleleEntities.iterator(), ResourceUtils.getFile(getClass(), "/test-snpeff.vcf"))); // Set id to null to prevent matching auto generated identifiers for (Entity resultEntity : results) { resultEntity.set("id", null); } assertTrue(EntityUtils.entitiesEquals(results, expectedMultiAlleleEffectEntities)); } @Test public void getSnpEffectsMultiGeneTest() { try { List<String> params = Arrays .asList("-Xmx2g", null, "hg19", "-noStats", "-noLog", "-lof", "-canon", "-ud", "0", "-spliceSiteSize", "5"); when(jarRunner.runJar(SnpEffAnnotator.NAME, params, ResourceUtils.getFile(getClass(), "/test-snpeff.vcf"))) .thenReturn(ResourceUtils.getFile(getClass(), "/snpeff-multi-gene-output.vcf")); } catch (Exception e) { e.printStackTrace(); } List<Entity> results = newArrayList(snpEffRunner .getSnpEffects(multiGeneEntities.iterator(), ResourceUtils.getFile(getClass(), "/test-snpeff.vcf"))); // Set id to null to prevent matching auto generated identifiers for (Entity resultEntity : results) { resultEntity.set("id", null); } assertTrue(EntityUtils.entitiesEquals(results, expectedMultiGeneEffectEntities)); } @Test public void testGetOutputMetaData() { EntityType sourceEMD = entityTypeFactory.create().setSimpleName("source"); sourceEMD.setPackage(packageFactory.create("package").setName("package")); sourceEMD.setBackend("TestBackend"); EntityType outputEMD = snpEffRunner.getTargetEntityType(sourceEMD); assertEquals(outputEMD.getBackend(), "TestBackend"); assertEquals(outputEMD.getName(), "package_source_EFFECTS"); } @Configuration @ComponentScan({ "org.molgenis.data.vcf.model", "org.molgenis.data.annotation.core.effects" }) public static class Config { } }