package org.molgenis.annotation.test.cmd.conversion;
import org.elasticsearch.common.collect.Iterables;
import org.molgenis.annotation.cmd.conversion.EffectStructureConverter;
import org.molgenis.data.Entity;
import org.molgenis.data.meta.model.Attribute;
import org.molgenis.data.meta.model.AttributeFactory;
import org.molgenis.data.meta.model.EntityType;
import org.molgenis.data.meta.model.EntityTypeFactory;
import org.molgenis.data.support.DynamicEntity;
import org.molgenis.data.vcf.VcfRepository;
import org.molgenis.data.vcf.model.VcfAttributes;
import org.molgenis.test.data.AbstractMolgenisSpringTest;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.context.annotation.ComponentScan;
import org.springframework.context.annotation.Configuration;
import org.springframework.context.annotation.Import;
import org.springframework.test.context.ContextConfiguration;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.Test;
import java.io.IOException;
import java.util.*;
import java.util.stream.Collectors;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;
import static org.molgenis.data.meta.AttributeType.*;
import static org.molgenis.data.vcf.model.VcfAttributes.*;
import static org.molgenis.data.vcf.utils.VcfWriterUtils.EFFECT;
import static org.molgenis.data.vcf.utils.VcfWriterUtils.VARIANT;
import static org.testng.Assert.*;
@ContextConfiguration(classes = { EffectStructureConverterTest.Config.class })
public class EffectStructureConverterTest extends AbstractMolgenisSpringTest
{
@Autowired
EntityTypeFactory entityTypeFactory;
@Autowired
AttributeFactory attributeFactory;
@Autowired
VcfAttributes vcfAttributes;
@Autowired
EffectStructureConverter effectStructureConverter;
private EntityType annotatedEntityType;
public EntityType vcfInputEntityType;
public EntityType effectEntityType;
public EntityType variantEntityType;
public Attribute attributeChrom;
public Attribute attributePos;
public Attribute attributeRef;
public Attribute attributeAlt;
public Attribute attributeCantAnnotateChrom;
public Entity entity1;
public Entity entity2;
public Entity entity3;
public ArrayList<Entity> entities;
private Entity variant1;
private Entity variant2;
@BeforeClass
public void beforeClass()
{
Attribute identifier = attributeFactory.create().setName("identifier").setDataType(STRING).setIdAttribute(true)
.setVisible(false);
Attribute INFO = attributeFactory.create().setName("INFO").setDataType(COMPOUND);
Attribute AC = attributeFactory.create().setName("AC").setDataType(STRING).setParent(INFO);
Attribute AN = attributeFactory.create().setName("AN").setDataType(STRING).setParent(INFO);
Attribute GTC = attributeFactory.create().setName("GTC").setDataType(STRING).setParent(INFO);
Attribute annoAttr = attributeFactory.create().setName("ANNO").setDataType(STRING).setParent(INFO);
annotatedEntityType = entityTypeFactory.create().setName("test");
vcfInputEntityType = entityTypeFactory.create().setName("test");
variantEntityType = entityTypeFactory.create().setName("test");
effectEntityType = entityTypeFactory.create().setName("test");
attributeChrom = attributeFactory.create().setName(CHROM).setDataType(STRING);
attributePos = attributeFactory.create().setName(POS).setDataType(INT);
attributeRef = attributeFactory.create().setName(REF).setDataType(STRING);
attributeAlt = attributeFactory.create().setName(ALT).setDataType(STRING);
attributeCantAnnotateChrom = attributeFactory.create().setName(CHROM).setDataType(LONG);
vcfInputEntityType.addAttribute(identifier);
vcfInputEntityType.addAttribute(attributeChrom);
vcfInputEntityType.addAttribute(attributePos);
vcfInputEntityType.addAttribute(attributeRef);
vcfInputEntityType.addAttribute(attributeAlt);
vcfInputEntityType.addAttribute(attributeFactory.create().setName(ID).setDataType(STRING));
vcfInputEntityType.addAttribute(attributeFactory.create().setName(QUAL).setDataType(STRING));
vcfInputEntityType.addAttribute(attributeFactory.create().setName(FILTER).setDataType(STRING));
vcfInputEntityType.addAttribute(attributeFactory.create().setName(EFFECT).setDataType(STRING).setDescription(
"EFFECT annotations: 'Alt_Allele | Gene_Name | Annotation | Putative_impact | Gene_ID | Feature_type | Feature_ID | Transcript_biotype | Rank_total | HGVS_c | HGVS_p | cDNA_position | CDS_position | Protein_position | Distance_to_feature | Errors'"));
vcfInputEntityType.addAttribute(INFO);
vcfInputEntityType.addAttribute(AC);
vcfInputEntityType.addAttribute(AN);
vcfInputEntityType.addAttribute(GTC);
vcfInputEntityType.addAttribute(annoAttr);
annotatedEntityType.addAttribute(identifier);
annotatedEntityType.addAttribute(attributeChrom);
annotatedEntityType.addAttribute(attributePos);
annotatedEntityType.addAttribute(attributeRef);
annotatedEntityType.addAttribute(attributeAlt);
annotatedEntityType.addAttribute(attributeFactory.create().setName(ID).setDataType(STRING));
annotatedEntityType.addAttribute(attributeFactory.create().setName(QUAL).setDataType(STRING));
annotatedEntityType.addAttribute((attributeFactory.create().setName(FILTER).setDataType(STRING))
.setDescription("Test that description is not: '" + VcfRepository.DEFAULT_ATTRIBUTE_DESCRIPTION + "'"));
annotatedEntityType.addAttribute(INFO);
annotatedEntityType.addAttribute(AC);
annotatedEntityType.addAttribute(AN);
annotatedEntityType.addAttribute(GTC);
annotatedEntityType.addAttribute(annoAttr);
variantEntityType.addAttribute(identifier);
variantEntityType.addAttribute(attributeChrom);
variantEntityType.addAttribute(attributePos);
variantEntityType.addAttribute(attributeRef);
variantEntityType.addAttribute(attributeAlt);
variantEntityType.addAttribute(attributeFactory.create().setName(ID).setDataType(STRING));
variantEntityType.addAttribute(attributeFactory.create().setName(QUAL).setDataType(STRING));
variantEntityType.addAttribute((attributeFactory.create().setName(FILTER).setDataType(STRING))
.setDescription("Test that description is not: '" + VcfRepository.DEFAULT_ATTRIBUTE_DESCRIPTION + "'"));
variantEntityType.addAttribute(INFO);
variantEntityType.addAttribute(AC);
variantEntityType.addAttribute(AN);
variantEntityType.addAttribute(GTC);
variantEntityType.addAttribute(annoAttr);
effectEntityType.addAttribute(
attributeFactory.create().setName("identifier").setDataType(STRING).setIdAttribute(true).setAuto(true)
.setVisible(false));
effectEntityType.addAttribute(attributeFactory.create().setName("Alt_Allele").setDataType(STRING));
effectEntityType.addAttribute(attributeFactory.create().setName("Gene_Name").setDataType(STRING));
effectEntityType.addAttribute((attributeFactory.create().setName("Annotation").setDataType(STRING)));
effectEntityType.addAttribute(attributeFactory.create().setName("Putative_impact").setDataType(STRING));
effectEntityType.addAttribute(attributeFactory.create().setName("Gene_ID").setDataType(STRING));
effectEntityType.addAttribute(attributeFactory.create().setName("Feature_type").setDataType(STRING));
effectEntityType.addAttribute(attributeFactory.create().setName("Feature_ID").setDataType(STRING));
effectEntityType.addAttribute(attributeFactory.create().setName("Transcript_biotype").setDataType(STRING));
effectEntityType.addAttribute(attributeFactory.create().setName("Rank_total").setDataType(STRING));
}
@BeforeMethod
public void beforeMethod() throws IOException
{
entity1 = new DynamicEntity(vcfInputEntityType);
entity2 = new DynamicEntity(vcfInputEntityType);
entity3 = new DynamicEntity(vcfInputEntityType);
entity1.set("identifier", "variant_ID1");
entity1.set(VcfAttributes.CHROM, "1");
entity1.set(VcfAttributes.POS, 10050000);
entity1.set(VcfAttributes.ID, "test21");
entity1.set(VcfAttributes.REF, "G");
entity1.set(VcfAttributes.ALT, "A");
entity1.set(VcfAttributes.QUAL, ".");
entity1.set(VcfAttributes.FILTER, "PASS");
entity1.set("AC", "21");
entity1.set("AN", "22");
entity1.set("GTC", "0,1,10");
entity1.set(EFFECT,
"A|GEN1|missense_variant|MODERATE|GEN1|transcript|NM_123456.7|Coding|4/4|c.1234C>T|p.Thr123Met|1234/5678|2345/6789|111/222||");
entity2.set("identifier", "variant_ID2");
entity2.set(VcfAttributes.CHROM, "1");
entity2.set(VcfAttributes.POS, 10050001);
entity2.set(VcfAttributes.ID, "test22");
entity2.set(VcfAttributes.REF, "G");
entity2.set(VcfAttributes.ALT, "A");
entity2.set(VcfAttributes.QUAL, ".");
entity2.set(VcfAttributes.FILTER, "PASS");
entity2.set(EFFECT,
"A|GEN1|missense_variant|MODERATE|GEN1|transcript|NM_123456.7|Coding|4/4|c.1234C>T|p.Thr123Met|1234/5678|2345/6789|111/222||,A|GEN2|missense_variant|MODERATE|GEN2|transcript|NM_123456.7|Coding|4/4|c.1234C>T|p.Thr123Met|1234/5678|2345/6789|111/222||");
entity3.set("identifier", "variant_ID3");
entity3.set(VcfAttributes.CHROM, "1");
entity3.set(VcfAttributes.POS, 10050002);
entity3.set(VcfAttributes.ID, "test23");
entity3.set(VcfAttributes.REF, "G");
entity3.set(VcfAttributes.ALT, "A");
entity3.set(VcfAttributes.QUAL, ".");
entity3.set(VcfAttributes.FILTER, "PASS");
variant1 = new DynamicEntity(variantEntityType);
variant2 = new DynamicEntity(variantEntityType);
variant1.set("identifier", "variant_ID1");
variant1.set(VcfAttributes.CHROM, "1");
variant1.set(VcfAttributes.POS, 10050000);
variant1.set(VcfAttributes.ID, "test21");
variant1.set(VcfAttributes.REF, "G");
variant1.set(VcfAttributes.ALT, "A");
variant1.set(VcfAttributes.QUAL, ".");
variant1.set(VcfAttributes.FILTER, "PASS");
variant1.set("AC", "21");
variant1.set("AN", "22");
variant1.set("GTC", "0,1,10");
variant2.set("identifier", "variant_ID2");
variant2.set(VcfAttributes.CHROM, "1");
variant2.set(VcfAttributes.POS, 10050001);
variant2.set(VcfAttributes.ID, "test22");
variant2.set(VcfAttributes.REF, "G");
variant2.set(VcfAttributes.ALT, "A");
variant2.set(VcfAttributes.QUAL, ".");
variant2.set(VcfAttributes.FILTER, "PASS");
entities = new ArrayList<>();
entities.add(entity1);
entities.add(entity2);
entities.add(entity3);
}
@Test
public void testGetDescription()
{
assertEquals(effectStructureConverter.getEffectDescription(vcfInputEntityType.getAttribute(EFFECT)),
"EFFECT annotations: 'Alt_Allele | Gene_Name | Annotation | Putative_impact | Gene_ID | Feature_type | Feature_ID | Transcript_biotype | Rank_total | HGVS_c | HGVS_p | cDNA_position | CDS_position | Protein_position | Distance_to_feature | Errors'");
}
@Test
public void testCreateVcfEntityStructure()
{
Entity effect1 = mock(Entity.class);
Entity effect2 = mock(Entity.class);
Entity effect3 = mock(Entity.class);
when(effect1.getIdValue()).thenReturn("effect_ID1");
when(effect1.getEntity(VARIANT)).thenReturn(variant1);
when(effect1.getEntityType()).thenReturn(effectEntityType);
when(effect1.get("Alt_Allele")).thenReturn("1");
when(effect1.get("Gene_Name")).thenReturn("2");
when(effect1.get("Annotation")).thenReturn("3");
when(effect1.get("Putative_impact")).thenReturn("4");
when(effect1.get("Gene_ID")).thenReturn("5");
when(effect1.get("Feature_type")).thenReturn("6");
when(effect1.get("Feature_ID")).thenReturn("7");
when(effect1.get("Transcript_biotype")).thenReturn("8");
when(effect1.get("Rank_total")).thenReturn("9");
when(effect2.getIdValue()).thenReturn("effect_ID2");
when(effect2.getEntity(VARIANT)).thenReturn(variant1);
when(effect2.getEntityType()).thenReturn(effectEntityType);
when(effect2.get("Alt_Allele")).thenReturn("1");
when(effect2.get("Gene_Name")).thenReturn("2");
when(effect2.get("Annotation")).thenReturn("3");
when(effect2.get("Putative_impact")).thenReturn("4");
when(effect2.get("Gene_ID")).thenReturn("5");
when(effect2.get("Feature_type")).thenReturn("6");
when(effect2.get("Feature_ID")).thenReturn("7");
when(effect2.get("Transcript_biotype")).thenReturn("8");
when(effect2.get("Rank_total")).thenReturn("9");
when(effect3.getIdValue()).thenReturn("effect_ID3");
when(effect3.getEntity(VARIANT)).thenReturn(variant2);
when(effect3.getEntityType()).thenReturn(effectEntityType);
when(effect3.get("Alt_Allele")).thenReturn("1");
when(effect3.get("Gene_Name")).thenReturn("2");
when(effect3.get("Annotation")).thenReturn("3");
when(effect3.get("Putative_impact")).thenReturn("4");
when(effect3.get("Gene_ID")).thenReturn("5");
when(effect3.get("Feature_type")).thenReturn("6");
when(effect3.get("Feature_ID")).thenReturn("7");
when(effect3.get("Transcript_biotype")).thenReturn("8");
when(effect3.get("Rank_total")).thenReturn("9");
Iterator<Entity> result = effectStructureConverter
.createVcfEntityStructure(Arrays.asList(effect1, effect2, effect3).iterator());
assertTrue(result.hasNext());
Entity expectedVariant1 = result.next();
assertEquals(2, Iterables.size(expectedVariant1.getEntities("EFFECT")));
Iterator<Entity> effectsIterator = expectedVariant1.getEntities("EFFECT").iterator();
assertEquals(effectsIterator.next().getIdValue(),"effect_ID1");
assertEquals(effectsIterator.next().getIdValue(),"effect_ID2");
assertTrue(result.hasNext());
Entity expectedVariant2 = result.next();
assertEquals(1, Iterables.size(expectedVariant2.getEntities("EFFECT")));
effectsIterator = expectedVariant2.getEntities("EFFECT").iterator();
assertEquals(effectsIterator.next().getIdValue(),"effect_ID3");
assertFalse(result.hasNext());
}
@Test
public void testCreateVariantEffectStructure()
{
VcfRepository vcfRepository = mock(VcfRepository.class);
when(vcfRepository.getEntityType()).thenReturn(vcfInputEntityType);
when(vcfRepository.spliterator()).thenReturn(entities.spliterator());
List<Entity> resultEntities = effectStructureConverter
.createVariantEffectStructure(EFFECT, Collections.emptyList(), vcfRepository)
.collect(Collectors.toList());
assertEquals(resultEntities.size(), 3);
assertEquals(resultEntities.get(0).get("Alt_Allele"), "A");
assertEquals(resultEntities.get(0).get("Gene_Name"), "GEN1");
assertEquals(resultEntities.get(0).get("Annotation"), "missense_variant");
assertEquals(resultEntities.get(0).get("Putative_impact"), "MODERATE");
assertEquals(resultEntities.get(0).get("Gene_ID"), "GEN1");
assertEquals(resultEntities.get(0).get("Feature_type"), "transcript");
assertEquals(resultEntities.get(0).get("Feature_ID"), "NM_123456.7");
assertEquals(resultEntities.get(0).get("Transcript_biotype"), "Coding");
assertEquals(resultEntities.get(0).get("Rank_total"), "4/4");
assertEquals(resultEntities.get(0).get("HGVS_c"), "c.1234C>T");
assertEquals(resultEntities.get(0).get("HGVS_p"), "p.Thr123Met");
assertEquals(resultEntities.get(0).get("cDNA_position"), "1234/5678");
assertEquals(resultEntities.get(0).get("CDS_position"), "2345/6789");
assertEquals(resultEntities.get(0).get("Protein_position"), "111/222");
assertEquals(resultEntities.get(0).get("Distance_to_feature"), "");
assertEquals(resultEntities.get(0).get("Errors"), "");
assertEquals(resultEntities.get(0).get("VARIANT").toString(), variant1.toString());
assertEquals(resultEntities.get(1).get("Alt_Allele"), "A");
assertEquals(resultEntities.get(1).get("Gene_Name"), "GEN1");
assertEquals(resultEntities.get(1).get("Annotation"), "missense_variant");
assertEquals(resultEntities.get(1).get("Putative_impact"), "MODERATE");
assertEquals(resultEntities.get(1).get("Gene_ID"), "GEN1");
assertEquals(resultEntities.get(1).get("Feature_type"), "transcript");
assertEquals(resultEntities.get(1).get("Feature_ID"), "NM_123456.7");
assertEquals(resultEntities.get(1).get("Transcript_biotype"), "Coding");
assertEquals(resultEntities.get(1).get("Rank_total"), "4/4");
assertEquals(resultEntities.get(1).get("HGVS_c"), "c.1234C>T");
assertEquals(resultEntities.get(1).get("HGVS_p"), "p.Thr123Met");
assertEquals(resultEntities.get(1).get("cDNA_position"), "1234/5678");
assertEquals(resultEntities.get(1).get("CDS_position"), "2345/6789");
assertEquals(resultEntities.get(1).get("Protein_position"), "111/222");
assertEquals(resultEntities.get(1).get("Distance_to_feature"), "");
assertEquals(resultEntities.get(1).get("Errors"), "");
assertEquals(resultEntities.get(1).get("VARIANT").toString(), variant2.toString());
assertEquals(resultEntities.get(2).get("Alt_Allele"), "A");
assertEquals(resultEntities.get(2).get("Gene_Name"), "GEN2");
assertEquals(resultEntities.get(2).get("Annotation"), "missense_variant");
assertEquals(resultEntities.get(2).get("Putative_impact"), "MODERATE");
assertEquals(resultEntities.get(2).get("Gene_ID"), "GEN2");
assertEquals(resultEntities.get(2).get("Feature_type"), "transcript");
assertEquals(resultEntities.get(2).get("Feature_ID"), "NM_123456.7");
assertEquals(resultEntities.get(2).get("Transcript_biotype"), "Coding");
assertEquals(resultEntities.get(2).get("Rank_total"), "4/4");
assertEquals(resultEntities.get(2).get("HGVS_c"), "c.1234C>T");
assertEquals(resultEntities.get(2).get("HGVS_p"), "p.Thr123Met");
assertEquals(resultEntities.get(2).get("cDNA_position"), "1234/5678");
assertEquals(resultEntities.get(2).get("CDS_position"), "2345/6789");
assertEquals(resultEntities.get(2).get("Protein_position"), "111/222");
assertEquals(resultEntities.get(2).get("Distance_to_feature"), "");
assertEquals(resultEntities.get(2).get("Errors"), "");
assertEquals(resultEntities.get(2).get("VARIANT").toString(), variant2.toString());
}
@Configuration
@Import(VcfAttributes.class)
@ComponentScan({ "org.molgenis.annotation.cmd.conversion" })
public static class Config
{
}
}