package org.molgenis.data.annotation.core.entity.impl;
import org.molgenis.data.DataService;
import org.molgenis.data.Entity;
import org.molgenis.data.annotation.core.RepositoryAnnotator;
import org.molgenis.data.annotation.core.entity.AnnotatorConfig;
import org.molgenis.data.annotation.core.entity.AnnotatorInfo;
import org.molgenis.data.annotation.core.entity.AnnotatorInfo.Status;
import org.molgenis.data.annotation.core.entity.EntityAnnotator;
import org.molgenis.data.annotation.core.entity.impl.framework.AbstractAnnotator;
import org.molgenis.data.annotation.core.entity.impl.framework.RepositoryAnnotatorImpl;
import org.molgenis.data.annotation.core.filter.MultiAllelicResultFilter;
import org.molgenis.data.annotation.core.query.LocusQueryCreator;
import org.molgenis.data.annotation.core.resources.MultiResourceConfig;
import org.molgenis.data.annotation.core.resources.Resource;
import org.molgenis.data.annotation.core.resources.Resources;
import org.molgenis.data.annotation.core.resources.impl.MultiFileResource;
import org.molgenis.data.annotation.core.resources.impl.MultiResourceConfigImpl;
import org.molgenis.data.annotation.core.resources.impl.RepositoryFactory;
import org.molgenis.data.annotation.core.resources.impl.tabix.TabixVcfRepositoryFactory;
import org.molgenis.data.meta.model.Attribute;
import org.molgenis.data.meta.model.AttributeFactory;
import org.molgenis.data.meta.model.EntityTypeFactory;
import org.molgenis.data.vcf.model.VcfAttributes;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import java.util.List;
import static java.util.Collections.singletonList;
import static org.molgenis.data.annotation.web.settings.ThousendGenomesAnnotatorSettings.Meta.*;
import static org.molgenis.data.meta.AttributeType.DECIMAL;
import static org.molgenis.data.meta.AttributeType.STRING;
@Configuration
public class ThousandGenomesAnnotator implements AnnotatorConfig
{
public static final String NAME = "thousand_genomes";
public static final String THOUSAND_GENOME_AF = "Thousand_Genomes_AF";
public static final String THOUSAND_GENOME_AF_LABEL = "Thousand genome allele frequency";
public static final String THOUSAND_GENOME_AF_RESOURCE_ATTRIBUTE_NAME = "AF";
public static final String THOUSAND_GENOME_MULTI_FILE_RESOURCE = "thousandGenomesSources";
@Autowired
private Entity thousendGenomesAnnotatorSettings;
@Autowired
private DataService dataService;
@Autowired
private Resources resources;
@Autowired
private VcfAttributes vcfAttributes;
@Autowired
private EntityTypeFactory entityTypeFactory;
@Autowired
private AttributeFactory attributeFactory;
private RepositoryAnnotatorImpl annotator;
@Bean
public RepositoryAnnotator thousandGenomes()
{
annotator = new RepositoryAnnotatorImpl(NAME);
return annotator;
}
@Override
public void init()
{
List<Attribute> attributes = createThousandGenomesOutputAttributes();
AnnotatorInfo thousandGenomeInfo = AnnotatorInfo
.create(Status.READY, AnnotatorInfo.Type.POPULATION_REFERENCE, NAME,
"The 1000 Genomes Project is an international collaboration to produce an "
+ "extensive public catalog of human genetic variation, including SNPs and structural variants, "
+ "and their haplotype contexts. This resource will support genome-wide association studies and other "
+ "medical research studies. "
+ "The genomes of about 2500 unidentified people from about 25 populations around the world will be"
+ "sequenced using next-generation sequencing technologies. "
+ "The results of the study will be freely and publicly accessible to researchers worldwide. "
+ "Further information about the project is available in the About tab. Information about downloading, "
+ "browsing or using the 1000 Genomes data is available at: http://www.1000genomes.org/ ",
attributes);
LocusQueryCreator locusQueryCreator = new LocusQueryCreator(vcfAttributes);
MultiAllelicResultFilter multiAllelicResultFilter = new MultiAllelicResultFilter(singletonList(
attributeFactory.create().setName(THOUSAND_GENOME_AF_RESOURCE_ATTRIBUTE_NAME).setDataType(DECIMAL)),
vcfAttributes);
EntityAnnotator entityAnnotator = new AbstractAnnotator(THOUSAND_GENOME_MULTI_FILE_RESOURCE, thousandGenomeInfo,
locusQueryCreator, multiAllelicResultFilter, dataService, resources, (annotationSourceFileName) ->
{
thousendGenomesAnnotatorSettings.set(ROOT_DIRECTORY, annotationSourceFileName);
thousendGenomesAnnotatorSettings
.set(FILEPATTERN, "ALL.chr%s.phase3_shapeit2_mvncall_integrated_v5.20130502.genotypes.vcf.gz");
thousendGenomesAnnotatorSettings
.set(CHROMOSOMES, "1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22");
})
{
@Override
public List<Attribute> createAnnotatorAttributes(AttributeFactory attributeFactory)
{
return createThousandGenomesOutputAttributes();
}
@Override
protected Object getResourceAttributeValue(Attribute attr, Entity entityType)
{
String attrName = THOUSAND_GENOME_AF
.equals(attr.getName()) ? THOUSAND_GENOME_AF_RESOURCE_ATTRIBUTE_NAME : attr.getName();
return entityType.get(attrName);
}
};
annotator.init(entityAnnotator);
}
private List<Attribute> createThousandGenomesOutputAttributes()
{
return singletonList(attributeFactory.create().setName(THOUSAND_GENOME_AF).setDataType(STRING).setDescription(
"The allele frequency for variants seen in the population used for the thousand genomes project")
.setLabel(THOUSAND_GENOME_AF_LABEL));
}
@Bean
Resource thousandGenomesSources()
{
MultiResourceConfig thousandGenomeConfig = new MultiResourceConfigImpl(CHROMOSOMES, FILEPATTERN, ROOT_DIRECTORY,
OVERRIDE_CHROMOSOME_FILES, thousendGenomesAnnotatorSettings);
return new MultiFileResource(THOUSAND_GENOME_MULTI_FILE_RESOURCE, thousandGenomeConfig)
{
@Override
public RepositoryFactory getRepositoryFactory()
{
return new TabixVcfRepositoryFactory(THOUSAND_GENOME_MULTI_FILE_RESOURCE, vcfAttributes,
entityTypeFactory, attributeFactory);
}
};
}
}