package org.molgenis.data.annotation.core.entity.impl;
import org.molgenis.data.DataService;
import org.molgenis.data.Entity;
import org.molgenis.data.annotation.core.RepositoryAnnotator;
import org.molgenis.data.annotation.core.entity.AnnotatorConfig;
import org.molgenis.data.annotation.core.entity.AnnotatorInfo;
import org.molgenis.data.annotation.core.entity.EntityAnnotator;
import org.molgenis.data.annotation.core.entity.impl.framework.AbstractAnnotator;
import org.molgenis.data.annotation.core.entity.impl.framework.RepositoryAnnotatorImpl;
import org.molgenis.data.annotation.core.filter.MultiAllelicResultFilter;
import org.molgenis.data.annotation.core.query.LocusQueryCreator;
import org.molgenis.data.annotation.core.resources.Resource;
import org.molgenis.data.annotation.core.resources.Resources;
import org.molgenis.data.annotation.core.resources.impl.RepositoryFactory;
import org.molgenis.data.annotation.core.resources.impl.ResourceImpl;
import org.molgenis.data.annotation.core.resources.impl.SingleResourceConfig;
import org.molgenis.data.annotation.core.resources.impl.tabix.TabixRepositoryFactory;
import org.molgenis.data.annotation.web.settings.SingleFileLocationCmdLineAnnotatorSettingsConfigurer;
import org.molgenis.data.meta.model.Attribute;
import org.molgenis.data.meta.model.AttributeFactory;
import org.molgenis.data.meta.model.EntityType;
import org.molgenis.data.meta.model.EntityTypeFactory;
import org.molgenis.data.vcf.model.VcfAttributes;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import java.util.ArrayList;
import java.util.List;
import static org.molgenis.data.annotation.web.settings.DannAnnotatorSettings.Meta.DANN_LOCATION;
import static org.molgenis.data.meta.AttributeType.STRING;
@Configuration
public class DannAnnotator implements AnnotatorConfig
{
public static final String NAME = "dann";
public static final String DANN_SCORE = "DANN_SCORE";
public static final String DANN_SCORE_LABEL = "DANNSCORE";
public static final String DANN_TABIX_RESOURCE = "DANNTabixResource";
@Autowired
private Entity dannAnnotatorSettings;
@Autowired
private DataService dataService;
@Autowired
private Resources resources;
@Autowired
private VcfAttributes vcfAttributes;
@Autowired
private EntityTypeFactory entityTypeFactory;
@Autowired
private AttributeFactory attributeFactory;
private RepositoryAnnotatorImpl annotator;
@Bean
public RepositoryAnnotator dann()
{
annotator = new RepositoryAnnotatorImpl(NAME);
return annotator;
}
@Override
public void init()
{
List<Attribute> attributes = createDannOutputAttributes();
AnnotatorInfo dannInfo = AnnotatorInfo
.create(AnnotatorInfo.Status.READY, AnnotatorInfo.Type.PATHOGENICITY_ESTIMATE, NAME,
"Annotating genetic variants, especially non-coding variants, "
+ "for the purpose of identifying pathogenic variants remains a challenge."
+ " Combined annotation-dependent depletion (CADD) is an al- gorithm designed "
+ "to annotate both coding and non-coding variants, and has been shown to outper- form "
+ "other annotation algorithms. CADD trains a linear kernel support vector machine (SVM) "
+ "to dif- ferentiate evolutionarily derived, likely benign, alleles from simulated, "
+ "likely deleterious, variants. However, SVMs cannot capture non-linear relationships"
+ " among the features, which can limit performance. To address this issue, we have"
+ " developed DANN. DANN uses the same feature set and training data as CADD to train"
+ " a deep neural network (DNN). DNNs can capture non-linear relation- ships among "
+ "features and are better suited than SVMs for problems with a large number of samples "
+ "and features. We exploit Compute Unified Device Architecture-compatible "
+ "graphics processing units and deep learning techniques such as dropout and momentum "
+ "training to accelerate the DNN training. DANN achieves about a 19%relative reduction "
+ "in the error rate and about a 14%relative increase in the area under the curve (AUC) metric "
+ "over CADD’s SVM methodology. "
+ "All data and source code are available at https://cbcl.ics.uci.edu/ public_data/DANN/.",
attributes);
EntityAnnotator entityAnnotator = new AbstractAnnotator(DANN_TABIX_RESOURCE, dannInfo,
new LocusQueryCreator(vcfAttributes), new MultiAllelicResultFilter(attributes, vcfAttributes),
dataService, resources,
new SingleFileLocationCmdLineAnnotatorSettingsConfigurer(DANN_LOCATION, dannAnnotatorSettings))
{
@Override
public List<Attribute> createAnnotatorAttributes(AttributeFactory attributeFactory)
{
return createDannOutputAttributes();
}
};
annotator.init(entityAnnotator);
}
private List<Attribute> createDannOutputAttributes()
{
List<Attribute> attributes = new ArrayList<>();
Attribute dann_score = attributeFactory.create().setName(DANN_SCORE).setDataType(STRING)
.setDescription("deleterious score of genetic variants using neural networks.")
.setLabel(DANN_SCORE_LABEL);
attributes.add(dann_score);
return attributes;
}
@Bean
Resource dannResource()
{
Resource dannTabixResource;
dannTabixResource = new ResourceImpl(DANN_TABIX_RESOURCE,
new SingleResourceConfig(DANN_LOCATION, dannAnnotatorSettings))
{
@Override
public RepositoryFactory getRepositoryFactory()
{
String idAttrName = "id";
EntityType repoMetaData = entityTypeFactory.create().setName(DANN_TABIX_RESOURCE);
repoMetaData.addAttribute(vcfAttributes.getChromAttribute());
repoMetaData.addAttribute(vcfAttributes.getPosAttribute());
repoMetaData.addAttribute(vcfAttributes.getRefAttribute());
repoMetaData.addAttribute(vcfAttributes.getAltAttribute());
repoMetaData.addAttribute(attributeFactory.create().setName("DANN_SCORE").setDataType(STRING));
Attribute idAttribute = attributeFactory.create().setName(idAttrName).setVisible(false)
.setIdAttribute(true);
repoMetaData.addAttribute(idAttribute);
return new TabixRepositoryFactory(repoMetaData);
}
};
return dannTabixResource;
}
}