package org.molgenis.data.annotation.core.entity.impl.gavin; import com.google.common.collect.Iterables; import org.molgenis.data.DataService; import org.molgenis.data.Entity; import org.molgenis.data.MolgenisDataException; import org.molgenis.data.annotation.core.EffectBasedAnnotator; import org.molgenis.data.annotation.core.effects.EffectsMetaData; import org.molgenis.data.annotation.core.entity.AnnotatorConfig; import org.molgenis.data.annotation.core.entity.AnnotatorInfo; import org.molgenis.data.annotation.core.entity.EntityAnnotator; import org.molgenis.data.annotation.core.entity.impl.framework.QueryAnnotatorImpl; import org.molgenis.data.annotation.core.entity.impl.snpeff.Impact; import org.molgenis.data.annotation.core.query.GeneNameQueryCreator; import org.molgenis.data.annotation.core.resources.Resource; import org.molgenis.data.annotation.core.resources.Resources; import org.molgenis.data.annotation.core.resources.impl.RepositoryFactory; import org.molgenis.data.annotation.core.resources.impl.SingleResourceConfig; import org.molgenis.data.annotation.core.resources.impl.emx.EmxResourceImpl; import org.molgenis.data.annotation.core.resources.impl.emx.InMemoryRepositoryFactory; import org.molgenis.data.annotation.core.utils.AnnotatorUtils; import org.molgenis.data.importer.emx.EmxMetaDataParser; import org.molgenis.data.meta.EntityTypeDependencyResolver; import org.molgenis.data.meta.model.*; import org.molgenis.data.vcf.model.VcfAttributes; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; import java.util.*; import java.util.stream.Collectors; import java.util.stream.StreamSupport; import static org.molgenis.data.annotation.core.effects.EffectsMetaData.GENE_NAME; import static org.molgenis.data.annotation.core.effects.EffectsMetaData.PUTATIVE_IMPACT; import static org.molgenis.data.annotation.core.entity.AnnotatorInfo.Status.READY; import static org.molgenis.data.annotation.core.entity.AnnotatorInfo.Type.PATHOGENICITY_ESTIMATE; import static org.molgenis.data.annotation.core.entity.impl.CaddAnnotator.CADD_SCALED; import static org.molgenis.data.annotation.core.entity.impl.CaddAnnotator.createCaddScaledAttr; import static org.molgenis.data.annotation.core.entity.impl.ExacAnnotator.EXAC_AF; import static org.molgenis.data.annotation.core.entity.impl.ExacAnnotator.getExacAFAttr; import static org.molgenis.data.annotation.web.settings.GavinAnnotatorSettings.Meta.VARIANT_FILE_LOCATION; import static org.molgenis.data.meta.AttributeType.STRING; import static org.molgenis.data.meta.AttributeType.XREF; import static org.molgenis.data.vcf.model.VcfAttributes.ALT; import static org.molgenis.data.vcf.utils.VcfWriterUtils.VARIANT; @Configuration public class GavinAnnotator implements AnnotatorConfig { public static final String NAME = "Gavin"; public static final String RESOURCE = "gavin"; public static final String RESOURCE_ENTITY_NAME = "gavin"; public static final String CLASSIFICATION = "Classification"; public static final String CONFIDENCE = "Confidence"; public static final String REASON = "Reason"; private final GavinAlgorithm gavinAlgorithm = new GavinAlgorithm(); @Autowired private Entity gavinAnnotatorSettings; @Autowired private DataService dataService; @Autowired private Resources resources; @Autowired private VcfAttributes vcfAttributes; @Autowired private PackageFactory packageFactory; @Autowired private EntityTypeFactory entityTypeFactory; @Autowired private AttributeFactory attributeFactory; @Autowired private EffectsMetaData effectsMetaData; @Autowired private EntityTypeDependencyResolver entityTypeDependencyResolver; @Autowired GeneNameQueryCreator geneNameQueryCreator; @Bean Resource GavinResource() { return new EmxResourceImpl(RESOURCE, new SingleResourceConfig(VARIANT_FILE_LOCATION, gavinAnnotatorSettings)) { @Override public RepositoryFactory getRepositoryFactory() { return new InMemoryRepositoryFactory(RESOURCE_ENTITY_NAME, new EmxMetaDataParser(packageFactory, attributeFactory, entityTypeFactory, entityTypeDependencyResolver), entityTypeFactory, attributeFactory); } }; } private EffectBasedAnnotator annotator; @Bean public EffectBasedAnnotator gavin() { annotator = new EffectBasedAnnotator(NAME); return annotator; } public void init() { LinkedList<Attribute> attributes = createGavinOutputAttributes(); String description = "Please note that this annotator processes the results from a SnpEff annotation\nTherefor it should be used on the result entity rather than the variant entity itself.\nThe corresponding variant entity should also be annotated with CADD and EXaC"; AnnotatorInfo gavinInfo = AnnotatorInfo.create(READY, PATHOGENICITY_ESTIMATE, NAME, description, attributes); EntityAnnotator entityAnnotator = new QueryAnnotatorImpl(RESOURCE, gavinInfo, geneNameQueryCreator, dataService, resources, (annotationSourceFileName) -> gavinAnnotatorSettings .set(VARIANT_FILE_LOCATION, annotationSourceFileName)) { @Override public List<Attribute> createAnnotatorAttributes(AttributeFactory attributeFactory) { return createGavinOutputAttributes(); } @Override public List<Attribute> getRequiredAttributes() { List<Attribute> requiredAttributes = new ArrayList<>(); EntityType entityType = entityTypeFactory.create().setName(VARIANT); List<Attribute> refAttributesList = Arrays .asList(createCaddScaledAttr(attributeFactory), getExacAFAttr(attributeFactory), vcfAttributes.getAltAttribute()); entityType.addAttributes(refAttributesList); Attribute refAttr = attributeFactory.create().setName(VARIANT).setDataType(XREF) .setRefEntity(entityType).setDescription( "This annotator needs a references to an entity containing: " + StreamSupport .stream(refAttributesList.spliterator(), false).map(Attribute::getName) .collect(Collectors.joining(", "))); requiredAttributes.addAll(Arrays .asList(effectsMetaData.getGeneNameAttr(), effectsMetaData.getPutativeImpactAttr(), refAttr, effectsMetaData.getAltAttr())); return requiredAttributes; } @Override protected void processQueryResults(Entity entity, Iterable<Entity> annotationSourceEntities, boolean updateMode) { if (updateMode) { throw new MolgenisDataException("This annotator/filter does not support updating of values"); } String alt = entity.getString(EffectsMetaData.ALT); if (alt == null) { entity.set(CLASSIFICATION, ""); entity.set(CONFIDENCE, ""); entity.set(REASON, "Missing ALT allele no judgment could be determined."); return; } if (alt.contains(",")) { throw new MolgenisDataException( "The gavin annotator only accepts single allele input ('effect entities')."); } int sourceEntitiesSize = Iterables.size(annotationSourceEntities); Entity variantEntity = entity.getEntity(VARIANT); Map<String, Double> caddMap = AnnotatorUtils .toAlleleMap(variantEntity.getString(ALT), variantEntity.getString(CADD_SCALED)); Map<String, Double> exacMap = AnnotatorUtils .toAlleleMap(variantEntity.getString(ALT), variantEntity.getString(EXAC_AF)); Impact impact = Impact.valueOf(entity.getString(PUTATIVE_IMPACT)); Double exacMAF = exacMap.get(alt); Double caddScaled = caddMap.get(alt); String gene = entity.getString(GENE_NAME); if (exacMAF == null) { exacMAF = 0.0; } if (sourceEntitiesSize == 1) { Entity annotationSourceEntity = annotationSourceEntities.iterator().next(); Judgment judgment = gavinAlgorithm.classifyVariant(impact, caddScaled, exacMAF, gene, GavinThresholds.fromEntity(annotationSourceEntity)); entity.set(CLASSIFICATION, judgment.getClassification().toString()); entity.set(CONFIDENCE, judgment.getConfidence().toString()); entity.set(REASON, judgment.getReason()); } else if (sourceEntitiesSize == 0) { // if we have no data for this gene, immediately fall back to the naive method Judgment judgment = gavinAlgorithm.genomewideClassifyVariant(impact, caddScaled, exacMAF, gene); entity.set(CLASSIFICATION, judgment.getClassification().toString()); entity.set(CONFIDENCE, judgment.getConfidence().toString()); entity.set(REASON, judgment.getReason()); } else { String message = "invalid number [" + sourceEntitiesSize + "] of results for this gene in annotation resource"; entity.set(REASON, message); throw new MolgenisDataException(message); } } }; annotator.init(entityAnnotator); } private LinkedList<Attribute> createGavinOutputAttributes() { LinkedList<Attribute> attributes = new LinkedList<>(); Attribute classification = attributeFactory.create().setName(CLASSIFICATION).setDataType(STRING) .setDescription(CLASSIFICATION).setLabel(CLASSIFICATION); Attribute confidence = attributeFactory.create().setName(CONFIDENCE).setDataType(STRING) .setDescription(CONFIDENCE).setLabel(CONFIDENCE); Attribute reason = attributeFactory.create().setName(REASON).setDataType(STRING).setDescription(REASON) .setLabel(REASON); attributes.add(classification); attributes.add(confidence); attributes.add(reason); return attributes; } }