package org.molgenis.annotation.cmd.utils; import org.molgenis.annotation.cmd.conversion.EffectStructureConverter; import org.molgenis.data.Entity; import org.molgenis.data.MolgenisInvalidFormatException; import org.molgenis.data.annotation.core.EffectBasedAnnotator; import org.molgenis.data.annotation.core.EffectCreatingAnnotator; import org.molgenis.data.annotation.core.RepositoryAnnotator; import org.molgenis.data.annotation.core.utils.AnnotatorUtils; import org.molgenis.data.meta.model.Attribute; import org.molgenis.data.meta.model.AttributeFactory; import org.molgenis.data.meta.model.EntityType; import org.molgenis.data.meta.model.EntityTypeFactory; import org.molgenis.data.vcf.VcfRepository; import org.molgenis.data.vcf.model.VcfAttributes; import org.molgenis.data.vcf.utils.VcfUtils; import org.molgenis.data.vcf.utils.VcfWriterUtils; import java.io.*; import java.util.Iterator; import java.util.List; import java.util.stream.Collectors; import java.util.stream.Stream; import static com.google.common.collect.Lists.newArrayList; import static java.nio.charset.StandardCharsets.UTF_8; import static org.molgenis.data.meta.AttributeType.MREF; public class CmdLineAnnotatorUtils { private static final String EFFECT = "EFFECT"; /** * Adds a new compound attribute to an existing CrudRepository * * @param annotator the annotator to be runned * @param vcfAttributes utility class for vcf metadata * @param entityTypeFactory factory for molgenis entityType * @param attributeFactory factory for molgenis entityType * @param effectStructureConverter utility class for converting a vcfRepo from and to the molgenis entity structure for "effects" annotations * @param inputVcfFile the vcf file to be annotated * @param outputVCFFile the resulting, annotated vcf file * @param attributesToInclude the attributes of the annotator that should be written to the result * @param update boolean indicating if values already present for the annotator attributes should be updated(true) or overwritten (false) * @return the path of the result vcf file * @throws IOException, * @throws MolgenisInvalidFormatException */ public static String annotate(RepositoryAnnotator annotator, VcfAttributes vcfAttributes, EntityTypeFactory entityTypeFactory, AttributeFactory attributeFactory, EffectStructureConverter effectStructureConverter, File inputVcfFile, File outputVCFFile, List<String> attributesToInclude, boolean update) throws IOException, MolgenisInvalidFormatException { try (BufferedWriter outputVCFWriter = new BufferedWriter( new OutputStreamWriter(new FileOutputStream(outputVCFFile), UTF_8)); VcfRepository vcfRepo = new VcfRepository(inputVcfFile, inputVcfFile.getName(), vcfAttributes, entityTypeFactory, attributeFactory)) { List<Attribute> outputMetaData = getOutputAttributeMetadatasForAnnotator(annotator, entityTypeFactory, attributeFactory, attributesToInclude, vcfRepo); VcfWriterUtils .writeVcfHeader(inputVcfFile, outputVCFWriter, VcfUtils.getAtomicAttributesFromList(outputMetaData), attributesToInclude); Iterable<Entity> entitiesToAnnotate = addAnnotatorMetaDataToRepository(annotator, attributeFactory, effectStructureConverter, vcfRepo); Iterator<Entity> annotatedRecords = annotateRepo(annotator, effectStructureConverter, update, entitiesToAnnotate); writeAnnotationResultToVcfFile(attributesToInclude, outputVCFWriter, outputMetaData, annotatedRecords); } return outputVCFFile.getAbsolutePath(); } private static Iterator<Entity> annotateRepo(RepositoryAnnotator annotator, EffectStructureConverter effectStructureConverter, boolean update, Iterable<Entity> entitiesToAnnotate) { Iterator<Entity> annotatedRecords = annotator.annotate(entitiesToAnnotate, update); if (annotator instanceof EffectCreatingAnnotator || annotator instanceof EffectBasedAnnotator) { annotatedRecords = effectStructureConverter.createVcfEntityStructure(annotatedRecords); } return annotatedRecords; } private static Iterable<Entity> addAnnotatorMetaDataToRepository(RepositoryAnnotator annotator, AttributeFactory attributeFactory, EffectStructureConverter effectStructureConverter, VcfRepository vcfRepo) { addAnnotatorAttributesToInfoAttribute(annotator, vcfRepo); Stream<Entity> entitiesToAnnotate; // Check if annotator is annotator that annotates effects (for example Gavin) if (annotator instanceof EffectBasedAnnotator) { entitiesToAnnotate = effectStructureConverter .createVariantEffectStructure(EFFECT, annotator.getOutputAttributes(), vcfRepo); } else { AnnotatorUtils.addAnnotatorMetaDataToRepositories(vcfRepo.getEntityType(), attributeFactory, annotator); return vcfRepo; } return entitiesToAnnotate::iterator; } private static void writeAnnotationResultToVcfFile(List<String> attributesToInclude, BufferedWriter outputVCFWriter, List<Attribute> outputMetaData, Iterator<Entity> annotatedRecords) throws IOException { while (annotatedRecords.hasNext()) { // annotation starts here Entity annotatedRecord = annotatedRecords.next(); VcfWriterUtils.writeToVcf(annotatedRecord, VcfUtils.getAtomicAttributesFromList(outputMetaData), attributesToInclude, outputVCFWriter); outputVCFWriter.newLine(); } } private static void addAnnotatorAttributesToInfoAttribute(RepositoryAnnotator annotator, VcfRepository vcfRepo) { EntityType entityType = vcfRepo.getEntityType(); Attribute infoAttribute = entityType.getAttribute(VcfAttributes.INFO); for (Attribute attribute : annotator.getOutputAttributes()) { for (Attribute atomicAttribute : attribute.getChildren()) { atomicAttribute.setParent(infoAttribute); entityType.addAttribute(atomicAttribute); } } } private static List<Attribute> getOutputAttributeMetadatasForAnnotator(RepositoryAnnotator annotator, EntityTypeFactory entityTypeFactory, AttributeFactory attributeFactory, List<String> attributesToInclude, VcfRepository vcfRepo) { if (!attributesToInclude.isEmpty()) { checkSelectedOutputAttributeNames(annotator, attributesToInclude, vcfRepo); } // If the annotator e.g. SnpEff creates an external repository, collect the output metadata into an mref // entity // This allows for the header to be written as 'EFFECT annotations: <ouput_attributes> | <ouput_attributes>' List<Attribute> outputMetaData = newArrayList(); if (annotator instanceof EffectCreatingAnnotator || annotator instanceof EffectBasedAnnotator) { EntityType effectRefEntity = entityTypeFactory.create().setName(annotator.getSimpleName() + "_EFFECTS"); for (Attribute outputAttribute : annotator.getOutputAttributes()) { effectRefEntity.addAttribute(outputAttribute); } Attribute effect = attributeFactory.create().setName(EFFECT); effect.setDataType(MREF).setRefEntity(effectRefEntity); outputMetaData.add(effect); } else { outputMetaData = annotator.getOutputAttributes(); } return outputMetaData; } private static void checkSelectedOutputAttributeNames(RepositoryAnnotator annotator, List<String> attributesToInclude, VcfRepository vcfRepo) { // Check attribute names List<String> outputAttributeNames = VcfUtils.getAtomicAttributesFromList(annotator.getOutputAttributes()) .stream().map(Attribute::getName).collect(Collectors.toList()); List<String> inputAttributeNames = VcfUtils .getAtomicAttributesFromList(vcfRepo.getEntityType().getAtomicAttributes()).stream() .map(Attribute::getName).collect(Collectors.toList()); for (String attrName : attributesToInclude) { if (!outputAttributeNames.contains(attrName)) { throw new RuntimeException("Unknown output attribute '" + attrName + "'"); } else if (inputAttributeNames.contains(attrName)) { throw new RuntimeException("The output attribute '" + attrName + "' is present in the inputfile, but is deselected in the current run, this is not supported"); } } } }