package org.molgenis.data.annotation.core.entity.impl.snpeff; import com.google.common.collect.Iterators; import com.google.common.collect.Lists; import com.google.common.collect.PeekingIterator; import org.molgenis.data.Entity; import org.molgenis.data.MolgenisDataException; import org.molgenis.data.annotation.core.effects.EffectsMetaData; import org.molgenis.data.annotation.core.utils.JarRunner; import org.molgenis.data.annotation.web.settings.SnpEffAnnotatorSettings; import org.molgenis.data.meta.model.Attribute; import org.molgenis.data.meta.model.AttributeFactory; import org.molgenis.data.meta.model.EntityType; import org.molgenis.data.meta.model.EntityTypeFactory; import org.molgenis.data.populate.IdGenerator; import org.molgenis.data.support.DynamicEntity; import org.molgenis.data.vcf.VcfRepository; import org.molgenis.data.vcf.model.VcfAttributes; import org.molgenis.security.core.runas.RunAsSystemProxy; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Component; import java.io.*; import java.util.Arrays; import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.regex.Pattern; import static com.google.common.collect.Iterators.peekingIterator; import static java.io.File.createTempFile; import static org.molgenis.data.annotation.core.effects.EffectsMetaData.*; import static org.molgenis.data.meta.AttributeType.XREF; @Component public class SnpEffRunner { private final EntityTypeFactory entityTypeFactory; private final AttributeFactory attributeFactory; private final VcfAttributes vcfAttributes; private static final Logger LOG = LoggerFactory.getLogger(SnpEffAnnotator.class); private String snpEffPath; private static final String CHARSET = "UTF-8"; public static final String ENTITY_NAME_SUFFIX = "_EFFECTS"; public static final String NAME = "snpEff"; public static final String LOF = "LOF"; public static final String NMD = "NMD"; public static final String ANN = "ANN"; private EffectsMetaData effectsMetaData = new EffectsMetaData(); private final JarRunner jarRunner; private final Entity snpEffAnnotatorSettings; private final IdGenerator idGenerator; @Autowired public SnpEffRunner(JarRunner jarRunner, Entity snpEffAnnotatorSettings, IdGenerator idGenerator, VcfAttributes vcfAttributes, EffectsMetaData effectsMetaData, EntityTypeFactory entityTypeFactory, AttributeFactory attributeFactory) { this.jarRunner = jarRunner; this.snpEffAnnotatorSettings = snpEffAnnotatorSettings; this.idGenerator = idGenerator; this.vcfAttributes = vcfAttributes; this.effectsMetaData = effectsMetaData; this.entityTypeFactory = entityTypeFactory; this.attributeFactory = attributeFactory; } public Iterator<Entity> getSnpEffects(Iterable<Entity> source) { try { File inputVcf = getInputVcfFile(source.iterator()); return getSnpEffects(source.iterator(), inputVcf); } catch (IOException e) { throw new MolgenisDataException("Exception making temporary VCF file", e); } } @SuppressWarnings("resource") public Iterator<Entity> getSnpEffects(Iterator<Entity> source, final File inputVcf) { try { if (!source.hasNext()) return Iterators.emptyIterator(); // get meta data by peeking at the first entity (work-around for issue #4701) PeekingIterator<Entity> peekingSourceIterator = Iterators.peekingIterator(source); EntityType sourceEMD = peekingSourceIterator.peek().getEntityType(); List<String> params = Arrays .asList("-Xmx2g", getSnpEffPath(), "hg19", "-noStats", "-noLog", "-lof", "-canon", "-ud", "0", "-spliceSiteSize", "5"); File outputVcf = jarRunner.runJar(NAME, params, inputVcf); VcfRepository repo = new VcfRepository(outputVcf, "SNPEFF_OUTPUT_VCF_" + inputVcf.getName(), vcfAttributes, entityTypeFactory, attributeFactory); PeekingIterator<Entity> snpEffResultIterator = peekingIterator(repo.iterator()); return new Iterator<Entity>() { final LinkedList<Entity> effects = Lists.newLinkedList(); @Override public boolean hasNext() { return (peekingSourceIterator.hasNext() || !effects.isEmpty()); } @Override public Entity next() { if (effects.isEmpty()) { // go to next source entity and get effects Entity sourceEntity = peekingSourceIterator.next(); String chromosome = sourceEntity.getString(VcfAttributes.CHROM); Integer position = sourceEntity.getInt(VcfAttributes.POS); if (chromosome != null && position != null) { Entity snpEffEntity = getSnpEffEntity(snpEffResultIterator, chromosome, position); if (snpEffEntity != null) { effects.addAll(getSnpEffectsFromSnpEffEntity(sourceEntity, snpEffEntity, getTargetEntityType(sourceEMD))); } else { effects.add(getEmptyEffectsEntity(sourceEntity, getTargetEntityType(sourceEMD))); } } else { effects.add(getEmptyEffectsEntity(sourceEntity, getTargetEntityType(sourceEMD))); } } return effects.removeFirst(); } }; } catch (IOException e) { throw new UncheckedIOException(e); } catch (InterruptedException e) { throw new MolgenisDataException("Exception running SnpEff", e); } } /** * Returns the next entity containing SnpEff annotations if its Chrom and Pos match. This implementation works * because SnpEff always returns output in the same order as the input * * @param snpEffResultIterator the snpEff results * @param chromosome chromosome for the entity that is being annotated * @param position position for the entity that is being annotated * @return {@link Entity} */ private Entity getSnpEffEntity(PeekingIterator<Entity> snpEffResultIterator, String chromosome, int position) { if (snpEffResultIterator.hasNext()) { Entity entityCandidate = snpEffResultIterator.peek(); if (chromosome.equals(entityCandidate.getString(VcfAttributes.CHROM)) && position == entityCandidate .getInt(VcfAttributes.POS) && entityCandidate.getString(SnpEffRunner.ANN) != null) { snpEffResultIterator.next(); return entityCandidate; } } return null; } private Entity getEmptyEffectsEntity(Entity sourceEntity, EntityType effectsEMD) { Entity effect = new DynamicEntity(effectsEMD); effect.set(ID, idGenerator.generateId()); effect.set(VARIANT, sourceEntity); return effect; } // ANN=G|intron_variant|MODIFIER|LOC101926913|LOC101926913|transcript|NR_110185.1|Noncoding|5/5|n.376+9526G>C||||||,G|non_coding_exon_variant|MODIFIER|LINC01124|LINC01124|transcript|NR_027433.1|Noncoding|1/1|n.590G>C||||||; private List<Entity> getSnpEffectsFromSnpEffEntity(Entity sourceEntity, Entity snpEffEntity, EntityType effectsEMD) { String[] annotations = snpEffEntity.getString(SnpEffRunner.ANN).split(Pattern.quote(","), -1); List<Entity> effects = Lists.newArrayList(); for (String annotation : annotations) { String[] fields = annotation.split(Pattern.quote("|"), -1); Entity effect = new DynamicEntity(effectsEMD); if (fields.length >= 15) { effect.set(ID, idGenerator.generateId()); effect.set(VARIANT, sourceEntity); effect.set(ALT, fields[0]); effect.set(GENE_NAME, fields[4]); effect.set(ANNOTATION, fields[1]); effect.set(PUTATIVE_IMPACT, fields[2]); effect.set(GENE_NAME, fields[3]); effect.set(GENE_ID, fields[4]); effect.set(FEATURE_TYPE, fields[5]); effect.set(FEATURE_ID, fields[6]); effect.set(TRANSCRIPT_BIOTYPE, fields[7]); effect.set(RANK_TOTAL, fields[8]); effect.set(HGVS_C, fields[9]); effect.set(HGVS_P, fields[10]); effect.set(C_DNA_POSITION, fields[11]); effect.set(CDS_POSITION, fields[12]); effect.set(PROTEIN_POSITION, fields[13]); effect.set(DISTANCE_TO_FEATURE, fields[14]); effect.set(ERRORS, fields[15]); } else { LOG.info("No results for CHROM:{} POS:{} REF:{} ALT:{} ", effect.getString(VcfAttributes.CHROM), effect.getString(VcfAttributes.POS), effect.getString(VcfAttributes.REF), effect.getString(VcfAttributes.ALT)); } effects.add(effect); } return effects; } /** * Converts entities to a VCF file that can be passed to SnpEff. * * @param source the Entities to convert to VCF * @return a VCF file */ public File getInputVcfFile(Iterator<Entity> source) throws IOException { File vcf = createTempFile(NAME, ".vcf"); try (BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(vcf), CHARSET))) { bw.write(VcfAttributes.CHROM + "\t" + VcfAttributes.POS + "\t" + VcfAttributes.ID + "\t" + VcfAttributes.REF + "\t" + VcfAttributes.ALT + "\t" + VcfAttributes.QUAL + "\t" + VcfAttributes.FILTER + "\t" + VcfAttributes.INFO + "\n"); while (source.hasNext()) { Entity entity = source.next(); StringBuilder builder = new StringBuilder(); builder.append(entity.getString(VcfAttributes.CHROM)); builder.append("\t"); builder.append(entity.getInt(VcfAttributes.POS)); builder.append("\t.\t");//ID builder.append(entity.getString(VcfAttributes.REF)); builder.append("\t"); builder.append(entity.getString(VcfAttributes.ALT)); builder.append("\t.\t");//QUAL builder.append("\t.\t");//FILTER builder.append(".");//INFO if (source.hasNext()) { builder.append("\n"); } bw.write(builder.toString()); } } return vcf; } /** * Gets the path to the SnpEff JAR. Returns null when the path is not found or snpEffAnnotatorSettings is null. * * @return the path to the SnpEff JAR, or null */ public String getSnpEffPath() { if (snpEffAnnotatorSettings != null) { snpEffPath = RunAsSystemProxy.runAsSystem( () -> snpEffAnnotatorSettings.getString(SnpEffAnnotatorSettings.Meta.SNPEFF_JAR_LOCATION)); if (snpEffPath != null) { File snpEffFile = new File(snpEffPath); if (snpEffFile.exists() && snpEffFile.isFile()) { LOG.info("SnpEff found at: " + snpEffFile.getAbsolutePath()); } else { LOG.debug("SnpEff not found at: " + snpEffFile.getAbsolutePath()); snpEffPath = null; } } } return snpEffPath; } /** * @param sourceEntityType The entity type for the entity that is being annotated by snpEff * @return entityType Returns the EntityType for the effect entity */ public EntityType getTargetEntityType(EntityType sourceEntityType) { EntityType entityType = entityTypeFactory.create() .setSimpleName(sourceEntityType.getSimpleName() + ENTITY_NAME_SUFFIX) .setPackage(sourceEntityType.getPackage()); entityType.setBackend(sourceEntityType.getBackend()); Attribute id = attributeFactory.create().setName(EffectsMetaData.ID).setAuto(true).setVisible(false) .setIdAttribute(true); entityType.addAttribute(id); for (Attribute attr : effectsMetaData.getOrderedAttributes()) { entityType.addAttribute(attr); } entityType.addAttribute( attributeFactory.create().setName(EffectsMetaData.VARIANT).setNillable(false).setDataType(XREF) .setRefEntity(sourceEntityType)); return entityType; } }