/* * See the NOTICE file distributed with this work for additional * information regarding copyright ownership. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see http://www.gnu.org/licenses/ */ package org.phenotips.variantstore.input.vcf; import org.phenotips.variantstore.input.AbstractVariantIterator; import org.phenotips.variantstore.input.VariantHeader; import org.phenotips.variantstore.shared.GACallInfoFields; import org.phenotips.variantstore.shared.GAVariantInfoFields; import org.phenotips.variantstore.shared.VariantUtils; import java.nio.file.Path; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.NoSuchElementException; import org.ga4gh.GACall; import org.ga4gh.GAVariant; import htsjdk.samtools.util.CloseableIterator; import htsjdk.variant.variantcontext.CommonInfo; import htsjdk.variant.variantcontext.Genotype; import htsjdk.variant.variantcontext.VariantContext; import htsjdk.variant.vcf.VCFFileReader; /** * Created by meatcar on 3/13/15. * * @version $Id: 7afd633ec1a02714d8f5fb90974d8fd896b40cb6 $ */ public class VCFIterator extends AbstractVariantIterator { private final VCFFileReader reader; private final CloseableIterator<VariantContext> iterator; private Map<String, List<String>> filter; private VariantContext nextRow; private int altIndex; /** * Create a new Variant Iterator for a VCF file. * * @param path the path to the vcf * @param header vcf meta info */ public VCFIterator(Path path, VariantHeader header) { this(path, null, header, null); } /** * Create a new Variant Iterator for a VCF file that has an index file. * * @param path the vcf file * @param index the index file * @param header the header */ public VCFIterator(Path path, Path index, VariantHeader header) { this(path, index, header, null); } /** * Create a new Variant Iterator for a VCF file with a filter to skip any info field that matches the filter. * * @param path the vcf file * @param header vcf meta info * @param filter map of info field names and the values to skip on */ public VCFIterator(Path path, VariantHeader header, Map<String, List<String>> filter) { this(path, null, header, filter); } /** * Set a filter for the Info fields. Any VCF row with info fields that match this filter will be skipped. * * @param path the vcf file * @param index the index file * @param header vcf meta info * @param filter A Map of Info field -> List of values to exclude */ public VCFIterator(Path path, Path index, VariantHeader header, Map<String, List<String>> filter) { super(path, header); this.filter = filter; if (index == null) { this.reader = new VCFFileReader(path.toFile(), false); } else { this.reader = new VCFFileReader(path.toFile(), index.toFile()); } this.iterator = this.reader.iterator(); this.nextRow = this.nextFiltered(); } @Override public boolean hasNext() { return nextRow != null; } @Override public GAVariant next() { if (!hasNext()) { throw new NoSuchElementException(); } GAVariant variant = new GAVariant(); VariantContext context = nextRow; Map<String, List<String>> info = new HashMap<>(); variant.setReferenceName(context.getChr()); // GA4GH uses 0-based indexing, unlike VCF's 1-based. variant.setStart((long) context.getStart() - 1); variant.setEnd((long) context.getEnd()); variant.setReferenceBases(context.getReference().getBaseString()); // ALT List<String> alts = Collections.singletonList(context.getAlleles().get(altIndex).getBaseString()); variant.setAlternateBases(alts); // INFO String alleleFrequency = (String) context.getAttribute("AF"); if (alleleFrequency != null) { // handling ExAC VCF file VariantUtils.addInfo(variant, GAVariantInfoFields.EXAC_AF, alleleFrequency); } variant.setInfo(info); // Calls List<GACall> calls = new ArrayList<>(); for (Genotype genotype : context.getGenotypes()) { GACall call = new GACall(); // genotype call.setGenotype(new ArrayList<Integer>()); int count = genotype.countAllele(context.getAlleles().get(altIndex)); // if 2: (1,1), if 1: (0, 1), if 0: (0, 0) call.setGenotype(Arrays.asList(count / 2, count % 2)); VariantUtils.addInfo(call, GACallInfoFields.QUALITY, String.valueOf(context.getPhredScaledQual())); VariantUtils.addInfo(call, GACallInfoFields.FILTER, context.getFilters()); calls.add(call); } variant.setCalls(calls); if (this.nextRow.getAlternateAlleles().size() > altIndex) { altIndex++; } else { this.nextRow = this.nextFiltered(); altIndex = 0; } if (!hasNext()) { iterator.close(); reader.close(); } return variant; } /** * Advance the iterator to the next filtered. */ private VariantContext nextFiltered() { // no next if (!iterator.hasNext()) { return null; } // no filter, don't do extra work. if (this.filter == null) { return iterator.next(); } while (iterator.hasNext()) { VariantContext ctx = iterator.next(); CommonInfo contextInfo = ctx.getCommonInfo(); // Skip any vcf row that matches the filter. boolean matched = false; for (Map.Entry<String, List<String>> filterEntry : filter.entrySet()) { String ctxInfoValue = String.valueOf(contextInfo.getAttribute(filterEntry.getKey())); if (filterEntry.getValue().contains(ctxInfoValue)) { // the INFO field matched the filter, skip to discarding this element. matched = true; break; } } if (!matched) { return ctx; } } // no items found! return null; } }