/* * The MIT License * * Copyright (c) 2014-2017 The Broad Institute * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. * */ package picard.vcf.filter; import htsjdk.samtools.SAMSequenceDictionary; import htsjdk.samtools.util.*; import htsjdk.variant.variantcontext.VariantContext; import htsjdk.variant.variantcontext.filter.JavascriptVariantFilter; import htsjdk.variant.variantcontext.writer.VariantContextWriter; import htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder; import htsjdk.variant.vcf.*; import picard.PicardException; import picard.cmdline.CommandLineProgram; import picard.cmdline.CommandLineProgramProperties; import picard.cmdline.Option; import picard.cmdline.StandardOptionDefinitions; import picard.cmdline.programgroups.VcfOrBcf; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.List; /** * Applies a set of hard filters to Variants and to Genotypes within a VCF. * * @author Tim Fennell * @author Pierre Lindenbaum added the javascript filter in 2016 */ @CommandLineProgramProperties( usage = "Applies one or more hard filters to a VCF file to filter out genotypes and variants.", usageShort = "Hard filters a VCF.", programGroup = VcfOrBcf.class ) public class FilterVcf extends CommandLineProgram { @Option(shortName=StandardOptionDefinitions.INPUT_SHORT_NAME, doc="The INPUT VCF or BCF file.") public File INPUT; @Option(shortName=StandardOptionDefinitions.OUTPUT_SHORT_NAME, doc="The output VCF or BCF.") public File OUTPUT; @Option(doc="The minimum allele balance acceptable before filtering a site. Allele balance is calculated for heterozygotes as " + "the number of bases supporting the least-represented allele over the total number of base observations. Different heterozygote " + "genotypes at the same locus are measured independently. The locus is filtered if any allele balance is below the limit.") public double MIN_AB = 0.0d; @Option(doc="The minimum sequencing depth supporting a genotype before the genotype will be filtered out.") public int MIN_DP = 0; @Option(doc="The minimum genotype quality that must be achieved for a sample otherwise the genotype will be filtered out.") public int MIN_GQ = 0; @Option(doc="The maximum phred scaled fisher strand value before a site will be filtered out.") public double MAX_FS = Double.MAX_VALUE; @Option(doc="The minimum QD value to accept or otherwise filter out the variant.") public double MIN_QD = 0; @Option(shortName = "JS", doc = "Filters a VCF file with a javascript expression interpreted by the java javascript engine. " + " The script puts the following variables in the script context: " + " 'variant' a VariantContext ( https://samtools.github.io/htsjdk/javadoc/htsjdk/htsjdk/variant/variantcontext/VariantContext.html ) and " + " 'header' a VCFHeader ( https://samtools.github.io/htsjdk/javadoc/htsjdk/htsjdk/variant/vcf/VCFHeader.html )." + " Last value of the script should be a boolean to tell whether we should accept or reject the record.", optional = true) public File JAVASCRIPT_FILE = null; /** Constructor to default to having index creation on. */ public FilterVcf() { this.CREATE_INDEX = true; } // Stock main method public static void main(final String[] args) { new FilterVcf().instanceMainWithExit(args); } final private Log log = Log.getInstance(FilterVcf.class); final private ProgressLogger progress = new ProgressLogger(log, 100_000, "Processed", "Variants"); @Override protected int doWork() { IOUtil.assertFileIsReadable(INPUT); IOUtil.assertFileIsWritable(OUTPUT); VCFFileReader in = null; VariantContextWriter out = null; try {// try/finally used to close 'in' and 'out' in = new VCFFileReader(INPUT, false); final List<VariantFilter> variantFilters = new ArrayList<>(4); variantFilters.add(new AlleleBalanceFilter(MIN_AB)); variantFilters.add(new FisherStrandFilter(MAX_FS)); variantFilters.add(new QdFilter(MIN_QD)); if (JAVASCRIPT_FILE != null) { try { variantFilters.add(new VariantContextJavascriptFilter(JAVASCRIPT_FILE, in.getFileHeader())); } catch (final IOException error) { throw new PicardException("javascript-related error", error); } } final List<GenotypeFilter> genotypeFilters = CollectionUtil.makeList(new GenotypeQualityFilter(MIN_GQ), new DepthFilter(MIN_DP)); final FilterApplyingVariantIterator iterator = new FilterApplyingVariantIterator(in.iterator(), variantFilters, genotypeFilters); final VCFHeader header = in.getFileHeader(); // If the user is writing to a .bcf or .vcf, VariantContextBuilderWriter requires a Sequence Dictionary. Make sure that the // Input VCF has one. final VariantContextWriterBuilder variantContextWriterBuilder = new VariantContextWriterBuilder(); if (isVcfOrBcf(OUTPUT)) { final SAMSequenceDictionary sequenceDictionary = header.getSequenceDictionary(); if (sequenceDictionary == null) { throw new PicardException("The input vcf must have a sequence dictionary in order to create indexed vcf or bcfs."); } variantContextWriterBuilder.setReferenceDictionary(sequenceDictionary); } out = variantContextWriterBuilder.setOutputFile(OUTPUT).build(); header.addMetaDataLine(new VCFFilterHeaderLine("AllGtsFiltered", "Site filtered out because all genotypes are filtered out.")); header.addMetaDataLine(new VCFFormatHeaderLine("FT", VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Genotype filters.")); for (final VariantFilter filter : variantFilters) { filter.headerLines().forEach(header::addMetaDataLine); } out.writeHeader(in.getFileHeader()); while (iterator.hasNext()) { final VariantContext vc = iterator.next(); progress.record(vc.getContig(), vc.getStart()); out.add(vc); } return 0; } finally { CloserUtil.close(out); CloserUtil.close(in); } } private boolean isVcfOrBcf(final File file) { final String fileName = file.getName(); return fileName.endsWith(".vcf") || fileName.endsWith(".bcf"); } /** Javascript filter implementing VariantFilter */ private static class VariantContextJavascriptFilter extends JavascriptVariantFilter implements VariantFilter { /** filter name */ private final String filterName; /** script file */ private final File scriptFile; private VariantContextJavascriptFilter(final File scriptFile, final VCFHeader header) throws IOException { super(scriptFile, header); this.scriptFile = scriptFile; /* create filter name using file basename */ String fname = IOUtil.basename(scriptFile); if (fname.isEmpty()) fname = "JSFILTER"; this.filterName = fname; } /** * returns the filterName if the javascript doesn't accept the variant, * null otherwise */ @Override public String filter(final VariantContext ctx) { return (super.accept(ctx) ? null : this.filterName ); } @Override public List<VCFFilterHeaderLine> headerLines() { return CollectionUtil.makeList( new VCFFilterHeaderLine(this.filterName, "Variant Filtered by Javascript file " + this.scriptFile)); } } }