/*
* The MIT License
*
* Copyright (c) 2015 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package picard.vcf.processor;
import com.google.common.base.Function;
import com.google.common.base.Predicate;
import com.google.common.collect.FluentIterable;
import com.google.common.primitives.Ints;
import htsjdk.samtools.util.Interval;
import htsjdk.samtools.util.Log;
import htsjdk.samtools.util.OverlapDetector;
import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.samtools.SAMSequenceRecord;
import htsjdk.variant.vcf.VCFFileReader;
import htsjdk.variant.vcf.VCFHeader;
import java.io.File;
import java.util.Iterator;
import java.util.List;
/**
* Describes a mechanism for producing {@link VcfFileSegment}s from a VCF file.
*
* @author mccowan
*/
public abstract class VcfFileSegmentGenerator {
final static Log LOG = Log.getInstance(VcfFileSegmentGenerator.class);
public abstract Iterable<VcfFileSegment> forVcf(final File vcf);
public static VcfFileSegmentGenerator byWholeContigSubdividingWithWidth(final long segmentWidth) {
return WidthLimitingDecorator.wrapping(ByWholeContig.getInstance(), segmentWidth);
}
/**
* Returns a decorated {@link VcfFileSegmentGenerator} that filters out {@link VcfFileSegment}s that have no overlap with the provided
* {@link OverlapDetector}.
*/
public static <T> VcfFileSegmentGenerator excludingNonOverlaps(final VcfFileSegmentGenerator strategy, final OverlapDetector<T> overlaps) {
return new VcfFileSegmentGenerator() {
@Override
public Iterable<VcfFileSegment> forVcf(final File vcf) {
return FluentIterable.from(strategy.forVcf(vcf)).filter(new Predicate<VcfFileSegment>() {
@Override
public boolean apply(final VcfFileSegment segment) {
final boolean keep = !overlaps.getOverlaps(new Interval(segment.contig(), segment.start(), segment.stop())).isEmpty();
if (!keep) {
LOG.debug(String.format("Ignoring segment because it does not overlap with detector, %s::%s:%s-%s",
segment.vcf().getName(), segment.contig(), segment.start(), segment.stop())
);
}
return keep;
}
});
}
};
}
/**
* A very simple {@link VcfFileSegmentGenerator} that breaks up the provided vcfs into contig-sized chunks.
*
* @author mccowan
*/
static class ByWholeContig extends VcfFileSegmentGenerator {
// Singleton!
ByWholeContig() {
}
private static final ByWholeContig singleton = new ByWholeContig();
public static ByWholeContig getInstance() {
return singleton;
}
@Override
public Iterable<VcfFileSegment> forVcf(final File vcf) {
final List<SAMSequenceRecord> samSequenceRecords = readSequences(vcf);
return FluentIterable.from(samSequenceRecords).transform(new Function<SAMSequenceRecord, VcfFileSegment>() {
@Override
public VcfFileSegment apply(final SAMSequenceRecord samSequenceRecord) {
return VcfFileSegment.ofWholeSequence(samSequenceRecord, vcf);
}
});
}
private static List<SAMSequenceRecord> readSequences(final File vcf) {
final VCFFileReader reader = new VCFFileReader(vcf);
final VCFHeader header = reader.getFileHeader();
final SAMSequenceDictionary dict = header.getSequenceDictionary();
reader.close();
return dict.getSequences();
}
}
/**
* Decorator to apply to other {@link VcfFileSegmentGenerator} to enforce that no segment is larger than the specified width.
*
* @author mccowan
*/
static final class WidthLimitingDecorator extends VcfFileSegmentGenerator {
final VcfFileSegmentGenerator underlyingStrategy;
final long width;
public static WidthLimitingDecorator wrapping(final VcfFileSegmentGenerator basis, final long maximumWidth) {
return new WidthLimitingDecorator(basis, maximumWidth);
}
private WidthLimitingDecorator(final VcfFileSegmentGenerator underlyingStrategy, final long maximumWidth) {
this.underlyingStrategy = underlyingStrategy;
this.width = maximumWidth - 1;
}
/**
* The thing that does the work; accepts a {@link VcfFileSegment} (produced by the parent {@link VcfFileSegmentGenerator}) and breaks
* it down into subsegments.
*/
private final class VcfFileSegmentSubdivider implements Iterable<VcfFileSegment> {
final VcfFileSegment basis;
private VcfFileSegmentSubdivider(final VcfFileSegment basis) {
this.basis = basis;
}
@Override
public Iterator<VcfFileSegment> iterator() {
return new Iterator<VcfFileSegment>() {
int nextStart = basis.start();
@Override
public boolean hasNext() {
return nextStart <= basis.stop();
}
@Override
public VcfFileSegment next() {
final int start = nextStart;
final VcfFileSegment ret = new VcfFileSegment() {
@Override
public int start() {
return start;
}
@Override
public int stop() {
return Ints.checkedCast(Math.min(start + width, basis.stop()));
}
@Override
public String contig() {
return basis.contig();
}
@Override
public File vcf() {
return basis.vcf();
}
};
nextStart += width + 1;
return ret;
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
};
}
}
@Override
public Iterable<VcfFileSegment> forVcf(final File vcf) {
// Turn the VCF into segments, and then apply our
return FluentIterable.from(underlyingStrategy.forVcf(vcf)).transformAndConcat(new Function<VcfFileSegment, Iterable<? extends VcfFileSegment>>() {
@Override
public Iterable<? extends VcfFileSegment> apply(final VcfFileSegment vcfFileSegment) {
return new VcfFileSegmentSubdivider(vcfFileSegment);
}
});
}
}
}