package htsjdk.variant.variantcontext; import htsjdk.samtools.SAMSequenceDictionary; import htsjdk.samtools.SAMSequenceRecord; import htsjdk.variant.vcf.VCFContigHeaderLine; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; /** * A Comparator that orders VariantContexts by the ordering of the contigs/chromosomes in the List * provided at construction time, then by start position with each contig/chromosome. */ public class VariantContextComparator implements Comparator<VariantContext> { public static List<String> getSequenceNameList(final SAMSequenceDictionary dictionary) { final List<String> list = new ArrayList<String>(); for (final SAMSequenceRecord record : dictionary.getSequences()) { list.add(record.getSequenceName()); } return list; } // For fast lookup of the contig's index in the contig list private final Map<String, Integer> contigIndexLookup; public VariantContextComparator(final List<String> contigs) { if (contigs.size() == 0) throw new IllegalArgumentException("One or more contigs must be in the contig list."); final Map<String, Integer> protoContigIndexLookup = new HashMap<String, Integer>(); int index = 0; for (final String contig : contigs) { protoContigIndexLookup.put(contig, index++); } if (protoContigIndexLookup.size() != contigs.size()) { throw new IllegalArgumentException("There are duplicate contigs/chromosomes in the input contig list."); } this.contigIndexLookup = Collections.unmodifiableMap(protoContigIndexLookup); } /** * Creates a VariantContextComparator from the given VCF contig header lines. The header lines' * index values are used to order the contigs. Throws IllegalArgumentException if there are dupe * */ public VariantContextComparator(final Collection<VCFContigHeaderLine> headerLines) { if (headerLines.size() == 0) throw new IllegalArgumentException("One or more header lines must be in the header line collection."); final Map<String, Integer> protoContigIndexLookup = new HashMap<String, Integer>(); for (final VCFContigHeaderLine headerLine : headerLines) { protoContigIndexLookup.put(headerLine.getID(), headerLine.getContigIndex()); } if (protoContigIndexLookup.size() != headerLines.size()) { throw new IllegalArgumentException("There are duplicate contigs/chromosomes in the input header line collection."); } final Set<Integer> protoIndexValues = new HashSet<Integer>(protoContigIndexLookup.values()); if (protoIndexValues.size() != headerLines.size()) { throw new IllegalArgumentException("One or more contigs share the same index number."); } this.contigIndexLookup = Collections.unmodifiableMap(protoContigIndexLookup); } public VariantContextComparator(final SAMSequenceDictionary dictionary) { this(getSequenceNameList(dictionary)); } @Override public int compare(final VariantContext firstVariantContext, final VariantContext secondVariantContext) { // Will throw NullPointerException -- happily -- if either of the chromosomes/contigs aren't // present. This error checking should already have been done in the constructor but it's left // in as defence anyway. final int contigCompare = this.contigIndexLookup.get(firstVariantContext.getChr()) - this.contigIndexLookup.get(secondVariantContext.getChr()); return contigCompare != 0 ? contigCompare : firstVariantContext.getStart() - secondVariantContext.getStart(); } /** * Returns true if the given header lines are from a file sorted according to this VariantContextComparator. * For sorting to work properly, the contig in each header line must have the same index. */ public boolean isCompatible(final Collection<VCFContigHeaderLine> headerLines) { for (final VCFContigHeaderLine headerLine : headerLines) { final Integer existingIndex = this.contigIndexLookup.get(headerLine.getID()); if (existingIndex == null || headerLine.getContigIndex() != existingIndex.intValue()) return false; } return true; } }