/*
* The MIT License
*
* Copyright (c) 2010 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package picard.fingerprint;
import picard.PicardException;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.FormatUtil;
import htsjdk.samtools.util.Interval;
import htsjdk.samtools.util.IntervalList;
import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMTextHeaderCodec;
import htsjdk.samtools.util.StringLineReader;
import java.io.*;
import java.util.*;
/**
* A collection of metadata about Haplotype Blocks including multiple in memory "indices" of the data
* to make it easy to query the correct HaplotypeBlock or Snp by snp names, positions etc. Also has the
* ability to read and write itself to and from files.
*
* @author Tim Fennell / Kathleen Tibbetts
*/
public class HaplotypeMap {
private final List<HaplotypeBlock> haplotypeBlocks = new ArrayList<HaplotypeBlock>();
private final Map<Snp, HaplotypeBlock> haplotypesBySnp = new HashMap<Snp, HaplotypeBlock>();
private final Map<String, HaplotypeBlock> haplotypesBySnpName = new HashMap<String, HaplotypeBlock>();
private final Map<String, HaplotypeBlock> haplotypesBySnpLocus = new HashMap<String, HaplotypeBlock>();
private final Map<String,Snp> snpsByPosition = new HashMap<String,Snp>();
private IntervalList intervals;
private final SAMFileHeader header;
/**
* Constructs a HaplotypeMap from the provided file.
*/
public HaplotypeMap(final File file) {
BufferedReader in = null;
try {
in = new BufferedReader(new InputStreamReader(IOUtil.openFileForReading(file)));
// Setup a reader and parse the header
final StringBuilder builder = new StringBuilder(4096);
String line = null;
while ((line = in.readLine()) != null) {
if (line.startsWith("@")) {
builder.append(line).append('\n');
}
else {
break;
}
}
if (builder.length() == 0) {
throw new IllegalStateException("Haplotype map file must contain header: " + file.getAbsolutePath());
}
this.header = new SAMTextHeaderCodec().decode(new StringLineReader(builder.toString()), "BufferedReader");
this.intervals = new IntervalList(header);
// Then read in the file
final FormatUtil format = new FormatUtil();
final List<HaplotypeMapFileEntry> entries = new ArrayList<HaplotypeMapFileEntry>();
final Map<String, HaplotypeBlock> anchorToHaplotype = new HashMap<String, HaplotypeBlock>();
do {
if (line.trim().isEmpty()) continue; // skip over blank lines
if (line.startsWith("#")) continue; // ignore comments/headers
// Make sure we have the right number of fields
final String[] fields = line.split("\\t");
if (fields.length < 6 || fields.length > 8) {
throw new PicardException("Invalid haplotype map record contains " +
fields.length + " fields: " + line);
}
// Then parse them out
final String chrom = fields[0];
final int pos = format.parseInt(fields[1]);
final String name = fields[2];
final byte major = (byte)fields[3].charAt(0);
final byte minor = (byte)fields[4].charAt(0);
final double maf = format.parseDouble(fields[5]);
final String anchor = fields.length > 6 ? fields[6] : null;
final String fpPanels = fields.length > 7 ? fields[7] : null;
List<String> panels = null;
if (fpPanels != null) {
panels = new ArrayList<String>();
for (final String panel : fpPanels.split(",")) {
panels.add(panel);
}
}
// If it's the anchor snp, start the haplotype
if (anchor == null || anchor.trim().equals("") || name.equals(anchor)) {
final HaplotypeBlock type = new HaplotypeBlock(maf);
type.addSnp(new Snp(name, chrom, pos, major, minor, maf, panels));
anchorToHaplotype.put(name, type);
}
else { // Otherwise save it for later
final HaplotypeMapFileEntry entry = new HaplotypeMapFileEntry(
chrom, pos, name, major, minor, maf, anchor, panels);
entries.add(entry);
}
}
while ((line = in.readLine()) != null);
// Now, go through and add all the anchored snps
for (final HaplotypeMapFileEntry entry : entries) {
final HaplotypeBlock block = anchorToHaplotype.get(entry.anchorSnp);
if (block == null) {
throw new PicardException("No haplotype found for anchor snp " + entry.anchorSnp);
}
block.addSnp(new Snp(entry.snpName, entry.chromosome, entry.position,
entry.majorAllele, entry.minorAllele,
entry.minorAlleleFrequency, entry.panels));
}
// And add them all
for (final HaplotypeBlock block : anchorToHaplotype.values()) {
addHaplotype(block);
}
}
catch (IOException ioe) {
throw new PicardException("Error parsing haplotype map.", ioe);
}
finally {
if (in != null) {
try { in.close(); } catch (Exception e) { /* do nothing */ }
}
}
}
/** Constructs an empty HaplotypeMap using the provided SAMFileHeader's sequence dictionary. */
public HaplotypeMap(final SAMFileHeader header) {
this.header = header;
this.intervals = new IntervalList(header);
}
/**
* Adds a HaplotypeBlock to the map and updates all the relevant caches/indices.
*/
public void addHaplotype(final HaplotypeBlock haplotypeBlock) {
this.haplotypeBlocks.add(haplotypeBlock);
for (final Snp snp : haplotypeBlock.getSnps()) {
this.haplotypesBySnp.put(snp, haplotypeBlock);
this.haplotypesBySnpName.put(snp.getName(), haplotypeBlock);
this.haplotypesBySnpLocus.put(toKey(snp.getChrom(), snp.getPos()), haplotypeBlock);
this.snpsByPosition.put(toKey(snp.getChrom(), snp.getPos()), snp);
this.intervals.add(new Interval(snp.getChrom(), snp.getPos(), snp.getPos(), false, snp.getName()));
}
}
/** Queries a HaplotypeBlock by Snp object. Returns NULL if none found. */
public HaplotypeBlock getHaplotype(final Snp snp) {
return this.haplotypesBySnp.get(snp);
}
/** Queries a HaplotypeBlock by Snp name. Returns NULL if none found. */
public HaplotypeBlock getHaplotype(final String snpName) {
return this.haplotypesBySnpName.get(snpName);
}
/** Queries a HaplotypeBlock by Snp chromosome and position. Returns NULL if none found. */
public HaplotypeBlock getHaplotype(final String chrom, final int pos) {
return this.haplotypesBySnpLocus.get(toKey(chrom, pos));
}
/** Returns an unmodifiable collection of all the haplotype blocks in the map. */
public List<HaplotypeBlock> getHaplotypes() {
return Collections.unmodifiableList(this.haplotypeBlocks);
}
/** Queries a Snp by chromosome and position. Returns NULL if none found. */
public Snp getSnp(final String chrom, final int pos) {
return this.snpsByPosition.get(toKey(chrom, pos));
}
/** Returns an unmodifiable collection of all SNPs in all Haplotype blocks. */
public Set<Snp> getAllSnps() {
return Collections.unmodifiableSet(haplotypesBySnp.keySet());
}
/** Returns an IntervalList with an entry for every SNP in every Haplotype in the map. */
public IntervalList getIntervalList() {
this.intervals = this.intervals.sorted(); // TODO: should probably do this elsewhere
return this.intervals;
}
private String toKey(final String chrom, final int pos) {
return chrom + ":" + pos;
}
/**
* Returns a copy of this haplotype map that excludes haplotypes on the chromosomes provided.
* @param chroms a set of zero or more chromosome names
*/
public HaplotypeMap withoutChromosomes(final Set<String> chroms) {
final HaplotypeMap out = new HaplotypeMap(getHeader());
for (final HaplotypeBlock block : this.haplotypeBlocks) {
if (!chroms.contains(block.getFirstSnp().getChrom())) {
out.addHaplotype(block);
}
}
return out;
}
/** Writes out a HaplotypeMap file with the contents of this map. */
public void writeToFile(final File file) {
try {
final BufferedWriter out = new BufferedWriter(new OutputStreamWriter(IOUtil.openFileForWriting(file)));
final FormatUtil format = new FormatUtil();
// Write out the header
if (this.header != null) {
final SAMTextHeaderCodec codec = new SAMTextHeaderCodec();
codec.encode(out, this.header);
}
// Write the header for the entries.
out.write("#CHROMOSOME\tPOSITION\tNAME\tMAJOR_ALLELE\tMINOR_ALLELE\tMAF\tANCHOR_SNP\tPANELS");
out.newLine();
final List<HaplotypeMapFileEntry> entries = new ArrayList<HaplotypeMapFileEntry>();
for (final HaplotypeBlock block : this.getHaplotypes()) {
String anchor = null;
final SortedSet<Snp> snps = new TreeSet<Snp>(block.getSnps());
for (final Snp snp : snps) {
entries.add(new HaplotypeMapFileEntry(snp.getChrom(), snp.getPos(), snp.getName(),
snp.getAllele1(), snp.getAllele2(), snp.getMaf(), anchor, snp.getFingerprintPanels()));
if (anchor == null) {
anchor = snp.getName();
}
}
}
Collections.sort(entries);
for (final HaplotypeMapFileEntry entry : entries) {
out.write(entry.chromosome + "\t");
out.write(format.format(entry.position) + "\t");
out.write(entry.snpName + "\t");
out.write((char)entry.majorAllele + "\t");
out.write((char)entry.minorAllele + "\t");
out.write(format.format(entry.minorAlleleFrequency) + "\t");
if (entry.anchorSnp != null) {
out.write(entry.anchorSnp);
}
out.write("\t");
if (entry.getPanels() != null) {
out.write(entry.getPanels());
}
out.newLine();
}
out.flush();
out.close();
}
catch (IOException ioe) {
throw new PicardException("Error writing out maplotype map to file: " + file.getAbsolutePath(), ioe);
}
}
public SAMFileHeader getHeader() { return header; }
/** Class used to represent all the information for a row in a haplotype map file, used in reading and writing. */
private class HaplotypeMapFileEntry implements Comparable {
private final String chromosome;
private final int position;
private final String snpName;
private final byte majorAllele;
private final byte minorAllele;
private final double minorAlleleFrequency;
private final String anchorSnp;
private final List<String> panels;
public HaplotypeMapFileEntry(final String chrom, final int pos, final String name,
final byte major, final byte minor, final double maf,
final String anchorSnp, final List<String> fingerprintPanels) {
this.chromosome = chrom;
this.position = pos;
this.snpName = name;
this.majorAllele = major;
this.minorAllele = minor;
this.minorAlleleFrequency = maf;
this.anchorSnp = anchorSnp;
// Always sort the list of panels so they are in a predictable order
this.panels = new ArrayList<String>();
if (fingerprintPanels != null) {
this.panels.addAll(fingerprintPanels);
Collections.sort(this.panels);
}
}
public String getPanels() {
if (panels == null) return "";
final StringBuilder sb = new StringBuilder();
for (final String panel : panels) {
if (sb.length() > 0) sb.append(',');
sb.append(panel);
}
return sb.toString();
}
public int compareTo(final Object o) {
final HaplotypeMapFileEntry that = (HaplotypeMapFileEntry) o;
int diff = header.getSequenceIndex(this.chromosome) - header.getSequenceIndex(that.chromosome);
if (diff != 0) return diff;
diff = this.position - that.position;
if (diff != 0) return diff;
diff = this.snpName.compareTo(that.snpName);
if (diff != 0) return diff;
diff = this.majorAllele - that.majorAllele;
if (diff != 0) return diff;
diff = this.minorAllele - that.minorAllele;
if (diff != 0) return diff;
diff = Double.compare(this.minorAlleleFrequency, that.minorAlleleFrequency);
if (diff != 0) return diff;
if (this.anchorSnp != null) {
if (that.anchorSnp != null) {
diff = this.anchorSnp.compareTo(that.anchorSnp);
}
else {
diff = 1;
}
}
else {
if (that.anchorSnp != null) {
diff = -1;
}
else {
diff = 0;
}
}
if (diff != 0) return diff;
final String p1 = this.getPanels();
final String p2 = that.getPanels();
if (p1 != null) {
if (p2 != null) {
return p1.compareTo(p2);
}
return 1;
}
else if (p2 != null) {
return -1;
}
else {
return 0;
}
}
}
}