/*
* The MIT License
*
* Copyright (c) 2011 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package picard.annotation;
import htsjdk.samtools.util.CoordMath;
import htsjdk.samtools.util.Interval;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
/**
* Holds annotation of a gene for storage in an OverlapDetector. May hold multiple transcripts for the same gene.
* The transcripts must all be relative to the same strand.
*/
public class Gene extends Interval implements Iterable<Gene.Transcript> {
private final Map<String, Transcript> transcripts = new HashMap<String, Transcript>();
public Gene(final String sequence, final int start, final int end, final boolean negative, final String name) {
super(sequence, start, end, negative, name);
}
public Transcript addTranscript(final String name, final int transcriptionStart, final int transcriptionEnd, final int codingStart, final int codingEnd, final int numExons) {
if (transcripts.containsKey(name)) {
throw new AnnotationException("Transcript " + name + " for gene " + this.getName() + " appears more than once");
}
else {
final Transcript tx = new Transcript(name, transcriptionStart, transcriptionEnd, codingStart, codingEnd, numExons);
transcripts.put(name, tx);
return tx;
}
}
public Iterator<Transcript> iterator() {
return transcripts.values().iterator();
}
public int compareTo(final Gene that) {
final int ret = super.compareTo(that);
if (ret != 0) return ret;
return Boolean.valueOf(this.isPositiveStrand()).compareTo(that.isPositiveStrand());
}
public boolean equals(final Object other) {
if (!(other instanceof Gene)) return false;
else if (this == other) return true;
else {
final Gene that = (Gene)other;
return (this.compareTo(that) == 0);
}
}
/**
* A single transcript of a gene. Sequence name is stored in the enclosing object (class Gene).
*/
public class Transcript {
public final String name;
public final int transcriptionStart;
public final int transcriptionEnd;
public final int codingStart;
public final int codingEnd;
public final Exon[] exons;
private int length; // the number of bases in the transcript
/**
* 1-based, inclusive representation of an exon. The sequence name is stored in an enclosing object (class Gene).
*/
public class Exon {
public final int start;
public final int end;
public Exon(final int start, final int end) {
this.start = start;
this.end = end;
}
}
public Transcript(final String name, final int transcriptionStart, final int transcriptionEnd, final int codingStart, final int codingEnd, final int numExons) {
this.name = name;
this.transcriptionStart = transcriptionStart;
this.transcriptionEnd = transcriptionEnd;
this.codingStart = codingStart;
this.codingEnd = codingEnd;
this.exons = new Exon[numExons];
}
public Exon addExon(final int start, final int end) {
for (int i=0; i<this.exons.length; ++i) {
if (exons[i] == null) {
exons[i] = new Exon(start, end);
this.length += CoordMath.getLength(start, end);
return exons[i];
}
}
throw new IllegalStateException("Attempting to add more exons that exist for transcript.");
}
public int start() {
return exons[0].start;
}
public int end() {
return exons[exons.length -1].end;
}
public int length() {
return this.length;
}
public boolean isSoloTranscript() {
return Gene.this.transcripts.size() == 1;
}
public Gene getGene() {
return Gene.this;
}
/**
* Write into locusFunctions the function of each position from start to start + locusFunctions.length
* relative to this transcript. Does not overwrite an existing value in locusFunctions that is stronger
* than the function for that locus in this transcript.
* @param start 1-based genomic coordinate of the first position in locusFunctions.
* @param locusFunctions
*/
public void assignLocusFunctionForRange(final int start, final LocusFunction[] locusFunctions) {
for (int i = Math.max(start, transcriptionStart);
i <= Math.min(transcriptionEnd, CoordMath.getEnd(start, locusFunctions.length)); ++i) {
if (locusFunctions[i - start].ordinal() > LocusFunction.CODING.ordinal()) continue;
final LocusFunction locusFunction;
if (inExon(i)) {
if (utr(i)) locusFunction = LocusFunction.UTR;
else locusFunction = LocusFunction.CODING;
} else locusFunction = LocusFunction.INTRONIC;
if (locusFunction.ordinal() > locusFunctions[i - start].ordinal()) {
locusFunctions[i - start] = locusFunction;
}
}
}
/**
*
* @param genomeStart
* @param genomeEnd
* @param coverage
*/
public void addCoverageCounts(final int genomeStart, final int genomeEnd, final int[] coverage) {
for (int i=genomeStart; i<genomeEnd; ++i) {
final int txBase = getTranscriptCoordinate(i);
if (txBase > 0) coverage[txBase-1]++;
}
}
/** Given a coordinate on the genome (same chromosome) give the corresponding coordinate in the transcript. */
public int getTranscriptCoordinate(final int genomeCoordinate) {
int exonOffset = 0;
for (final Exon e : exons) {
if (genomeCoordinate >= e.start && genomeCoordinate <=e.end) {
return (genomeCoordinate - e.start + 1) + exonOffset;
}
else {
exonOffset += CoordMath.getLength(e.start, e.end);
}
}
return -1;
}
private boolean utr(final int locus) {
return locus < codingStart || locus > codingEnd;
}
private boolean inExon(final int locus) {
for (int i = 0; i < exons.length; ++i) {
final Exon exon = exons[i];
if (exon.start > locus) return false;
if (inRange(exon.start, exon.end, locus)) return true;
}
return false;
}
private boolean inRange(final int start, final int end, final int locus) {
return (locus >= start && locus <= end);
}
@Override
public boolean equals(final Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
final Transcript that = (Transcript) o;
if (codingEnd != that.codingEnd) return false;
if (codingStart != that.codingStart) return false;
if (transcriptionEnd != that.transcriptionEnd) return false;
if (transcriptionStart != that.transcriptionStart) return false;
if (!name.equals(that.name)) return false;
return true;
}
@Override
public int hashCode() {
int result = name.hashCode();
result = 31 * result + transcriptionStart;
result = 31 * result + transcriptionEnd;
result = 31 * result + codingStart;
result = 31 * result + codingEnd;
return result;
}
}
}