package org.nextprot.api.core.utils.exon;
import com.google.common.base.Preconditions;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.nextprot.api.core.domain.AminoAcid;
import org.nextprot.api.core.domain.Exon;
import java.util.*;
/**
* Compute phases and categorize exons of isoform transcripts
*
* Created by fnikitin on 22/07/15.
*/
public class TranscriptExonsAnalyser {
private final static Log LOGGER = LogFactory.getLog(TranscriptExonsAnalyser.class);
private static final ExonsAnalysisListener DEFAULT_HANDLER = new ExonsAnalysisListenerImpl();
private final List<Exon> exons;
private final ExonsAnalysisListener handler;
private String accession;
private ExonCategorizer categorizer;
private String isoformSequence;
private int startPositionIsoform;
private int endPositionIsoform;
private int currentTranscriptLen;
private int currentIsoformPos;
private int currentPhase;
public TranscriptExonsAnalyser() {
this(DEFAULT_HANDLER);
}
public TranscriptExonsAnalyser(ExonsAnalysisListener handler) {
Preconditions.checkNotNull(handler);
this.handler = handler;
exons = new ArrayList<>();
}
private void init(String accession, String isoformSequence, int startPositionIsoform, int endPositionIsoform, Collection<Exon> exons) {
this.accession = accession;
this.isoformSequence = isoformSequence;
this.startPositionIsoform = startPositionIsoform;
this.endPositionIsoform = endPositionIsoform;
this.currentTranscriptLen = 0;
this.currentIsoformPos = -1;
this.currentPhase = 0;
this.exons.addAll(exons);
Collections.sort(this.exons, new Comparator<Exon>() {
@Override
public int compare(Exon e1, Exon e2) {
return e1.getFirstPositionOnGene() - e2.getFirstPositionOnGene();
}
});
categorizer = new ExonCategorizer(startPositionIsoform, endPositionIsoform);
}
public void extract(String isoformSequence, int startPositionIsoform, int endPositionIsoform, Collection<Exon> exons) {
extract("", isoformSequence, startPositionIsoform, endPositionIsoform, exons);
}
public void extract(String accession, String isoformSequence, int startPositionIsoform, int endPositionIsoform, Collection<Exon> exons) {
init(accession, isoformSequence, startPositionIsoform, endPositionIsoform, exons);
handler.started();
for (Exon exon : exons) {
handler.startedExon(exon);
ExonCategory exonCategory = categorizer.categorize(exon);
if (exonCategory.isCoding()) {
if (!handleCodingExon(isoformSequence, exon, exonCategory)) break;
}
else {
handler.analysedNonCodingExon(exon, exonCategory);
}
handler.terminated(exon);
}
handler.terminated();
}
private void moveToNextFirstPos() {
if (currentPhase == 0) currentIsoformPos++;
}
private void moveToNextLastPos() {
currentIsoformPos = currentTranscriptLen / 3;
currentPhase = currentTranscriptLen % 3;
if (currentPhase == 0) currentIsoformPos--;
}
private boolean handleCodingExon(String isoformSequence, Exon exon, ExonCategory cat) {
int startPositionExon = exon.getFirstPositionOnGene();
int endPositionExon = exon.getLastPositionOnGene();
if (cat == ExonCategory.START || cat == ExonCategory.MONO)
startPositionExon = startPositionIsoform;
if (cat == ExonCategory.STOP || cat == ExonCategory.MONO)
endPositionExon = endPositionIsoform;
moveToNextFirstPos();
AminoAcid first = newAminoAcid(isoformSequence, currentIsoformPos, currentPhase);
// update transcript length
currentTranscriptLen += endPositionExon - startPositionExon + 1;
moveToNextLastPos();
AminoAcid last = newAminoAcid(isoformSequence, currentIsoformPos, currentPhase);
if (first.getPosition() > isoformSequence.length()) {
handler.analysedCodingExonFailed(exon, new ExonOutOfBoundError(first, last,
ExonOutOfBoundError.AminoAcidOutOfBound.FIRST, isoformSequence.length()));
return false;
}
else if (last.getPosition() > isoformSequence.length()) {
handler.analysedCodingExonFailed(exon, new ExonOutOfBoundError(first, last,
ExonOutOfBoundError.AminoAcidOutOfBound.LAST, isoformSequence.length()));
return false;
}
handler.analysedCodingExon(exon, first, last, cat);
return true;
}
private AminoAcid newAminoAcid(String isoformSequence, int aaPosition, int phase) {
if (aaPosition >= isoformSequence.length()) return new AminoAcid(aaPosition + 1, phase, '?');
return new AminoAcid(aaPosition + 1, phase, isoformSequence.charAt(aaPosition));
}
/**
* Update exon on the fly in this default implementation
*/
private static class ExonsAnalysisListenerImpl implements ExonsAnalysisListener {
@Override
public void started() {}
@Override
public void startedExon(Exon exon) {}
@Override
public void analysedCodingExon(Exon exon, AminoAcid first, AminoAcid last, ExonCategory category) {
exon.setFirstAminoAcid(first);
exon.setLastAminoAcid(last);
exon.setCodingStatus(category.getTypeString());
}
@Override
public void analysedNonCodingExon(Exon exon, ExonCategory category) {
exon.setCodingStatus(category.getTypeString());
}
@Override
public void terminated(Exon exon) {}
@Override
public void terminated() {}
@Override
public void analysedCodingExonFailed(Exon exon, ExonOutOfBoundError exonOutOfBoundError) {
StringBuilder sb = new StringBuilder("SequenceIndexOutOfBoundsException: index (");
sb.append(exonOutOfBoundError.getOutOfBoundAminoAcid().getPosition()-1);
sb.append(") must be less than size (").append(exonOutOfBoundError.getIsoformLength()).append(")");
//TODO there is a bug in mapping transcript <-> isoform (should be fixed in the database)
//Should be solved with: https://issues.isb-sib.ch/browse/NEXTPROT-1005
LOGGER.warn(sb.toString());
}
}
}