package org.nextprot.api.core.utils.seqmap;
import org.junit.Assert;
import org.junit.Test;
import org.nextprot.api.commons.constants.AnnotationCategory;
import org.nextprot.api.commons.service.MasterIdentifierService;
import org.nextprot.api.commons.utils.NucleotidePositionRange;
import org.nextprot.api.core.domain.Entry;
import org.nextprot.api.core.domain.Isoform;
import org.nextprot.api.core.domain.annotation.Annotation;
import org.nextprot.api.core.domain.annotation.AnnotationIsoformSpecificity;
import org.nextprot.api.core.service.EntryBuilderService;
import org.nextprot.api.core.service.fluent.EntryConfig;
import org.nextprot.api.core.test.base.CoreUnitBaseTest;
import org.nextprot.api.core.utils.IsoformUtils;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.test.context.ActiveProfiles;
import java.io.*;
import java.util.*;
@ActiveProfiles({ "dev" })
public class IsoformSequencePositionMapperIntegrationTest extends CoreUnitBaseTest {
private boolean sout = false;
@Autowired
private EntryBuilderService entryBuilderService;
@Autowired
private MasterIdentifierService masterIdentifierService;
//@Test
public void testPropagationForVariantsOfAllEntries() throws Exception {
openLogger("testPropagationForVariantsOfAllEntries.log");
Set<String> acs = masterIdentifierService.findUniqueNames();
int cnt=0;
int entriesWithErrors=0;
boolean working=false;
for (String ac: acs) {
cnt++;
if (working) {
if (sout) System.out.println("--- START testing propagation for variants of entry no. " + cnt + ":" + ac);
int errorCnt = getErrorsDuringPropagationOnVariantsOfSingleEntry(ac);
if (errorCnt>0) entriesWithErrors++;
log(new Date() + " - " + cnt + " - " + ac + (errorCnt==0 ? " OK" : " has " +errorCnt + " ERROR(s)"));
if (sout) System.out.println("--- END testing propagation for variants of entry no. " + cnt + ":" + ac + (errorCnt==0 ? ": OK":": with " +errorCnt + " ERRORs"));;
}
if (ac.equals("NX_Q86YS6")) working=true; // start just after last entry tested
}
log.close();
Assert.assertEquals(0, entriesWithErrors);
}
/*
*
* last entry tested so far: NX_Q86YS6
* Known errors so far:
*
NX_O00555 has 1 ERROR(s)
NX_P46937 has 1 ERROR(s)
NX_P34810 has 1 ERROR(s)
NX_P33527 has 1 ERROR(s)
NX_Q9HB55 has 1 ERROR(s)
NX_P19544 has 1 ERROR(s)
NX_Q96NU1 has 1 ERROR(s)
NX_Q8N9B5 has 1 ERROR(s)
NX_Q86UR1 has 1 ERROR(s)
NX_Q9NUA8 has 1 ERROR(s)
NX_Q9P275 has 1 ERROR(s)
NX_Q02078 has 1 ERROR(s)
NX_P78324 has 1 ERROR(s)
NX_Q9NPQ8 has 1 ERROR(s)
NX_Q96K49 has 1 ERROR(s)
NX_Q00653 has 1 ERROR(s)
NX_Q9UPQ7 has 1 ERROR(s)
NX_O95825 has 1 ERROR(s)
NX_Q86VQ3 has 1 ERROR(s)
NX_Q96QH2 has 1 ERROR(s)
NX_Q9UJW3 has 1 ERROR(s)
*/
//@Test
public void testPropagationForVariantsOfNX_P78324() throws Exception {
// 1. first know error
// TODO see with Anne, fix it or ignore it !
// known error: interpretation
// variant 129 PD->D should not project on iso-4 because the P at 129 is at the end of an exon
// the deletion is at the border of an exon ? and would impact on splicing ?
int errorCnt = getErrorsDuringPropagationOnVariantsOfSingleEntry("NX_P78324");
Assert.assertEquals(0, errorCnt);
}
@Test
public void testPropagationForVariantsOfNX_P20591() throws Exception {
int errorCnt = getErrorsDuringPropagationOnVariantsOfSingleEntry("NX_P20591");
Assert.assertEquals(0, errorCnt);
}
@Test
public void testPropagationForVariantsOfNX_O15503() throws Exception {
int errorCnt = getErrorsDuringPropagationOnVariantsOfSingleEntry("NX_O15503");
Assert.assertEquals(0, errorCnt);
}
@Test
public void testPropagationForVariantsOfNX_P05019() throws Exception {
int errorCnt = getErrorsDuringPropagationOnVariantsOfSingleEntry("NX_P05019");
Assert.assertEquals(0, errorCnt);
}
@Test
public void testSingleVariantWithInvalidNucleotideIndice() throws Exception {
String entry_ac = "NX_O00115";
String iso_ac = "NX_O00115-1";
String variant_ac = "AN_O00115_000472";
Entry entry = entryBuilderService.build(EntryConfig.newConfig(entry_ac).withTargetIsoforms().withAnnotations());
for (Annotation a: entry.getAnnotations()) {
if (a.getUniqueName().equals(variant_ac)) {
int pos = a.getTargetingIsoformsMap().get(iso_ac).getFirstPosition();
Isoform iso = IsoformUtils.getIsoformByName(entry, iso_ac);
GeneMasterCodonPosition nuPos = IsoformSequencePositionMapper.getCodonPositionsOnMaster(pos, iso);
for (Isoform iso2: entry.getIsoforms()) {
if (!iso2.equals(iso)) {
CodonNucleotideIndices nuIdx = IsoformSequencePositionMapper.getCodonNucleotideIndices(nuPos, iso2);
Assert.assertEquals(false, nuIdx.has3Nucleotides()); // cannot be projected to iso2
Assert.assertEquals(false,nuIdx.areConsecutive());
Assert.assertEquals(false,nuIdx.areInFrame());
Assert.assertNull(nuIdx.getAminoAcidPosition());
}
}
return;
}
}
Assert.assertTrue(false);
}
@Test
public void testSingleVariantPositionOnMaster() throws Exception {
// just to be aware of difference between db info and api info
String entry_ac = "NX_P01308";
String iso_ac = "NX_P01308-1";
String variant_ac = "AN_P01308_001839";
int expectedBeginPosOnMaster = 430;
Entry entry = entryBuilderService.build(EntryConfig.newConfig(entry_ac).withTargetIsoforms().withAnnotations());
for (Annotation a: entry.getAnnotations()) {
if (a.getUniqueName().equals(variant_ac)) {
int pos = a.getTargetingIsoformsMap().get(iso_ac).getFirstPosition();
Isoform iso = IsoformUtils.getIsoformByName(entry, iso_ac);
GeneMasterCodonPosition nuPos = IsoformSequencePositionMapper.getCodonPositionsOnMaster(pos, iso);
if (sout) {
System.out.println("isoform position : " + pos);
System.out.println("nuPos is valid : " + nuPos.isValid());
System.out.println("master position according to iso mapper service : " + nuPos.getNucleotidePosition(0));
System.out.println("master position according to table identifier_feature_position : " + expectedBeginPosOnMaster);
System.out.println("master first_position for Anne : " + (nuPos.getNucleotidePosition(0) - 1));
}
// we then have to gie the last_positon to Anne, there are 2 cases:
// case 1: original AAs = single AA
// => master last_positon fo Anne = first_position for Anne + 3
// case 2: original AAs length has more than one AAs
// compute position on master of last AA (same process as above), return nuPos(0)-1 found for iso pos)
// we expect a difference of 1 between what we have in db and what we have from api
Assert.assertEquals(new Integer(expectedBeginPosOnMaster + 1), new Integer(nuPos.getNucleotidePosition(0)));
return;
}
}
Assert.assertTrue(false);
/*
* SQL to get master position for this variant
*
select a.unique_name, a.cv_annotation_type_id, pfp.first_pos,pfp.last_pos,ifp.first_pos as master_frist_pos, ifp.last_pos as master_last_pos from sequence_identifiers si
inner join annotations a on (a.identifier_id=si.identifier_id)
inner join annotation_protein_assoc apa on (a.annotation_id=apa.annotation_id)
inner join protein_feature_positions pfp on (apa.assoc_id=pfp.annotation_protein_id)
inner join identifier_feature_positions ifp on (ifp.annotation_id=a.annotation_id)
where si.unique_name='NX_P01308'
and pfp.first_pos=20;
SQL result:
unique_name cv_annotation_type_id first_pos last_pos master_frist_pos master_last_pos
AN_P01308_001839 1027 20 21 430 433
(1 row)
*/
}
public int getErrorsDuringPropagationOnVariantsOfSingleEntry(String entry_ac) throws Exception {
Entry entry = entryBuilderService.build(EntryConfig.newConfig(entry_ac).withTargetIsoforms().withAnnotations());
int delCount = 0;
int subCount = 0;
int insCount = 0;
int otherCount = 0;
int errorCount = 0;
for (Annotation a : entry.getAnnotations()) {
if (a.getAPICategory().equals(AnnotationCategory.VARIANT)) {
// for each variant annotation
String ori = a.getVariant().getOriginal();
String mut = a.getVariant().getVariant();
if (ori.length() == 1 && mut.length() == 1) { subCount++;
} else if (ori.length() == 1 && mut.length() == 0) { delCount++;
} else if (ori.length() == 0 && mut.length() == 1) { insCount++;
} else if (sout){
System.out.println("Other variant:" + a.getUniqueName()); otherCount++;
}
Map<String, Integer> isoExpectedPos = getExpectedPosForEachIsoform(entry, a);
printExpectedPosForEachIsoform(isoExpectedPos, a);
boolean errorOnVariant = false;
for (String iso1name : isoExpectedPos.keySet()) {
Integer iso1ExpectedPos = isoExpectedPos.get(iso1name);
Isoform iso1 = IsoformUtils.getIsoformByName(entry, iso1name);
if (iso1ExpectedPos != null) {
GeneMasterCodonPosition nuPos = IsoformSequencePositionMapper.getCodonPositionsOnMaster(iso1ExpectedPos, iso1);
if (!nuPos.isValid()) {
errorOnVariant = true;
if (sout) System.out.println("ERROR1: codon positions not found for " + iso1name + " for variant at position: " + iso1ExpectedPos);
continue;
}
printIsoLengthAndRangesNuCount(iso1.getUniqueName(), iso1.getSequence(), iso1.getMasterMapping());
if (sout) {
System.out.println("Starting variant propagation from isoform " + iso1name + " at position " + iso1ExpectedPos);
System.out.println(getSequenceWithHighlighedPos(iso1.getSequence(), iso1ExpectedPos));
}
for (Isoform iso2 : entry.getIsoforms()) {
String iso2name = iso2.getUniqueName();
if (iso2name.equals(iso1name)) continue;
CodonNucleotideIndices nuIdx = IsoformSequencePositionMapper.getCodonNucleotideIndices(nuPos, iso2);
Integer iso2ActualPos = nuIdx.getAminoAcidPosition();
Integer iso2ExpectedPos = isoExpectedPos.get(iso2name);
if (sout) System.out.println("Variant " + a.getUniqueName() + " position on isoform " + iso2name + " is " + iso2ActualPos);
printIsoLengthAndRangesNuCount(iso2.getUniqueName(),iso2.getSequence(), iso2.getMasterMapping());
if (iso2ExpectedPos != null) if (sout) System.out.println("Expected:" + getSequenceWithHighlighedPos(iso2.getSequence(), iso2ExpectedPos));
if (iso2ActualPos != null) if (sout) System.out.println("Actual :" + getSequenceWithHighlighedPos(iso2.getSequence(), iso2ActualPos));
if (iso2ActualPos == null && iso2ExpectedPos == null) {
// OK
} else if (iso2ActualPos == null || iso2ExpectedPos == null) {
errorOnVariant = true;
if (sout) System.out.println("ERROR2: variant position on isoform " + iso2name + " is " + iso2ActualPos + ", expected " + iso2ExpectedPos);
} else if (!iso2ActualPos.equals(iso2ExpectedPos)) {
errorOnVariant = true;
if (sout) System.out.println("ERROR3: variant position on isoform " + iso2name + " is " + iso2ActualPos + ", expected " + iso2ExpectedPos);
}
}
}
}
if (errorOnVariant) errorCount++;
if (errorOnVariant) break;
}
}
if (sout) {
System.out.println("Summary " + entry.getUniqueName());
System.out.println("insCount:" + insCount);
System.out.println("delCount:" + delCount);
System.out.println("subCount:" + subCount);
System.out.println("otherCount:" + otherCount);
System.out.println("errorCount:" + errorCount);
}
return errorCount;
}
@Test
public void test1() {
if (sout) {
System.out.println("pos 3:" + getSequenceWithHighlighedPos("12345", 3));
System.out.println("pos 1:" + getSequenceWithHighlighedPos("12345", 1));
System.out.println("pos 5:" + getSequenceWithHighlighedPos("12345", 5));
}
}
private Map<String, Integer> getExpectedPosForEachIsoform(Entry entry, Annotation a) {
Map<String, Integer> isoExpectedPos = new HashMap<String, Integer>();
for (Isoform isoform : entry.getIsoforms()) {
String isoname = isoform.getUniqueName();
AnnotationIsoformSpecificity spec = a.getTargetingIsoformsMap().get(isoname);
// store variant pos on isoform (default is null)
isoExpectedPos.put(isoname, null);
// if variant maps on isoform
if (spec != null) {
int p1 = spec.getFirstPosition();
// store variant position on isoform
isoExpectedPos.put(isoname, new Integer(p1));
}
}
return isoExpectedPos;
}
private void printExpectedPosForEachIsoform(Map<String, Integer> isoExpectedPos, Annotation a) {
for (String isoname : isoExpectedPos.keySet()) {
StringBuilder sb = new StringBuilder();
sb.append(a.getUniqueName()).append(" ");
sb.append(isoname).append(" ");
sb.append(isoExpectedPos.get(isoname)).append(" ");
sb.append(a.getVariant().getOriginal()).append("->").append(a.getVariant().getVariant());
if (sout) System.out.println(sb.toString());
}
}
private void printIsoLengthAndRangesNuCount(String isoName, String isoSeq, List<NucleotidePositionRange> ranges) {
int isoLng = isoSeq.length();
int nuCount = getNucleotideCount(ranges);
boolean ok = isoLng * 3 == nuCount;
for (NucleotidePositionRange r: ranges) if (sout) System.out.println(isoName + " has masterMapping range " + r);
if (sout) System.out.println((ok ? "OK - " : "ERROR4 - ") + isoName + " lng in nu:" + isoLng * 3 + " nuCount:" + nuCount);
}
private int getNucleotideCount(List<NucleotidePositionRange> ranges) {
int nuCount = 0;
for (NucleotidePositionRange r : ranges) {
int cnt = r.getUpper() - r.getLower() + 1;
nuCount += cnt;
}
return nuCount;
}
private String getSequenceWithHighlighedPos(String seq, int pos) {
// first aa has position 1
StringBuilder sb = new StringBuilder(seq);
sb.insert(pos - 1, '(').insert(pos + 1, ')');
return sb.toString();
}
private BufferedWriter log=null;
private void openLogger(String filename) throws Exception {
this.log = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(new File(filename))));
}
private void log(String s) throws IOException {
if (log!=null) {
log.write(s);
log.write("\n");
log.flush();
}
}
private void closeLogger() throws IOException {
log.close();
}
}