package org.nextprot.api.core.utils.seqmap; import com.google.common.base.Preconditions; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.nextprot.api.commons.utils.NucleotidePositionRange; import org.nextprot.api.commons.utils.Pair; import java.util.ArrayList; import java.util.List; import java.util.Map.Entry; public class SequencePositionMapper { private final static Log logger = LogFactory.getLog(SequencePositionMapper.class); /* * * nuNum 0 1 2 3 4 5 6 7 8 * exons |---- exon1 ------| |-- exon 2 ---| * nuPos 100 101 102 103 104 201 202 203 204 * codons |--codon1--|-------codon2-------|--codon3--| * * */ static CodonNucleotideIndices getCodonNucleotideIndices(GeneMasterCodonPosition codonPos, List<NucleotidePositionRange> positionsOfIsoformOnDNA) { logger.debug("----------------------------------------------------------"); int lowNum = 0; CodonNucleotideIndices codonPosInTranscript = new CodonNucleotideIndices(); for (NucleotidePositionRange range: positionsOfIsoformOnDNA) { int nu1Pos = range.getLower(); int nu2Pos = range.getUpper(); int highNum = lowNum + nu2Pos - nu1Pos ; logger.debug("nu1Pos:"+ nu1Pos + " nu2Pos:" + nu2Pos + " lowNum:" + lowNum + " highNum:" + highNum); while (true) { int nuIndex = codonPosInTranscript.size(); int nuPos = codonPos.getNucleotidePosition(nuIndex); logger.debug("nuPos("+nuIndex+")=" + nuPos); if (nuPos < nu1Pos || nuPos > nu2Pos) break; int nuNum = lowNum + nuPos - nu1Pos; logger.debug("adding codon nucelotide number:" + nuNum); codonPosInTranscript.addNucleotideIndex(nuNum); if (codonPosInTranscript.size()==3) { logger.debug("returning codonPosInTranscript: " + codonPosInTranscript); return codonPosInTranscript; } } lowNum=highNum + 1; } logger.debug("codon not found in the gene mapping ranges, " + codonPosInTranscript.size() + " nucleotides found"); logger.debug("returning codonPosInTranscript: " + codonPosInTranscript); return codonPosInTranscript; } /** * Get the codon position on gene master that corresponds to the given amino-acid position on isoform * * @param isoformPos the aa position on isoform * @param isoformMasterMapping the list of isoform to gene master mapping * @return the codon position on gene master */ static GeneMasterCodonPosition getCodonPositionOnMaster(int isoformPos, List<NucleotidePositionRange> isoformMasterMapping) { int firstNucPos = isoformPos * 3 - 3; //if (debug) System.out.println("nu1Num:" + nu1Num); int lowNum = 0; GeneMasterCodonPosition result = new GeneMasterCodonPosition(); for (NucleotidePositionRange range: isoformMasterMapping) { int firstRangeNucPos = range.getLower(); int lastRangeNucPos = range.getUpper(); int highNum = lowNum + lastRangeNucPos - firstRangeNucPos ; //if (debug) System.out.println("nu1Pos:"+ nu1Pos + " nu2Pos:" + nu2Pos + " lowNum:" + lowNum + " highNum:" + highNum); while (true) { int nuIndex = result.size(); int nuNum = firstNucPos + nuIndex; //if (debug) System.out.println("nuNum:" + nuNum); if (nuNum < lowNum || nuNum > highNum) break; result.addNucleotidePosition(firstRangeNucPos + nuNum-lowNum); if (result.size()==3) return result; } lowNum=highNum + 1; } return result; } /** * Check that we have amino acid aa(s) in isoform sequence at position pos. * If aa is null or empty string we just check that position is < sequence length * * @param sequence the protein sequence * @param pos position according to bio standard (first pos = 1) * @param aas 1 or more amino acids (1 char / aa) (empty or null when it is an insertion) * @return true if aas are found at pos in sequence */ static boolean checkAminoAcidsFromPosition(String sequence, int pos, String aas) { boolean insertionMode = (aas == null) || aas.isEmpty(); if (insertionMode) return checkSequencePosition(sequence, pos, true); return checkSequencePosition(sequence, pos, false) && sequence.startsWith(aas, pos-1); } /** * Check that position exists in specified sequence at given mode. * * <h4>Insertion pos mode</h4> * In insertion mode, insertion will be applied before amino-acid(s) at given position. * <p> * They are 3 valid cases to consider (illustrated with sequence ABCDEF): * * <ol> * <li>Before 1st AA: ABPCDEF (pos 1)</li> * <li>Internal: PABCDEF (pos 3)</li> * <li>After last AA: ABCDEFP (pos 7)</li> * </ol> * </p> * * @param sequence the amino-acid sequence * @param pos position according to bio standard (first pos = 1) * @param insertionMode is true apply insertion rule else apply standard rule * @return true if position exists in the given sequence */ static boolean checkSequencePosition(String sequence, int pos, boolean insertionMode) { Preconditions.checkNotNull(sequence); Preconditions.checkArgument(!sequence.isEmpty()); Preconditions.checkArgument(pos>0, pos + ": invalid value (position should start at 1)"); // An insertion at position p means if (insertionMode) return pos <= sequence.length()+1; return pos <= sequence.length(); } public static List<NucleotidePositionRange> getPositionRangesFromEntries(List<Entry<Integer,Integer>> listEntries) { List<NucleotidePositionRange> result = new ArrayList<>(); for (Entry<Integer,Integer> e: listEntries) result.add(new NucleotidePositionRange(e.getKey(), e.getValue())); return result; } public static List<NucleotidePositionRange> getPositionRangesFromPairs(List<Pair<Integer,Integer>> listPair) { List<NucleotidePositionRange> result = new ArrayList<>(); for (Pair<Integer,Integer> e: listPair) result.add(new NucleotidePositionRange(e.getFirst(), e.getSecond())); return result; } }