package fr.orsay.lri.varna.models.templates;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import fr.orsay.lri.varna.exceptions.ExceptionInvalidRNATemplate;
import fr.orsay.lri.varna.models.rna.ModeleBaseNucleotide;
import fr.orsay.lri.varna.models.rna.ModeleBP;
import fr.orsay.lri.varna.models.rna.RNA;
import fr.orsay.lri.varna.models.templates.RNATemplate.RNATemplateElement;
import fr.orsay.lri.varna.models.templates.RNATemplate.RNATemplateHelix;
import fr.orsay.lri.varna.models.templates.RNATemplate.RNATemplateUnpairedSequence;
import fr.orsay.lri.varna.models.treealign.AlignedNode;
import fr.orsay.lri.varna.models.treealign.RNANodeValue;
import fr.orsay.lri.varna.models.treealign.RNANodeValue2;
import fr.orsay.lri.varna.models.treealign.RNATree2;
import fr.orsay.lri.varna.models.treealign.RNATree2Exception;
import fr.orsay.lri.varna.models.treealign.Tree;
import fr.orsay.lri.varna.models.treealign.TreeAlign;
import fr.orsay.lri.varna.models.treealign.TreeAlignException;
import fr.orsay.lri.varna.models.treealign.TreeAlignResult;
/**
* This class is about the alignment between a tree of RNANodeValue2
* and a tree of RNANodeValueTemplate.
*
* @author Raphael Champeimont
*/
public class RNATemplateAlign {
// We check this node can be part of a non-broken helix.
private static boolean canBePartOfAnHelix(RNANodeValue2 leftNodeValue) {
return (leftNodeValue != null) && leftNodeValue.isSingleNode() && leftNodeValue.getNode().getRightBasePosition() > 0;
}
private static boolean canBePartOfASequence(RNANodeValue2 leftNodeValue) {
return (leftNodeValue != null) && !leftNodeValue.isSingleNode();
}
private static boolean canBePartOfABrokenHelix(RNANodeValue2 leftNodeValue) {
return (leftNodeValue != null) && leftNodeValue.isSingleNode() && leftNodeValue.getNode().getRightBasePosition() < 0;
}
/**
* This method takes an alignment between a tree of RNANodeValue2
* of RNANodeValue and a tree of RNANodeValue2 of RNANodeValueTemplate,
* and the original RNA object that was used to create the first tree
* in the alignment.
* It returns the corresponding RNATemplateMapping.
*/
public static RNATemplateMapping makeTemplateMapping(TreeAlignResult<RNANodeValue2,RNANodeValueTemplate> alignResult, RNA rna) throws RNATemplateMappingException {
RNATemplateMapping mapping = new RNATemplateMapping();
Tree<AlignedNode<RNANodeValue2,RNANodeValueTemplate>> alignment = alignResult.getAlignment();
mapping.setDistance(alignResult.getDistance());
// Map sequences and helices together, without managing pseudoknots
{
// We will go through the tree using a DFS
// The reason why this algorithm is not trivial is that we may have
// a longer helix on the RNA side than on the template side, in which
// case some nodes on the RNA side are going to be alone while we
// would want them to be part of the helix.
RNATemplateHelix currentHelix = null;
LinkedList<Tree<AlignedNode<RNANodeValue2,RNANodeValueTemplate>>> remainingNodes = new LinkedList<Tree<AlignedNode<RNANodeValue2,RNANodeValueTemplate>>>();
List<RNANodeValue2> nodesInSameHelix = new LinkedList<RNANodeValue2>();
remainingNodes.add(alignment);
while (!remainingNodes.isEmpty()) {
Tree<AlignedNode<RNANodeValue2,RNANodeValueTemplate>> node = remainingNodes.getLast();
remainingNodes.removeLast();
Tree<RNANodeValue2> leftNode = node.getValue().getLeftNode();
Tree<RNANodeValueTemplate> rightNode = node.getValue().getRightNode();
// Do we have something on RNA side?
if (leftNode != null && leftNode.getValue() != null) {
RNANodeValue2 leftNodeValue = leftNode.getValue();
// Do we have something on template side?
if (rightNode != null && rightNode.getValue() != null) {
RNANodeValueTemplate rightNodeValue = rightNode.getValue();
if (rightNodeValue instanceof RNANodeValueTemplateBasePair
&& canBePartOfAnHelix(leftNodeValue)) {
RNATemplateHelix helix = ((RNANodeValueTemplateBasePair) rightNodeValue).getHelix();
currentHelix = helix;
int i = leftNodeValue.getNode().getLeftBasePosition();
int j = leftNodeValue.getNode().getRightBasePosition();
mapping.addCouple(i, helix);
mapping.addCouple(j, helix);
// Maybe we have marked nodes as part of the same helix
// when we didn't know yet which helix it was.
if (nodesInSameHelix.size() > 0) {
int k = Integer.MAX_VALUE;
int l = Integer.MIN_VALUE;
for (RNANodeValue2 v: nodesInSameHelix) {
k = Math.min(k, v.getNode().getLeftBasePosition());
l = Math.max(l, v.getNode().getRightBasePosition());
}
// We want to check nodesInSameHelix is a parent helix and not a sibling.
boolean validExtension = (k < i) && (j < l);
if (validExtension) {
for (RNANodeValue2 v: nodesInSameHelix) {
mapping.addCouple(v.getNode().getLeftBasePosition(), helix);
mapping.addCouple(v.getNode().getRightBasePosition(), helix);
}
}
}
nodesInSameHelix.clear();
} else if (rightNodeValue instanceof RNANodeValueTemplateSequence
&& canBePartOfASequence(leftNodeValue)) {
currentHelix = null;
nodesInSameHelix.clear();
RNATemplateUnpairedSequence sequence = ((RNANodeValueTemplateSequence) rightNodeValue).getSequence();
for (RNANodeValue nodeValue: leftNode.getValue().getNodes()) {
mapping.addCouple(nodeValue.getLeftBasePosition(), sequence);
}
} else {
// Pseudoknot in template
currentHelix = null;
nodesInSameHelix.clear();
}
} else {
// We have nothing on template side
if (canBePartOfAnHelix(leftNodeValue)) {
if (currentHelix != null) {
// We may be in this case when the RNA
// contains a longer helix than in the template
int i = leftNodeValue.getNode().getLeftBasePosition();
int j = leftNodeValue.getNode().getRightBasePosition();
int k = Integer.MAX_VALUE;
int l = Integer.MIN_VALUE;
for (int b: mapping.getAncestor(currentHelix)) {
k = Math.min(k, b);
l = Math.max(l, b);
}
// We want to check currentHelix is a parent helix and not a sibling.
boolean validExtension = (k < i) && (j < l);
if (validExtension) {
mapping.addCouple(i, currentHelix);
mapping.addCouple(j, currentHelix);
}
} else {
// Maybe this left node is part of an helix
// which is smaller in the template
nodesInSameHelix.add(leftNodeValue);
}
}
}
}
// If this node has children, add them in the stack
List<Tree<AlignedNode<RNANodeValue2,RNANodeValueTemplate>>> children = node.getChildren();
int n = children.size();
if (n > 0) {
int helixChildren = 0;
// For each subtree, we want the sequences (in RNA side) to be treated first
// and then the helix nodes, because finding an aligned sequence tells
// use we cannot grow a current helix
ArrayList<Tree<AlignedNode<RNANodeValue2,RNANodeValueTemplate>>> addToStack1 = new ArrayList<Tree<AlignedNode<RNANodeValue2,RNANodeValueTemplate>>>();
ArrayList<Tree<AlignedNode<RNANodeValue2,RNANodeValueTemplate>>> addToStack2 = new ArrayList<Tree<AlignedNode<RNANodeValue2,RNANodeValueTemplate>>>();
for (int i=0; i<n; i++) {
Tree<AlignedNode<RNANodeValue2,RNANodeValueTemplate>> child = children.get(i);
Tree<RNANodeValue2> RNAchild = child.getValue().getLeftNode();
if (RNAchild != null
&& RNAchild.getValue() != null
&& (canBePartOfAnHelix(RNAchild.getValue()) || canBePartOfABrokenHelix(RNAchild.getValue()) )) {
helixChildren++;
addToStack2.add(child);
} else {
addToStack1.add(child);
}
}
// We add the children in their reverse order so they
// are given in the original order by the iterator
for (int i=addToStack2.size()-1; i>=0; i--) {
remainingNodes.add(addToStack2.get(i));
}
for (int i=addToStack1.size()-1; i>=0; i--) {
remainingNodes.add(addToStack1.get(i));
}
if (helixChildren >= 2) {
// We cannot "grow" the current helix, because we have a multiloop
// in the RNA.
currentHelix = null;
nodesInSameHelix.clear();
}
}
}
}
// Now recover pseudoknots (broken helices)
{
// First create a temporary mapping with broken helices
LinkedList<Tree<AlignedNode<RNANodeValue2,RNANodeValueTemplate>>> remainingNodes = new LinkedList<Tree<AlignedNode<RNANodeValue2,RNANodeValueTemplate>>>();
remainingNodes.add(alignment);
RNATemplateMapping tempPKMapping = new RNATemplateMapping();
while (!remainingNodes.isEmpty()) {
Tree<AlignedNode<RNANodeValue2,RNANodeValueTemplate>> node = remainingNodes.getLast();
remainingNodes.removeLast();
List<Tree<AlignedNode<RNANodeValue2,RNANodeValueTemplate>>> children = node.getChildren();
int n = children.size();
if (n > 0) {
for (int i=n-1; i>=0; i--) {
// We add the children in their reverse order so they
// are given in the original order by the iterator
remainingNodes.add(children.get(i));
}
List<RNANodeValue2> nodesInSameHelix = new LinkedList<RNANodeValue2>();
RNATemplateHelix currentHelix = null;
for (Tree<AlignedNode<RNANodeValue2,RNANodeValueTemplate>> child: node.getChildren()) {
Tree<RNANodeValue2> leftNode = child.getValue().getLeftNode();
Tree<RNANodeValueTemplate> rightNode = child.getValue().getRightNode();
if (leftNode != null && leftNode.getValue() != null) {
RNANodeValue2 leftNodeValue = leftNode.getValue();
// We have a real left (RNA side) node
if (rightNode != null && rightNode.getValue() != null) {
// We have a real right (template side) node
RNANodeValueTemplate rightNodeValue = rightNode.getValue();
if (rightNodeValue instanceof RNANodeValueTemplateBrokenBasePair
&& canBePartOfABrokenHelix(leftNodeValue)) {
RNATemplateHelix helix = ((RNANodeValueTemplateBrokenBasePair) rightNodeValue).getHelix();
currentHelix = helix;
tempPKMapping.addCouple(leftNodeValue.getNode().getLeftBasePosition(), helix);
// Maybe we have marked nodes as part of the same helix
// when we didn't know yet which helix it was.
for (RNANodeValue2 v: nodesInSameHelix) {
tempPKMapping.addCouple(v.getNode().getLeftBasePosition(), helix);
}
nodesInSameHelix.clear();
} else {
currentHelix = null;
nodesInSameHelix.clear();
}
} else {
// We have no right (template side) node
if (canBePartOfABrokenHelix(leftNodeValue)) {
if (currentHelix != null) {
// We may be in this case if the RNA sequence
// contains a longer helix than in the template
tempPKMapping.addCouple(leftNodeValue.getNode().getLeftBasePosition(), currentHelix);
} else {
// Maybe this left node is part of an helix
// which is smaller in the template
nodesInSameHelix.add(leftNodeValue);
}
} else {
currentHelix = null;
nodesInSameHelix.clear();
}
}
} else {
currentHelix = null;
nodesInSameHelix.clear();
}
}
}
}
// As parts of broken helices were aligned independently,
// we need to check for consistency, ie. keep only bases for
// which the associated base is also aligned with the same helix.
for (RNATemplateElement element: tempPKMapping.getTargetElemsAsSet()) {
RNATemplateHelix helix = (RNATemplateHelix) element;
HashSet<Integer> basesInHelix = new HashSet<Integer>(tempPKMapping.getAncestor(helix));
for (int baseIndex: basesInHelix) {
System.out.println("PK: " + helix + " aligned with " + baseIndex);
boolean baseOK = false;
// Search for an associated base aligned with the same helix
ArrayList<ModeleBP> auxBasePairs = rna.getAuxBPs(baseIndex);
for (ModeleBP auxBasePair: auxBasePairs) {
int partner5 = ((ModeleBaseNucleotide) auxBasePair.getPartner5()).getIndex();
int partner3 = ((ModeleBaseNucleotide) auxBasePair.getPartner3()).getIndex();
if (baseIndex == partner5) {
if (basesInHelix.contains(partner3)) {
baseOK = true;
break;
}
} else if (baseIndex == partner3) {
if (basesInHelix.contains(partner5)) {
baseOK = true;
break;
}
}
}
if (baseOK) {
// Add it to the real mapping
mapping.addCouple(baseIndex, helix);
}
}
}
}
return mapping;
}
public static void printMapping(RNATemplateMapping mapping, RNATemplate template, String sequence) {
Iterator<RNATemplateElement> iter = template.rnaIterator();
while (iter.hasNext()) {
RNATemplateElement element = iter.next();
System.out.println(element.toString());
ArrayList<Integer> A = mapping.getAncestor(element);
if (A != null) {
RNATemplateAlign.printIntArrayList(A);
for (int n=A.size(), i=0; i<n; i++) {
System.out.print("\t" + sequence.charAt(A.get(i)));
}
System.out.println("");
} else {
System.out.println("\tno match");
}
}
}
/**
* Align the given RNA with the given RNA template.
*/
public static TreeAlignResult<RNANodeValue2,RNANodeValueTemplate> alignRNAWithTemplate(RNA rna, RNATemplate template) throws RNATemplateDrawingAlgorithmException {
try {
Tree<RNANodeValue2> rnaAsTree = RNATree2.RNATree2FromRNA(rna);
Tree<RNANodeValueTemplate> templateAsTree = template.toTree();
TreeAlign<RNANodeValue2,RNANodeValueTemplate> treeAlign = new TreeAlign<RNANodeValue2,RNANodeValueTemplate>(new RNANodeValue2TemplateDistance());
TreeAlignResult<RNANodeValue2,RNANodeValueTemplate> result = treeAlign.align(rnaAsTree, templateAsTree);
return result;
} catch (RNATree2Exception e) {
throw (new RNATemplateDrawingAlgorithmException("RNATree2Exception: " + e.getMessage()));
} catch (ExceptionInvalidRNATemplate e) {
throw (new RNATemplateDrawingAlgorithmException("ExceptionInvalidRNATemplate: " + e.getMessage()));
} catch (TreeAlignException e) {
throw (new RNATemplateDrawingAlgorithmException("TreeAlignException: " + e.getMessage()));
}
}
/**
* Map an RNA with an RNATemplate using tree alignment.
*/
public static RNATemplateMapping mapRNAWithTemplate(RNA rna, RNATemplate template) throws RNATemplateDrawingAlgorithmException {
try {
TreeAlignResult<RNANodeValue2,RNANodeValueTemplate> alignResult = RNATemplateAlign.alignRNAWithTemplate(rna, template);
RNATemplateMapping mapping = RNATemplateAlign.makeTemplateMapping(alignResult, rna);
return mapping;
} catch (RNATemplateMappingException e) {
e.printStackTrace();
throw (new RNATemplateDrawingAlgorithmException("RNATemplateMappingException: " + e.getMessage()));
}
}
/**
* Print an integer array.
*/
public static void printIntArray(int[] A) {
for (int i=0; i<A.length; i++) {
System.out.print("\t" + A[i]);
}
System.out.println("");
}
/**
* Print an integer ArrayList.
*/
public static void printIntArrayList(ArrayList<Integer> A) {
for (int i=0; i<A.size(); i++) {
System.out.print("\t" + A.get(i));
}
System.out.println("");
}
/**
* Print an matrix of shorts.
*/
public static void printShortMatrix(short[][] M) {
System.out.println("Begin matrix");
for (int i=0; i<M.length; i++) {
for (int j=0; j<M[i].length; j++) {
System.out.print("\t" + M[i][j]);
}
System.out.println("");
}
System.out.println("End matrix");
}
/**
* Convert a list of integers into an array of integers.
* The returned arrays is freshly allocated.
* Returns null if given null.
*/
public static int[] intArrayFromList(List<Integer> l) {
if (l != null) {
int n = l.size();
int[] result = new int[n];
for (int i=0; i<n; i++) {
result[i] = l.get(i);
}
return result;
} else {
return null;
}
}
}