package fr.orsay.lri.varna.models.templates; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.Map; import fr.orsay.lri.varna.exceptions.ExceptionInvalidRNATemplate; import fr.orsay.lri.varna.models.rna.ModeleBaseNucleotide; import fr.orsay.lri.varna.models.rna.ModeleBP; import fr.orsay.lri.varna.models.rna.RNA; import fr.orsay.lri.varna.models.templates.RNATemplate.RNATemplateElement; import fr.orsay.lri.varna.models.templates.RNATemplate.RNATemplateHelix; import fr.orsay.lri.varna.models.templates.RNATemplate.RNATemplateUnpairedSequence; import fr.orsay.lri.varna.models.treealign.AlignedNode; import fr.orsay.lri.varna.models.treealign.RNANodeValue; import fr.orsay.lri.varna.models.treealign.RNANodeValue2; import fr.orsay.lri.varna.models.treealign.RNATree2; import fr.orsay.lri.varna.models.treealign.RNATree2Exception; import fr.orsay.lri.varna.models.treealign.Tree; import fr.orsay.lri.varna.models.treealign.TreeAlign; import fr.orsay.lri.varna.models.treealign.TreeAlignException; import fr.orsay.lri.varna.models.treealign.TreeAlignResult; /** * This class is about the alignment between a tree of RNANodeValue2 * and a tree of RNANodeValueTemplate. * * @author Raphael Champeimont */ public class RNATemplateAlign { // We check this node can be part of a non-broken helix. private static boolean canBePartOfAnHelix(RNANodeValue2 leftNodeValue) { return (leftNodeValue != null) && leftNodeValue.isSingleNode() && leftNodeValue.getNode().getRightBasePosition() > 0; } private static boolean canBePartOfASequence(RNANodeValue2 leftNodeValue) { return (leftNodeValue != null) && !leftNodeValue.isSingleNode(); } private static boolean canBePartOfABrokenHelix(RNANodeValue2 leftNodeValue) { return (leftNodeValue != null) && leftNodeValue.isSingleNode() && leftNodeValue.getNode().getRightBasePosition() < 0; } /** * This method takes an alignment between a tree of RNANodeValue2 * of RNANodeValue and a tree of RNANodeValue2 of RNANodeValueTemplate, * and the original RNA object that was used to create the first tree * in the alignment. * It returns the corresponding RNATemplateMapping. */ public static RNATemplateMapping makeTemplateMapping(TreeAlignResult<RNANodeValue2,RNANodeValueTemplate> alignResult, RNA rna) throws RNATemplateMappingException { RNATemplateMapping mapping = new RNATemplateMapping(); Tree<AlignedNode<RNANodeValue2,RNANodeValueTemplate>> alignment = alignResult.getAlignment(); mapping.setDistance(alignResult.getDistance()); // Map sequences and helices together, without managing pseudoknots { // We will go through the tree using a DFS // The reason why this algorithm is not trivial is that we may have // a longer helix on the RNA side than on the template side, in which // case some nodes on the RNA side are going to be alone while we // would want them to be part of the helix. RNATemplateHelix currentHelix = null; LinkedList<Tree<AlignedNode<RNANodeValue2,RNANodeValueTemplate>>> remainingNodes = new LinkedList<Tree<AlignedNode<RNANodeValue2,RNANodeValueTemplate>>>(); List<RNANodeValue2> nodesInSameHelix = new LinkedList<RNANodeValue2>(); remainingNodes.add(alignment); while (!remainingNodes.isEmpty()) { Tree<AlignedNode<RNANodeValue2,RNANodeValueTemplate>> node = remainingNodes.getLast(); remainingNodes.removeLast(); Tree<RNANodeValue2> leftNode = node.getValue().getLeftNode(); Tree<RNANodeValueTemplate> rightNode = node.getValue().getRightNode(); // Do we have something on RNA side? if (leftNode != null && leftNode.getValue() != null) { RNANodeValue2 leftNodeValue = leftNode.getValue(); // Do we have something on template side? if (rightNode != null && rightNode.getValue() != null) { RNANodeValueTemplate rightNodeValue = rightNode.getValue(); if (rightNodeValue instanceof RNANodeValueTemplateBasePair && canBePartOfAnHelix(leftNodeValue)) { RNATemplateHelix helix = ((RNANodeValueTemplateBasePair) rightNodeValue).getHelix(); currentHelix = helix; int i = leftNodeValue.getNode().getLeftBasePosition(); int j = leftNodeValue.getNode().getRightBasePosition(); mapping.addCouple(i, helix); mapping.addCouple(j, helix); // Maybe we have marked nodes as part of the same helix // when we didn't know yet which helix it was. if (nodesInSameHelix.size() > 0) { int k = Integer.MAX_VALUE; int l = Integer.MIN_VALUE; for (RNANodeValue2 v: nodesInSameHelix) { k = Math.min(k, v.getNode().getLeftBasePosition()); l = Math.max(l, v.getNode().getRightBasePosition()); } // We want to check nodesInSameHelix is a parent helix and not a sibling. boolean validExtension = (k < i) && (j < l); if (validExtension) { for (RNANodeValue2 v: nodesInSameHelix) { mapping.addCouple(v.getNode().getLeftBasePosition(), helix); mapping.addCouple(v.getNode().getRightBasePosition(), helix); } } } nodesInSameHelix.clear(); } else if (rightNodeValue instanceof RNANodeValueTemplateSequence && canBePartOfASequence(leftNodeValue)) { currentHelix = null; nodesInSameHelix.clear(); RNATemplateUnpairedSequence sequence = ((RNANodeValueTemplateSequence) rightNodeValue).getSequence(); for (RNANodeValue nodeValue: leftNode.getValue().getNodes()) { mapping.addCouple(nodeValue.getLeftBasePosition(), sequence); } } else { // Pseudoknot in template currentHelix = null; nodesInSameHelix.clear(); } } else { // We have nothing on template side if (canBePartOfAnHelix(leftNodeValue)) { if (currentHelix != null) { // We may be in this case when the RNA // contains a longer helix than in the template int i = leftNodeValue.getNode().getLeftBasePosition(); int j = leftNodeValue.getNode().getRightBasePosition(); int k = Integer.MAX_VALUE; int l = Integer.MIN_VALUE; for (int b: mapping.getAncestor(currentHelix)) { k = Math.min(k, b); l = Math.max(l, b); } // We want to check currentHelix is a parent helix and not a sibling. boolean validExtension = (k < i) && (j < l); if (validExtension) { mapping.addCouple(i, currentHelix); mapping.addCouple(j, currentHelix); } } else { // Maybe this left node is part of an helix // which is smaller in the template nodesInSameHelix.add(leftNodeValue); } } } } // If this node has children, add them in the stack List<Tree<AlignedNode<RNANodeValue2,RNANodeValueTemplate>>> children = node.getChildren(); int n = children.size(); if (n > 0) { int helixChildren = 0; // For each subtree, we want the sequences (in RNA side) to be treated first // and then the helix nodes, because finding an aligned sequence tells // use we cannot grow a current helix ArrayList<Tree<AlignedNode<RNANodeValue2,RNANodeValueTemplate>>> addToStack1 = new ArrayList<Tree<AlignedNode<RNANodeValue2,RNANodeValueTemplate>>>(); ArrayList<Tree<AlignedNode<RNANodeValue2,RNANodeValueTemplate>>> addToStack2 = new ArrayList<Tree<AlignedNode<RNANodeValue2,RNANodeValueTemplate>>>(); for (int i=0; i<n; i++) { Tree<AlignedNode<RNANodeValue2,RNANodeValueTemplate>> child = children.get(i); Tree<RNANodeValue2> RNAchild = child.getValue().getLeftNode(); if (RNAchild != null && RNAchild.getValue() != null && (canBePartOfAnHelix(RNAchild.getValue()) || canBePartOfABrokenHelix(RNAchild.getValue()) )) { helixChildren++; addToStack2.add(child); } else { addToStack1.add(child); } } // We add the children in their reverse order so they // are given in the original order by the iterator for (int i=addToStack2.size()-1; i>=0; i--) { remainingNodes.add(addToStack2.get(i)); } for (int i=addToStack1.size()-1; i>=0; i--) { remainingNodes.add(addToStack1.get(i)); } if (helixChildren >= 2) { // We cannot "grow" the current helix, because we have a multiloop // in the RNA. currentHelix = null; nodesInSameHelix.clear(); } } } } // Now recover pseudoknots (broken helices) { // First create a temporary mapping with broken helices LinkedList<Tree<AlignedNode<RNANodeValue2,RNANodeValueTemplate>>> remainingNodes = new LinkedList<Tree<AlignedNode<RNANodeValue2,RNANodeValueTemplate>>>(); remainingNodes.add(alignment); RNATemplateMapping tempPKMapping = new RNATemplateMapping(); while (!remainingNodes.isEmpty()) { Tree<AlignedNode<RNANodeValue2,RNANodeValueTemplate>> node = remainingNodes.getLast(); remainingNodes.removeLast(); List<Tree<AlignedNode<RNANodeValue2,RNANodeValueTemplate>>> children = node.getChildren(); int n = children.size(); if (n > 0) { for (int i=n-1; i>=0; i--) { // We add the children in their reverse order so they // are given in the original order by the iterator remainingNodes.add(children.get(i)); } List<RNANodeValue2> nodesInSameHelix = new LinkedList<RNANodeValue2>(); RNATemplateHelix currentHelix = null; for (Tree<AlignedNode<RNANodeValue2,RNANodeValueTemplate>> child: node.getChildren()) { Tree<RNANodeValue2> leftNode = child.getValue().getLeftNode(); Tree<RNANodeValueTemplate> rightNode = child.getValue().getRightNode(); if (leftNode != null && leftNode.getValue() != null) { RNANodeValue2 leftNodeValue = leftNode.getValue(); // We have a real left (RNA side) node if (rightNode != null && rightNode.getValue() != null) { // We have a real right (template side) node RNANodeValueTemplate rightNodeValue = rightNode.getValue(); if (rightNodeValue instanceof RNANodeValueTemplateBrokenBasePair && canBePartOfABrokenHelix(leftNodeValue)) { RNATemplateHelix helix = ((RNANodeValueTemplateBrokenBasePair) rightNodeValue).getHelix(); currentHelix = helix; tempPKMapping.addCouple(leftNodeValue.getNode().getLeftBasePosition(), helix); // Maybe we have marked nodes as part of the same helix // when we didn't know yet which helix it was. for (RNANodeValue2 v: nodesInSameHelix) { tempPKMapping.addCouple(v.getNode().getLeftBasePosition(), helix); } nodesInSameHelix.clear(); } else { currentHelix = null; nodesInSameHelix.clear(); } } else { // We have no right (template side) node if (canBePartOfABrokenHelix(leftNodeValue)) { if (currentHelix != null) { // We may be in this case if the RNA sequence // contains a longer helix than in the template tempPKMapping.addCouple(leftNodeValue.getNode().getLeftBasePosition(), currentHelix); } else { // Maybe this left node is part of an helix // which is smaller in the template nodesInSameHelix.add(leftNodeValue); } } else { currentHelix = null; nodesInSameHelix.clear(); } } } else { currentHelix = null; nodesInSameHelix.clear(); } } } } // As parts of broken helices were aligned independently, // we need to check for consistency, ie. keep only bases for // which the associated base is also aligned with the same helix. for (RNATemplateElement element: tempPKMapping.getTargetElemsAsSet()) { RNATemplateHelix helix = (RNATemplateHelix) element; HashSet<Integer> basesInHelix = new HashSet<Integer>(tempPKMapping.getAncestor(helix)); for (int baseIndex: basesInHelix) { System.out.println("PK: " + helix + " aligned with " + baseIndex); boolean baseOK = false; // Search for an associated base aligned with the same helix ArrayList<ModeleBP> auxBasePairs = rna.getAuxBPs(baseIndex); for (ModeleBP auxBasePair: auxBasePairs) { int partner5 = ((ModeleBaseNucleotide) auxBasePair.getPartner5()).getIndex(); int partner3 = ((ModeleBaseNucleotide) auxBasePair.getPartner3()).getIndex(); if (baseIndex == partner5) { if (basesInHelix.contains(partner3)) { baseOK = true; break; } } else if (baseIndex == partner3) { if (basesInHelix.contains(partner5)) { baseOK = true; break; } } } if (baseOK) { // Add it to the real mapping mapping.addCouple(baseIndex, helix); } } } } return mapping; } public static void printMapping(RNATemplateMapping mapping, RNATemplate template, String sequence) { Iterator<RNATemplateElement> iter = template.rnaIterator(); while (iter.hasNext()) { RNATemplateElement element = iter.next(); System.out.println(element.toString()); ArrayList<Integer> A = mapping.getAncestor(element); if (A != null) { RNATemplateAlign.printIntArrayList(A); for (int n=A.size(), i=0; i<n; i++) { System.out.print("\t" + sequence.charAt(A.get(i))); } System.out.println(""); } else { System.out.println("\tno match"); } } } /** * Align the given RNA with the given RNA template. */ public static TreeAlignResult<RNANodeValue2,RNANodeValueTemplate> alignRNAWithTemplate(RNA rna, RNATemplate template) throws RNATemplateDrawingAlgorithmException { try { Tree<RNANodeValue2> rnaAsTree = RNATree2.RNATree2FromRNA(rna); Tree<RNANodeValueTemplate> templateAsTree = template.toTree(); TreeAlign<RNANodeValue2,RNANodeValueTemplate> treeAlign = new TreeAlign<RNANodeValue2,RNANodeValueTemplate>(new RNANodeValue2TemplateDistance()); TreeAlignResult<RNANodeValue2,RNANodeValueTemplate> result = treeAlign.align(rnaAsTree, templateAsTree); return result; } catch (RNATree2Exception e) { throw (new RNATemplateDrawingAlgorithmException("RNATree2Exception: " + e.getMessage())); } catch (ExceptionInvalidRNATemplate e) { throw (new RNATemplateDrawingAlgorithmException("ExceptionInvalidRNATemplate: " + e.getMessage())); } catch (TreeAlignException e) { throw (new RNATemplateDrawingAlgorithmException("TreeAlignException: " + e.getMessage())); } } /** * Map an RNA with an RNATemplate using tree alignment. */ public static RNATemplateMapping mapRNAWithTemplate(RNA rna, RNATemplate template) throws RNATemplateDrawingAlgorithmException { try { TreeAlignResult<RNANodeValue2,RNANodeValueTemplate> alignResult = RNATemplateAlign.alignRNAWithTemplate(rna, template); RNATemplateMapping mapping = RNATemplateAlign.makeTemplateMapping(alignResult, rna); return mapping; } catch (RNATemplateMappingException e) { e.printStackTrace(); throw (new RNATemplateDrawingAlgorithmException("RNATemplateMappingException: " + e.getMessage())); } } /** * Print an integer array. */ public static void printIntArray(int[] A) { for (int i=0; i<A.length; i++) { System.out.print("\t" + A[i]); } System.out.println(""); } /** * Print an integer ArrayList. */ public static void printIntArrayList(ArrayList<Integer> A) { for (int i=0; i<A.size(); i++) { System.out.print("\t" + A.get(i)); } System.out.println(""); } /** * Print an matrix of shorts. */ public static void printShortMatrix(short[][] M) { System.out.println("Begin matrix"); for (int i=0; i<M.length; i++) { for (int j=0; j<M[i].length; j++) { System.out.print("\t" + M[i][j]); } System.out.println(""); } System.out.println("End matrix"); } /** * Convert a list of integers into an array of integers. * The returned arrays is freshly allocated. * Returns null if given null. */ public static int[] intArrayFromList(List<Integer> l) { if (l != null) { int n = l.size(); int[] result = new int[n]; for (int i=0; i<n; i++) { result[i] = l.get(i); } return result; } else { return null; } } }