package org.biojava.nbio.structure.align.quaternary; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; import javax.vecmath.Matrix4d; import org.biojava.nbio.structure.Atom; import org.biojava.nbio.structure.Calc; import org.biojava.nbio.structure.Structure; import org.biojava.nbio.structure.StructureException; import org.biojava.nbio.structure.StructureTools; import org.biojava.nbio.structure.align.multiple.Block; import org.biojava.nbio.structure.align.multiple.BlockImpl; import org.biojava.nbio.structure.align.multiple.BlockSet; import org.biojava.nbio.structure.align.multiple.BlockSetImpl; import org.biojava.nbio.structure.align.multiple.MultipleAlignment; import org.biojava.nbio.structure.align.multiple.MultipleAlignmentEnsembleImpl; import org.biojava.nbio.structure.align.multiple.MultipleAlignmentImpl; import org.biojava.nbio.structure.align.multiple.util.MultipleAlignmentScorer; import org.biojava.nbio.structure.align.multiple.util.ReferenceSuperimposer; import org.biojava.nbio.structure.cluster.Subunit; import org.biojava.nbio.structure.cluster.SubunitCluster; import org.biojava.nbio.structure.cluster.SubunitClusterer; import org.biojava.nbio.structure.cluster.SubunitClustererParameters; import org.biojava.nbio.structure.cluster.SubunitExtractor; import org.biojava.nbio.structure.contact.Pair; import org.biojava.nbio.structure.geometry.SuperPositions; import org.biojava.nbio.structure.geometry.UnitQuaternions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * Quaternary Structure Alignment (QS-Align). The algorithm takes as input two * protein structures at the quaternary structure level (multiple chains or * subunits) and calculates the equivalent subunit matching and a residue-based * alignment, together with usual alignment quality scores. * * @author Aleix Lafita * @since 5.0.0 * */ public class QsAlign { private static final Logger logger = LoggerFactory.getLogger(QsAlign.class); public static QsAlignResult align(Structure s1, Structure s2, SubunitClustererParameters cParams, QsAlignParameters aParams) throws StructureException { return align( SubunitExtractor.extractSubunits(s1, cParams.getAbsoluteMinimumSequenceLength(), cParams.getMinimumSequenceLengthFraction(), cParams.getMinimumSequenceLength()), SubunitExtractor.extractSubunits(s2, cParams.getAbsoluteMinimumSequenceLength(), cParams.getMinimumSequenceLengthFraction(), cParams.getMinimumSequenceLength()), cParams, aParams); } public static QsAlignResult align(List<Subunit> s1, List<Subunit> s2, SubunitClustererParameters cParams, QsAlignParameters aParams) throws StructureException { QsAlignResult result = new QsAlignResult(s1, s2); // SETP 1: cluster each group of subunits O(N^2*L^2) - intra List<SubunitCluster> c1 = SubunitClusterer.cluster(s1, cParams); List<SubunitCluster> c2 = SubunitClusterer.cluster(s2, cParams); // STEP 2: match each subunit cluster between groups O(N^2*L^2) - inter Map<Integer, Integer> clusterMap = new HashMap<Integer, Integer>(); for (int i = 0; i < c1.size(); i++) { for (int j = 0; j < c2.size(); j++) { if (clusterMap.keySet().contains(i)) break; if (clusterMap.values().contains(j)) continue; // Use structural alignment to match the subunit clusters if (c1.get(i).mergeStructure(c2.get(j), cParams.getRmsdThreshold(), cParams.getCoverageThreshold(), aParams.getAligner())) clusterMap.put(i, j); } } logger.info("Cluster Map: " + clusterMap.toString()); result.setClusters(c1); // STEP 3: Align the assemblies for each cluster match O(N^2*L) for (int globalKey : clusterMap.keySet()) { // Obtain the clusters SubunitCluster clust1 = c1.get(globalKey); SubunitCluster clust2 = c2.get(clusterMap.get(globalKey)); // Take a cluster match as reference int index1 = 0; int index2 = clust1.size() - clust2.size(); Map<Integer, Integer> subunitMap = new HashMap<Integer, Integer>(); subunitMap.put(index1, index2); // Map cluster id to their subunit matching Map<Integer, Map<Integer, Integer>> clustSubunitMap = new HashMap<Integer, Map<Integer, Integer>>(); clustSubunitMap.put(globalKey, subunitMap); // Change order of key set so that globalKey is first List<Integer> keySet = new ArrayList<Integer>(clusterMap.keySet()); keySet.remove((Integer) globalKey); keySet.add(0, globalKey); for (int key : clusterMap.keySet()) { // Recover subunitMap if it is the reference, new one otherwise if (key == globalKey) subunitMap = clustSubunitMap.get(key); else subunitMap = new HashMap<Integer, Integer>(); // Obtain the clusters of each subunit group clust1 = c1.get(key); clust2 = c2.get(clusterMap.get(key)); // Get the initial subunit indices of each group index1 = 0; index2 = clust1.size() - clust2.size(); for (int i = 0; i < index2; i++) { for (int j = index2; j < clust1.size(); j++) { if (subunitMap.keySet().contains(i)) break; if (subunitMap.values().contains(j)) continue; // Obtain cumulative transformation matrix Matrix4d transform = getTransformForClusterSubunitMap( c1, clustSubunitMap); // Obtain Atom arrays of the subunit pair to match Atom[] atoms1 = clust1.getAlignedAtomsSubunit(i); Atom[] atoms2 = clust1.getAlignedAtomsSubunit(j); // Obtain centroids and transform the second Atom centr1 = Calc.getCentroid(atoms1); Atom centr2 = Calc.getCentroid(atoms2); Calc.transform(centr2, transform); // 1- Check that the distance fulfills maximum double dCentroid = Calc.getDistance(centr1, centr2); if (dCentroid > aParams.getdCutoff()) { logger.debug(String.format("Subunit matching %d " + "vs %d of cluster %d could not be " + "matched, because centroid distance is " + "%.2f", index1, index2, key, dCentroid)); continue; } // Transform coordinates of second Atom[] atoms2c = StructureTools.cloneAtomArray(atoms2); Calc.transform(atoms2c, transform); // 2- Check the orientation metric condition double qOrient = UnitQuaternions.orientationAngle( Calc.atomsToPoints(atoms1), Calc.atomsToPoints(atoms2c), false); qOrient = Math.min(Math.abs(2*Math.PI - qOrient), qOrient); if (qOrient > aParams.getMaxOrientationAngle()) { logger.debug(String.format("Subunit matching %d " + "vs %d of cluster %d could not be " + "matched, because orientation metric is " + "%.2f", i, j, key, qOrient)); continue; } // 3- Check the RMSD condition double rmsd = Calc.rmsd(atoms1, atoms2c); if (rmsd > aParams.getMaxRmsd()) { logger.debug(String.format("Subunit matching %d " + "vs %d of cluster %d could not be " + "matched, because RMSD is %.2f", i, j, key, rmsd)); continue; } logger.info(String.format("Subunit matching %d vs %d" + " of cluster %d with centroid distance %.2f" + ", orientation metric %.2f and RMSD %.2f", i, j, key, dCentroid, qOrient, rmsd)); subunitMap.put(i, j); } } clustSubunitMap.put(key, subunitMap); } logger.info("Cluster Subunit Map: " + clustSubunitMap.toString()); // Unfold the nested map into subunit map and alignment subunitMap = new HashMap<Integer, Integer>(); List<Integer> alignRes1 = new ArrayList<Integer>(); List<Integer> alignRes2 = new ArrayList<Integer>(); List<Atom> atomArray1 = new ArrayList<Atom>(); List<Atom> atomArray2 = new ArrayList<Atom>(); for (int key : clustSubunitMap.keySet()) { // Obtain the cluster and the alignment in it SubunitCluster cluster = c1.get(key); List<List<Integer>> clusterEqrs = cluster .getMultipleAlignment().getBlock(0).getAlignRes(); for (Entry<Integer, Integer> pair : clustSubunitMap.get(key) .entrySet()) { int i = pair.getKey(); int j = pair.getValue(); // Obtain the indices of the original Subunit Lists int orig1 = s1.indexOf(cluster.getSubunits().get(i)); int orig2 = s2.indexOf(cluster.getSubunits().get(j)); // Append rescaled aligned residue indices for (Integer eqr : clusterEqrs.get(i)) alignRes1.add(eqr + atomArray1.size()); for (Integer eqr : clusterEqrs.get(j)) alignRes2.add(eqr + atomArray2.size()); // Apend atoms to the arrays atomArray1.addAll(Arrays.asList(s1.get(orig1) .getRepresentativeAtoms())); atomArray2.addAll(Arrays.asList(s2.get(orig2) .getRepresentativeAtoms())); subunitMap.put(orig1, orig2); } } // Evaluate the goodness of the match with an alignment object MultipleAlignment msa = new MultipleAlignmentImpl(); msa.setEnsemble(new MultipleAlignmentEnsembleImpl()); msa.getEnsemble().setAtomArrays( Arrays.asList(new Atom[][] { atomArray1.toArray(new Atom[atomArray1.size()]), atomArray2.toArray(new Atom[atomArray2.size()]) })); // Fill in the alignment information BlockSet bs = new BlockSetImpl(msa); Block b = new BlockImpl(bs); List<List<Integer>> alignRes = new ArrayList<List<Integer>>(2); alignRes.add(alignRes1); alignRes.add(alignRes2); b.setAlignRes(alignRes); // Fill in the transformation matrices new ReferenceSuperimposer().superimpose(msa); // Calculate some scores MultipleAlignmentScorer.calculateScores(msa); // If it is the best match found so far store it if (subunitMap.size() > result.getSubunitMap().size()) { result.setSubunitMap(subunitMap); result.setAlignment(msa); logger.info("Better result found: " + result.toString()); } else if (subunitMap.size() == result.getSubunitMap().size()) { if (result.getAlignment() == null) { result.setSubunitMap(subunitMap); result.setAlignment(msa); } else if (msa.getScore(MultipleAlignmentScorer.RMSD) < result .getRmsd()) { result.setSubunitMap(subunitMap); result.setAlignment(msa); logger.info("Better result found: " + result.toString()); } } } return result; } /** * Returns a pair of Atom arrays corresponding to the alignment of subunit * matchings, in order of appearance. Superposition of the two Atom sets * gives the transformation of the complex. * <p> * Utility method to cumulative calculate the alignment Atoms. * * @param clusters * List of SubunitClusters * @param clusterSubunitMap * map from cluster id to subunit matching * @return pair of atom arrays to be superposed */ private static Pair<Atom[]> getAlignedAtomsForClusterSubunitMap( List<SubunitCluster> clusters, Map<Integer, Map<Integer, Integer>> clusterSubunitMap) { List<Atom> atomArray1 = new ArrayList<Atom>(); List<Atom> atomArray2 = new ArrayList<Atom>(); // For each cluster of subunits for (int key : clusterSubunitMap.keySet()) { // Obtain the cluster and the alignment in it SubunitCluster cluster = clusters.get(key); // For each subunit matching in the cluster for (Entry<Integer, Integer> pair : clusterSubunitMap.get(key) .entrySet()) { int i = pair.getKey(); int j = pair.getValue(); // Apend atoms to the arrays atomArray1.addAll(Arrays.asList(cluster .getAlignedAtomsSubunit(i))); atomArray2.addAll(Arrays.asList(cluster .getAlignedAtomsSubunit(j))); } } return new Pair<Atom[]>( atomArray1.toArray(new Atom[atomArray1.size()]), atomArray2.toArray(new Atom[atomArray2.size()])); } /** * Returns the transformation matrix corresponding to the alignment of * subunit matchings. * <p> * Utility method to cumulative calculate the alignment transformation. * * @param clusters * List of SubunitClusters * @param clusterSubunitMap * map from cluster id to subunit matching * @return transformation matrix * @throws StructureException */ private static Matrix4d getTransformForClusterSubunitMap( List<SubunitCluster> clusters, Map<Integer, Map<Integer, Integer>> clusterSubunitMap) throws StructureException { Pair<Atom[]> pair = getAlignedAtomsForClusterSubunitMap(clusters, clusterSubunitMap); return SuperPositions.superpose(Calc.atomsToPoints(pair.getFirst()), Calc.atomsToPoints(pair.getSecond())); } }