/* * BioJava development code * * This code may be freely distributed and modified under the * terms of the GNU Lesser General Public Licence. This should * be distributed with the code. If you do not have a copy, * see: * * http://www.gnu.org/copyleft/lesser.html * * Copyright for this code is held jointly by the individual * authors. These should be listed in @author doc comments. * * For more information on the BioJava project and its aims, * or to join the biojava-l mailing list, visit the home page * at: * * http://www.biojava.org/ * * Created on Mar 9, 2010 * Author: Spencer Bliven * */ package org.biojava.nbio.structure.align.ce; import org.biojava.nbio.structure.Atom; import org.biojava.nbio.structure.StructureException; import org.biojava.nbio.structure.StructureTools; import org.biojava.nbio.structure.align.model.AFPChain; import org.biojava.nbio.structure.align.util.AFPChainScorer; import org.biojava.nbio.structure.align.util.AtomCache; import org.biojava.nbio.structure.jama.Matrix; import java.lang.reflect.InvocationTargetException; import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.Scanner; /** * A wrapper for {@link CeMain} which sets default parameters to be appropriate for finding * circular permutations. * <p> * A circular permutation consists of a single cleavage point and rearrangement * between two structures, for example: * <pre> * ABCDEFG * DEFGABC * </pre> * @author Spencer Bliven. * */ public class OptimalCECPMain extends CeMain { private static final boolean debug = true; public static final String algorithmName = "jCE Optimal Circular Permutation"; /** * version history: * 1.0 - Initial version */ public static final String version = "1.0"; protected OptimalCECPParameters params; /** * */ public OptimalCECPMain() { super(); params = new OptimalCECPParameters(); } @Override public String getAlgorithmName() { return OptimalCECPMain.algorithmName; } @Override public String getVersion() { return OptimalCECPMain.version; } /** * @return an {@link OptimalCECPParameters} object */ @Override public ConfigStrucAligParams getParameters() { return params; } /** * @param params Should be an {@link OptimalCECPParameters} object specifying alignment options */ @Override public void setParameters(ConfigStrucAligParams params){ if (! (params instanceof OptimalCECPParameters )){ throw new IllegalArgumentException("provided parameter object is not of type CeParameter"); } this.params = (OptimalCECPParameters) params; } /** * Circularly permutes arr in place. * * <p>Similar to {@link Collections#rotate(List, int)} but with reversed * direction. Perhaps it would be more efficient to use the Collections version? * @param <T> * @param arr The array to be permuted * @param cp The number of residues to shift leftward, or equivalently, the index of * the first element after the permutation point. */ private static <T> void permuteArray(T[] arr, int cp) { // Allow negative cp points for convenience. if(cp == 0) { return; } if(cp < 0) { cp = arr.length+cp; } if(cp < 0 || cp >= arr.length) { throw new ArrayIndexOutOfBoundsException( "Permutation point ("+cp+") must be between -ca2.length and ca2.length-1" ); } List<T> temp = new ArrayList<T>(cp); // shift residues left for(int i=0;i<cp;i++) { temp.add(arr[i]); } for(int j=cp;j<arr.length;j++) { arr[j-cp]=arr[j]; } for(int i=0;i<cp;i++) { arr[arr.length-cp+i] = temp.get(i); } } /** * Circularly permutes arr in place. * * <p>Similar to {@link Collections#rotate(List, int)} but with reversed * direction. Perhaps it would be more efficient to use the Collections version? * @param <T> * @param arr The array to be permuted * @param cp The number of residues to shift leftward, or equivalently, the index of * the first element after the permutation point. * private static void permuteArray(int[] arr, int cp) { // Allow negative cp points for convenience. if(cp == 0) { return; } if(cp < 0) { cp = arr.length+cp; } if(cp < 0 || cp >= arr.length) { throw new ArrayIndexOutOfBoundsException( "Permutation point ("+cp+") must be between -ca2.length and ca2.length-1" ); } List<Integer> temp = new ArrayList<Integer>(cp); // shift residues left for(int i=0;i<cp;i++) { temp.add(arr[i]); } for(int j=cp;j<arr.length;j++) { arr[j-cp]=arr[j]; } for(int i=0;i<cp;i++) { arr[arr.length-cp+i] = temp.get(i); } } */ /** * Aligns ca1 with ca2 permuted by <i>cp</i> residues. * <p><strong>WARNING:</strong> Modifies ca2 during the permutation. Be sure * to make a copy before calling this method. * * @param ca1 * @param ca2 * @param param * @param cp * @return * @throws StructureException */ public AFPChain alignPermuted(Atom[] ca1, Atom[] ca2, Object param, int cp) throws StructureException { // initial permutation permuteArray(ca2,cp); // perform alignment AFPChain afpChain = super.align(ca1, ca2, param); // un-permute alignment permuteAFPChain(afpChain, -cp); if(afpChain.getName2() != null) { afpChain.setName2(afpChain.getName2()+" CP="+cp); } // Specify the permuted return afpChain; } /** * Permute the second protein of afpChain by the specified number of residues. * @param afpChain Input alignment * @param cp Amount leftwards (or rightward, if negative) to shift the * @return A new alignment equivalent to afpChain after the permutations */ private static void permuteAFPChain(AFPChain afpChain, int cp) { int ca2len = afpChain.getCa2Length(); //fix up cp to be positive if(cp == 0) { return; } if(cp < 0) { cp = ca2len+cp; } if(cp < 0 || cp >= ca2len) { throw new ArrayIndexOutOfBoundsException( "Permutation point ("+cp+") must be between -ca2.length and ca2.length-1" ); } // Fix up optAln permuteOptAln(afpChain,cp); if(afpChain.getBlockNum() > 1) afpChain.setSequentialAlignment(false); // fix up matrices // ca1 corresponds to row indices, while ca2 corresponds to column indices. afpChain.setDistanceMatrix(permuteMatrix(afpChain.getDistanceMatrix(),0,-cp)); // this is square, so permute both afpChain.setDisTable2(permuteMatrix(afpChain.getDisTable2(),-cp,-cp)); //TODO fix up other AFP parameters? } /** * Permutes <i>mat</i> by moving the rows of the matrix upwards by <i>cp</i> * rows. * @param mat The original matrix * @param cpRows Number of rows upward to move entries * @param cpCols Number of columns leftward to move entries * @return The permuted matrix */ private static Matrix permuteMatrix(Matrix mat, int cpRows, int cpCols) { //fix up cp to be positive if(cpRows == 0 && cpCols == 0) { return mat.copy(); } if(cpRows < 0) { cpRows = mat.getRowDimension()+cpRows; } if(cpRows < 0 || cpRows >= mat.getRowDimension()) { throw new ArrayIndexOutOfBoundsException( String.format( "Can't permute rows by %d: only %d rows.", cpRows, mat.getRowDimension() ) ); } if(cpCols < 0) { cpCols = mat.getColumnDimension()+cpCols; } if(cpCols < 0 || cpCols >= mat.getColumnDimension()) { throw new ArrayIndexOutOfBoundsException( String.format( "Can't permute cols by %d: only %d rows.", cpCols, mat.getColumnDimension() ) ); } int[] rows = new int[mat.getRowDimension()]; for(int i=0;i<rows.length;i++) { rows[i] = (i+cpRows)%rows.length; } int[] cols = new int[mat.getColumnDimension()]; for(int i=0;i<cols.length;i++) { cols[i] = (i+cpCols)%cols.length; } Matrix newMat = mat.getMatrix(rows, cols); assert(newMat.getRowDimension() == mat.getRowDimension()); assert(newMat.getColumnDimension() == mat.getColumnDimension()); assert(newMat.get(0, 0) == mat.get(cpRows%mat.getRowDimension(), cpCols%mat.getColumnDimension())); return newMat; } /** * Modifies the {@link AFPChain#setOptAln(int[][][]) optAln} of an AFPChain * by permuting the second protein. * * Sets residue numbers in the second protein to <i>(i-cp)%len</i> * * @param afpChain * @param cp Amount leftwards (or rightward, if negative) to shift the */ private static void permuteOptAln(AFPChain afpChain, int cp) { int ca2len = afpChain.getCa2Length(); if( ca2len <= 0) { throw new IllegalArgumentException("No Ca2Length specified in "+afpChain); } // Allow negative cp points for convenience. if(cp == 0) { return; } if(cp <= -ca2len || cp >= ca2len) { // could just take cp%ca2len, but probably its a bug if abs(cp)>=ca2len throw new ArrayIndexOutOfBoundsException( String.format( "Permutation point %d must be between %d and %d for %s", cp, 1-ca2len,ca2len-1, afpChain.getName2() ) ); } if(cp < 0) { cp = cp + ca2len; } // the unprocessed alignment int[][][] optAln = afpChain.getOptAln(); int[] optLen = afpChain.getOptLen(); // the processed alignment List<List<List<Integer>>> blocks = new ArrayList<List<List<Integer>>>(afpChain.getBlockNum()*2); //Update residue indices // newi = (oldi-cp) % N for(int block = 0; block < afpChain.getBlockNum(); block++) { if(optLen[block]<1) continue; // set up storage for the current block List<List<Integer>> currBlock = new ArrayList<List<Integer>>(2); currBlock.add( new ArrayList<Integer>()); currBlock.add( new ArrayList<Integer>()); blocks.add(currBlock); // pos = 0 case currBlock.get(0).add( optAln[block][0][0] ); currBlock.get(1).add( (optAln[block][1][0]+cp ) % ca2len); for(int pos = 1; pos < optLen[block]; pos++) { //check if we need to start a new block //this happens when the new alignment crosses the protein terminus if( optAln[block][1][pos-1]+cp<ca2len && optAln[block][1][pos]+cp >= ca2len) { currBlock = new ArrayList<List<Integer>>(2); currBlock.add( new ArrayList<Integer>()); currBlock.add( new ArrayList<Integer>()); blocks.add(currBlock); } currBlock.get(0).add( optAln[block][0][pos] ); currBlock.get(1).add( (optAln[block][1][pos]+cp ) % ca2len); } } // save permuted blocks to afpChain assignOptAln(afpChain,blocks); } /** * Sometimes it's convenient to store an alignment using java collections, * where <tt>blocks.get(blockNum).get(0).get(pos)</tt> specifies the aligned * residue at position <i>pos</i> of block <i>blockNum</i> of the first * protein. * * This method takes such a collection and stores it into the afpChain's * {@link AFPChain#setOptAln(int[][][]) optAln}, setting the associated * length variables as well. * * @param afpChain * @param blocks */ private static void assignOptAln(AFPChain afpChain, List<List<List<Integer>>> blocks) { int[][][] optAln = new int[blocks.size()][][]; int[] optLen = new int[blocks.size()]; int optLength = 0; int numBlocks = blocks.size(); for(int block = 0; block < numBlocks; block++) { // block should be 2xN rectangular assert(blocks.get(block).size() == 2); assert( blocks.get(block).get(0).size() == blocks.get(block).get(1).size()); optLen[block] = blocks.get(block).get(0).size(); optLength+=optLen[block]; optAln[block] = new int[][] { new int[optLen[block]], new int[optLen[block]] }; for(int pos = 0; pos < optLen[block]; pos++) { optAln[block][0][pos] = blocks.get(block).get(0).get(pos); optAln[block][1][pos] = blocks.get(block).get(1).get(pos); } } afpChain.setBlockNum(numBlocks); afpChain.setOptAln(optAln); afpChain.setOptLen(optLen); afpChain.setOptLength(optLength); // TODO I don't know what these do. Should they be set? //afpChain.setBlockSize(blockSize); //afpChain.setBlockResList(blockResList); //afpChain.setChainLen(chainLen); } /** * Finds the optimal alignment between two proteins allowing for a circular * permutation (CP). * * The precise algorithm is controlled by the * {@link OptimalCECPParameters parameters}. If the parameter * {@link OptimalCECPParameters#isTryAllCPs() tryAllCPs} is true, all possible * CP sites are tried and the optimal site is returned. Otherwise, the * {@link OptimalCECPParameters#getCPPoint() cpPoint} parameter is used to * determine the CP point, greatly reducing the computation required. * * @param ca1 CA atoms of the first protein * @param ca2 CA atoms of the second protein * @param param {@link CeParameters} object * @return The best-scoring alignment * @throws StructureException * * @see #alignOptimal(Atom[], Atom[], Object, AFPChain[]) */ @Override public AFPChain align(Atom[] ca1, Atom[] ca2, Object param) throws StructureException { if(params.isTryAllCPs()) { return alignOptimal(ca1,ca2,param,null); } else { int cpPoint = params.getCPPoint(); return alignPermuted(ca1, ca2, param, cpPoint); } } /** * Finds the optimal alignment between two proteins allowing for a circular * permutation (CP). * * This algorithm performs a CE alignment for each possible CP site. This is * quite slow. Use {@link #alignHeuristic(Atom[], Atom[], Object)} for a * faster algorithm. * * @param ca1 CA atoms of the first protein * @param ca2 CA atoms of the second protein * @param param {@link CeParameters} object * @param alignments If not null, should be an empty array of the same length as * ca2. This will be filled with the alignments from permuting ca2 by * 0 to n-1 residues. * @return The best-scoring alignment * @throws StructureException */ public AFPChain alignOptimal(Atom[] ca1, Atom[] ca2, Object param, AFPChain[] alignments) throws StructureException { long startTime = System.currentTimeMillis(); if(alignments != null && alignments.length != ca2.length) { throw new IllegalArgumentException("scores param should have same length as ca2"); } AFPChain unaligned = super.align(ca1, ca2, param); AFPChain bestAlignment = unaligned; if(debug) { // print progress bar header System.out.print("|"); for(int cp=1;cp<ca2.length-1;cp++) { System.out.print("="); } System.out.println("|"); System.out.print("."); } if(alignments != null) { alignments[0] = unaligned; } for(int cp=1;cp<ca2.length;cp++) { // clone ca2 to prevent side effects from propegating Atom[] ca2p = StructureTools.cloneAtomArray(ca2); //permute one each time. Alters ca2p as a side effect AFPChain currentAlignment = alignPermuted(ca1,ca2p,param,cp); // increment progress bar if(debug) System.out.print("."); // fix up names, since cloning ca2 wipes it if (ca2.length!=0 && ca2[0].getGroup().getChain()!=null && ca2[0].getGroup().getChain().getStructure()!=null) { currentAlignment.setName2(ca2[0].getGroup().getChain().getStructure().getName()+" CP="+cp); } double currentScore = currentAlignment.getAlignScore(); if(alignments != null) { alignments[cp] = currentAlignment; } if(currentScore>bestAlignment.getAlignScore()) { bestAlignment = currentAlignment; } } if(debug) { long elapsedTime = System.currentTimeMillis()-startTime; System.out.println(); System.out.format("%d alignments took %.4f s (%.1f ms avg)\n", ca2.length, elapsedTime/1000., (double)elapsedTime/ca2.length); } return bestAlignment; } public static void main(String[] args){ try { String name1, name2; //int[] cps= new int[] {}; //Concanavalin //name1 = "2pel.A"; //name2 = "3cna"; //cps = new int[] {122,0,3}; //small case name1 = "d1qdmA1"; //name1 = "1QDM.A"; name2 = "d1nklA_"; /*cps = new int[] { //41, // CECP optimum 19,59, // unpermuted local minima in TM-score //39, // TM-score optimum 0, };*/ //1itb selfsymmetry //name1 = "1ITB.A"; //name2 = "1ITB.A"; //cps = new int[] {92}; name1=name2 = "2LSQ"; OptimalCECPMain ce = new OptimalCECPMain(); CeParameters params = (CeParameters) ce.getParameters(); ce.setParameters(params); AtomCache cache = new AtomCache(); Atom[] ca1 = cache.getAtoms(name1); Atom[] ca2 = cache.getAtoms(name2); AFPChain afpChain; // find optimal solution AFPChain[] alignments = new AFPChain[ca2.length]; afpChain = ce.alignOptimal(ca1, ca2, params, alignments); System.out.format("Optimal Score: %.2f\n", afpChain.getAlignScore()); System.out.println("Pos\tScore\tTMScore\tLen\tRMSD\tBlocks"); for(int i = 0; i< alignments.length; i++) { double tm = AFPChainScorer.getTMScore(alignments[i], ca1, ca2); System.out.format("%d\t%.2f\t%.2f\t%d\t%.2f\t%d\n", i, alignments[i].getAlignScore(), tm, alignments[i].getOptLength(), alignments[i].getTotalRmsdOpt(), alignments[i].getBlockNum() ); } //displayAlignment(afpChain,ca1,ca2); // permuted alignment //for(int cp : cps) { // new copy of ca2, since alignPermuted has side effects //Atom[] ca2clone = cache.getAtoms(name2); //afpChain = ce.alignPermuted(ca1, ca2clone, params, cp); //displayAlignment(afpChain, ca1, ca2); //displayAlignment(alignments[cp],ca1,ca2); //} // CECP alignment CeCPMain cecp = new CeCPMain(); afpChain = cecp.align(ca1, ca2); displayAlignment(afpChain,ca1,ca2); System.out.println("Inspect additional alignments?"); Scanner scanner = new Scanner(System.in); System.out.print("CP location [0,"+ca2.length+"): "); while(scanner.hasNext()) { if(scanner.hasNextInt()) { int cp = scanner.nextInt(); if(0<=cp && cp<ca2.length) { alignments[cp].setName1(name1); alignments[cp].setName2(name2+"@"+cp); displayAlignment(alignments[cp],ca1,ca2); Thread.sleep(1000); } } else { //String next = scanner.nextLine(); } System.out.print("CP location [0,"+ca2.length+"): "); } scanner.close(); } catch (Exception e) { e.printStackTrace(); } } /** * Try showing a the afpChain in a GUI. * * <p>requires additional dependencies biojava-structure-gui and JmolApplet * * @param afpChain * @param ca1 * @param ca2 * @throws ClassNotFoundException * @throws NoSuchMethodException * @throws InvocationTargetException * @throws IllegalAccessException * @throws StructureException */ private static void displayAlignment(AFPChain afpChain, Atom[] ca1, Atom[] ca2) throws ClassNotFoundException, NoSuchMethodException, InvocationTargetException, IllegalAccessException, StructureException { Atom[] ca1clone = StructureTools.cloneAtomArray(ca1); Atom[] ca2clone = StructureTools.cloneAtomArray(ca2); if (! GuiWrapper.isGuiModuleInstalled()) { System.err.println("The biojava-structure-gui and/or JmolApplet modules are not installed. Please install!"); // display alignment in console System.out.println(afpChain.toCE(ca1clone, ca2clone)); } else { Object jmol = GuiWrapper.display(afpChain,ca1clone,ca2clone); GuiWrapper.showAlignmentImage(afpChain, ca1clone,ca2clone,jmol); } } }