/* * BioJava development code * * This code may be freely distributed and modified under the * terms of the GNU Lesser General Public Licence. This should * be distributed with the code. If you do not have a copy, * see: * * http://www.gnu.org/copyleft/lesser.html * * Copyright for this code is held jointly by the individual * authors. These should be listed in @author doc comments. * * For more information on the BioJava project and its aims, * or to join the biojava-l mailing list, visit the home page * at: * * http://www.biojava.org/ * */ package org.biojava.nbio.structure.cluster; import java.util.ArrayList; import java.util.List; import org.biojava.nbio.core.exceptions.CompoundNotFoundException; import org.biojava.nbio.structure.Structure; import org.biojava.nbio.structure.StructureException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * The SubunitClusterer takes as input a collection of {@link Subunit} and * returns a collection of {@link SubunitCluster}. * * @author Aleix Lafita * @since 5.0.0 * */ public class SubunitClusterer { private static final Logger logger = LoggerFactory .getLogger(SubunitClusterer.class); /** Prevent instantiation **/ private SubunitClusterer() { } public static List<SubunitCluster> cluster(Structure structure, SubunitClustererParameters params) { List<Subunit> subunits = SubunitExtractor.extractSubunits(structure, params.getAbsoluteMinimumSequenceLength(), params.getMinimumSequenceLengthFraction(), params.getMinimumSequenceLength()); return cluster(subunits, params); } public static List<SubunitCluster> cluster(List<Subunit> subunits, SubunitClustererParameters params) { // The collection of clusters to return List<SubunitCluster> clusters = new ArrayList<SubunitCluster>(); if (subunits.size() == 0) return clusters; // First generate a new cluster for each Subunit for (Subunit s : subunits) clusters.add(new SubunitCluster(s)); // Now merge clusters by IDENTITY for (int c1 = 0; c1 < clusters.size(); c1++) { for (int c2 = clusters.size() - 1; c2 > c1; c2--) { if (clusters.get(c1).mergeIdentical(clusters.get(c2))) clusters.remove(c2); } } if (params.getClustererMethod() == SubunitClustererMethod.IDENTITY) return clusters; // Now merge clusters by SEQUENCE similarity for (int c1 = 0; c1 < clusters.size(); c1++) { for (int c2 = clusters.size() - 1; c2 > c1; c2--) { try { if (clusters.get(c1).mergeSequence(clusters.get(c2), params.getSequenceIdentityThreshold(), params.getCoverageThreshold())) clusters.remove(c2); } catch (CompoundNotFoundException e) { logger.warn("Could not merge by Sequence. {}", e.getMessage()); } } } if (params.getClustererMethod() == SubunitClustererMethod.SEQUENCE) return clusters; // Now merge clusters by STRUCTURAL similarity for (int c1 = 0; c1 < clusters.size(); c1++) { for (int c2 = clusters.size() - 1; c2 > c1; c2--) { try { if (clusters.get(c1).mergeStructure(clusters.get(c2), params.getRmsdThreshold(), params.getCoverageThreshold())) clusters.remove(c2); } catch (StructureException e) { logger.warn("Could not merge by Structure. {}", e.getMessage()); } } } if (!params.isInternalSymmetry()) return clusters; // Now divide clusters by their INTERNAL SYMMETRY for (int c = 0; c < clusters.size(); c++) { try { clusters.get(c).divideInternally(params.getCoverageThreshold(), params.getRmsdThreshold(), params.getMinimumSequenceLength()); } catch (StructureException e) { logger.warn("Error analyzing internal symmetry. {}", e.getMessage()); } } // After internal symmetry merge again by structural similarity // Use case: C8 propeller with 3 chains with 3+3+2 repeats each for (int c1 = 0; c1 < clusters.size(); c1++) { for (int c2 = clusters.size() - 1; c2 > c1; c2--) { try { if (clusters.get(c1).mergeStructure(clusters.get(c2), params.getRmsdThreshold(), params.getCoverageThreshold())) clusters.remove(c2); } catch (StructureException e) { logger.warn("Could not merge by Structure. {}", e.getMessage()); } } } return clusters; } }