package org.biojava.nbio.structure.io.mmtf; import java.text.DateFormat; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Date; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Set; import javax.vecmath.Matrix4d; import org.biojava.nbio.structure.AminoAcid; import org.biojava.nbio.structure.AminoAcidImpl; import org.biojava.nbio.structure.Atom; import org.biojava.nbio.structure.Bond; import org.biojava.nbio.structure.Chain; import org.biojava.nbio.structure.ExperimentalTechnique; import org.biojava.nbio.structure.Group; import org.biojava.nbio.structure.GroupType; import org.biojava.nbio.structure.NucleotideImpl; import org.biojava.nbio.structure.PDBCrystallographicInfo; import org.biojava.nbio.structure.Structure; import org.biojava.nbio.structure.StructureException; import org.biojava.nbio.structure.StructureIO; import org.biojava.nbio.structure.align.util.AtomCache; import org.biojava.nbio.structure.io.FileParsingParameters; import org.biojava.nbio.structure.io.mmcif.ChemCompGroupFactory; import org.biojava.nbio.structure.io.mmcif.DownloadChemCompProvider; import org.biojava.nbio.structure.io.mmcif.model.ChemComp; import org.biojava.nbio.structure.quaternary.BioAssemblyInfo; import org.biojava.nbio.structure.quaternary.BiologicalAssemblyTransformation; import org.biojava.nbio.structure.secstruc.DSSPParser; import org.biojava.nbio.structure.secstruc.SecStrucCalc; import org.biojava.nbio.structure.secstruc.SecStrucState; import org.biojava.nbio.structure.secstruc.SecStrucType; import org.biojava.nbio.structure.xtal.CrystalCell; import org.biojava.nbio.structure.xtal.SpaceGroup; import org.rcsb.mmtf.dataholders.DsspType; import org.rcsb.mmtf.utils.CodecUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * A utils class of functions needed for Biojava to read and write to mmtf. * @author Anthony Bradley * */ public class MmtfUtils { private static final Logger LOGGER = LoggerFactory.getLogger(MmtfUtils.class); /** * Set up the configuration parameters for BioJava. */ public static AtomCache setUpBioJava() { // Set up the atom cache etc AtomCache cache = new AtomCache(); cache.setUseMmCif(true); FileParsingParameters params = cache.getFileParsingParams(); params.setCreateAtomBonds(true); params.setAlignSeqRes(true); params.setParseBioAssembly(true); DownloadChemCompProvider cc = new DownloadChemCompProvider(); ChemCompGroupFactory.setChemCompProvider(cc); cc.checkDoFirstInstall(); cache.setFileParsingParams(params); StructureIO.setAtomCache(cache); return cache; } /** * Set up the configuration parameters for BioJava. * @param extraUrl the string describing the URL (or file path) from which * to get missing CCD entries. */ public static AtomCache setUpBioJava(String extraUrl) { // Set up the atom cache etc AtomCache cache = new AtomCache(); cache.setUseMmCif(true); FileParsingParameters params = cache.getFileParsingParams(); params.setCreateAtomBonds(true); params.setAlignSeqRes(true); params.setParseBioAssembly(true); DownloadChemCompProvider.serverBaseUrl = extraUrl; DownloadChemCompProvider.useDefaultUrlLayout = false; DownloadChemCompProvider cc = new DownloadChemCompProvider(); ChemCompGroupFactory.setChemCompProvider(cc); cc.checkDoFirstInstall(); cache.setFileParsingParams(params); StructureIO.setAtomCache(cache); return cache; } /** * This sets all microheterogeneous groups * (previously alternate location groups) as separate groups. * This is required because mmtf groups cannot have multiple HET codes. * @param bioJavaStruct */ public static void fixMicroheterogenity(Structure bioJavaStruct) { // Loop through the models for (int i=0; i<bioJavaStruct.nrModels(); i++){ // Then the chains List<Chain> chains = bioJavaStruct.getModel(i); for (Chain c : chains) { // Build a new list of groups List<Group> outGroups = new ArrayList<>(); for (Group g : c.getAtomGroups()) { List<Group> removeList = new ArrayList<>(); for (Group altLoc : g.getAltLocs()) { // Check if they are not equal -> microheterogenity if(! altLoc.getPDBName().equals(g.getPDBName())) { // Now add this group to the main list removeList.add(altLoc); } } // Add this group outGroups.add(g); // Remove any microhet alt locs g.getAltLocs().removeAll(removeList); // Add these microhet alt locs outGroups.addAll(removeList); } c.setAtomGroups(outGroups); } } } /** * Generate the secondary structure for a Biojava structure object. * @param bioJavaStruct the Biojava structure for which it is to be calculate. */ public static void calculateDsspSecondaryStructure(Structure bioJavaStruct) { SecStrucCalc ssp = new SecStrucCalc(); try{ ssp.calculate(bioJavaStruct, true); } catch(StructureException e) { LOGGER.warn("Could not calculate secondary structure (error {}). Will try to get a DSSP file from the RCSB web server instead.", e.getMessage()); try { DSSPParser.fetch(bioJavaStruct.getPDBCode(), bioJavaStruct, true); //download from PDB the DSSP result } catch(Exception bige){ LOGGER.warn("Could not get a DSSP file from RCSB web server. There will not be secondary structure assignment for this structure ({}). Error: {}", bioJavaStruct.getPDBCode(), bige.getMessage()); } } } /** * Get the string representation of a space group. * @param spaceGroup the input SpaceGroup object * @return the space group as a string. */ public static String getSpaceGroupAsString(SpaceGroup spaceGroup) { if(spaceGroup==null){ return "NA"; } else{ return spaceGroup.getShortSymbol(); } } /** * Get the length six array of the unit cell information. * @param xtalInfo the input PDBCrystallographicInfo object * @return the length six float array */ public static float[] getUnitCellAsArray(PDBCrystallographicInfo xtalInfo) { CrystalCell xtalCell = xtalInfo.getCrystalCell(); if(xtalCell==null){ return null; }else{ float[] inputUnitCell = new float[6]; inputUnitCell[0] = (float) xtalCell.getA(); inputUnitCell[1] = (float) xtalCell.getB(); inputUnitCell[2] = (float) xtalCell.getC(); inputUnitCell[3] = (float) xtalCell.getAlpha(); inputUnitCell[4] = (float) xtalCell.getBeta(); inputUnitCell[5] = (float) xtalCell.getGamma(); return inputUnitCell; } } /** * Converts the set of experimental techniques to an array of strings. * @param experimentalTechniques the input set of experimental techniques * @return the array of strings describing the methods used. */ public static String[] techniquesToStringArray(Set<ExperimentalTechnique> experimentalTechniques) { if(experimentalTechniques==null){ return new String[0]; } String[] outArray = new String[experimentalTechniques.size()]; int index = 0; for (ExperimentalTechnique experimentalTechnique : experimentalTechniques) { outArray[index] = experimentalTechnique.getName(); index++; } return outArray; } /** * Covert a Date object to ISO time format. * @param inputDate The input date object * @return the time in ISO time format */ public static String dateToIsoString(Date inputDate) { DateFormat dateStringFormat = new SimpleDateFormat("yyyy-MM-dd"); return dateStringFormat.format(inputDate); } /** * Convert a bioassembly information into a map of transform, chainindices it relates to. * @param bioassemblyInfo the bioassembly info object for this structure * @param chainIdToIndexMap the map of chain ids to the index that chain corresponds to. * @return the bioassembly information (as primitive types). */ public static Map<double[], int[]> getTransformMap(BioAssemblyInfo bioassemblyInfo, Map<String, Integer> chainIdToIndexMap) { Map<Matrix4d, List<Integer>> matMap = new HashMap<>(); List<BiologicalAssemblyTransformation> transforms = bioassemblyInfo.getTransforms(); for (BiologicalAssemblyTransformation transformation : transforms) { Matrix4d transMatrix = transformation.getTransformationMatrix(); String transChainId = transformation.getChainId(); if (!chainIdToIndexMap.containsKey(transChainId)){ continue; } int chainIndex = chainIdToIndexMap.get(transformation.getChainId()); if(matMap.containsKey(transMatrix)){ matMap.get(transMatrix).add(chainIndex); } else{ List<Integer> chainIdList = new ArrayList<>(); chainIdList.add(chainIndex); matMap.put(transMatrix, chainIdList); } } Map<double[], int[]> outMap = new HashMap<>(); for (Entry<Matrix4d, List<Integer>> entry : matMap.entrySet()) { outMap.put(convertToDoubleArray(entry.getKey()), CodecUtils.convertToIntArray(entry.getValue())); } return outMap; } /** * Convert a four-d matrix to a double array. Row-packed. * @param transformationMatrix the input matrix4d object * @return the double array (16 long). */ public static double[] convertToDoubleArray(Matrix4d transformationMatrix) { // Initialise the output array double[] outArray = new double[16]; // Iterate over the matrix for(int i=0; i<4; i++){ for(int j=0; j<4; j++){ // Now set this element outArray[i*4+j] = transformationMatrix.getElement(i,j); } } return outArray; } /** * Count the total number of groups in the structure * @param structure the input structure * @return the total number of groups */ public static int getNumGroups(Structure structure) { int count = 0; for(int i=0; i<structure.nrModels(); i++) { for(Chain chain : structure.getChains(i)){ count+= chain.getAtomGroups().size(); } } return count; } /** * Get a list of atoms for a group. Only add each atom once. * @param inputGroup the Biojava Group to consider * @return the atoms for the input Biojava Group */ public static List<Atom> getAtomsForGroup(Group inputGroup) { Set<Atom> uniqueAtoms = new HashSet<Atom>(); List<Atom> theseAtoms = new ArrayList<Atom>(); for(Atom a: inputGroup.getAtoms()){ theseAtoms.add(a); uniqueAtoms.add(a); } List<Group> altLocs = inputGroup.getAltLocs(); for(Group thisG: altLocs){ for(Atom a: thisG.getAtoms()){ if(uniqueAtoms.contains(a)){ continue; } theseAtoms.add(a); } } return theseAtoms; } /** * Find the number of bonds in a group * @param atomsInGroup the list of atoms in the group * @return the number of atoms in the group */ public static int getNumBondsInGroup(List<Atom> atomsInGroup) { int bondCounter = 0; for(Atom atom : atomsInGroup) { if(atom.getBonds()==null){ continue; } for(Bond bond : atom.getBonds()) { // Now set the bonding information. Atom other = bond.getOther(atom); // If both atoms are in the group if (atomsInGroup.indexOf(other)!=-1){ Integer firstBondIndex = atomsInGroup.indexOf(atom); Integer secondBondIndex = atomsInGroup.indexOf(other); // Don't add the same bond twice if (firstBondIndex<secondBondIndex){ bondCounter++; } } } } return bondCounter; } /** * Get the secondary structure as defined by DSSP. * @param group the input group to be calculated * @return the integer index of the group type. */ public static int getSecStructType(Group group) { SecStrucState props = (SecStrucState) group.getProperty("secstruc"); if(props==null){ return DsspType.NULL_ENTRY.getDsspIndex(); } return DsspType.dsspTypeFromString(props.getType().name).getDsspIndex(); } /** * Get the secondary structure as defined by DSSP. * @param group the input group to be calculated * @param the integer index of the group type. */ public static void setSecStructType(Group group, int dsspIndex) { SecStrucType secStrucType = getSecStructTypeFromDsspIndex(dsspIndex); SecStrucState secStrucState = new SecStrucState(group, "MMTF_ASSIGNED", secStrucType); if(secStrucType!=null){ group.setProperty("secstruc", secStrucState); } else{ } } /** * Set the DSSP type based on a numerical index. * @param dsspIndex the integer index of the type to set * @return the instance of the SecStrucType object holding this secondary * structure type. */ public static SecStrucType getSecStructTypeFromDsspIndex(int dsspIndex) { String dsspType = DsspType.dsspTypeFromInt(dsspIndex).getDsspType(); for(SecStrucType secStrucType : SecStrucType.values()) { if(dsspType==secStrucType.name) { return secStrucType; } } // Return a null entry. return null; } /** * Get summary information for the structure. * @param structure the structure for which to get the information. */ public static MmtfSummaryDataBean getStructureInfo(Structure structure) { MmtfSummaryDataBean mmtfSummaryDataBean = new MmtfSummaryDataBean(); // Get all the atoms List<Atom> theseAtoms = new ArrayList<>(); List<Chain> allChains = new ArrayList<>(); Map<String, Integer> chainIdToIndexMap = new HashMap<>(); int chainCounter = 0; int bondCount = 0; mmtfSummaryDataBean.setAllAtoms(theseAtoms); mmtfSummaryDataBean.setAllChains(allChains); mmtfSummaryDataBean.setChainIdToIndexMap(chainIdToIndexMap); for (int i=0; i<structure.nrModels(); i++){ List<Chain> chains = structure.getModel(i); allChains.addAll(chains); for (Chain chain : chains) { String idOne = chain.getId(); if (!chainIdToIndexMap.containsKey(idOne)) { chainIdToIndexMap.put(idOne, chainCounter); } chainCounter++; for (Group g : chain.getAtomGroups()) { for(Atom atom: getAtomsForGroup(g)){ theseAtoms.add(atom); // If both atoms are in the group if (atom.getBonds()!=null){ bondCount+=atom.getBonds().size(); } } } } } // Assumes all bonds are referenced twice mmtfSummaryDataBean.setNumBonds(bondCount/2); return mmtfSummaryDataBean; } /** * Get a list of N 4*4 matrices from a single list of doubles of length 16*N. * @param ncsOperMatrixList the input list of doubles * @return the list of 4*4 matrics */ public static Matrix4d[] getNcsAsMatrix4d(double[][] ncsOperMatrixList) { if(ncsOperMatrixList==null){ return null; } int numMats = ncsOperMatrixList.length; if(numMats==0){ return null; } if(numMats==1 && ncsOperMatrixList[0].length==0){ return null; } Matrix4d[] outList = new Matrix4d[numMats]; for(int i=0; i<numMats; i++){ outList[i] = new Matrix4d(ncsOperMatrixList[i]); } return outList; } /** * Get a list of length N*16 of a list of Matrix4d*N. * @param ncsOperators the {@link Matrix4d} list * @return the list of length N*16 of the list of matrices */ public static double[][] getNcsAsArray(Matrix4d[] ncsOperators) { if(ncsOperators==null){ return new double[0][0]; } double[][] outList = new double[ncsOperators.length][16]; for(int i=0; i<ncsOperators.length;i++){ outList[i] = convertToDoubleArray(ncsOperators[i]); } return outList; } /** * Insert the group in the given position in the sequence. * @param chain the chain to add the seq res group to * @param group the group to add * @param sequenceIndexId the index to add it in */ public static void insertSeqResGroup(Chain chain, Group group, int sequenceIndexId) { List<Group> seqResGroups = chain.getSeqResGroups(); addGroupAtId(seqResGroups, group, sequenceIndexId); } /** * Add the missing groups to the SeqResGroups. * @param modelChain the chain to add the information for * @param sequence the sequence of the construct */ public static void addSeqRes(Chain modelChain, String sequence) { List<Group> seqResGroups = modelChain.getSeqResGroups(); GroupType chainType = getChainType(modelChain.getAtomGroups()); for(int i=0; i<sequence.length(); i++){ char singleLetterCode = sequence.charAt(i); Group group = null; if(seqResGroups.size()<=i){ } else{ group=seqResGroups.get(i); } if(group!=null){ continue; } group = getSeqResGroup(modelChain, singleLetterCode, chainType); addGroupAtId(seqResGroups, group, i); seqResGroups.set(i, group); } } private static GroupType getChainType(List<Group> groups) { for(Group group : groups) { if(group==null){ continue; } else if(group.getType()!=GroupType.HETATM){ return group.getType(); } } return GroupType.HETATM; } private static <T> void addGroupAtId(List<T> seqResGroups, T group, int sequenceIndexId) { while(seqResGroups.size()<=sequenceIndexId){ seqResGroups.add(null); } if(sequenceIndexId>=0){ seqResGroups.set(sequenceIndexId, group); } } private static Group getSeqResGroup(Chain modelChain, char singleLetterCode, GroupType type) { if(type==GroupType.AMINOACID){ AminoAcidImpl a = new AminoAcidImpl(); a.setRecordType(AminoAcid.SEQRESRECORD); a.setAminoType(singleLetterCode); ChemComp chemComp = new ChemComp(); chemComp.setOne_letter_code(""+singleLetterCode); a.setChemComp(chemComp); return a; } else if (type==GroupType.NUCLEOTIDE) { NucleotideImpl n = new NucleotideImpl(); ChemComp chemComp = new ChemComp(); chemComp.setOne_letter_code(""+singleLetterCode); n.setChemComp(chemComp); return n; } else{ return null; } } }