/* * BioJava development code * * This code may be freely distributed and modified under the * terms of the GNU Lesser General Public Licence. This should * be distributed with the code. If you do not have a copy, * see: * * http://www.gnu.org/copyleft/lesser.html * * Copyright for this code is held jointly by the individual * authors. These should be listed in @author doc comments. * * For more information on the BioJava project and its aims, * or to join the biojava-l mailing list, visit the home page * at: * * http://www.biojava.org/ * */ package org.biojava.nbio.structure.io.mmcif; import java.lang.reflect.Field; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import org.biojava.nbio.structure.Atom; import org.biojava.nbio.structure.Chain; import org.biojava.nbio.structure.Element; import org.biojava.nbio.structure.Group; import org.biojava.nbio.structure.GroupType; import org.biojava.nbio.structure.Structure; import org.biojava.nbio.structure.io.FileConvert; import org.biojava.nbio.structure.io.mmcif.model.AbstractBean; import org.biojava.nbio.structure.io.mmcif.model.AtomSite; import org.biojava.nbio.structure.io.mmcif.model.CIFLabel; import org.biojava.nbio.structure.io.mmcif.model.Cell; import org.biojava.nbio.structure.io.mmcif.model.IgnoreField; import org.biojava.nbio.structure.io.mmcif.model.Symmetry; import org.biojava.nbio.structure.xtal.CrystalCell; import org.biojava.nbio.structure.xtal.SpaceGroup; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * Some tools for mmCIF file writing. * * See http://www.iucr.org/__data/assets/pdf_file/0019/22618/cifguide.pdf * * CIF categories are represented as a simple bean, typically extending {@link AbstractBean}. * By default, all fields from the bean are taken as the CIF labels. Fields * may be omitted by annotating them as {@link IgnoreField @IgnoreField}. * The CIF label for a field may be changed (for instance, for fields that * are not valid Java identifiers) by defining a function * <tt>static Map<String,String> getCIFLabelMap()</tt> * mapping from the field's name to the correct label. * * @author Jose Duarte * @author Spencer Bliven */ public class MMCIFFileTools { private static final Logger logger = LoggerFactory.getLogger(MMCIFFileTools.class); private static final String newline = System.getProperty("line.separator"); /** * The character to be printed out in cases where a value is not assigned in mmCIF files */ public static final String MMCIF_MISSING_VALUE = "?"; /** * The character to be printed out as a default value in mmCIF files, e.g. for the default alt_locs */ public static final String MMCIF_DEFAULT_VALUE = "."; /** * Produces a mmCIF loop header string for the given categoryName and className. * className must be one of the beans in the {@link org.biojava.nbio.structure.io.mmcif.model} package * @param categoryName * @param className * @return * @throws ClassNotFoundException if the given className can not be found */ public static String toLoopMmCifHeaderString(String categoryName, String className) throws ClassNotFoundException { StringBuilder str = new StringBuilder(); str.append(SimpleMMcifParser.LOOP_START+newline); Class<?> c = Class.forName(className); for (Field f : getFields(c)) { str.append(categoryName+"."+f.getName()+newline); } return str.toString(); } /** * Converts a mmCIF bean (see {@link org.biojava.nbio.structure.io.mmcif.model} to * a String representing it in mmCIF (single-record) format. * @param categoryName * @param o * @return */ public static String toMMCIF(String categoryName, Object o) { StringBuilder sb = new StringBuilder(); Class<?> c = o.getClass(); Field[] fields = getFields(c); String[] names = getFieldNames(fields); int maxFieldNameLength = getMaxStringLength(names); for (int i=0;i<fields.length;i++) { Field f = fields[i]; String name = names[i]; sb.append(categoryName).append(".").append(name); int spacing = maxFieldNameLength - name.length() + 3; try { Object obj = f.get(o); String val; if (obj==null) { logger.debug("Field {} is null, will write it out as {}",name,MMCIF_MISSING_VALUE); val = MMCIF_MISSING_VALUE; } else { val = (String) obj; } for (int j=0;j<spacing;j++) sb.append(' '); sb.append(addMmCifQuoting(val)); sb.append(newline); } catch (IllegalAccessException e) { logger.warn("Field {} is inaccessible", name); continue; } catch (ClassCastException e) { logger.warn("Could not cast value to String for field {}",name); continue; } } sb.append(SimpleMMcifParser.COMMENT_CHAR+newline); return sb.toString(); } /** * Gets all fields for a particular class, filtering fields annotated * with {@link IgnoreField @IgnoreField}. * * As a side effect, calls {@link Field#setAccessible(boolean) setAccessible(true)} * on all fields. * @param c * @return */ public static Field[] getFields(Class<?> c) { Field[] allFields = c.getDeclaredFields(); Field[] fields = new Field[allFields.length]; int n = 0; for(Field f : allFields) { f.setAccessible(true); IgnoreField anno = f.getAnnotation(IgnoreField.class); if(anno == null) { fields[n] = f; n++; } } return Arrays.copyOf(fields, n); } /** * Gets the mmCIF record name for each field. This is generally just * the name of the field or the value specified by the {@link CIFLabel @CIFLabel} annotation. * * As a side effect, calls {@link Field#setAccessible(boolean) setAccessible(true)} * on all fields. * @param fields * @return */ public static String[] getFieldNames(Field[] fields) { String[] names = new String[fields.length]; for(int i=0;i<fields.length;i++) { Field f = fields[i]; f.setAccessible(true); String rawName = fields[i].getName(); CIFLabel cifLabel = f.getAnnotation(CIFLabel.class); if(cifLabel != null) { names[i] = cifLabel.label(); } else { names[i] = rawName; } } return names; } /** * Converts a list of mmCIF beans (see {@link org.biojava.nbio.structure.io.mmcif.model} to * a String representing them in mmCIF loop format with one record per line. * @param list * @return */ public static <T> String toMMCIF(List<T> list, Class<T> klass) { if (list.isEmpty()) throw new IllegalArgumentException("List of beans is empty!"); Field[] fields = getFields(klass); int[] sizes = getFieldSizes(list,fields); StringBuilder sb = new StringBuilder(); for (T o:list) { sb.append(toSingleLoopLineMmCifString(o, fields, sizes)); } sb.append(SimpleMMcifParser.COMMENT_CHAR+newline); return sb.toString(); } /** * Converts a list of mmCIF beans (see {@link org.biojava.nbio.structure.io.mmcif.model} to * a String representing them in mmCIF loop format with one record per line. * @param list * @return * @deprecated The {@link #toMMCIF(List, Class)} provides compile-time type safety * @throws ClassCastException if not all list elements have the same type */ @Deprecated @SuppressWarnings("unchecked") public static <T> String toMMCIF(List<T> list) { Class<T> klass = (Class<T>)list.get(0).getClass(); for(T t : list) { if( klass != t.getClass() ) { throw new ClassCastException("Not all loop elements have the same fields"); } } return toMMCIF(list,klass); } /** * Given a mmCIF bean produces a String representing it in mmCIF loop format as a single record line * @param record * @param fields Set of fields for the record. If null, will be calculated from the class of the record * @param sizes the size of each of the fields * @return */ private static String toSingleLoopLineMmCifString(Object record, Field[] fields, int[] sizes) { StringBuilder str = new StringBuilder(); Class<?> c = record.getClass(); if(fields == null) fields = getFields(c); if (sizes.length!=fields.length) throw new IllegalArgumentException("The given sizes of fields differ from the number of declared fields"); int i = -1; for (Field f : fields) { i++; f.setAccessible(true); try { Object obj = f.get(record); String val; if (obj==null) { logger.debug("Field {} is null, will write it out as {}",f.getName(),MMCIF_MISSING_VALUE); val = MMCIF_MISSING_VALUE; } else { val = (String) obj; } str.append(String.format("%-"+sizes[i]+"s ", addMmCifQuoting(val))); } catch (IllegalAccessException e) { logger.warn("Field {} is inaccessible", f.getName()); continue; } catch (ClassCastException e) { logger.warn("Could not cast value to String for field {}",f.getName()); continue; } } str.append(newline); return str.toString(); } /** * Adds quoting to a String according to the STAR format (mmCIF) rules * @param val * @return */ private static String addMmCifQuoting(String val) { String newval; if (val.contains("'")) { // double quoting for strings containing single quotes (not strictly necessary but it's what the PDB usually does) newval = "\""+val+"\""; } else if (val.contains(" ")) { // single quoting for stings containing spaces newval = "'"+val+"'"; } else { if (val.contains(" ") && val.contains("'")) { // TODO deal with this case logger.warn("Value contains both spaces and single quotes, won't format it: {}. CIF ouptut will likely be invalid.",val); } newval = val; } // TODO deal with all the other cases: e.g. multi-line quoting with ;; return newval; } /** * Converts a SpaceGroup object to a {@link Symmetry} object. * @param sg * @return */ public static Symmetry convertSpaceGroupToSymmetry(SpaceGroup sg) { Symmetry sym = new Symmetry(); sym.setSpace_group_name_H_M(sg.getShortSymbol()); // TODO do we need to fill any of the other values? return sym; } /** * Converts a CrystalCell object to a {@link Cell} object. * @param c * @return */ public static Cell convertCrystalCellToCell(CrystalCell c) { Cell cell = new Cell(); cell.setLength_a(String.format("%.3f",c.getA())); cell.setLength_b(String.format("%.3f",c.getB())); cell.setLength_c(String.format("%.3f",c.getC())); cell.setAngle_alpha(String.format("%.3f",c.getAlpha())); cell.setAngle_beta(String.format("%.3f",c.getBeta())); cell.setAngle_gamma(String.format("%.3f",c.getGamma())); return cell; } /** * Converts an Atom object to an {@link AtomSite} object. * @param a * @param model * @param chainId * @param internalChainId * @return */ public static AtomSite convertAtomToAtomSite(Atom a, int model, String chainId, String internalChainId) { return convertAtomToAtomSite(a, model, chainId, internalChainId, a.getPDBserial()); } /** * Converts an Atom object to an {@link AtomSite} object. * @param a * @param model * @param chainId * @param internalChainId * @param atomId the atom id to be written to AtomSite * @return */ public static AtomSite convertAtomToAtomSite(Atom a, int model, String chainId, String internalChainId, int atomId) { /* ATOM 7 C CD . GLU A 1 24 ? -10.109 15.374 38.853 1.00 50.05 ? ? ? ? ? ? 24 GLU A CD 1 ATOM 8 O OE1 . GLU A 1 24 ? -9.659 14.764 37.849 1.00 49.80 ? ? ? ? ? ? 24 GLU A OE1 1 ATOM 9 O OE2 . GLU A 1 24 ? -11.259 15.171 39.310 1.00 50.51 ? ? ? ? ? ? 24 GLU A OE2 1 ATOM 10 N N . LEU A 1 25 ? -5.907 18.743 37.412 1.00 41.55 ? ? ? ? ? ? 25 LEU A N 1 ATOM 11 C CA . LEU A 1 25 ? -5.168 19.939 37.026 1.00 37.55 ? ? ? ? ? ? 25 LEU A CA 1 */ Group g = a.getGroup(); String record ; if ( g.getType().equals(GroupType.HETATM) ) { record = "HETATM"; } else { record = "ATOM"; } String entityId = "0"; String labelSeqId = Integer.toString(g.getResidueNumber().getSeqNum()); if (g.getChain()!=null && g.getChain().getEntityInfo()!=null) { entityId = Integer.toString(g.getChain().getEntityInfo().getMolId()); labelSeqId = Integer.toString(g.getChain().getEntityInfo().getAlignedResIndex(g, g.getChain())); } Character altLoc = a.getAltLoc() ; String altLocStr; if (altLoc==null || altLoc == ' ') { altLocStr = MMCIF_DEFAULT_VALUE; } else { altLocStr = altLoc.toString(); } Element e = a.getElement(); String eString = e.toString().toUpperCase(); if ( e.equals(Element.R)) { eString = "X"; } String insCode = MMCIF_MISSING_VALUE; if (g.getResidueNumber().getInsCode()!=null ) { insCode = Character.toString(g.getResidueNumber().getInsCode()); } AtomSite atomSite = new AtomSite(); atomSite.setGroup_PDB(record); atomSite.setId(Integer.toString(atomId)); atomSite.setType_symbol(eString); atomSite.setLabel_atom_id(a.getName()); atomSite.setLabel_alt_id(altLocStr); atomSite.setLabel_comp_id(g.getPDBName()); atomSite.setLabel_asym_id(internalChainId); atomSite.setLabel_entity_id(entityId); atomSite.setLabel_seq_id(labelSeqId); atomSite.setPdbx_PDB_ins_code(insCode); atomSite.setCartn_x(FileConvert.d3.format(a.getX())); atomSite.setCartn_y(FileConvert.d3.format(a.getY())); atomSite.setCartn_z(FileConvert.d3.format(a.getZ())); atomSite.setOccupancy(FileConvert.d2.format(a.getOccupancy())); atomSite.setB_iso_or_equiv(FileConvert.d2.format(a.getTempFactor())); atomSite.setAuth_seq_id(Integer.toString(g.getResidueNumber().getSeqNum())); atomSite.setAuth_comp_id(g.getPDBName()); atomSite.setAuth_asym_id(chainId); atomSite.setAuth_atom_id(a.getName()); atomSite.setPdbx_PDB_model_num(Integer.toString(model)); return atomSite; } /** * Converts a Group into a List of {@link AtomSite} objects * @param g * @param model * @param chainId * @param internalChainId * @return */ private static List<AtomSite> convertGroupToAtomSites(Group g, int model, String chainId, String internalChainId) { List<AtomSite> list = new ArrayList<AtomSite>(); int groupsize = g.size(); for ( int atompos = 0 ; atompos < groupsize; atompos++) { Atom a = null ; a = g.getAtom(atompos); if ( a == null) continue ; list.add(convertAtomToAtomSite(a, model, chainId, internalChainId)); } if ( g.hasAltLoc()){ for (Group alt : g.getAltLocs() ) { list.addAll(convertGroupToAtomSites(alt, model, chainId, internalChainId)); } } return list; } /** * Converts a Chain into a List of {@link AtomSite} objects * @param c * @param model * @param authorId * @param asymId * @return */ public static List<AtomSite> convertChainToAtomSites(Chain c, int model, String authorId, String asymId) { List<AtomSite> list = new ArrayList<AtomSite>(); if (c.getEntityInfo()==null) { logger.warn("No Compound (entity) found for chain {}: entity_id will be set to 0, label_seq_id will be the same as auth_seq_id", c.getName()); } for ( int h=0; h<c.getAtomLength();h++){ Group g= c.getAtomGroup(h); list.addAll(convertGroupToAtomSites(g, model, authorId, asymId)); } return list; } /** * Converts a Structure into a List of {@link AtomSite} objects * @param s * @return */ public static List<AtomSite> convertStructureToAtomSites(Structure s) { List<AtomSite> list = new ArrayList<AtomSite>(); for (int m=0;m<s.nrModels();m++) { for (Chain c:s.getChains(m)) { list.addAll(convertChainToAtomSites(c, m+1, c.getName(), c.getId())); } } return list; } /** * Finds the max length of each of the String values contained in each of the fields of the given list of beans. * Useful for producing mmCIF loop data that is aligned for all columns. * @param list list of objects. All objects should have the same class. * @param fields Set of fields for the record. If null, will be calculated from the class of the first record * @return * @see #toMMCIF(List, Class) */ private static <T> int[] getFieldSizes(List<T> list, Field[] fields) { if (list.isEmpty()) throw new IllegalArgumentException("List of beans is empty!"); if(fields == null) fields = getFields(list.get(0).getClass()); int[] sizes = new int [fields.length]; for (T a:list) { int i = -1; for (Field f : fields) { i++; f.setAccessible(true); try { Object obj = f.get(a); int length; if (obj==null) { length = MMCIF_MISSING_VALUE.length(); } else { String val = (String) obj; length = addMmCifQuoting(val).length(); } if (length>sizes[i]) sizes[i] = length; } catch (IllegalAccessException e) { logger.warn("Field {} is inaccessible", f.getName()); continue; } catch (ClassCastException e) { logger.warn("Could not cast value to String for field {}",f.getName()); continue; } } } return sizes; } /** * Finds the max length of a list of strings * Useful for producing mmCIF single-record data that is aligned for all values. * @param names * @return * @see #toMMCIF(String, Object) */ private static int getMaxStringLength(String[] names) { int size = 0; for(String s : names) { if(s.length()>size) { size = s.length(); } } return size; } }