StructureTools.java example

Explorer
biojava-master
/*
 *                  BioJava development code
 *
 * This code may be freely distributed and modified under the
 * terms of the GNU Lesser General Public Licence.  This should
 * be distributed with the code.  If you do not have a copy,
 * see:
 *
 *      http://www.gnu.org/copyleft/lesser.html
 *
 * Copyright for this code is held jointly by the individual
 * authors.  These should be listed in @author doc comments.
 *
 * For more information on the BioJava project and its aims,
 * or to join the biojava-l mailing list, visit the home page
 * at:
 *
 *      http://www.biojava.org/
 *
 * Created on Jan 4, 2006
 *
 */
package org.biojava.nbio.structure;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import javax.vecmath.Matrix4d;
import javax.vecmath.Point3d;

import org.biojava.nbio.structure.align.util.AtomCache;
import org.biojava.nbio.structure.contact.AtomContactSet;
import org.biojava.nbio.structure.contact.Grid;
import org.biojava.nbio.structure.io.FileParsingParameters;
import org.biojava.nbio.structure.io.PDBFileParser;
import org.biojava.nbio.core.util.FileDownloadUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * A class that provides some tool methods.
 *
 * @author Andreas Prlic, Jules Jacobsen
 * @since 1.0
 */
public class StructureTools {

	private static final Logger logger = LoggerFactory
			.getLogger(StructureTools.class);

	// Amino Acid backbone
	/**
	 * The atom name of the backbone C-alpha atom. Note that this can be
	 * ambiguous depending on the context since Calcium atoms use the same name
	 * in PDB.
	 */
	public static final String CA_ATOM_NAME = "CA";

	/**
	 * The atom name for the backbone amide nitrogen
	 */
	public static final String N_ATOM_NAME = "N";

	/**
	 * The atom name for the backbone carbonyl
	 */
	public static final String C_ATOM_NAME = "C";

	/**
	 * The atom name for the backbone carbonyl oxygen
	 */
	public static final String O_ATOM_NAME = "O";

	/**
	 * The atom name of the side-chain C-beta atom
	 */
	public static final String CB_ATOM_NAME = "CB";

	// Nucleotide backbone
	/**
	 * The atom name of the backbone C1' in RNA
	 */
	public static final String C1_ATOM_NAME = "C1'";
	/**
	 * The atom name of the backbone C2' in RNA
	 */
	public static final String C2_ATOM_NAME = "C2'";
	/**
	 * The atom name of the backbone C3' in RNA
	 */
	public static final String C3_ATOM_NAME = "C3'";
	/**
	 * The atom name of the backbone C4' in RNA
	 */
	public static final String C4_ATOM_NAME = "C4'";
	/**
	 * The atom name of the backbone O2' in RNA
	 */
	public static final String O2_ATOM_NAME = "O2'";
	/**
	 * The atom name of the backbone O3' in RNA
	 */
	public static final String O3_ATOM_NAME = "O3'";
	/**
	 * The atom name of the backbone O4' in RNA
	 */
	public static final String O4_ATOM_NAME = "O4'";
	/**
	 * The atom name of the backbone O4' in RNA
	 */
	public static final String O5_ATOM_NAME = "O5'";
	/**
	 * The atom name of the backbone O4' in RNA
	 */
	public static final String OP1_ATOM_NAME = "OP1";
	/**
	 * The atom name of the backbone O4' in RNA
	 */
	public static final String OP2_ATOM_NAME = "OP2";
	/**
	 * The atom name of the backbone phosphate in RNA
	 */
	public static final String P_ATOM_NAME = "P";

	/**
	 * The atom used as representative for nucleotides, equivalent to
	 * {@link #CA_ATOM_NAME} for proteins
	 */
	public static final String NUCLEOTIDE_REPRESENTATIVE = P_ATOM_NAME;

	/**
	 * The character to use for unknown compounds in sequence strings
	 */
	public static final char UNKNOWN_GROUP_LABEL = 'X';

	/**
	 * Below this ratio of aminoacid/nucleotide residues to the sequence total,
	 * we use simple majority of aminoacid/nucleotide residues to decide the
	 * character of the chain (protein/nucleotide)
	 */
	public static final double RATIO_RESIDUES_TO_TOTAL = 0.95;

	/**
	 * Threshold for plausible binding of a ligand to the selected substructure
	 */
	public static final double DEFAULT_LIGAND_PROXIMITY_CUTOFF = 5;

	// there is a file format change in PDB 3.0 and nucleotides are being
	// renamed
	private static final Map<String, Character> nucleotides30;
	private static final Map<String, Character> nucleotides23;

	// amino acid 3 and 1 letter code definitions
	private static final Map<String, Character> aminoAcids;

	private static final Set<Element> hBondDonorAcceptors;

	static {
		nucleotides30 = new HashMap<String, Character>();
		nucleotides30.put("DA", 'A');
		nucleotides30.put("DC", 'C');
		nucleotides30.put("DG", 'G');
		nucleotides30.put("DT", 'T');
		nucleotides30.put("DI", 'I');
		nucleotides30.put("A", 'A');
		nucleotides30.put("G", 'G');
		nucleotides30.put("C", 'C');
		nucleotides30.put("U", 'U');
		nucleotides30.put("I", 'I');

		// the DNA linkers - the +C , +G, +A +T +U and +I have been replaced
		// with these:
		nucleotides30.put("TAF", UNKNOWN_GROUP_LABEL); // Fluorinated Thymine
		nucleotides30.put("TC1", UNKNOWN_GROUP_LABEL); // Furanosyl
		nucleotides30.put("TFE", UNKNOWN_GROUP_LABEL); // Fluorinated Thymine
		nucleotides30.put("TFO", UNKNOWN_GROUP_LABEL); // Tenofovir (3'
		// terminator)
		nucleotides30.put("TGP", UNKNOWN_GROUP_LABEL); // Guanine variant
		nucleotides30.put("THX", UNKNOWN_GROUP_LABEL); // 5' terminator
		nucleotides30.put("TLC", UNKNOWN_GROUP_LABEL); // Thymine with dicyclic
		// sugar
		nucleotides30.put("TLN", UNKNOWN_GROUP_LABEL); // locked Thymine
		nucleotides30.put("LCG", UNKNOWN_GROUP_LABEL); // locked Guanine
		nucleotides30.put("TP1", UNKNOWN_GROUP_LABEL); // Thymine peptide
		// nucleic acid, with
		// added methyl
		nucleotides30.put("CP1", UNKNOWN_GROUP_LABEL); // Cytidine peptide
		// nucleic acid, with
		// added methyl
		nucleotides30.put("TPN", UNKNOWN_GROUP_LABEL); // Thymine peptide
		// nucleic acid
		nucleotides30.put("CPN", UNKNOWN_GROUP_LABEL); // Cytidine peptide
		// nucleic acid
		nucleotides30.put("GPN", UNKNOWN_GROUP_LABEL); // Guanine peptide
		// nucleic acid
		nucleotides30.put("APN", UNKNOWN_GROUP_LABEL); // Adenosine peptide
		// nucleic acid
		nucleotides30.put("TPC", UNKNOWN_GROUP_LABEL); // Thymine variant

		// store nucleic acids (C, G, A, T, U, and I), and
		// the modified versions of nucleic acids (+C, +G, +A, +T, +U, and +I),
		// and
		nucleotides23 = new HashMap<String, Character>();
		String[] names = { "C", "G", "A", "T", "U", "I", "+C", "+G", "+A",
				"+T", "+U", "+I" };
		for (String n : names) {
			nucleotides23.put(n, n.charAt(n.length() - 1));
		}

		aminoAcids = new HashMap<String, Character>();
		aminoAcids.put("GLY", 'G');
		aminoAcids.put("ALA", 'A');
		aminoAcids.put("VAL", 'V');
		aminoAcids.put("LEU", 'L');
		aminoAcids.put("ILE", 'I');
		aminoAcids.put("PHE", 'F');
		aminoAcids.put("TYR", 'Y');
		aminoAcids.put("TRP", 'W');
		aminoAcids.put("PRO", 'P');
		aminoAcids.put("HIS", 'H');
		aminoAcids.put("LYS", 'K');
		aminoAcids.put("ARG", 'R');
		aminoAcids.put("SER", 'S');
		aminoAcids.put("THR", 'T');
		aminoAcids.put("GLU", 'E');
		aminoAcids.put("GLN", 'Q');
		aminoAcids.put("ASP", 'D');
		aminoAcids.put("ASN", 'N');
		aminoAcids.put("CYS", 'C');
		aminoAcids.put("MET", 'M');
		// MSE is only found as a molecular replacement for MET
		aminoAcids.put("MSE", 'M');
		// 'non-standard', genetically encoded
		// http://www.chem.qmul.ac.uk/iubmb/newsletter/1999/item3.html
		// IUBMB recommended name is 'SEC' but the wwPDB currently use 'CSE'
		// likewise 'PYL' (IUBMB) and 'PYH' (PDB)
		aminoAcids.put("CSE", 'U');
		aminoAcids.put("SEC", 'U');
		aminoAcids.put("PYH", 'O');
		aminoAcids.put("PYL", 'O');

		hBondDonorAcceptors = new HashSet<Element>();
		hBondDonorAcceptors.add(Element.N);
		hBondDonorAcceptors.add(Element.O);
		hBondDonorAcceptors.add(Element.S);

	}

	/**
	 * Count how many Atoms are contained within a Structure object.
	 *
	 * @param s
	 *            the structure object
	 * @return the number of Atoms in this Structure
	 */
	public static final int getNrAtoms(Structure s) {

		int nrAtoms = 0;

		Iterator<Group> iter = new GroupIterator(s);

		while (iter.hasNext()) {
			Group g = iter.next();
			nrAtoms += g.size();
		}

		return nrAtoms;
	}

	/**
	 * Count how many groups are contained within a structure object.
	 *
	 * @param s
	 *            the structure object
	 * @return the number of groups in the structure
	 */
	public static final int getNrGroups(Structure s) {
		int nrGroups = 0;

		List<Chain> chains = s.getChains(0);
		for (Chain c : chains) {
			nrGroups += c.getAtomLength();
		}
		return nrGroups;
	}

	/**
	 * Returns an array of the requested Atoms from the Structure object.
	 * Iterates over all groups and checks if the requested atoms are in this
	 * group, no matter if this is a {@link AminoAcid} or {@link HetatomImpl}
	 * group. If the group does not contain all requested atoms then no atoms
	 * are added for that group. For structures with more than one model, only
	 * model 0 will be used.
	 *
	 * @param s
	 *            the structure to get the atoms from
	 *
	 * @param atomNames
	 *            contains the atom names to be used.
	 * @return an Atom[] array
	 */
	public static final Atom[] getAtomArray(Structure s, String[] atomNames) {
		List<Chain> chains = s.getModel(0);

		List<Atom> atoms = new ArrayList<Atom>();

		extractAtoms(atomNames, chains, atoms);
		return atoms.toArray(new Atom[atoms.size()]);

	}

	/**
	 * Returns an array of the requested Atoms from the Structure object. In
	 * contrast to {@link #getAtomArray(Structure, String[])} this method
	 * iterates over all chains. Iterates over all chains and groups and checks
	 * if the requested atoms are in this group, no matter if this is a
	 * {@link AminoAcid} or {@link HetatomImpl} group. If the group does not
	 * contain all requested atoms then no atoms are added for that group. For
	 * structures with more than one model, only model 0 will be used.
	 *
	 * @param s
	 *            the structure to get the atoms from
	 *
	 * @param atomNames
	 *            contains the atom names to be used.
	 * @return an Atom[] array
	 */
	public static final Atom[] getAtomArrayAllModels(Structure s,
			String[] atomNames) {

		List<Atom> atoms = new ArrayList<Atom>();

		for (int i = 0; i < s.nrModels(); i++) {
			List<Chain> chains = s.getModel(i);
			extractAtoms(atomNames, chains, atoms);
		}
		return atoms.toArray(new Atom[atoms.size()]);

	}

	/**
	 * Convert all atoms of the structure (all models) into an Atom array
	 *
	 * @param s
	 *            input structure
	 * @return all atom array
	 */
	public static final Atom[] getAllAtomArray(Structure s) {
		List<Atom> atoms = new ArrayList<Atom>();

		AtomIterator iter = new AtomIterator(s);
		while (iter.hasNext()) {
			Atom a = iter.next();
			atoms.add(a);
		}
		return atoms.toArray(new Atom[atoms.size()]);
	}
	/**
	 * Convert all atoms of the structure (specified model) into an Atom array
	 *
	 * @param s
	 *            input structure
	 * @return all atom array
	 */
	public static final Atom[] getAllAtomArray(Structure s, int model) {
		List<Atom> atoms = new ArrayList<Atom>();

		AtomIterator iter = new AtomIterator(s,model);
		while (iter.hasNext()) {
			Atom a = iter.next();
			atoms.add(a);
		}
		return atoms.toArray(new Atom[atoms.size()]);

	}

	/**
	 * Returns and array of all atoms of the chain, including
	 * Hydrogens (if present) and all HETATOMs. Waters are not included.
	 *
	 * @param c
	 *            input chain
	 * @return all atom array
	 */
	public static final Atom[] getAllAtomArray(Chain c) {
		List<Atom> atoms = new ArrayList<Atom>();

		for (Group g : c.getAtomGroups()) {
			if (g.isWater())
				continue;
			for (Atom a : g.getAtoms()) {
				atoms.add(a);
			}
		}
		return atoms.toArray(new Atom[atoms.size()]);
	}

	/**
	 * List of groups from the structure not included in ca (e.g. ligands).
	 *
	 * Unaligned groups are searched from all chains referenced in ca, as well
	 * as any chains in the first model of the structure from ca[0], if any.
	 *
	 * @param ca an array of atoms
	 * @return
	 */
	public static List<Group> getUnalignedGroups(Atom[] ca) {
		Set<Chain> chains = new HashSet<Chain>();
		Set<Group> caGroups = new HashSet<Group>();

		// Create list of all chains in this structure
		Structure s = null;
		if (ca.length > 0) {
			Group g = ca[0].getGroup();
			if (g != null) {
				Chain c = g.getChain();
				if (c != null) {
					s = c.getStructure();
				}
			}
		}
		if (s != null) {
			// Add all chains from the structure
			for (Chain c : s.getChains(0)) {
				chains.add(c);
			}
		}

		// Add groups and chains from ca
		for (Atom a : ca) {
			Group g = a.getGroup();
			if (g != null) {
				caGroups.add(g);

				Chain c = g.getChain();
				if (c != null) {
					chains.add(c);
				}
			}
		}

		// Iterate through all chains, finding groups not in ca
		List<Group> unadded = new ArrayList<Group>();
		for (Chain c : chains) {
			for (Group g : c.getAtomGroups()) {
				if (!caGroups.contains(g)) {
					unadded.add(g);
				}
			}
		}
		return unadded;
	}

	/**
	 * Finds all ligand groups from the target which fall within the cutoff distance
	 * of some atom from the query set.
	 * 
	 * @param target Set of groups including the ligands
	 * @param query Atom selection
	 * @param cutoff Distance from query atoms to consider, in angstroms
	 * @return All groups from the target with at least one atom within cutoff of a query atom
	 * @see StructureTools#DEFAULT_LIGAND_PROXIMITY_CUTOFF
	 */
	public static List<Group> getLigandsByProximity(Collection<Group> target, Atom[] query, double cutoff) {
		// Geometric hashing of the reduced structure
		Grid grid = new Grid(cutoff);
		grid.addAtoms(query);

		List<Group> ligands = new ArrayList<>();
		for(Group g :target ) {
			// don't worry about waters
			if(g.isWater()) {
				continue;
			}

			if(g.isPolymeric() ) {
				// Polymers aren't ligands
				continue;
			}

			// It is a ligand!

			// Check that it's within cutoff of something in reduced
			List<Atom> groupAtoms = g.getAtoms();
			if( ! grid.hasAnyContact(Calc.atomsToPoints(groupAtoms))) {
				continue;
			}

			ligands.add(g);
		}
		return ligands;
	}
	
	/**
	 * Adds a particular group to a structure. A new chain will be created if necessary.
	 * 
	 * <p>When adding multiple groups, pass the return value of one call as the
	 * chainGuess parameter of the next call for efficiency.
	 * <pre>
	 * Chain guess = null;
	 * for(Group g : groups) {
	 *     guess = addGroupToStructure(s, g, guess );
	 * }
	 * </pre>
	 * @param s structure to receive the group
	 * @param g group to add
	 * @param chainGuess (optional) If not null, should be a chain from s. Used
	 *  to improve performance when adding many groups from the same chain
	 * @param clone Indicates whether the input group should be cloned before
	 *  being added to the new chain
	 * @return the chain g was added to
	 */
	public static Chain addGroupToStructure(Structure s, Group g, int model, Chain chainGuess, boolean clone ) {
		synchronized(s) {
			// Find or create the chain
			String chainId = g.getChainId();
			assert !chainId.isEmpty();
			Chain chain;
			if(chainGuess != null && chainGuess.getId() == chainId) {
				// previously guessed chain
				chain = chainGuess;
			} else {
				// Try to guess
				chain = s.getChain(chainId, model);
				if(chain == null) {
					// no chain found
					chain = new ChainImpl();
					chain.setId(chainId);

					Chain oldChain = g.getChain();
					chain.setName(oldChain.getName());

					EntityInfo oldEntityInfo = oldChain.getEntityInfo();

					EntityInfo newEntityInfo = s.getEntityById(oldEntityInfo.getMolId());
					if( newEntityInfo == null ) {
						newEntityInfo = new EntityInfo(oldEntityInfo);
						s.addEntityInfo(newEntityInfo);
					}
					newEntityInfo.addChain(chain);
					chain.setEntityInfo(newEntityInfo);
					
					// TODO Do the seqres need to be cloned too? -SB 2016-10-7
					chain.setSeqResGroups(oldChain.getSeqResGroups());
					chain.setSeqMisMatches(oldChain.getSeqMisMatches());
					
					s.addChain(chain,model);
				}
			}

			// Add cloned group
			if(clone) {
				g = (Group)g.clone();
			}
			chain.addGroup(g);

			return chain;
		}
	}

	/**
	 * Add a list of groups to a new structure. Chains will be automatically
	 * created in the new structure as needed.
	 * @param s structure to receive the group
	 * @param g group to add
	 * @param clone Indicates whether the input groups should be cloned before
	 *  being added to the new chain
	 */
	public static void addGroupsToStructure(Structure s, Collection<Group> groups, int model, boolean clone) {
		Chain chainGuess = null;
		for(Group g : groups) {
			chainGuess = addGroupToStructure(s, g, model, chainGuess, clone);
		}
	}
	
	/**
	 * Expand a set of atoms into all groups from the same structure.
	 * 
	 * If the structure is set, only the first atom is used (assuming all
	 * atoms come from the same original structure).
	 * If the atoms aren't linked to a structure (for instance, for cloned atoms),
	 * searches all chains of all atoms for groups.
	 * @param atoms Sample of atoms
	 * @return All groups from all chains accessible from the input atoms
	 */
	public static Set<Group> getAllGroupsFromSubset(Atom[] atoms) {
		return getAllGroupsFromSubset(atoms,null);
	}
	/**
	 * Expand a set of atoms into all groups from the same structure.
	 * 
	 * If the structure is set, only the first atom is used (assuming all
	 * atoms come from the same original structure).
	 * If the atoms aren't linked to a structure (for instance, for cloned atoms),
	 * searches all chains of all atoms for groups.
	 * @param atoms Sample of atoms
	 * @param types Type of groups to return (useful for getting only ligands, for instance).
	 *  Null gets all groups.
	 * @return All groups from all chains accessible from the input atoms
	 */
	public static Set<Group> getAllGroupsFromSubset(Atom[] atoms,GroupType types) {
		// Get the full structure
		Structure s = null;
		if (atoms.length > 0) {
			Group g = atoms[0].getGroup();
			if (g != null) {
				Chain c = g.getChain();
				if (c != null) {
					s = c.getStructure();
				}
			}
		}
		// Collect all groups from the structure
		Set<Chain> allChains = new HashSet<>();
		if( s != null ) {
			allChains.addAll(s.getChains());
		}
		// In case the structure wasn't set, need to use ca chains too
		for(Atom a : atoms) {
			Group g = a.getGroup();
			if(g != null) {
				Chain c = g.getChain();
				if( c != null ) {
					allChains.add(c);
				}
			}
		}

		if(allChains.isEmpty() ) {
			return Collections.emptySet();
		}
		
		// Extract all ligand groups
		Set<Group> full = new HashSet<>();
		for(Chain c : allChains) {
			if(types == null) {
				full.addAll(c.getAtomGroups());
			} else {
				full.addAll(c.getAtomGroups(types));
			}
		}

		return full;
	}


	/**
	 * Returns and array of all non-Hydrogen atoms in the given Structure,
	 * optionally including HET atoms or not. Waters are not included.
	 *
	 * @param s
	 * @param hetAtoms
	 *            if true HET atoms are included in array, if false they are not
	 * @return
	 */
	public static final Atom[] getAllNonHAtomArray(Structure s, boolean hetAtoms) {
		AtomIterator iter = new AtomIterator(s);
		return getAllNonHAtomArray(s, hetAtoms, iter);
	}
	/**
	 * Returns and array of all non-Hydrogen atoms in the given Structure,
	 * optionally including HET atoms or not. Waters are not included.
	 *
	 * @param s
	 * @param hetAtoms
	 *            if true HET atoms are included in array, if false they are not
	 * @param modelNr Model number to draw atoms from
	 * @return
	 */
	public static final Atom[] getAllNonHAtomArray(Structure s, boolean hetAtoms, int modelNr) {
		AtomIterator iter = new AtomIterator(s,modelNr);
		return getAllNonHAtomArray(s, hetAtoms, iter);
	}
	private static final Atom[] getAllNonHAtomArray(Structure s, boolean hetAtoms, AtomIterator iter) {
		List<Atom> atoms = new ArrayList<Atom>();

		while (iter.hasNext()) {
			Atom a = iter.next();
			if (a.getElement() == Element.H)
				continue;

			Group g = a.getGroup();

			if (g.isWater())
				continue;

			if (!hetAtoms && g.getType().equals(GroupType.HETATM))
				continue;

			atoms.add(a);
		}
		return atoms.toArray(new Atom[atoms.size()]);
	}

	/**
	 * Returns and array of all non-Hydrogen atoms in the given Chain,
	 * optionally including HET atoms or not Waters are not included.
	 *
	 * @param c
	 * @param hetAtoms
	 *            if true HET atoms are included in array, if false they are not
	 * @return
	 */
	public static final Atom[] getAllNonHAtomArray(Chain c, boolean hetAtoms) {
		List<Atom> atoms = new ArrayList<Atom>();

		for (Group g : c.getAtomGroups()) {
			if (g.isWater())
				continue;
			for (Atom a : g.getAtoms()) {

				if (a.getElement() == Element.H)
					continue;

				if (!hetAtoms && g.getType().equals(GroupType.HETATM))
					continue;

				atoms.add(a);
			}
		}
		return atoms.toArray(new Atom[atoms.size()]);
	}
	
	/**
	 * Returns and array of all non-Hydrogen atoms coordinates in the given Chain,
	 * optionally including HET atoms or not Waters are not included.
	 *
	 * @param c
	 * @param hetAtoms
	 *            if true HET atoms are included in array, if false they are not
	 * @return
	 */
	public static final Point3d[] getAllNonHCoordsArray(Chain c, boolean hetAtoms) {
		List<Point3d> atoms = new ArrayList<Point3d>();

		for (Group g : c.getAtomGroups()) {
			if (g.isWater())
				continue;
			for (Atom a : g.getAtoms()) {

				if (a.getElement() == Element.H)
					continue;

				if (!hetAtoms && g.getType().equals(GroupType.HETATM))
					continue;

				atoms.add(a.getCoordsAsPoint3d());
			}
		}
		return atoms.toArray(new Point3d[atoms.size()]);
	}

	/**
	 * Adds to the given atoms list, all atoms of groups that contained all
	 * requested atomNames, i.e. if a group does not contain all of the
	 * requested atom names, its atoms won't be added.
	 *
	 * @param atomNames
	 * @param chains
	 * @param atoms
	 */
	private static void extractAtoms(String[] atomNames, List<Chain> chains,
			List<Atom> atoms) {

		for (Chain c : chains) {

			for (Group g : c.getAtomGroups()) {

				// a temp container for the atoms of this group
				List<Atom> thisGroupAtoms = new ArrayList<Atom>();
				// flag to check if this group contains all the requested atoms.
				boolean thisGroupAllAtoms = true;
				for (String atomName : atomNames) {
					Atom a = g.getAtom(atomName);

					if (a == null) {
						// this group does not have a required atom, skip it...
						thisGroupAllAtoms = false;
						break;
					}
					thisGroupAtoms.add(a);
				}
				if (thisGroupAllAtoms) {
					// add the atoms of this group to the array.
					for (Atom a : thisGroupAtoms) {
						atoms.add(a);
					}
				}

			}
		}
	}

	/**
	 * Returns an array of the requested Atoms from the Chain object. Iterates
	 * over all groups and checks if the requested atoms are in this group, no
	 * matter if this is a AminoAcid or Hetatom group. If the group does not
	 * contain all requested atoms then no atoms are added for that group.
	 *
	 * @param c
	 *            the Chain to get the atoms from
	 *
	 * @param atomNames
	 *            contains the atom names to be used.
	 * @return an Atom[] array
	 */
	public static final Atom[] getAtomArray(Chain c, String[] atomNames) {

		List<Atom> atoms = new ArrayList<Atom>();

		for (Group g : c.getAtomGroups()) {

			// a temp container for the atoms of this group
			List<Atom> thisGroupAtoms = new ArrayList<Atom>();
			// flag to check if this group contains all the requested atoms.
			boolean thisGroupAllAtoms = true;
			for (String atomName : atomNames) {
				Atom a = g.getAtom(atomName);
				if (a == null) {
					logger.debug("Group " + g.getResidueNumber() + " ("
							+ g.getPDBName()
							+ ") does not have the required atom '" + atomName
							+ "'");
					// this group does not have a required atom, skip it...
					thisGroupAllAtoms = false;
					break;
				}
				thisGroupAtoms.add(a);
			}

			if (thisGroupAllAtoms) {
				// add the atoms of this group to the array.
				for (Atom a : thisGroupAtoms) {
					atoms.add(a);
				}
			}

		}
		return atoms.toArray(new Atom[atoms.size()]);

	}

	/**
	 * Returns an Atom array of the C-alpha atoms. Any atom that is a carbon and
	 * has CA name will be returned.
	 *
	 * @param c
	 *            the structure object
	 * @return an Atom[] array
	 * @see #getRepresentativeAtomArray(Chain)
	 */
	public static final Atom[] getAtomCAArray(Chain c) {
		List<Atom> atoms = new ArrayList<Atom>();

		for (Group g : c.getAtomGroups()) {
			if (g.hasAtom(CA_ATOM_NAME)
					&& g.getAtom(CA_ATOM_NAME).getElement() == Element.C) {
				atoms.add(g.getAtom(CA_ATOM_NAME));
			}
		}

		return atoms.toArray(new Atom[atoms.size()]);
	}

	/**
	 * Gets a representative atom for each group that is part of the chain
	 * backbone. Note that modified aminoacids won't be returned as part of the
	 * backbone if the {@link org.biojava.nbio.structure.io.mmcif.ReducedChemCompProvider} was used to load the
	 * structure.
	 *
	 * For amino acids, the representative is a CA carbon. For nucleotides, the
	 * representative is the {@value #NUCLEOTIDE_REPRESENTATIVE}. Other group
	 * types will be ignored.
	 *
	 * @param c
	 * @return representative Atoms of the chain backbone
	 * @since Biojava 4.1.0
	 */
	public static final Atom[] getRepresentativeAtomArray(Chain c) {
		List<Atom> atoms = new ArrayList<Atom>();

		for (Group g : c.getAtomGroups()) {

			switch (g.getType()) {
			case AMINOACID:
				if (g.hasAtom(CA_ATOM_NAME)
						&& g.getAtom(CA_ATOM_NAME).getElement() == Element.C) {
					atoms.add(g.getAtom(CA_ATOM_NAME));
				}
				break;
			case NUCLEOTIDE:
				if (g.hasAtom(NUCLEOTIDE_REPRESENTATIVE)) {
					atoms.add(g.getAtom(NUCLEOTIDE_REPRESENTATIVE));
				}
				break;
			default:
				// don't add
			}
		}

		return atoms.toArray(new Atom[atoms.size()]);

	}

	/**
	 * Provides an equivalent copy of Atoms in a new array. Clones everything,
	 * starting with parent groups and chains. The chain will only contain
	 * groups that are part of the input array.
	 *
	 * @param ca
	 *            array of representative atoms, e.g. CA atoms
	 * @return Atom array
	 * @deprecated Use the better-named {@link #cloneAtomArray(Atom[])} instead
	 */
	@Deprecated
	public static final Atom[] cloneCAArray(Atom[] ca) {
		return cloneAtomArray(ca);
	}

	/**
	 * Provides an equivalent copy of Atoms in a new array. Clones everything,
	 * starting with parent groups and chains. The chain will only contain
	 * groups that are part of the input array.
	 *
	 * @param ca
	 *            array of representative atoms, e.g. CA atoms
	 * @return Atom array
	 * @since Biojava 4.1.0
	 */
	public static final Atom[] cloneAtomArray(Atom[] ca) {
		Atom[] newCA = new Atom[ca.length];

		List<Chain> model = new ArrayList<Chain>();
		int apos = -1;
		for (Atom a : ca) {
			apos++;
			Group parentG = a.getGroup();
			Chain parentC = parentG.getChain();

			Chain newChain = null;
			for (Chain c : model) {
				if (c.getName().equals(parentC.getName())) {
					newChain = c;
					break;
				}
			}
			if (newChain == null) {
				newChain = new ChainImpl();
				newChain.setId(parentC.getId());
				newChain.setName(parentC.getName());
				model.add(newChain);
			}

			Group parentN = (Group) parentG.clone();

			newCA[apos] = parentN.getAtom(a.getName());
			try {
				// if the group doesn't exist yet, this produces a StructureException
				newChain.getGroupByPDB(parentN.getResidueNumber());
			} catch (StructureException e) {
				// the group doesn't exist yet in the newChain, let's add it
				newChain.addGroup(parentN);
			}

		}
		return newCA;
	}

	/**
	 * Clone a set of representative Atoms, but returns the parent groups
	 *
	 * @param ca
	 *            Atom array
	 * @return Group array
	 */
	public static Group[] cloneGroups(Atom[] ca) {
		Group[] newGroup = new Group[ca.length];

		List<Chain> model = new ArrayList<Chain>();
		int apos = -1;
		for (Atom a : ca) {
			apos++;
			Group parentG = a.getGroup();
			Chain parentC = parentG.getChain();

			Chain newChain = null;
			for (Chain c : model) {
				if (c.getName().equals(parentC.getName())) {
					newChain = c;
					break;
				}
			}
			if (newChain == null) {
				newChain = new ChainImpl();
				newChain.setName(parentC.getName());
				model.add(newChain);
			}

			Group ng = (Group) parentG.clone();
			newGroup[apos] = ng;
			newChain.addGroup(ng);
		}
		return newGroup;
	}

	/**
	 * Utility method for working with circular permutations. Creates a
	 * duplicated and cloned set of Calpha atoms from the input array.
	 *
	 * @param ca2
	 *            atom array
	 * @return cloned and duplicated set of input array
	 */
	public static Atom[] duplicateCA2(Atom[] ca2) {
		// we don't want to rotate input atoms, do we?
		Atom[] ca2clone = new Atom[ca2.length * 2];

		int pos = 0;

		Chain c = null;
		String prevChainId = "";
		for (Atom a : ca2) {
			Group g = (Group) a.getGroup().clone(); // works because each group
			// has only a single atom

			if (c == null) {
				c = new ChainImpl();
				Chain orig = a.getGroup().getChain();
				c.setId(orig.getId());
				c.setName(orig.getName());
			} else {
				Chain orig = a.getGroup().getChain();
				if (!orig.getId().equals(prevChainId)) {
					c = new ChainImpl();
					c.setId(orig.getId());
					c.setName(orig.getName());
				}
			}

			c.addGroup(g);
			ca2clone[pos] = g.getAtom(a.getName());

			pos++;
		}

		// Duplicate ca2!
		c = null;
		prevChainId = "";
		for (Atom a : ca2) {
			Group g = (Group) a.getGroup().clone();

			if (c == null) {
				c = new ChainImpl();
				Chain orig = a.getGroup().getChain();
				c.setId(orig.getId());
				c.setName(orig.getName());
			} else {
				Chain orig = a.getGroup().getChain();
				if (!orig.getId().equals(prevChainId)) {
					c = new ChainImpl();
					c.setId(orig.getId());
					c.setName(orig.getName());
				}
			}

			c.addGroup(g);
			ca2clone[pos] = g.getAtom(a.getName());

			pos++;
		}

		return ca2clone;

	}

	/**
	 * Return an Atom array of the C-alpha atoms. Any atom that is a carbon and
	 * has CA name will be returned.
	 *
	 * @param s
	 *            the structure object
	 * @return an Atom[] array
	 * @see #getRepresentativeAtomArray(Structure)
	 */
	public static Atom[] getAtomCAArray(Structure s) {

		List<Atom> atoms = new ArrayList<Atom>();

		for (Chain c : s.getChains()) {
			for (Group g : c.getAtomGroups()) {
				if (g.hasAtom(CA_ATOM_NAME)
						&& g.getAtom(CA_ATOM_NAME).getElement() == Element.C) {
					atoms.add(g.getAtom(CA_ATOM_NAME));
				}
			}
		}

		return atoms.toArray(new Atom[atoms.size()]);
	}

	/**
	 * Gets a representative atom for each group that is part of the chain
	 * backbone. Note that modified aminoacids won't be returned as part of the
	 * backbone if the {@link org.biojava.nbio.structure.io.mmcif.ReducedChemCompProvider} was used to load the
	 * structure.
	 *
	 * For amino acids, the representative is a CA carbon. For nucleotides, the
	 * representative is the {@value #NUCLEOTIDE_REPRESENTATIVE}. Other group
	 * types will be ignored.
	 *
	 * @param s
	 *            Input structure
	 * @return representative Atoms of the structure backbone
	 * @since Biojava 4.1.0
	 */
	public static Atom[] getRepresentativeAtomArray(Structure s) {

		List<Atom> atoms = new ArrayList<Atom>();

		for (Chain c : s.getChains()) {
			Atom[] chainAtoms = getRepresentativeAtomArray(c);
			for (Atom a : chainAtoms) {
				atoms.add(a);
			}
		}

		return atoms.toArray(new Atom[atoms.size()]);
	}

	/**
	 * Return an Atom array of the main chain atoms: CA, C, N, O Any group that
	 * contains those atoms will be included, be it a standard aminoacid or not
	 *
	 * @param s
	 *            the structure object
	 * @return an Atom[] array
	 */
	public static Atom[] getBackboneAtomArray(Structure s) {

		List<Atom> atoms = new ArrayList<Atom>();

		for (Chain c : s.getChains()) {
			for (Group g : c.getAtomGroups()) {
				if (g.hasAminoAtoms()) {
					// this means we will only take atoms grom groups that have
					// complete backbones
					for (Atom a : g.getAtoms()) {
						switch (g.getType()) {
						case NUCLEOTIDE:
							// Nucleotide backbone
							if (a.getName().equals(C1_ATOM_NAME))
								atoms.add(a);
							if (a.getName().equals(C2_ATOM_NAME))
								atoms.add(a);
							if (a.getName().equals(C3_ATOM_NAME))
								atoms.add(a);
							if (a.getName().equals(C4_ATOM_NAME))
								atoms.add(a);
							if (a.getName().equals(O2_ATOM_NAME))
								atoms.add(a);
							if (a.getName().equals(O3_ATOM_NAME))
								atoms.add(a);
							if (a.getName().equals(O4_ATOM_NAME))
								atoms.add(a);
							if (a.getName().equals(O5_ATOM_NAME))
								atoms.add(a);
							if (a.getName().equals(OP1_ATOM_NAME))
								atoms.add(a);
							if (a.getName().equals(OP2_ATOM_NAME))
								atoms.add(a);
							if (a.getName().equals(P_ATOM_NAME))
								atoms.add(a);
							// TODO Allow C4* names as well as C4'? -SB 3/2015
							break;
						case AMINOACID:
						default:
							// we do it this way instead of with g.getAtom() to
							// be sure we always use the same order as original
							if (a.getName().equals(CA_ATOM_NAME))
								atoms.add(a);
							if (a.getName().equals(C_ATOM_NAME))
								atoms.add(a);
							if (a.getName().equals(N_ATOM_NAME))
								atoms.add(a);
							if (a.getName().equals(O_ATOM_NAME))
								atoms.add(a);
							break;
						}
					}
				}
			}

		}

		return atoms.toArray(new Atom[atoms.size()]);
	}

	/**
	 * Convert three character amino acid codes into single character e.g.
	 * convert CYS to C. Valid 3-letter codes will be those of the standard 20
	 * amino acids plus MSE, CSE, SEC, PYH, PYL (see the {@link #aminoAcids}
	 * map)
	 *
	 * @return the 1 letter code, or null if the given 3 letter code does not
	 *         correspond to an amino acid code
	 * @param groupCode3
	 *            a three character amino acid representation String
	 * @see {@link #get1LetterCode(String)}
	 */
	public static final Character get1LetterCodeAmino(String groupCode3) {
		return aminoAcids.get(groupCode3);
	}

	/**
	 *
	 * @param code3
	 * @return
	 * @deprecated Use {@link #get1LetterCodeAmino(String)} instead
	 */
	@Deprecated
	public static final Character convert_3code_1code(String code3) {
		return get1LetterCodeAmino(code3);
	}

	/**
	 * Convert a three letter amino acid or nucleotide code into a single
	 * character code. If the code does not correspond to an amino acid or
	 * nucleotide, returns {@link #UNKNOWN_GROUP_LABEL}.
	 *
	 * Returned null for nucleotides prior to version 4.0.1.
	 *
	 * @param groupCode3
	 *            three letter representation
	 * @return The 1-letter abbreviation
	 */
	public static final Character get1LetterCode(String groupCode3) {

		Character code1;

		// is it a standard amino acid ?
		code1 = get1LetterCodeAmino(groupCode3);

		if (code1 == null) {
			// hm groupCode3 is not standard
			// perhaps it is a nucleotide?
			groupCode3 = groupCode3.trim();
			if (isNucleotide(groupCode3)) {
				code1 = nucleotides30.get(groupCode3);
				if (code1 == null) {
					code1 = nucleotides23.get(groupCode3);
				}
				if (code1 == null) {
					code1 = UNKNOWN_GROUP_LABEL;
				}
			} else {
				// does not seem to be so let's assume it is
				// nonstandard aminoacid and label it "X"
				// logger.warning("unknown group name "+groupCode3 );
				code1 = UNKNOWN_GROUP_LABEL;
			}
		}

		return code1;

	}

	/**
	 * Test if the three-letter code of an ATOM entry corresponds to a
	 * nucleotide or to an aminoacid.
	 *
	 * @param groupCode3
	 *            3-character code for a group.
	 *
	 */
	public static final boolean isNucleotide(String groupCode3) {
		String code = groupCode3.trim();
		return nucleotides30.containsKey(code)
				|| nucleotides23.containsKey(code);
	}

	/**
	 * Reduce a structure to provide a smaller representation . Only takes the
	 * first model of the structure. If chainName is provided only return a
	 * structure containing that Chain ID. Converts lower case chain IDs to
	 * upper case if structure does not contain a chain with that ID.
	 *
	 * @param s
	 * @param chainId
	 * @return Structure
	 * @since 3.0
	 * @deprecated Use {@link StructureIdentifier#reduce(Structure)} instead (v. 4.2.0)
	 */
	@Deprecated
	public static final Structure getReducedStructure(Structure s,
			String chainId) throws StructureException {
		// since we deal here with structure alignments,
		// only use Model 1...

		Structure newS = new StructureImpl();
		newS.setPDBCode(s.getPDBCode());
		newS.setPDBHeader(s.getPDBHeader());
		newS.setName(s.getName());
		newS.setSSBonds(s.getSSBonds());
		newS.setDBRefs(s.getDBRefs());
		newS.setSites(s.getSites());
		newS.setBiologicalAssembly(s.isBiologicalAssembly());
		newS.setEntityInfos(s.getEntityInfos());
		newS.setSSBonds(s.getSSBonds());
		newS.setSites(s.getSites());

		if (chainId != null)
			chainId = chainId.trim();

		if (chainId == null || chainId.equals("")) {
			// only get model 0
			List<Chain> model0 = s.getModel(0);
			for (Chain c : model0) {
				newS.addChain(c);
			}
			return newS;

		}

		Chain c = null;
		try {
			c = s.getChainByPDB(chainId);
		} catch (StructureException e) {
			logger.warn(e.getMessage() + ". Chain id " + chainId
					+ " did not match, trying upper case Chain id.");
			c = s.getChainByPDB(chainId.toUpperCase());

		}
		if (c != null) {
			newS.addChain(c);
			for (EntityInfo comp : s.getEntityInfos()) {
				if (comp.getChainIds() != null
						&& comp.getChainIds().contains(c.getChainID())) {
					// found matching entity info. set description...
					newS.getPDBHeader().setDescription(
							"Chain " + c.getChainID() + " of " + s.getPDBCode()
							+ " " + comp.getDescription());
				}
			}
		}

		return newS;
	}

	/**
	 * Reduce a structure to provide a smaller representation. Only takes the
	 * first model of the structure. If chainNr >=0 only takes the chain at that
	 * position into account.
	 *
	 * @param s
	 * @param chainNr
	 *            can be -1 to request all chains of model 0, otherwise will
	 *            only add chain at this position
	 * @return Structure object
	 * @since 3.0
	 * @deprecated Use {@link StructureIdentifier#reduce(Structure)} instead (v. 4.2.0)
	 */
	@Deprecated
	public static final Structure getReducedStructure(Structure s, int chainNr) {
		// since we deal here with structure alignments,
		// only use Model 1...

		Structure newStructure = new StructureImpl();
		newStructure.setPDBCode(s.getPDBCode());
		newStructure.setPDBHeader(s.getPDBHeader());
		newStructure.setName(s.getName());
		newStructure.setSSBonds(s.getSSBonds());
		newStructure.setDBRefs(s.getDBRefs());
		newStructure.setSites(s.getSites());
		newStructure.setBiologicalAssembly(s.isBiologicalAssembly());
		newStructure.setEntityInfos(s.getEntityInfos());
		newStructure.setSSBonds(s.getSSBonds());
		newStructure.setSites(s.getSites());
		newStructure.setCrystallographicInfo(s.getCrystallographicInfo());
		newStructure.getPDBHeader().setDescription(
				"subset of " + s.getPDBCode() + " "
						+ s.getPDBHeader().getDescription());

		if (chainNr < 0) {

			// only get model 0
			List<Chain> model0 = s.getModel(0);
			for (Chain c : model0) {
				newStructure.addChain(c);
			}
			return newStructure;
		}
		Chain c = null;

		c = s.getChainByIndex(0, chainNr);

		newStructure.addChain(c);

		return newStructure;
	}

	/**
	 * In addition to the functionality provided by
	 * {@link #getReducedStructure(Structure, int)} and
	 * {@link #getReducedStructure(Structure, String)}, also provides a way to
	 * specify sub-regions of a structure with the following specification:
	 *
	 * <p>
	 * <li>ranges can be surrounded by ( and ). (but will be removed).</li>
	 * <li>ranges are specified as PDBresnum1 : PDBresnum2</li>
	 *
	 * <li>a list of ranges is separated by ,</li>
	 * </p>
	 * Example
	 *
	 * <pre>
	 *  4GCR (A:1-83)
	 *  1CDG (A:407-495,A:582-686)
	 *  1CDG (A_407-495,A_582-686)
	 * </pre>
	 *
	 * @param s
	 *            The full structure
	 * @param ranges
	 *            A comma-separated list of ranges, optionally surrounded by
	 *            parentheses
	 * @return Substructure of s specified by ranges
	 * @throws IllegalArgumentException for malformed range strings
	 * @throws StructureException for errors when reducing the Structure
	 * @deprecated Use {@link StructureIdentifier} instead (4.2.0)
	 */
	@Deprecated
	public static final Structure getSubRanges(Structure s, String ranges )
			throws StructureException
	{
		if (ranges == null || ranges.equals(""))
			throw new IllegalArgumentException("ranges can't be null or empty");

		ranges = ranges.trim();

		if (ranges.startsWith("("))
			ranges = ranges.substring(1);
		if (ranges.endsWith(")")) {
			ranges = ranges.substring(0, ranges.length() - 1);
		}

		// special case: '-' means 'everything'
		if (ranges.equals("-")) {
			return s;
		}

		List<ResidueRange> resRanges = ResidueRange.parseMultiple(ranges);
		SubstructureIdentifier structId = new SubstructureIdentifier(null,resRanges);
		return structId.reduce(s);
	}

	public static final String convertAtomsToSeq(Atom[] atoms) {

		StringBuilder buf = new StringBuilder();
		Group prevGroup = null;
		for (Atom a : atoms) {
			Group g = a.getGroup();
			if (prevGroup != null) {
				if (prevGroup.equals(g)) {
					// we add each group only once.
					continue;
				}
			}
			String code3 = g.getPDBName();
			Character code1 = get1LetterCodeAmino(code3);
			if (code1 == null)
				code1 = UNKNOWN_GROUP_LABEL;

			buf.append(code1);

			prevGroup = g;

		}
		return buf.toString();
	}

	/**
	 * Get a group represented by a ResidueNumber.
	 *
	 * @param struc
	 *            a {@link Structure}
	 * @param pdbResNum
	 *            a {@link ResidueNumber}
	 * @return a group in the structure that is represented by the pdbResNum.
	 * @throws StructureException
	 *             if the group cannot be found.
	 */
	public static final Group getGroupByPDBResidueNumber(Structure struc,
			ResidueNumber pdbResNum) throws StructureException {
		if (struc == null || pdbResNum == null) {
			throw new IllegalArgumentException("Null argument(s).");
		}

		Chain chain = struc.getPolyChainByPDB(pdbResNum.getChainName());

		return chain.getGroupByPDB(pdbResNum);
	}

	/**
	 * Returns the set of intra-chain contacts for the given chain for given
	 * atom names, i.e. the contact map. Uses a geometric hashing algorithm that
	 * speeds up the calculation without need of full distance matrix. The
	 * parsing mode {@link FileParsingParameters#setAlignSeqRes(boolean)} needs
	 * to be set to true for this to work.
	 *
	 * @param chain
	 * @param atomNames
	 *            the array with atom names to be used. Beware: CA will do both
	 *            C-alphas an Calciums if null all non-H atoms of non-hetatoms
	 *            will be used
	 * @param cutoff
	 * @return
	 */
	public static AtomContactSet getAtomsInContact(Chain chain,
			String[] atomNames, double cutoff) {
		Grid grid = new Grid(cutoff);

		Atom[] atoms = null;
		if (atomNames == null) {
			atoms = getAllNonHAtomArray(chain, false);
		} else {
			atoms = getAtomArray(chain, atomNames);
		}
		// If tha
		if(atoms.length==0){ 
			logger.warn("No atoms found for buidling grid!");
			return new AtomContactSet(cutoff);
		}
		grid.addAtoms(atoms);

		return grid.getAtomContacts();
	}

	/**
	 * Returns the set of intra-chain contacts for the given chain for all non-H
	 * atoms of non-hetatoms, i.e. the contact map. Uses a geometric hashing
	 * algorithm that speeds up the calculation without need of full distance
	 * matrix. The parsing mode
	 * {@link FileParsingParameters#setAlignSeqRes(boolean)} needs to be set to
	 * true for this to work.
	 *
	 * @param chain
	 * @param cutoff
	 * @return
	 */
	public static AtomContactSet getAtomsInContact(Chain chain, double cutoff) {
		return getAtomsInContact(chain, (String[]) null, cutoff);
	}

	/**
	 * Returns the set of intra-chain contacts for the given chain for C-alpha
	 * atoms (including non-standard aminoacids appearing as HETATM groups),
	 * i.e. the contact map. Uses a geometric hashing algorithm that speeds up
	 * the calculation without need of full distance matrix. The parsing mode
	 * {@link FileParsingParameters#setAlignSeqRes(boolean)} needs to be set to
	 * true for this to work.
	 *
	 * @param chain
	 * @param cutoff
	 * @return
	 * @see {@link #getRepresentativeAtomsInContact(Chain, double)}
	 */
	public static AtomContactSet getAtomsCAInContact(Chain chain, double cutoff) {
		Grid grid = new Grid(cutoff);

		Atom[] atoms = getAtomCAArray(chain);

		grid.addAtoms(atoms);

		return grid.getAtomContacts();
	}

	/**
	 * Returns the set of intra-chain contacts for the given chain for C-alpha
	 * or C3' atoms (including non-standard aminoacids appearing as HETATM
	 * groups), i.e. the contact map. Uses a geometric hashing algorithm that
	 * speeds up the calculation without need of full distance matrix.
	 *
	 * @param chain
	 * @param cutoff
	 * @return
	 * @since Biojava 4.1.0
	 */
	public static AtomContactSet getRepresentativeAtomsInContact(Chain chain,
			double cutoff) {
		Grid grid = new Grid(cutoff);

		Atom[] atoms = getRepresentativeAtomArray(chain);

		grid.addAtoms(atoms);

		return grid.getAtomContacts();
	}

	/**
	 * Returns the set of inter-chain contacts between the two given chains for
	 * the given atom names. Uses a geometric hashing algorithm that speeds up
	 * the calculation without need of full distance matrix. The parsing mode
	 * {@link FileParsingParameters#setAlignSeqRes(boolean)} needs to be set to
	 * true for this to work.
	 *
	 * @param chain1
	 * @param chain2
	 * @param atomNames
	 *            the array with atom names to be used. For Calphas use {"CA"},
	 *            if null all non-H atoms will be used. Note HET atoms are
	 *            ignored unless this parameter is null.
	 * @param cutoff
	 * @param hetAtoms
	 *            if true HET atoms are included, if false they are not
	 * @return
	 */
	public static AtomContactSet getAtomsInContact(Chain chain1, Chain chain2,
			String[] atomNames, double cutoff, boolean hetAtoms) {
		Grid grid = new Grid(cutoff);
		Atom[] atoms1 = null;
		Atom[] atoms2 = null;
		if (atomNames == null) {
			atoms1 = getAllNonHAtomArray(chain1, hetAtoms);
			atoms2 = getAllNonHAtomArray(chain2, hetAtoms);
		} else {
			atoms1 = getAtomArray(chain1, atomNames);
			atoms2 = getAtomArray(chain2, atomNames);
		}
		grid.addAtoms(atoms1, atoms2);

		return grid.getAtomContacts();
	}

	/**
	 * Returns the set of inter-chain contacts between the two given chains for
	 * all non-H atoms. Uses a geometric hashing algorithm that speeds up the
	 * calculation without need of full distance matrix. The parsing mode
	 * {@link FileParsingParameters#setAlignSeqRes(boolean)} needs to be set to
	 * true for this to work.
	 *
	 * @param chain1
	 * @param chain2
	 * @param cutoff
	 * @param hetAtoms
	 *            if true HET atoms are included, if false they are not
	 * @return
	 */
	public static AtomContactSet getAtomsInContact(Chain chain1, Chain chain2,
			double cutoff, boolean hetAtoms) {
		return getAtomsInContact(chain1, chain2, null, cutoff, hetAtoms);
	}

	/**
	 * Finds Groups in {@code structure} that contain at least one Atom that is
	 * within {@code radius} Angstroms of {@code centroid}.
	 *
	 * @param structure
	 *            The structure from which to find Groups
	 * @param centroid
	 *            The centroid of the shell
	 * @param excludeResidues
	 *            A list of ResidueNumbers to exclude
	 * @param radius
	 *            The radius from {@code centroid}, in Angstroms
	 * @param includeWater
	 *            Whether to include Groups whose <em>only</em> atoms are water
	 * @param useAverageDistance
	 *            When set to true, distances are the arithmetic mean (1-norm)
	 *            of the distances of atoms that belong to the group and that
	 *            are within the shell; otherwise, distances are the minimum of
	 *            these values
	 * @return A map of Groups within (or partially within) the shell, to their
	 *         distances in Angstroms
	 */
	public static Map<Group, Double> getGroupDistancesWithinShell(
			Structure structure, Atom centroid,
			Set<ResidueNumber> excludeResidues, double radius,
			boolean includeWater, boolean useAverageDistance) {

		// for speed, we avoid calculating square roots
		radius = radius * radius;

		Map<Group, Double> distances = new HashMap<Group, Double>();

		// we only need this if we're averaging distances
		// note that we can't use group.getAtoms().size() because some the
		// group's atoms be outside the shell
		Map<Group, Integer> atomCounts = new HashMap<Group, Integer>();

		for (Chain chain : structure.getChains()) {
			groupLoop: for (Group chainGroup : chain.getAtomGroups()) {

				// exclude water
				if (!includeWater && chainGroup.isWater())
					continue;

				// check blacklist of residue numbers
				for (ResidueNumber rn : excludeResidues) {
					if (rn.equals(chainGroup.getResidueNumber()))
						continue groupLoop;
				}

				for (Atom testAtom : chainGroup.getAtoms()) {

					// use getDistanceFast as we are doing a lot of comparisons
					double dist = Calc.getDistanceFast(centroid, testAtom);

					// if we're the shell
					if (dist <= radius) {
						if (!distances.containsKey(chainGroup))
							distances.put(chainGroup, Double.POSITIVE_INFINITY);
						if (useAverageDistance) {
							// sum the distance; we'll divide by the total
							// number later
							// here, we CANNOT use fastDistance (distance
							// squared) because we want the arithmetic mean
							distances.put(chainGroup, distances.get(chainGroup)
									+ Math.sqrt(dist));
							if (!atomCounts.containsKey(chainGroup))
								atomCounts.put(chainGroup, 0);
							atomCounts.put(chainGroup,
									atomCounts.get(chainGroup) + 1);
						} else {
							// take the minimum distance among all atoms of
							// chainGroup
							// note that we can't break here because we might
							// find a smaller distance
							if (dist < distances.get(chainGroup)) {
								distances.put(chainGroup, dist);
							}
						}
					}

				}
			}
		}

		if (useAverageDistance) {
			for (Map.Entry<Group, Double> entry : distances.entrySet()) {
				int count = atomCounts.get(entry.getKey());
				distances.put(entry.getKey(), entry.getValue() / count);
			}
		} else {
			// in this case we used getDistanceFast
			for (Map.Entry<Group, Double> entry : distances.entrySet()) {
				distances.put(entry.getKey(), Math.sqrt(entry.getValue()));
			}
		}

		return distances;

	}

	public static Set<Group> getGroupsWithinShell(Structure structure,
			Atom atom, Set<ResidueNumber> excludeResidues, double distance,
			boolean includeWater) {

		// square the distance to use as a comparison against getDistanceFast
		// which returns the square of a distance.
		distance = distance * distance;

		Set<Group> returnSet = new LinkedHashSet<Group>();
		for (Chain chain : structure.getChains()) {
			groupLoop: for (Group chainGroup : chain.getAtomGroups()) {
				if (!includeWater && chainGroup.isWater())
					continue;
				for (ResidueNumber rn : excludeResidues) {
					if (rn.equals(chainGroup.getResidueNumber()))
						continue groupLoop;
				}
				for (Atom atomB : chainGroup.getAtoms()) {

					// use getDistanceFast as we are doing a lot of comparisons
					double dist = Calc.getDistanceFast(atom, atomB);
					if (dist <= distance) {
						returnSet.add(chainGroup);
						break;
					}

				}
			}
		}
		return returnSet;
	}

	/**
	 * <p>
	 * Returns a Set of Groups in a structure within the distance specified of a
	 * given group.
	 * </p>
	 * <p>
	 * Updated 18-Sep-2015 sroughley to return a Set so only a unique set of
	 * Groups returned
	 *
	 * @param structure
	 *            The structure to work with
	 * @param group
	 *            The 'query' group
	 * @param distance
	 *            The cutoff distance
	 * @param includeWater
	 *            Should water residues be included in the output?
	 * @return {@link LinkedHashSet} of {@link Group}s within at least one atom
	 *         with {@code distance} of at least one atom in {@code group}
	 */
	public static Set<Group> getGroupsWithinShell(Structure structure,
			Group group, double distance, boolean includeWater) {

		Set<Group> returnList = new LinkedHashSet<Group>();

		Set<ResidueNumber> excludeGroups = new HashSet<ResidueNumber>();
		excludeGroups.add(group.getResidueNumber());
		for (Atom atom : group.getAtoms()) {
			Set<Group> set = getGroupsWithinShell(structure, atom,
					excludeGroups, distance, includeWater);
			returnList.addAll(set);
		}

		return returnList;
	}

	/**
	 * Remove all models from a Structure and keep only the first
	 *
	 * @param s
	 *            original Structure
	 * @return a structure that contains only the first model
	 * @since 3.0.5
	 */
	public static Structure removeModels(Structure s) {
		if (s.nrModels() == 1)
			return s;

		Structure n = new StructureImpl();
		// go through whole substructure and clone ...

		// copy structure data

		n.setPDBCode(s.getPDBCode());
		n.setName(s.getName());

		// TODO: do deep copying of data!
		n.setPDBHeader(s.getPDBHeader());
		n.setDBRefs(s.getDBRefs());

		n.setSites(s.getSites());

		n.setChains(s.getModel(0));

		return n;

	}

	/**
	 * Removes all polymeric and solvent groups from a list of groups
	 *
	 */
	public static List<Group> filterLigands(List<Group> allGroups) {

		List<Group> groups = new ArrayList<Group>();
		for (Group g : allGroups) {

			if ( g.isPolymeric())
				continue;

			if (!g.isWater()) {
				groups.add(g);
			}
		}

		return groups;
	}

	/**
	 * Short version of {@link #getStructure(String, PDBFileParser, AtomCache)}
	 * which creates new parsers when needed
	 *
	 * @param name
	 * @return
	 * @throws IOException
	 * @throws StructureException
	 */
	public static Structure getStructure(String name) throws IOException,
	StructureException {
		return StructureTools.getStructure(name, null, null);
	}

	/**
	 * Flexibly get a structure from an input String. The intent of this method
	 * is to allow any reasonable string which could refer to a structure to be
	 * correctly parsed. The following are currently supported:
	 * <ol>
	 * <li>Filename (if name refers to an existing file)
	 * <li>PDB ID
	 * <li>SCOP domains
	 * <li>PDP domains
	 * <li>Residue ranges
	 * <li>Other formats supported by AtomCache
	 * </ol>
	 *
	 * @param name
	 *            Some reference to the protein structure
	 * @param parser
	 *            A clean PDBFileParser to use if it is a file. If null, a
	 *            PDBFileParser will be instantiated if needed.
	 * @param cache
	 *            An AtomCache to use if the structure can be fetched from the
	 *            PDB. If null, a AtomCache will be instantiated if needed.
	 * @return A Structure object
	 * @throws IOException
	 *             if name is an existing file, but doesn't parse correctly
	 * @throws StructureException
	 *             if the format is unknown, or if AtomCache throws an
	 *             exception.
	 */
	public static Structure getStructure(String name, PDBFileParser parser,
			AtomCache cache) throws IOException, StructureException {
		File f = new File(FileDownloadUtils.expandUserHome(name));
		if (f.exists()) {
			if (parser == null) {
				parser = new PDBFileParser();
			}
			InputStream inStream = new FileInputStream(f);
			return parser.parsePDBFile(inStream);
		} else {
			if (cache == null) {
				cache = new AtomCache();
			}
			return cache.getStructure(name);
		}
	}

	/**
	 * @deprecated  use {@link Chain#isProtein()} instead.
	 */
	@Deprecated
	public static boolean isProtein(Chain c) {

		return c.isProtein();
	}

	/**
	 * @deprecated use {@link Chain#isNucleicAcid()} instead.
 	 */
	@Deprecated
	public static boolean isNucleicAcid(Chain c) {
		return c.isNucleicAcid();
	}

	/**
	 * @deprecated use {@link Chain#getPredominantGroupType()} instead.
	 */
	@Deprecated
	public static GroupType getPredominantGroupType(Chain c) {
		return c.getPredominantGroupType();
	}

	/**
	 * @deprecated use {@link Chain#isWaterOnly()} instead.
	 */
	@Deprecated
	public static boolean isChainWaterOnly(Chain c) {
		return c.isWaterOnly();
	}

	/** @deprecated  use {@link Chain#isPureNonPolymer()} instead.
	 */
	@Deprecated
	public static boolean isChainPureNonPolymer(Chain c) {

		return c.isPureNonPolymer();
	}

	/**
	 * Cleans up the structure's alternate location groups. All alternate location groups should have all atoms (except in the case of microheterogenity) or when a deuetuim exiss.
	 * Ensure that all the alt loc groups have all the atoms in the main group
	 * @param structure The Structure to be cleaned up
	 */
	public static void cleanUpAltLocs(Structure structure) {
		for (int i =0; i< structure.nrModels() ; i++){
			for (Chain chain : structure.getModel(i)) {
				for (Group group : chain.getAtomGroups()) {
					for (Group altLocGroup : group.getAltLocs()) { 
						for ( Atom groupAtom : group.getAtoms()) {
							// If this alt loc doesn't have this atom
							if (! altLocGroup.hasAtom(groupAtom.getName())) {
								// Fix for microheterogenity
								if (altLocGroup.getPDBName().equals(group.getPDBName())) {
									// If it's a Hydrogen then we check for it's Deuterated brother
									if(hasDeuteratedEquiv(groupAtom, altLocGroup)){
										
									}
									else{
										altLocGroup.addAtom(groupAtom);
									}
								}
							}
						}
					}
				}
			}
		}
	}

	/**
	 * Expands the NCS operators in the given Structure adding new chains as needed.
	 * The new chains are assigned ids of the form: original_chain_id+ncs_operator_index+"n"
	 * @param structure
	 */
	public static void expandNcsOps(Structure structure) {
		PDBCrystallographicInfo xtalInfo = structure.getCrystallographicInfo();
		if (xtalInfo ==null) return;

		if (xtalInfo.getNcsOperators()==null || xtalInfo.getNcsOperators().length==0) return;

		List<Chain> chainsToAdd = new ArrayList<>();
		int i = 0;
		for (Matrix4d m:xtalInfo.getNcsOperators()) {
			i++;

			for (Chain c:structure.getChains()) {
				Chain clonedChain = (Chain)c.clone();
				String newChainId = c.getId()+i+"n";
				String newChainName = c.getName()+i+"n";
				clonedChain.setId(newChainId);
				clonedChain.setName(newChainName);
				setChainIdsInResidueNumbers(clonedChain, newChainName);
				Calc.transform(clonedChain, m);
				chainsToAdd.add(clonedChain);
				c.getEntityInfo().addChain(clonedChain);
			}
		}

		for (Chain c:chainsToAdd) {
			structure.addChain(c);
		}
	}

	/**
	 * Auxiliary method to reset chain ids of residue numbers in a chain.
	 * Used when cloning chains and resetting their ids: one needs to take care of 
	 * resetting the ids within residue numbers too.
	 * @param c
	 * @param newChainName
	 */
	private static void setChainIdsInResidueNumbers(Chain c, String newChainName) {
		for (Group g:c.getAtomGroups()) {
			g.setResidueNumber(newChainName, g.getResidueNumber().getSeqNum(), g.getResidueNumber().getInsCode());
		}
		for (Group g:c.getSeqResGroups()) {
			if (g.getResidueNumber()==null) continue;
			g.setResidueNumber(newChainName, g.getResidueNumber().getSeqNum(), g.getResidueNumber().getInsCode());
		}
	}

	/**
	 * Check to see if an Deuterated atom has a non deuterated brother in the group.
	 * @param atom the input atom that is putatively deuterium
	 * @param currentGroup the group the atom is in
	 * @return true if the atom is deuterated and it's hydrogen equive exists.
	 */
	public static boolean hasNonDeuteratedEquiv(Atom atom, Group currentGroup) {
		if(atom.getElement()==Element.D && currentGroup.hasAtom(replaceFirstChar(atom.getName(),'D', 'H'))) {
			// If it's deuterated and has a non-deuterated brother
			return true;
		}
		return false;
	}
	
	/**
	 * Check to see if a Hydorgen has a  Deuterated brother in the group.
	 * @param atom the input atom that is putatively hydorgen
	 * @param currentGroup the group the atom is in
	 * @return true if the atom is hydrogen and it's Deuterium equiv exists.
	 */
	public static boolean hasDeuteratedEquiv(Atom atom, Group currentGroup) {
		if(atom.getElement()==Element.H && currentGroup.hasAtom(replaceFirstChar(atom.getName(),'H', 'D'))) {
			// If it's hydrogen and has a deuterated brother
			return true;
		}
		return false;
	}

	private static String replaceFirstChar(String name, char c, char d) {
		if(name.charAt(0)==c){
			return name.replaceFirst(String.valueOf(c), String.valueOf(d));
		}
		return name;
	}
}