/* * BioJava development code * * This code may be freely distributed and modified under the * terms of the GNU Lesser General Public Licence. This should * be distributed with the code. If you do not have a copy, * see: * * http://www.gnu.org/copyleft/lesser.html * * Copyright for this code is held jointly by the individual * authors. These should be listed in @author doc comments. * * For more information on the BioJava project and its aims, * or to join the biojava-l mailing list, visit the home page * at: * * http://www.biojava.org/ * * Created on 25.04.2004 * @author Andreas Prlic * */ package org.biojava.nbio.structure; import org.biojava.nbio.core.sequence.template.Sequence; import org.biojava.nbio.structure.io.FileParsingParameters; import org.biojava.nbio.structure.io.mmcif.model.ChemComp; import java.util.List; /** * <p> * Defines the interface for a Chain. A Chain corresponds to a Chain in a PDB file. * A chain consists out of a list of {@link Group} objects. A Group can either be * an {@link AminoAcid}, {@link HetatomImpl Hetatom} or {@link NucleotideImpl Nucleotide}. * </p> * * <p> * The BioJava API provides access to both the ATOM and SEQRES records in a PDB file. * During parsing of a PDB file it aligns the ATOM and SEQRES groups and joins them. * The SEQRES sequence can be accessed via {@link #getSeqResGroups()} and the * ATOM groups via {@link #getAtomGroups()}. Groups that have been observed * (i.e. they are in the ATOM records) can be detected by {@link Group}.has3D() * </p> * * @author Andreas Prlic * @version %I% %G% * @since 1.4 */ public interface Chain { /** returns an identical copy of this Chain. * @return an identical copy of this Chain */ Object clone(); /** add a group to the list of ATOM record group of this chain. * To add SEQRES records a more complex alignment between ATOM and SEQRES residues * is required, please see SeqRes2AtomAligner for more details on that. * @param group a Group object */ void addGroup(Group group); /** Get the 'private' asymId (internal chain IDs in mmCif) for this chain. * * @return the asymId * @see #setId(String) * @see #getName() */ String getId() ; /** * Set the 'private' asymId (internal chain IDs in mmCif) for this chain. * * @param asymId the internal chain Id */ void setId(String asymId) ; /** * Set the 'public' authId (chain ID in PDB file) * * @param authId the 'public' authId (chain ID in PDB file) * @see #getId() */ void setName(String authId); /** * Get the 'public' authId (chain ID in PDB file) * * @return the authId for this chain. * @see #getId() */ String getName(); /** * Return the Group at given position, * from within Groups with observed density in the chain, i.e. * those with coordinates in ATOM and HETATMS (including waters) records. * @param position an int * @return a Group object * @see #getAtomLength() * @see #getAtomGroups() * @see #getSeqResGroup(int) */ Group getAtomGroup (int position); /** * Return the Group at given position, * from within groups in the SEQRES records of the chain, i.e. * the aminoacids/nucleotides in the construct. * @param position an int * @return a Group object * @see #getSeqResLength() * @see #getSeqResGroups() * @see #getAtomGroup(int) */ Group getSeqResGroup (int position); /** * Return all Groups with observed density in the chain, i.e. * those with coordinates in ATOM and HETATMS (including waters) records. * * @return a List object representing the Groups of this Chain. * @see #setAtomGroups(List) * @see #getAtomLength() * @see #getSeqResGroups() */ List<Group> getAtomGroups(); /** * Set all Groups with observed density in the chain, i.e. * those with coordinates in ATOM and HETATMs (including waters) records. * @param groups a List object representing the Groups of this Chain. * @see #getAtomGroups() */ void setAtomGroups(List<Group> groups); /** * Return a List of all (observed) Groups of a special type, one of: {@link GroupType#AMINOACID}, * {@link GroupType#HETATM} or {@link GroupType#NUCLEOTIDE}. * Note that if a standard aminoacid appears as a HETATM (because it is part of a ligand) then * it is still considered as {@link GroupType#AMINOACID} and not as {@link GroupType#HETATM}. * @param type GroupType * @return a List object * @see #setAtomGroups(List) */ List<Group> getAtomGroups (GroupType type); /** * Get a group by its PDB residue numbering. If the PDB residue number is not known, * throws a StructureException. * * @param resNum the PDB residue number of the group * @return the matching group * @throws StructureException */ Group getGroupByPDB(ResidueNumber resNum) throws StructureException; /** * Get all groups that are located between two PDB residue numbers. * * @param pdbresnumStart PDB residue number of start. If null, defaults to the chain start. * @param pdbresnumEnd PDB residue number of end. If null, defaults to the chain end. * @return Groups in between. or throws a StructureException if either start or end can not be found, * @throws StructureException */ Group[] getGroupsByPDB(ResidueNumber pdbresnumStart, ResidueNumber pdbresnumEnd) throws StructureException; /** * Get all groups that are located between two PDB residue numbers. In contrast to getGroupsByPDB * this method call ignores if the exact outer groups are not found. This is useful e.g. when requesting the range * of groups as specified by the DBREF records - these frequently are rather inaccurate. * * * @param pdbresnumStart PDB residue number of start. If null, defaults to the chain start. * @param pdbresnumEnd PDB residue number of end. If null, defaults to the chain end. * @param ignoreMissing ignore missing groups in this range. * @return Groups in between. or throws a StructureException if either start or end can not be found, * @throws StructureException * */ Group[] getGroupsByPDB(ResidueNumber pdbresnumStart, ResidueNumber pdbresnumEnd,boolean ignoreMissing) throws StructureException; /** * Returns the number of Groups with observed density in the chain, i.e. * those with coordinates in ATOM and HETATMs (including waters) records * * @return the length * @see #getAtomGroup(int) * @see #getAtomGroups() * @see #getSeqResLength()) */ int getAtomLength(); /** * Returns the number of groups in the SEQRES records of the chain, i.e. * the number of aminoacids/nucleotides in the construct * * @return the length * @see #getSeqResGroup(int) * @see #getSeqResGroups() * @see #getAtomLength() */ int getSeqResLength(); /** * Sets the Entity information * @param entityInfo the EntityInfo * @see #getEntityInfo() */ void setEntityInfo(EntityInfo entityInfo); /** * Returns the EntityInfo for this chain. * * @return the EntityInfo object * @see #setEntityInfo(EntityInfo) */ EntityInfo getEntityInfo(); /** * Sets the 'private' asymId of this chain (Chain id in PDB file ). * @param asymId a String specifying the name value * @see #getChainID() * @deprecated use {@link #setId(String asymId)} instead */ @Deprecated void setChainID(String asymId); /** * Gets the 'private' asymId of this chain. * @return a String representing the name value * @see #setChainID(String) * @deprecated use getId() instead */ @Deprecated String getChainID(); /** * If available, returns the internal chain ID that is used in mmCIF files (asym_id), otherwise null * * @return String or null * @since 3.0.5 * @deprecated use {@link #getId()} instead */ String getInternalChainID(); /** * Sets the internal chain ID that is used in mmCif files * * @param internalChainID * @since 3.0.5 * @deprecated use {@link #setId()} instead */ void setInternalChainID(String internalChainID); @Override String toString(); /** * Converts the SEQRES groups of a Chain to a Biojava Sequence object. * * @return the SEQRES groups of the Chain as a Sequence object. */ Sequence<?> getBJSequence() ; /** * Returns the sequence of amino acids as it has been provided in the ATOM records. * Non-standard residues will be present in the string only if the property * {@value org.biojava.nbio.structure.io.PDBFileReader.LOAD_CHEM_COMP_PROPERTY} has been set. * @return amino acid sequence as string * @see #getSeqResSequence() */ String getAtomSequence(); /** * Returns the PDB SEQRES sequence as a one-letter sequence string. * Non-standard residues are represented by an "X". * @return one-letter PDB SEQRES sequence as string * @see #getAtomSequence() */ String getSeqResSequence(); /** * Sets the Swissprot id of this chain. * @param sp_id a String specifying the swissprot id value * @see #getSwissprotId() */ void setSwissprotId(String sp_id); /** * Gets the Swissprot id of this chain. * @return a String representing the swissprot id value * @see #setSwissprotId(String sp_id) */ String getSwissprotId() ; /** * Returns a List of all SEQRES groups of a special type, one of: {@link GroupType#AMINOACID}, * {@link GroupType#HETATM} or {@link GroupType#NUCLEOTIDE}. * @param type a GroupType * @return an List object * @see #setSeqResGroups(List) */ List<Group> getSeqResGroups (GroupType type); /** * Returns a list of all groups in SEQRES records of the chain, i.e. * the aminoacids/nucleotides in the construct. * @return a List of all Group objects of this chain * @see #setSeqResGroups(List) * @see #getSeqResLength() * @see #getAtomGroups() */ List<Group> getSeqResGroups (); /** * Sets the list of SeqResGroups for this chain. * * @param seqResGroups a List of Group objects that from the SEQRES groups of this chain. * @see #getSeqResGroups() */ void setSeqResGroups(List<Group> seqResGroups); /** * Sets the back-reference to its parent Structure. * @param parent the parent Structure object for this Chain * @see #getStructure() * @deprecated use setStructure instead * */ @Deprecated void setParent(Structure parent) ; /** * Sets the back-reference to its parent Structure. * * @param parent */ void setStructure(Structure parent) ; /** * Returns the parent Structure of this chain. * * @return the parent Structure object * @see #setStructure(Structure) * @deprecated use getStructure(Structure) instead. */ @Deprecated Structure getParent() ; /** * Returns the parent Structure of this chain. * * @return the parent Structure object * @see #setStructure(Structure) */ Structure getStructure() ; /** * Gets all groups that are not polymer groups and that are not solvent groups. * Will automatically fetch Chemical Component files from the PDB web site, even if * {@link FileParsingParameters#setLoadChemCompInfo(boolean)} has not been set to true. * Otherwise the Ligands could not correctly be identified. * @return list of Groups that are ligands * @deprecated since biojava 5.0 this does not apply anymore. Chains contain either * polymeric groups or non-polymeric groups */ @Deprecated List<Group> getAtomLigands(); /** * Convert this Chain to a String in PDB format * @return */ String toPDB(); /** * Convert this Chain to a String in mmCIF format * @return */ String toMMCIF(); /** * Sets annotated sequence mismatches for this chain. This is based on the STRUCT_REF_SEQ_DIF mmCif category * * @param seqMisMatches */ void setSeqMisMatches(List<SeqMisMatch> seqMisMatches); /** * Gets annotated sequence mismatches for this chain. This is based on the STRUCT_REF_SEQ_DIF mmCif category * * @returns a list of sequence mismatches (or null if none found) */ List<SeqMisMatch> getSeqMisMatches(); /** * Returns the EntityType of this chain. Equivalent to getEntityInfo().getType() * @return * @see EntityType */ EntityType getEntityType(); /** Tests if a chain is consisting of water molecules only * * @return true if there are only solvent molecules in this chain. */ public boolean isWaterOnly(); /** Returns true if the given chain is composed of non-polymeric (including water) groups only. * * @return true if only non-polymeric groups in this chain. */ public boolean isPureNonPolymer(); /** * Get the predominant {@link GroupType} for a given Chain, following these * rules: <li>if the ratio of number of residues of a certain * {@link GroupType} to total non-water residues is above the threshold * {@value #org.biojava.nbio.structure.StructureTools.RATIO_RESIDUES_TO_TOTAL}, then that {@link GroupType} is * returned</li> <li>if there is no {@link GroupType} that is above the * threshold then the {@link GroupType} with most members is chosen, logging * it</li> * <p> * See also {@link ChemComp#getPolymerType()} and * {@link ChemComp#getResidueType()} which follow the PDB chemical component * dictionary and provide a much more accurate description of groups and * their linking. * </p> * * @return */ public GroupType getPredominantGroupType(); /** * Tell whether given chain is a protein chain * * @return true if protein, false if nucleotide or ligand * @see #getPredominantGroupType() */ public boolean isProtein(); /** * Tell whether given chain is DNA or RNA * * @return true if nucleic acid, false if protein or ligand * @see #getPredominantGroupType() */ public boolean isNucleicAcid(); }