/*
* BioJava development code
*
* This code may be freely distributed and modified under the
* terms of the GNU Lesser General Public Licence. This should
* be distributed with the code. If you do not have a copy,
* see:
*
* http://www.gnu.org/copyleft/lesser.html
*
* Copyright for this code is held jointly by the individual
* authors. These should be listed in @author doc comments.
*
* For more information on the BioJava project and its aims,
* or to join the biojava-l mailing list, visit the home page
* at:
*
* http://www.biojava.org/
*
*/
package org.biojava.nbio.structure;
import java.io.IOException;
import java.util.Collections;
import java.util.List;
import org.biojava.nbio.structure.align.util.AtomCache;
import org.biojava.nbio.structure.io.MMCIFFileReader;
import org.biojava.nbio.structure.io.PDBFileReader;
//import org.slf4j.Logger;
//import org.slf4j.LoggerFactory;
/**
* A class that provides static access methods for easy lookup of protein structure related components
*
* @author Andreas Prlic
*
* @since 3.0.5
*/
public class StructureIO {
//private static final Logger logger = LoggerFactory.getLogger(StructureIO.class);
private static AtomCache cache ;
/**
* Loads a structure based on a name. Supported naming conventions are:
*
* <pre>
Formal specification for how to specify the <i>name</i>:
name := pdbID
| pdbID '.' chainID
| pdbID '.' range
| scopID
| biol
| pdp
range := '('? range (',' range)? ')'?
| chainID
| chainID '_' resNum '-' resNum
pdbID := [0-9][a-zA-Z0-9]{3}
chainID := [a-zA-Z0-9]
scopID := 'd' pdbID [a-z_][0-9_]
biol := 'BIOL:' pdbID [:]? [0-9]+
pdp := 'PDP:' pdbID[A-Za-z0-9_]+
resNum := [-+]?[0-9]+[A-Za-z]?
Example structures:
1TIM #whole structure - asym unit
4HHB.C #single chain
4GCR.A_1-83 #one domain, by residue number
3AA0.A,B #two chains treated as one structure
d2bq6a1 #scop domain
BIOL:1fah #biological assembly nr 1 for 1fah
BIOL:1fah:0 #asym unit for 1fah
BIOL:1fah:1 #biological assembly nr 1 for 1fah
BIOL:1fah:2 #biological assembly nr 2 for 1fah
* </pre>
*
* With the additional set of rules:
*
* <ul>
* <li>If only a PDB code is provided, the whole structure will be return including ligands, but the first model only (for NMR).
* <li>Chain IDs are case sensitive, PDB ids are not. To specify a particular chain write as: 4hhb.A or 4HHB.A </li>
* <li>To specify a SCOP domain write a scopId e.g. d2bq6a1. Some flexibility can be allowed in SCOP domain names, see {@link #setStrictSCOP(boolean)}</li>
* <li>URLs are accepted as well</li>
* </ul>
*
* @param name
* @return a Structure object, or null if name appears improperly formated (eg too short, etc)
* @throws IOException The PDB file cannot be cached due to IO errors
* @throws StructureException The name appeared valid but did not correspond to a structure.
* Also thrown by some submethods upon errors, eg for poorly formatted subranges.
*/
public static Structure getStructure(String name) throws IOException, StructureException{
checkInitAtomCache();
// delegate this functionality to AtomCache...
return cache.getStructure(name);
}
private static void checkInitAtomCache() {
if ( cache == null){
cache = new AtomCache();
}
}
public static void setAtomCache(AtomCache c){
cache = c;
}
/**
* Returns the first biological assembly that is available for the given PDB id.
* <p>
* The output Structure will be different depending on the multiModel parameter:
* <li>
* the symmetry-expanded chains are added as new models, one per transformId. All original models but
* the first one are discarded.
* </li>
* <li>
* as original with symmetry-expanded chains added with renamed chain ids and names (in the form
* originalAsymId_transformId and originalAuthId_transformId)
* </li>
* <p>
* For more documentation on quaternary structures see:
* {@link http://pdb101.rcsb.org/learn/guide-to-understanding-pdb-data/biological-assemblies}
*
*
* @param pdbId
* @param multiModel if true the output Structure will be a multi-model one with one transformId per model,
* if false the outputStructure will be as the original with added chains with renamed asymIds (in the form originalAsymId_transformId and originalAuthId_transformId).
* @return a Structure object or null if that assembly is not available
* @throws StructureException
* @throws IOException
*/
public static Structure getBiologicalAssembly(String pdbId, boolean multiModel) throws IOException, StructureException{
checkInitAtomCache();
pdbId = pdbId.toLowerCase();
Structure s = cache.getBiologicalAssembly(pdbId, multiModel);
return s;
}
/**
* Returns the first biological assembly that is available for the given PDB id,
* using multiModel={@value AtomCache#DEFAULT_BIOASSEMBLY_STYLE}
* <p>
* For more documentation on quaternary structures see:
* {@link http://pdb101.rcsb.org/learn/guide-to-understanding-pdb-data/biological-assemblies}
*
*
* @param pdbId
* @return a Structure object or null if that assembly is not available
* @throws StructureException
* @throws IOException
*/
public static Structure getBiologicalAssembly(String pdbId) throws IOException, StructureException{
return getBiologicalAssembly(pdbId, AtomCache.DEFAULT_BIOASSEMBLY_STYLE);
}
/**
* Returns the biological assembly for the given PDB id and bioassembly identifier.
* <p>
* The output Structure will be different depending on the multiModel parameter:
* <li>
* the symmetry-expanded chains are added as new models, one per transformId. All original models but
* the first one are discarded.
* </li>
* <li>
* as original with symmetry-expanded chains added with renamed chain ids and names (in the form
* originalAsymId_transformId and originalAuthId_transformId)
* </li>
* @param pdbId
* @param biolAssemblyNr - the ith biological assembly that is available for a PDB ID (we start counting at 1, 0 represents the asym unit).
* @param multiModel if true the output Structure will be a multi-model one with one transformId per model,
* if false the outputStructure will be as the original with added chains with renamed asymIds (in the form originalAsymId_transformId and originalAuthId_transformId).
* @return a Structure object or null if that assembly is not available
* @throws StructureException if there is no bioassembly available for given biolAssemblyNr or some other problems encountered while loading it
* @throws IOException
*/
public static Structure getBiologicalAssembly(String pdbId, int biolAssemblyNr, boolean multiModel) throws IOException, StructureException {
checkInitAtomCache();
pdbId = pdbId.toLowerCase();
Structure s = cache.getBiologicalAssembly(pdbId, biolAssemblyNr, multiModel);
return s;
}
/**
* Returns the biological assembly for the given PDB id and bioassembly identifier,
* using multiModel={@value AtomCache#DEFAULT_BIOASSEMBLY_STYLE}
* @param pdbId
* @param biolAssemblyNr - the ith biological assembly that is available for a PDB ID (we start counting at 1, 0 represents the asym unit).
* @return a Structure object or null if that assembly is not available
* @throws StructureException if there is no bioassembly available for given biolAssemblyNr or some other problems encountered while loading it
* @throws IOException
*/
public static Structure getBiologicalAssembly(String pdbId, int biolAssemblyNr) throws IOException, StructureException {
return getBiologicalAssembly(pdbId, biolAssemblyNr, AtomCache.DEFAULT_BIOASSEMBLY_STYLE);
}
/**
* Returns all biological assemblies for the given PDB id.
* <p>
* The output Structure will be different depending on the multiModel parameter:
* <li>
* the symmetry-expanded chains are added as new models, one per transformId. All original models but
* the first one are discarded.
* </li>
* <li>
* as original with symmetry-expanded chains added with renamed chain ids and names (in the form
* originalAsymId_transformId and originalAuthId_transformId)
* </li>
* If only one biological assembly is required use {@link #getBiologicalAssembly(String)} or {@link #getBiologicalAssembly(String, int)} instead.
* @param pdbId
* @param multiModel if true the output Structure will be a multi-model one with one transformId per model,
* if false the outputStructure will be as the original with added chains with renamed asymIds (in the form originalAsymId_transformId and originalAuthId_transformId).
* @return
* @throws IOException
* @throws StructureException
* @since 5.0
*/
public static List<Structure> getBiologicalAssemblies(String pdbId, boolean multiModel) throws IOException, StructureException {
checkInitAtomCache();
pdbId = pdbId.toLowerCase();
List<Structure> s = cache.getBiologicalAssemblies(pdbId, multiModel);
return s;
}
/**
* Returns all biological assemblies for the given PDB id,
* using multiModel={@value AtomCache#DEFAULT_BIOASSEMBLY_STYLE}
* <p>
* If only one biological assembly is required use {@link #getBiologicalAssembly(String)} or {@link #getBiologicalAssembly(String, int)} instead.
* @param pdbId
* @return
* @throws IOException
* @throws StructureException
* @since 5.0
*/
public static List<Structure> getBiologicalAssemblies(String pdbId) throws IOException, StructureException {
return getBiologicalAssemblies(pdbId, AtomCache.DEFAULT_BIOASSEMBLY_STYLE);
}
private static final String FILE_SEPARATOR = System.getProperty("file.separator");
/**
* Utility method to set the location where PDB files can be found
*
* @param pathToPDBFiles
*/
public static void setPdbPath(String pathToPDBFiles){
if ( ! pathToPDBFiles.endsWith(FILE_SEPARATOR))
pathToPDBFiles += FILE_SEPARATOR;
}
public static enum StructureFiletype {
PDB( (new PDBFileReader()).getExtensions()),
CIF( new MMCIFFileReader().getExtensions()),
UNKNOWN(Collections.<String>emptyList());
private List<String> extensions;
/**
* @param extensions List of supported extensions, including leading period
*/
private StructureFiletype(List<String> extensions) {
this.extensions = extensions;
}
/**
* @return a list of file extensions associated with this type
*/
public List<String> getExtensions() {
return extensions;
}
}
/**
* Attempts to guess the type of a structure file based on the extension
* @param filename
* @return
*/
public static StructureFiletype guessFiletype(String filename) {
String lower = filename.toLowerCase();
for(StructureFiletype type : StructureFiletype.values()) {
for(String ext : type.getExtensions()) {
if(lower.endsWith(ext.toLowerCase())) {
return type;
}
}
}
return StructureFiletype.UNKNOWN;
}
}