/* * BioJava development code * * This code may be freely distributed and modified under the * terms of the GNU Lesser General Public Licence. This should * be distributed with the code. If you do not have a copy, * see: * * http://www.gnu.org/copyleft/lesser.html * * Copyright for this code is held jointly by the individual * authors. These should be listed in @author doc comments. * * For more information on the BioJava project and its aims, * or to join the biojava-l mailing list, visit the home page * at: * * http://www.biojava.org/ * */ package org.biojava.nbio.structure.io; import static org.junit.Assert.*; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.util.List; import java.util.zip.GZIPInputStream; import org.biojava.nbio.structure.Chain; import org.biojava.nbio.structure.EntityInfo; import org.biojava.nbio.structure.EntityType; import org.biojava.nbio.structure.Structure; import org.biojava.nbio.structure.StructureException; import org.biojava.nbio.structure.StructureIO; import org.biojava.nbio.structure.align.util.AtomCache; import org.biojava.nbio.structure.io.mmcif.MMcifParser; import org.biojava.nbio.structure.io.mmcif.SimpleMMcifConsumer; import org.biojava.nbio.structure.io.mmcif.SimpleMMcifParser; import org.biojava.nbio.structure.xtal.CrystalCell; import org.junit.Test; /** * Tests for non-deposited PDB/mmCIF files, i.e. any kind of "raw" file * lacking significant parts of the headers. * * Some things tested: * - heuristics to guess isNMR, isCrystallographic * * @author Jose Duarte * */ public class TestNonDepositedFiles { @Test public void test1B8GnoSeqresPdb() throws IOException, StructureException { InputStream inStream = new GZIPInputStream(this.getClass().getResourceAsStream("/org/biojava/nbio/structure/io/1b8g_raw.pdb.gz")); assertNotNull(inStream); PDBFileParser pdbpars = new PDBFileParser(); FileParsingParameters params = new FileParsingParameters(); params.setAlignSeqRes(true); pdbpars.setFileParsingParameters(params); Structure s = pdbpars.parsePDBFile(inStream) ; assertNotNull(s); assertTrue(s.isCrystallographic()); assertFalse(s.isNmr()); assertTrue(s.nrModels()==1); assertNull(s.getPDBHeader().getExperimentalTechniques()); assertNotNull(s.getCrystallographicInfo().getCrystalCell()); assertNotNull(s.getCrystallographicInfo().getSpaceGroup()); assertEquals(s.getCrystallographicInfo().getSpaceGroup().getShortSymbol(),"P 1 21 1"); CrystalCell cell = s.getCrystallographicInfo().getCrystalCell(); assertTrue(cell.isCellReasonable()); // TODO get the scale matrix from the PDB file and check it against the calculated one: //cell.checkScaleMatrixConsistency(scaleMatrix); //cell.checkScaleMatrix(scaleMatrix); // 2 protein chanis, 2 nonpoly PLP chains, 2 water chains assertEquals(6,s.getChains().size()); // checking that heuristics in CompoundFinder work. We should have 1 polymer entity (protein) + 1 nonpoly entity (PLP) + 1 water entity assertEquals(3, s.getEntityInfos().size()); assertEquals(EntityType.POLYMER, s.getEntityById(1).getType()); //System.out.println("Chains from incomplete header file: "); //checkChains(s); // trying without seqAlignSeqRes params.setAlignSeqRes(false); inStream = new GZIPInputStream(this.getClass().getResourceAsStream("/org/biojava/nbio/structure/io/1b8g_raw.pdb.gz")); s = pdbpars.parsePDBFile(inStream); assertNotNull(s); assertEquals(6,s.getChains().size()); assertEquals(3, s.getEntityInfos().size()); assertEquals(EntityType.POLYMER, s.getEntityById(1).getType()); } //@Test public void test1B8G() throws IOException, StructureException { AtomCache cache = new AtomCache(); StructureIO.setAtomCache(cache); cache.setUseMmCif(true); Structure s = StructureIO.getStructure("1B8G"); System.out.println("Chains from full deposited file: "); checkChains(s); } @Test public void test3C5F() throws IOException, StructureException { InputStream inStream = new GZIPInputStream(this.getClass().getResourceAsStream("/org/biojava/nbio/structure/io/3c5f_raw.pdb.gz")); assertNotNull(inStream); PDBFileParser pdbpars = new PDBFileParser(); FileParsingParameters params = new FileParsingParameters(); params.setAlignSeqRes(true); pdbpars.setFileParsingParameters(params); Structure s = pdbpars.parsePDBFile(inStream) ; // multi-model X-ray diffraction entry, thus: assertFalse(s.isNmr()); assertTrue(s.isCrystallographic()); assertTrue(s.nrModels()>1); assertNull(s.getPDBHeader().getExperimentalTechniques()); } @Test public void test4B19() throws IOException, StructureException { InputStream inStream = new GZIPInputStream(this.getClass().getResourceAsStream("/org/biojava/nbio/structure/io/4b19_raw.pdb.gz")); assertNotNull(inStream); PDBFileParser pdbpars = new PDBFileParser(); FileParsingParameters params = new FileParsingParameters(); params.setAlignSeqRes(true); pdbpars.setFileParsingParameters(params); Structure s = pdbpars.parsePDBFile(inStream) ; // multi-model NMR entry, thus: assertTrue(s.isNmr()); assertFalse(s.isCrystallographic()); assertTrue(s.nrModels()>1); assertNull(s.getPDBHeader().getExperimentalTechniques()); } @Test public void test2M7Y() throws IOException { InputStream inStream = new GZIPInputStream(this.getClass().getResourceAsStream("/org/biojava/nbio/structure/io/2m7y_raw.pdb.gz")); assertNotNull(inStream); PDBFileParser pdbpars = new PDBFileParser(); FileParsingParameters params = new FileParsingParameters(); params.setAlignSeqRes(true); pdbpars.setFileParsingParameters(params); Structure s = pdbpars.parsePDBFile(inStream) ; // single-model NMR entry, thus: //assertTrue(s.isNmr()); // we can't detect it properly, because it's single model! assertFalse(s.isCrystallographic()); // at least this we can detect from the unreasonable crystal cell assertTrue(s.nrModels()==1); assertNull(s.getPDBHeader().getExperimentalTechniques()); } private void checkChains(Structure s) { for (Chain chain:s.getChains()) { int seqResLength = chain.getSeqResLength(); int atomLength = chain.getAtomLength(); System.out.println("chain "+chain.getId()+", atomLength: "+atomLength+", seqResLength: "+seqResLength); //assertTrue("atom length ("+atomLength+") should be smaller than seqResLength ("+seqResLength+")",atomLength<=seqResLength); System.out.println("seq res groups size: "+chain.getSeqResGroups().size()); } } /** * A test for reading a phenix-produced (ver 1.9_1692) mmCIF file. * This is the file submitted to the PDB for deposition of entry 4lup * See github issue #234 * @throws IOException */ @Test public void testPhenixCifFile() throws IOException { InputStream inStream = new GZIPInputStream(this.getClass().getResourceAsStream("/org/biojava/nbio/structure/io/4lup_phenix_output.cif.gz")); MMcifParser parser = new SimpleMMcifParser(); SimpleMMcifConsumer consumer = new SimpleMMcifConsumer(); FileParsingParameters fileParsingParams = new FileParsingParameters(); fileParsingParams.setAlignSeqRes(true); consumer.setFileParsingParameters(fileParsingParams); parser.addMMcifConsumer(consumer); parser.parse(new BufferedReader(new InputStreamReader(inStream))); Structure s = consumer.getStructure(); assertNotNull(s); assertTrue(s.isCrystallographic()); // all ligands are into their own chains, so we have 2 proteins, 2 nucleotide chains, 1 ligand chain and 1 purely water chain assertEquals(6, s.getChains().size()); // 4 entities: 1 protein, 1 nucleotide, 1 water, 1 ligand (EDO) assertEquals(4, s.getEntityInfos().size()); int[] counts = countEntityTypes(s.getEntityInfos()); assertEquals(2, counts[0]); assertEquals(1, counts[1]); assertEquals(1, counts[2]); } @Test public void testPhenixPdbFile() throws IOException { InputStream inStream = new GZIPInputStream(this.getClass().getResourceAsStream("/org/biojava/nbio/structure/io/4lup_phenix_output.pdb.gz")); PDBFileParser pdbpars = new PDBFileParser(); FileParsingParameters params = new FileParsingParameters(); params.setAlignSeqRes(true); pdbpars.setFileParsingParameters(params); Structure s = pdbpars.parsePDBFile(inStream) ; assertNotNull(s); assertTrue(s.isCrystallographic()); // all ligands are into their own chains, so we have 2 proteins, 2 nucleotide chains, 1 ligand chain and 1 purely water chain assertEquals(6, s.getChains().size()); // 4 entities: 1 protein, 1 nucleotide, 1 water, 1 ligand (EDO) assertEquals(4, s.getEntityInfos().size()); int[] counts = countEntityTypes(s.getEntityInfos()); assertEquals(2, counts[0]); assertEquals(1, counts[1]); assertEquals(1, counts[2]); } @Test public void testPhaserPdbFile() throws IOException { InputStream inStream = new GZIPInputStream(this.getClass().getResourceAsStream("/org/biojava/nbio/structure/io/4lup_phaser_output.pdb.gz")); PDBFileParser pdbpars = new PDBFileParser(); FileParsingParameters params = new FileParsingParameters(); params.setAlignSeqRes(true); pdbpars.setFileParsingParameters(params); Structure s = pdbpars.parsePDBFile(inStream) ; assertNotNull(s); assertTrue(s.isCrystallographic()); assertEquals(2, s.getChains().size()); assertEquals(1, s.getEntityInfos().size()); } @Test public void testRefmacPdbFile() throws IOException { InputStream inStream = new GZIPInputStream(this.getClass().getResourceAsStream("/org/biojava/nbio/structure/io/rnase_refmac_output.pdb.gz")); PDBFileParser pdbpars = new PDBFileParser(); FileParsingParameters params = new FileParsingParameters(); params.setAlignSeqRes(true); pdbpars.setFileParsingParameters(params); Structure s = pdbpars.parsePDBFile(inStream) ; assertNotNull(s); assertTrue(s.isCrystallographic()); // 2 polymer chains with 1 ligand per chain, 1 purely water chain = 5 chains assertEquals(5, s.getChains().size()); // 1 polymer entity, 1 nonpoly entity, 1 water entity assertEquals(3, s.getEntityInfos().size()); int[] counts = countEntityTypes(s.getEntityInfos()); assertEquals(1, counts[0]); assertEquals(1, counts[1]); assertEquals(1, counts[2]); } /** * This test represents a common situation for a non-deposited structure. * When building with common crystallography software, the user often adds new * ligands (or solvent) molecules as new chains. Only prior to deposition * then relabel them so that they belong to the same chain as the polymeric residues. * * In this case, the ligands represent valuable information and should not be discarded. */ @Test public void testNewLigandChain() throws IOException { // Test the file parsing speed when the files are already downloaded. InputStream pdbStream = new GZIPInputStream(this.getClass().getResourceAsStream("/ligandTest.pdb.gz")); InputStream cifStream = new GZIPInputStream(this.getClass().getResourceAsStream("/ligandTest.cif.gz")); assertNotNull(cifStream); assertNotNull(pdbStream); FileParsingParameters params = new FileParsingParameters(); PDBFileParser pdbpars = new PDBFileParser(); pdbpars.setFileParsingParameters(params); Structure s1 = pdbpars.parsePDBFile(pdbStream) ; // The chain B should be present with 1 ligand HEM Chain c1 = s1.getNonPolyChainsByPDB("B").get(0); assertNotNull(c1); int expectedNumLigands = 1; assertEquals(expectedNumLigands, c1.getAtomGroups().size()); MMcifParser mmcifpars = new SimpleMMcifParser(); SimpleMMcifConsumer consumer = new SimpleMMcifConsumer(); consumer.setFileParsingParameters(params); mmcifpars.addMMcifConsumer(consumer); mmcifpars.parse(cifStream) ; Structure s2 = consumer.getStructure(); // The chain B should be present with 1 ligand HEM Chain c2 = s2.getNonPolyChainsByPDB("B").get(0); assertNotNull(c2); assertEquals(expectedNumLigands, c2.getAtomGroups().size()); // pdb and mmcif should have same number of chains assertEquals(s1.getChains().size(), s2.getChains().size()); } @Test public void testWaterOnlyChainPdb() throws IOException { // following file is cut-down version of 4a10 InputStream pdbStream = new GZIPInputStream(this.getClass().getResourceAsStream("/org/biojava/nbio/structure/io/4a10_short.pdb.gz")); PDBFileParser pdbpars = new PDBFileParser(); Structure s1 = pdbpars.parsePDBFile(pdbStream) ; assertEquals(2, s1.getChains().size()); Chain c1 = s1.getWaterChainByPDB("F"); assertNotNull("Got null when looking for water-only chain with author id F", c1); // checking that compounds are linked assertNotNull(c1.getEntityInfo()); // checking that the water molecule was assigned an ad-hoc compound assertEquals(2,s1.getEntityInfos().size()); } @Test public void testWaterOnlyChainCif() throws IOException { // following file is cut-down versions of 4a10 InputStream cifStream = new GZIPInputStream(this.getClass().getResourceAsStream("/org/biojava/nbio/structure/io/4a10_short.cif.gz")); MMcifParser mmcifpars = new SimpleMMcifParser(); SimpleMMcifConsumer consumer = new SimpleMMcifConsumer(); mmcifpars.addMMcifConsumer(consumer); mmcifpars.parse(cifStream) ; Structure s2 = consumer.getStructure(); assertEquals(2, s2.getChains().size()); Chain c = s2.getWaterChainByPDB("F"); assertNotNull("Got null when looking for water-only chain with author id F", c); // checking that compounds are linked assertNotNull(c.getEntityInfo()); // checking that the water molecule was assigned an ad-hoc compound assertEquals(2,s2.getEntityInfos().size()); Chain cAsymId = s2.getWaterChain("E"); assertNotNull("Got null when looking for water-only chain with asym id E", cAsymId); assertSame(c, cAsymId); } private static int[] countEntityTypes(List<EntityInfo> entities) { int countPoly = 0; int countNonPoly = 0; int countWater = 0; for (EntityInfo e:entities) { if (e.getType()==EntityType.POLYMER) countPoly++; if (e.getType()==EntityType.NONPOLYMER) countNonPoly++; if (e.getType()==EntityType.WATER) countWater++; } int[] counts = {countPoly, countNonPoly, countWater}; return counts; } }