/* * BioJava development code * * This code may be freely distributed and modified under the * terms of the GNU Lesser General Public Licence. This should * be distributed with the code. If you do not have a copy, * see: * * http://www.gnu.org/copyleft/lesser.html * * Copyright for this code is held jointly by the individual * authors. These should be listed in @author doc comments. * * For more information on the BioJava project and its aims, * or to join the biojava-l mailing list, visit the home page * at: * * http://www.biojava.org/ * */ package org.biojava.nbio.structure.io; import static org.junit.Assert.assertNotNull; import java.io.IOException; import java.io.InputStream; import java.util.List; import java.util.zip.GZIPInputStream; import org.biojava.nbio.structure.Chain; import org.biojava.nbio.structure.Group; import org.biojava.nbio.structure.Structure; import org.biojava.nbio.structure.StructureException; import org.biojava.nbio.structure.StructureIO; import org.biojava.nbio.structure.align.util.AtomCache; import org.biojava.nbio.structure.io.LocalPDBDirectory.FetchBehavior; import org.biojava.nbio.structure.io.mmcif.MMcifParser; import org.biojava.nbio.structure.io.mmcif.SimpleMMcifConsumer; import org.biojava.nbio.structure.io.mmcif.SimpleMMcifParser; import org.biojava.nbio.structure.io.mmcif.model.ChemComp; import org.junit.Assert; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; public class TestHeaderOnly { private static final Logger logger = LoggerFactory.getLogger(TestHeaderOnly.class); private final String pdbID = "1REP"; /** * All groups are expected to be empty. * * @throws StructureException * @throws IOException */ @Test public void testHeaderOnly() throws StructureException, IOException { // Get either PDB or mmCIF with a headerOnly = true. // Test 1: with PDB AtomCache cache = new AtomCache(); cache.setUseMmCif(false); FileParsingParameters params = new FileParsingParameters(); params.setHeaderOnly(true); // params.setAlignSeqRes(true); // Now this is default. cache.setFileParsingParams(params); StructureIO.setAtomCache(cache); Structure sPDB = StructureIO.getStructure(pdbID); Assert.assertEquals(false, doSeqResHaveAtoms(sPDB)); // Test 2: with mmCIF cache.setUseMmCif(true); Structure sCIF = StructureIO.getStructure(pdbID); Assert.assertEquals(false, doSeqResHaveAtoms(sCIF)); } /** * Test that with alignSeqRes, expected Group(s) have Atoms, while others * are present with correct sequence but empty. * * @throws StructureException * @throws IOException */ @Test public void testAlignSeqres() throws StructureException, IOException { // Get either PDB or mmCIF with a headerOnly = false. // Test 1: with PDB AtomCache cache = new AtomCache(); cache.setUseMmCif(false); FileParsingParameters params = new FileParsingParameters(); params.setHeaderOnly(false); // params.setAlignSeqRes(true); // Now this is default. cache.setFileParsingParams(params); StructureIO.setAtomCache(cache); Structure sPDB = StructureIO.getStructure(pdbID); Assert.assertEquals(true, doSeqResHaveAtoms(sPDB)); check1REPChainC(sPDB); // Check particular residues to be aligned. // Test 2: with mmCIF cache.setUseMmCif(true); Structure sCIF = StructureIO.getStructure(pdbID); Assert.assertEquals(true, doSeqResHaveAtoms(sCIF)); check1REPChainC(sCIF); // Check particular residues to be aligned. } // A better test follows that uses local files. // @Test public void testSpeed() { // Force using a file reader. MMCIFFileReader fr = new MMCIFFileReader(); FileParsingParameters par = new FileParsingParameters(); //par.setAlignSeqRes(true); // par.setHeaderOnly(true); par.setHeaderOnly(false); fr.setFileParsingParameters(par); fr.setFetchBehavior(FetchBehavior.FETCH_FILES); Structure s = null; long start = System.nanoTime(); try { // Medium sized structure parsed in 0.549s (no header) vs .676s (header) ~ 20% faster s = fr.getStructureById("4WZ6"); // A larger structure could be parsed ~ 4.991s (no header) vs 5.867s (header) ~ 16% faster // s = fr.getStructureById("4V60"); } catch (IOException e) { e.printStackTrace(); System.exit(1); } long stop = System.nanoTime(); double diff = (stop - start) / 1000000000.0; logger.info(String.format("[%s] Elapsed time: %.3f s", s.getIdentifier(), diff)); } // Test using local files. @Test public void testSpeed2() throws StructureException, IOException { // Test the file parsing speed when the files are already downloaded. InputStream cifStream = new GZIPInputStream(this.getClass().getResourceAsStream("/4hhb.cif.gz")); InputStream pdbStream = new GZIPInputStream(this.getClass().getResourceAsStream("/4hhb.pdb.gz")); assertNotNull(cifStream); FileParsingParameters params = new FileParsingParameters(); params.setHeaderOnly(true); // Flip this true/false to compare parsing speed. logger.info("Testing PDB parsing speed"); PDBFileParser pdbpars = new PDBFileParser(); pdbpars.setFileParsingParameters(params); //pdbpars.setLoadChemCompInfo(true); long start = System.nanoTime(); Structure s1 = pdbpars.parsePDBFile(pdbStream) ; long stop = System.nanoTime(); double diff = (stop - start) / 1000000000.0; logger.info(String.format("[%s] Elapsed time: %.3f s", s1.getIdentifier(), diff)); MMcifParser mmcifpars = new SimpleMMcifParser(); SimpleMMcifConsumer consumer = new SimpleMMcifConsumer(); consumer.setFileParsingParameters(params); mmcifpars.addMMcifConsumer(consumer); logger.info("Testing mmCIF parsing speed"); start = System.nanoTime(); mmcifpars.parse(cifStream) ; Structure s2 = consumer.getStructure(); stop = System.nanoTime(); diff = (stop - start) / 1000000000.0; logger.info(String.format("[%s] Elapsed time: %.3f s", s2.getIdentifier(), diff)); /* Running from an SSD.. * PDB .165s (all atom) -> 0.009s (only header) 95% faster. * mmCIF 0.323s (no header) -> 0.175s (only header) 45% faster. */ } /** * Scan through SeqResGroups, returns true if any have Atoms. * @param s * @return */ public boolean doSeqResHaveAtoms(Structure s) { for (int i = 0; i < s.nrModels(); i++) { for (Chain c : s.getChains(i)) { for (Group g : c.getSeqResGroups()) { if (hasAtoms(g)) return true; // Found some Atoms in a Seqres group. } } } return false; } /** * Does a group have any Atom(s)? * * @param g : a group * @return true if has any Atom(s) */ public boolean hasAtoms(Group g) { if (g.getAtoms().size() > 0) return true; return false; } /** * Check that the gapped residues have no atoms, but that ungapped residues * have atoms. * * @param s: Structure to test. */ public void check1REPChainC(Structure s) throws StructureException { String sequence = "MAETAVINHKKRKNSPRIVQSNDLTEAAYSLSRDQKRMLYLFVDQIRK" + "SDGTLQEHDGICEIHVAKYAEIFGLTSAEASKDIRQALKSFAGKEVVFYRPEEDAGDE" + "KGYESFPWFIKPAHSPSRGLYSVHINPYLIPFFIGLQNRFTQFRLSETKEITNPYAMR" + "LYESLCQYRKPDGSGIVSLKIDWIIERYQLPQSYQRMPDFRRRFLQVCVNEINSRTPM" + "RLSYIEKKKGRQTTHIVFSFRDITSMTTG"; boolean [] shouldMatch = new boolean[sequence.length()]; for (int i = 0; i < sequence.length(); i++) shouldMatch[i] = true; // 1-14 is gap for (int i = 0; i < 14; i++) shouldMatch[i] = false; // 50-55 is gap for (int i = 49; i < 55; i++) shouldMatch[i] = false; // 98-109 is gap for (int i = 97; i < 109; i++) shouldMatch[i] = false; // 247-251 is gap for (int i = 246; i < 251; i++) shouldMatch[i] = false; Chain c = s.getPolyChainByPDB("C"); List<Group> seqres = c.getSeqResGroups(); // Check lengths Assert.assertEquals(sequence.length(), seqres.size()); // Check sequences. Assert.assertEquals(sequence, c.getSeqResSequence()); for (int i = 0; i < sequence.length(); i++) { Assert.assertEquals(shouldMatch[i], hasAtoms(seqres.get(i))); } } /** * * @param seqres : a list of Group(s) * @return a String representing these Groups */ public String getSequenceString(List<Group> seqres) { StringBuilder sb = new StringBuilder(); for (Group g : seqres) { ChemComp c = g.getChemComp(); sb.append(c.getOne_letter_code()); } return sb.toString(); } }