/**
* BioJava development code
*
* This code may be freely distributed and modified under the
* terms of the GNU Lesser General Public Licence. This should
* be distributed with the code. If you do not have a copy,
* see:
*
* http://www.gnu.org/copyleft/lesser.html
*
* Copyright for this code is held jointly by the individual
* authors. These should be listed in @author doc comments.
*
* For more information on the BioJava project and its aims,
* or to join the biojava-l mailing list, visit the home page
* at:
*
* http://www.biojava.org/
*
* Created on 2013-05-28
* Created by Douglas Myers-Turnbull
*
* @since 3.0.6
*/
package org.biojava.nbio.structure.io;
import org.biojava.nbio.structure.Structure;
import org.biojava.nbio.structure.StructureException;
import org.biojava.nbio.structure.StructureTools;
import org.biojava.nbio.structure.align.model.AFPChain;
import org.biojava.nbio.structure.align.util.AtomCache;
import org.biojava.nbio.structure.align.xml.AFPChainXMLConverter;
import org.biojava.nbio.structure.scop.ScopFactory;
import org.biojava.nbio.core.exceptions.CompoundNotFoundException;
import org.biojava.nbio.core.sequence.ProteinSequence;
import org.custommonkey.xmlunit.DetailedDiff;
import org.custommonkey.xmlunit.Diff;
import org.custommonkey.xmlunit.Difference;
import org.custommonkey.xmlunit.XMLUnit;
import org.custommonkey.xmlunit.examples.RecursiveElementNameAndTextQualifier;
import org.junit.Before;
import org.junit.Test;
import org.xml.sax.SAXException;
import java.io.*;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.fail;
/**
* A test for {@link FastaAFPChainConverter}.
* @author dmyersturnbull
*
*/
public class FastaAFPChainConverterTest {
static {
XMLUnit.setIgnoreWhitespace(true);
XMLUnit.setIgnoreComments(true);
XMLUnit.setIgnoreAttributeOrder(true);
}
public static void printDetailedDiff(Diff diff, PrintStream ps) {
DetailedDiff detDiff = new DetailedDiff(diff);
for (Object object : detDiff.getAllDifferences()) {
Difference difference = (Difference) object;
ps.println(difference);
}
}
/**
* Compares two XML files without regard to the order of elements or attributes, and ignoring any element named \"releaseDate\".
* @return Whether the files are \"similar\"
*/
public static boolean compareXml(File expectedFile, File actualFile) {
try {
FileReader expectedFr = new FileReader(expectedFile);
FileReader actualFr = new FileReader(actualFile);
Diff diff = new Diff(expectedFr, actualFr);
// ignore order
// look at element, id, and weight (weight is a nested element)
diff.overrideElementQualifier(new RecursiveElementNameAndTextQualifier());
final boolean isSimilar = diff.similar();
if (!isSimilar) printDetailedDiff(diff, System.err);
expectedFr.close();
actualFr.close();
return isSimilar;
} catch (IOException e) {
throw new RuntimeException(e);
} catch (SAXException e) {
throw new RuntimeException(e);
}
}
private AtomCache cache;
@Before
public void setUp() {
cache = new AtomCache();
ScopFactory.setScopDatabase(ScopFactory.VERSION_1_75B);
}
@Test
public void testCpAsymmetric() throws IOException, StructureException, CompoundNotFoundException {
Structure structure = cache.getStructure("1w0p");
String first = ("alfdynatgdtefdspakqgwmqdntnngsgvltnadgmpawlvqgiggraqwtyslstnqhaqassfgwrmttemkvlsggmitnyyangtqrvlpiisldssgnlvvefegqtgrtvlatgtaateyhkfelvflpgsnpsasfyfdgklirdniqptaskQNMIVWGNGSSntdgvaayrdikfei------------------------------------------------------------------------------------------------------------------QGDVIf------------RGPDRIPSIVASsvTPGVVTAFAEKRVGGgdpgalsntNDIITRTSRDGGITWDTELNLTEQinvsdeFDFSDPRPIYDPs---SNTVLVSYARWPtdaaqngdrikpwmpNGIFYSVYDVASgnWQAPIDVTdqvkersfqiagwggselyrrntslnsqqdwqsnakirivdgaanqiqvadgsrkyvvtlsidesgglvanlngvsapiilqsehakvhsfhdyelqysalnhtttlfvdgqqittwagevsqenniqfgnadaqidgrlhvqkivltqqghnlvefdafylaqqtpevekdleklgwtkiktgntmslygNASVNPGpgHGITLtrqqnisgsqNGRLIYPAIVLdrfFLNVMSIYSDDGgsnwq-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------TGSTLpipfrwksssileTLEPSEADMVELQN--GDLLLTARLDFNQivngvny--SPRQQFLSKDGGITWSLLEANNANvfsnistgTVDASITRFEqsdgSHFLLFTNPQGnpagTNgr------------QNLGLWFSFDEG--VTWKGPIQ--LVNGasaysdiyqldsenaivivetdnsnmrilrmpitllkqklt");
String second = ("--------------------------------------------------------------------------------------------kirivdgaanqiqvadgsrkyvvtlsidesgglvanlngvsapiilqsehakvhsfhdyelqysalnhtttLFVDGQQITTWagevsqenniqfgnadaqidgrlhvqkivltqqghnlvefdafylaqqtpevekdleklgwtkiktgntmslygnasvnpgpghgitltrqqnisgsqngrliypaivldrfflnvmsiysddggsnwqTGSTLpipfrwksssileTLEPSEADMVEL--QNGDLLLTARLDFNQivngvny--SPRQQFLSKDGGITWSLLEANNANvfsnisTGTVDASITRFEqsdgSHFLLFTNPQGNpagtngr--------QNLGLWFSFDEG--VTWKGPIQlv---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------NGASAYS--DIYQLd---------SENAIVIVETD---NSNMRILRMPITllkqkltalfdynatgdtefdspakqgwmqdntnngsgvltnadgmpawlvqgiggraqwtyslstnqhaqassfgwrmttemkvlsggmitnyyangtqrvlpiisldssgnlvvefegqtgrtvlatgtaateyhkfelvflpgsnpsasfyfdgklirdniqptaskqnmivwgngssntdgvaayrdikfeiQGDVIf------------RGPDRIPSIVASSVtpGVVTAFAEKRVGGgdpgalsntNDIITRTSRDGGITWDTELNLTEQinvsdefdFSDPRPIYDPs---SNTVLVSYARW----PTdaaqngdrikpwmpNGIFYSVYDVASgnWQAPIDVTdqVKERsfqiagwggselyrrntslnsqqdwqsna------------");
AFPChain afpChain = FastaAFPChainConverter.cpFastaToAfpChain(first, second, structure, -393);
assertEquals("Wrong TM-score", 0.2949, afpChain.getTMScore(), 0.001);
assertEquals("Wrong RMSD", 3.605, afpChain.getTotalRmsdOpt(), 0.001);
}
@Test
public void testCpSymmetric2() throws IOException,StructureException, CompoundNotFoundException {
String a = "--vRSLNCTLRDSQQ-KSLVMSG---PYELKALHLQgqdmeq-----QVVFSMSFVQGeesndkiPVALGLKEK-NLYLSSVLKdDKPTLQLESVdpknypkkkmekRFVFNKIEInn--KLEFESAQFpnWYISTSqAENmPVFLGGT----KGgqDITDFTMQFV---";
String b = "esnDKIPVALGLKEKnLYLSSVLkddKPTLQLESVDpknypkkkmekRFVFNKIEINN-------KLEFESAQFpNWYISTSQA-ENMPVFLGGTkggqd-------ITDFTMQFVvrslNCTLRDSQQ--KSLVMS-GPY-ELKALHLqgqdME--QQVVFSMSFVqge";
Structure structure = StructureTools.getStructure("31BI");
AFPChain afpChain = FastaAFPChainConverter.cpFastaToAfpChain(a, b, structure, -101);
assertEquals("Wrong TM-score", 0.6284, afpChain.getTMScore(), 0.001);
assertEquals("Wrong RMSD", 2.50569, afpChain.getTotalRmsdOpt(), 0.001);
}
@Test
public void testBug1() throws IOException, StructureException, CompoundNotFoundException {
/*
* From CriteriaDifference:
* [d3er9b_: TM-score=0.6984812617301941, Tmpr=0.14740000665187836]
* This is a HUGE difference
* 0.6984813 appears to be correct.
* The TM-score is so high for such an asymmetric domains simply because the alignment partially follows the main diagonal (trivial alignment).
*/
String a = "nitlkiietylgrvpsvneyhmlksqarniqkitvfnkdifvslvkknkkrffsdvntsaseikdrilsyfsKQTQty-------NIGKLFTIIELQSVLVTTYTDilgvLTINV----TSMEELARDMLnsmnVAVVSSLVKNVNKLMEEYLRRHNKSCICYGSYSLYLINPNIRYGDIDILQTNSRTFLIDLAFLIKFITGNNIILSKIPYLRNYMVIKDENDNHIIDSFNIRQDTMNVVPKIFIDNIYIVDP---TFQLLNMIKMfsqIDRLEDLSkdpeKFNARMATMLEYVRYT------HGIVFdgKRNNMPMKCIIDENNRIVTVTTKDYFSFKKCLVYLDENVLSSDILDLNADTSCDFESVTNSVYLIHDNIMYTYFSNTILLSDKGKVheiSARGLCAHILLYQml-----TSG--EYKQCLSDLLNsmMNRDKIPIysHTERDKKPGRHGFINIEKDIIVF-------------------------------------------------------------------";
String b = "----------------------------------------------------------------------lsYFSKqtqtynigkLFTIIELQSVLVTTYTDILGV----LTINVtsmeELARDMLNSMN----VAVVSSLVKNVNKLMEEYLRRHNKSCICYGSYSLYLINPNIRYGDIDILQTNSRTFLIDLAFLIKFITGNNIILSKIPYLRNYMVIKDENDNHIIDSFNIRQDTMNVVPKIFIDNIYIVDPtfqLLNMIKMFSQ---IDRLEDLS----KDPEKFNARMATMLEYvrythgIVFDG--KRNNMPMKCIIDENNRIVTVTTKDYFSFKKCLVYLDENVLSSDILDLNADTSCDFESVTNSVYLIHDNIMYTYFSNTILLSDKGKV---HEISARGLCAHILlyqmltsGEYkqCLSDLLNSMMN--RDKIPIYS--HTERDKKPGRHGFINIEKDIIVFnitlkiietylgrvpsvneyhmlksqarniqkitvfnkdifvslvkknkkrffsdvntsaseikdri";
// ========================================================================KQTQ=========NIGKLFTIIELQSVLVTTYTD====LTINV====TSMEELARDML====VAVVSSLVKNVNKLMEEYLRRHNKSCICYGSYSLYLINPNIRYGDIDILQTNSRTFLIDLAFLIKFITGNNIILSKIPYLRNYMVIKDENDNHIIDSFNIRQDTMNVVPKIFIDNIYIVDP===TFQLLNMIKM===IDRLEDLS====KFNARMATMLEYVRYT======HGIVF==KRNNMPMKCIIDENNRIVTVTTKDYFSFKKCLVYLDENVLSSDILDLNADTSCDFESVTNSVYLIHDNIMYTYFSNTILLSDKGKV===SARGLCAHILLYQ=======TSG==EYKQCLSDLLN==MNRDKIPI==HTERDKKPGRHGFINIEKDIIVF===================================================================
// ========================================================================YFSK=========LFTIIELQSVLVTTYTDILGV====LTINV====ELARDMLNSMN====VAVVSSLVKNVNKLMEEYLRRHNKSCICYGSYSLYLINPNIRYGDIDILQTNSRTFLIDLAFLIKFITGNNIILSKIPYLRNYMVIKDENDNHIIDSFNIRQDTMNVVPKIFIDNIYIVDP===LLNMIKMFSQ===IDRLEDLS====KDPEKFNARMATMLEY======IVFDG==KRNNMPMKCIIDENNRIVTVTTKDYFSFKKCLVYLDENVLSSDILDLNADTSCDFESVTNSVYLIHDNIMYTYFSNTILLSDKGKV===HEISARGLCAHIL=======GEY==CLSDLLNSMMN==RDKIPIYS==HTERDKKPGRHGFINIEKDIIVF===================================================================
Structure structure = StructureTools.getStructure("d3er9b_");
AFPChain afpChain = FastaAFPChainConverter.cpFastaToAfpChain(a, b, structure, 67);
assertEquals("Wrong RMSD", 2.681, afpChain.getTotalRmsdOpt(), 0.001);
assertEquals("Wrong TM-score", 0.69848, afpChain.getTMScore(), 0.001);
}
@Test
public void testCpSymmetric1() throws IOException,StructureException, CompoundNotFoundException {
//cat 2GG6-best.fasta |tr -d \\n|pbcopy
String a = "-SSRPATAR-KSSGLSGTVRIPGDKSISHRSFMFGGLA-SGETRITGLLEG-EDvINTGKAMQAMGARIRKEGd---------TWIIDGVgngglLAPEAPLD---FGNAATGCRLTMGLVGvydFDSTFIGDASLtkrp---MGRVLNPLREMGVQVKSEDgdrLPVTLRGPK---TPT---PITYRVpMASAQVKSAVLLAGLNTPGITTVIEpi---MTRDHTEKMLQGFGANLTVEtdadGVRTIRLEgRGKLTGQVIDVPGDPSSTAFPLVAALLVpGSDVTILNVLMNpTR-TGLILTLQEMGADIEVINprlaggedvaDLRVRSS-----TLKGVTVPedrAPSMIDEYPILAVAAAFAEGATVMNGLEELrvkesdrLSAVANGLKLNGVDCDEGE---TSLVVRGRPdgkGLGNasgAAVAT-HLDHRIAMSFLVMGLVSENPVTVDDatmIATSFPEFMDLMAGLGAKIELS---";
String b = "dGVRTIRLEgRGKLTGQVIDVPGDPSSTAFPLVAALLVpGSDVTILNVLMNpTR-TGLILTLQEMGADIEVINprlaggedvaDLRVRSS-----TLKGVTVPedrAPSMIDEYPILAVAAAfaeGATVMNGLEELrvkesdrLSAVANGLKLNGVDCDEGE---TSLVVRGRPdgkGLGnasGAAVAT-HLDHRIAMSFLVMGLVSENPVTVDDatmiaTSFPEFMDLMAGLGAKIELS----SSRPATAR-KSSGLSGTVRIPGDKSISHRSFMFGGLA-SGETRITGLLEG-EDvINTGKAMQAMGARIRKEGd---------TWIIDGVgngglLAPEAPLD---FGNAATGCRLTMGLVGVYDFDSTFIGDASLtkrp---MGRVLNPLREMGVQVKSEDgdrLPVTLRGPK---TPTP---ITYRVpMASAQVKSAVLLAGLNTPGITTVIE---PIMTRDHTEKMLQGFGANLTVEtda";
Structure structure = StructureTools.getStructure("2GG6");
AFPChain afpChain = FastaAFPChainConverter.cpFastaToAfpChain(a, b, structure, -230); // 215
assertEquals("Wrong TM-score", 0.7701, afpChain.getTMScore(), 0.001);
assertEquals("Wrong RMSD", 3.035, afpChain.getTotalRmsdOpt(), 0.001);
}
@Test
public void testFromFasta() throws IOException, StructureException, CompoundNotFoundException {
Structure s1 = cache.getStructure("1w0p");
Structure s2 = cache.getStructure("1qdm");
ProteinSequence seq1 = new ProteinSequence("GWGG----SEL--YRRNTSLNS--QQDW-------QSNAKIRIVDGAA-----NQIQ");
ProteinSequence seq2 = new ProteinSequence("WMQNQLAQNKT--QDLILDYVNQLCNRL---PSPMESAV----DCGSLGSMPDIEFT");
AFPChain afpChain = FastaAFPChainConverter.fastaToAfpChain(seq1, seq2, s1, s2);
assertEquals("Wrong number of EQRs", 33, afpChain.getNrEQR());
assertEquals("Wrong number of alnLength",53,afpChain.getAlnLength());
String xml = AFPChainXMLConverter.toXML(afpChain);
File expected = new File("src/test/resources/1w0p_1qdm.xml");
File x = File.createTempFile("1w0p_1qdm_output", "xml.tmp");
x.deleteOnExit();
BufferedWriter bw = new BufferedWriter(new FileWriter(x));
bw.write(xml);
bw.close();
boolean match = compareXml(expected, x);
if (!match) {
System.err.println(xml);
fail("AFPChain is wrong");
}
}
}