/*
* BioJava development code
*
* This code may be freely distributed and modified under the
* terms of the GNU Lesser General Public Licence. This should
* be distributed with the code. If you do not have a copy,
* see:
*
* http://www.gnu.org/copyleft/lesser.html
*
* Copyright for this code is held jointly by the individual
* authors. These should be listed in @author doc comments.
*
* For more information on the BioJava project and its aims,
* or to join the biojava-l mailing list, visit the home page
* at:
*
* http://www.biojava.org/
*
*/
package org.biojava.nbio.structure.align.xml;
import org.biojava.nbio.structure.*;
import org.biojava.nbio.structure.align.ce.CeCPMain;
import org.biojava.nbio.structure.align.model.AFP;
import org.biojava.nbio.structure.align.model.AFPChain;
import org.biojava.nbio.structure.align.util.AFPAlignmentDisplay;
import org.biojava.nbio.structure.jama.Matrix;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.List;
//http://www.developerfusion.com/code/2064/a-simple-way-to-read-an-xml-file-in-java/
public class AFPChainXMLParser
{
private static final Logger logger = LoggerFactory.getLogger(AFPChainXMLParser.class);
public static final String DEFAULT_ALGORITHM_NAME = "jFatCat_rigid";
/** new utility method that checks that the order of the pair in the XML alignment is correct and flips the direction if needed
*
* @param xml
* @param name1
* @param name1
* @param ca1
* @param ca2
* @return
*/
public static AFPChain fromXML(String xml, String name1, String name2, Atom[] ca1, Atom[] ca2) throws IOException, StructureException{
AFPChain[] afps = parseMultiXML( xml);
if ( afps.length > 0 ) {
AFPChain afpChain = afps[0];
String n1 = afpChain.getName1();
String n2 = afpChain.getName2();
if ( n1 == null )
n1 = "";
if ( n2 == null)
n2 = "";
//System.out.println("from AFPCHAIN: " + n1 + " " + n2);
if ( n1.equals(name2) && n2.equals(name1)){
// flipped order
//System.out.println("AfpChain in wrong order, flipping...");
afpChain = AFPChainFlipper.flipChain(afpChain);
}
rebuildAFPChain(afpChain, ca1, ca2);
return afpChain;
}
return null;
}
public static AFPChain fromXML(String xml, Atom[] ca1, Atom[] ca2) throws IOException
{
AFPChain[] afps = parseMultiXML( xml);
if ( afps.length > 0 ) {
AFPChain afpChain = afps[0];
rebuildAFPChain(afpChain, ca1, ca2);
return afpChain;
}
return null;
}
/** returns true if the alignment XML contains an error message
*
* @param xml
* @return flag if there was an Error while processing the alignment.
*/
public static boolean isErrorXML(String xml){
if ( xml.contains("error=\""))
return true;
return false;
}
/** Takes an XML representation of the alignment and flips the positions of name1 and name2
*
* @param xml String representing the alignment
* @return XML representation of the flipped alignment
*/
public static String flipAlignment(String xml) throws IOException,StructureException{
AFPChain[] afps = parseMultiXML( xml);
if ( afps.length < 1 )
return null;
if ( afps.length == 1) {
AFPChain newChain = AFPChainFlipper.flipChain(afps[0]);
if ( newChain.getAlgorithmName() == null) {
newChain.setAlgorithmName(DEFAULT_ALGORITHM_NAME);
}
return AFPChainXMLConverter.toXML(newChain);
}
throw new StructureException("not Implemented yet!");
}
/** replace the PDB res nums with atom positions:
*
* @param afpChain
* @param ca1
* @param ca2
*/
public static void rebuildAFPChain(AFPChain afpChain, Atom[] ca1, Atom[] ca2){
if ( afpChain.getAlgorithmName() == null) {
afpChain.setAlgorithmName(DEFAULT_ALGORITHM_NAME);
}
if ( afpChain.getVersion() == null){
afpChain.setVersion("1.0");
}
int blockNum = afpChain.getBlockNum();
int ca1Length = afpChain.getCa1Length();
int ca2Length = afpChain.getCa2Length();
int minLength = Math.min(ca1Length, ca2Length);
int[][][] optAln = new int[blockNum][2][minLength];
int[][][] blockResList = afpChain.getBlockResList();
if ( blockResList == null){
blockResList = new int[blockNum][2][minLength];
}
int[] optLen = afpChain.getOptLen();
String[][][] pdbAln = afpChain.getPdbAln();
int[] verifiedOptLen = null;
if ( optLen != null)
verifiedOptLen = afpChain.getOptLen().clone();
else {
logger.warn("did not find optimal alignment, building up empty alignment.");
optLen = new int[1];
optLen[0] = 0;
}
for (int blockNr = 0 ; blockNr < blockNum ; blockNr++){
//System.out.println("got block " + blockNr + " size: " + optLen[blockNr]);
int verifiedEQR = -1;
for ( int eqrNr = 0 ; eqrNr < optLen[blockNr] ; eqrNr++ ){
String pdbResnum1 = pdbAln[blockNr][0][eqrNr];
String pdbResnum2 = pdbAln[blockNr][1][eqrNr];
//System.out.println(blockNr + " " + eqrNr + " got resnum: " + pdbResnum1 + " " + pdbResnum2);
String[] spl1 = pdbResnum1.split(":");
String[] spl2 = pdbResnum2.split(":");
String chain1 = spl1[0];
String pdbres1 = spl1[1];
String chain2 = spl2[0];
String pdbres2 = spl2[1];
int pos1 = getPositionForPDBresunm(pdbres1,chain1,ca1);
int pos2 = getPositionForPDBresunm(pdbres2,chain2,ca2);
if ( pos1 == -1 || pos2 == -1 ){
// this can happen when parsing old files that contained Calcium atoms...
logger.warn("pos1: {} (residue {}), pos2: {} (residue {}), should never be -1. Probably parsing an old file.",
pos1, pdbResnum1, pos2, pdbResnum2);
verifiedOptLen[blockNr]-- ;
continue;
}
verifiedEQR++;
//System.out.println(blockNr + " " + eqrNr + " " + pos1 + " " + pos2);
optAln[blockNr][0][verifiedEQR] = pos1;
optAln[blockNr][1][verifiedEQR] = pos2;
blockResList[blockNr][0][verifiedEQR] = pos1;
blockResList[blockNr][1][verifiedEQR] = pos2;
}
}
afpChain.setOptLen(verifiedOptLen);
afpChain.setOptAln(optAln);
afpChain.setBlockResList(blockResList);
// build up alignment image:
AFPAlignmentDisplay.getAlign(afpChain, ca1, ca2);
}
public static AFPChain[] parseMultiXML(String xml) throws IOException {
List<AFPChain> afpChains = new ArrayList<AFPChain>();
try
{
//Convert string to XML document
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
DocumentBuilder db = factory.newDocumentBuilder();
InputSource inStream = new InputSource();
inStream.setCharacterStream(new StringReader(xml));
Document doc = db.parse(inStream);
// normalize text representation
doc.getDocumentElement().normalize();
//Element rootElement = doc.getDocumentElement();
NodeList listOfAFPChains = doc.getElementsByTagName("AFPChain");
//int numArrays = listOfArrays.getLength();
// go over the blocks
for(int afpPos=0; afpPos<listOfAFPChains.getLength() ; afpPos++)
{
AFPChain a = new AFPChain(DEFAULT_ALGORITHM_NAME);
a.setVersion("1.0");
Node rootElement = listOfAFPChains.item(afpPos);
a.setName1(getAttribute(rootElement,"name1"));
a.setName2(getAttribute(rootElement,"name2"));
String algoname = getAttribute(rootElement,"method");
if ( algoname != null) {
a.setAlgorithmName(algoname);
}
String version = getAttribute(rootElement,"version");
if ( version != null)
a.setVersion(version);
a.setAlnLength( Integer.parseInt(getAttribute(rootElement,"alnLength")));
a.setBlockNum( Integer.parseInt(getAttribute(rootElement,"blockNum")));
a.setGapLen( Integer.parseInt(getAttribute(rootElement,"gapLen")));
a.setOptLength( Integer.parseInt(getAttribute(rootElement,"optLength")));
a.setTotalLenIni( Integer.parseInt(getAttribute(rootElement,"totalLenIni")));
a.setBlockNum( Integer.parseInt(getAttribute(rootElement,"blockNum")));
if ( a.getAlgorithmName().equals(CeCPMain.algorithmName)){
a.setSequentialAlignment(a.getBlockNum() == 1);
}
a.setAlignScore(Double.parseDouble(getAttribute(rootElement,"alignScore")));
a.setChainRmsd(Double.parseDouble(getAttribute(rootElement,"chainRmsd")));
Double identity = Double.parseDouble(getAttribute(rootElement,"identity"));
a.setIdentity(identity);
a.setNormAlignScore(Double.parseDouble(getAttribute(rootElement,"normAlignScore")));
a.setProbability(Double.parseDouble(getAttribute(rootElement,"probability")));
a.setSimilarity(Double.parseDouble(getAttribute(rootElement,"similarity")));
a.setTotalRmsdIni(Double.parseDouble(getAttribute(rootElement,"totalRmsdIni")));
a.setTotalRmsdOpt(Double.parseDouble(getAttribute(rootElement,"totalRmsdOpt")));
a.setAlignScoreUpdate(Double.parseDouble(getAttribute(rootElement,"alignScoreUpdate")));
int ca1Length = Integer.parseInt(getAttribute(rootElement,"ca1Length"));
a.setCa1Length(ca1Length);
int ca2Length = Integer.parseInt(getAttribute(rootElement,"ca2Length"));
a.setCa2Length(ca2Length);
String tmScoreS = getAttribute(rootElement,"tmScore");
if ( tmScoreS != null) {
Double tmScore = null;
try {
tmScore = Double.parseDouble(tmScoreS);
} catch (Exception e){
}
a.setTMScore(tmScore);
}
String calcTimeS = getAttribute(rootElement,"time");
Long calcTime = -1L;
if ( calcTimeS != null){
try {
calcTime = Long.parseLong(calcTimeS);
} catch (Exception e){
e.printStackTrace();
}
}
a.setCalculationTime(calcTime);
Matrix[] ms = new Matrix[a.getBlockNum()];
a.setBlockRotationMatrix(ms);
Atom[] blockShiftVector = new Atom[a.getBlockNum()];
a.setBlockShiftVector(blockShiftVector);
int afpNum = Integer.parseInt(getAttribute(rootElement,"afpNum"));
List<AFP> afpSet = new ArrayList<AFP>();
for (int afp=0;afp<afpNum;afp++){
afpSet.add( new AFP());
}
a.setAfpSet(afpSet);
int minLength = Math.min(ca1Length, ca2Length);
a.setFocusRes1(new int[minLength]);
a.setFocusRes2(new int[minLength]);
//NodeList listOfBlocks = doc.getElementsByTagName("block");
NodeList listOfBlocks = rootElement.getChildNodes();
//int numArrays = listOfArrays.getLength();
// go over the blocks
for(int i=0; i<listOfBlocks.getLength() ; i++)
{
Node block = listOfBlocks.item(i);
// we only look at blocks.
if (! block.getNodeName().equals("block"))
continue;
processBlock(block, a, minLength);
}
afpChains.add(a);
}
}
// TODO these 2 exceptions should be thrown forward, it's not a good idea to catch them so early
catch (SAXException e)
{
Exception x = e.getException ();
((x == null) ? e : x).printStackTrace ();
}
catch (ParserConfigurationException e) {
e.printStackTrace();
}
return afpChains.toArray(new AFPChain[afpChains.size()]);
}
private static void processBlock(Node block, AFPChain a, int minLength){
NodeList valList = block.getChildNodes();
int numChildren = valList.getLength();
NamedNodeMap map = block.getAttributes();
int blockNum = a.getBlockNum();
int[] optLen = a.getOptLen();
if ( optLen == null )
optLen = new int[blockNum];
String[][][] pdbAln = a.getPdbAln();
if ( pdbAln == null)
pdbAln = new String[blockNum][2][minLength];
//int[][][] optAln = new int[blockNum][2][minLength];
int[] blockGap = a.getBlockGap();
if ( blockGap == null )
blockGap = new int[blockNum];
int[] blockSize= a.getBlockSize();
if ( blockSize == null)
blockSize = new int[blockNum];
double[] blockScore = a.getBlockScore();
if ( blockScore == null)
blockScore = new double[blockNum];
double[] blockRmsd = a.getBlockRmsd();
if (blockRmsd == null )
blockRmsd = new double[blockNum];
Matrix[] ms = a.getBlockRotationMatrix();
Atom[] shifts = a.getBlockShiftVector();
int blockNr = Integer.parseInt( map.getNamedItem("blockNr").getTextContent());
int thisBlockGap = Integer.parseInt(map.getNamedItem("blockGap").getTextContent());
blockGap[blockNr] = thisBlockGap;
int thisBlockSize = Integer.parseInt(map.getNamedItem("blockSize").getTextContent());
blockSize[blockNr] = thisBlockSize;
double thisBlockScore = Double.parseDouble(map.getNamedItem("blockScore").getTextContent());
blockScore[blockNr] = thisBlockScore;
double thisBlockRmsd = Double.parseDouble(map.getNamedItem("blockRmsd").getTextContent());
blockRmsd[blockNr] = thisBlockRmsd;
// parse out the equivalent positions from the file
int nrEqr = 0;
for ( int e =0; e< numChildren ; e++){
Node eqr = valList.item(e);
if(!eqr.hasAttributes()) continue;
if ( eqr.getNodeName().equals("eqr")) {
nrEqr++;
NamedNodeMap atts = eqr.getAttributes();
int eqrNr = Integer.parseInt(atts.getNamedItem("eqrNr").getTextContent());
String pdbres1 = atts.getNamedItem("pdbres1").getTextContent();
String chain1 = atts.getNamedItem("chain1").getTextContent();
String pdbres2 = atts.getNamedItem("pdbres2").getTextContent();
String chain2 = atts.getNamedItem("chain2").getTextContent();
//System.out.println(blockNr + " " + eqrNr + " " + chain1+" " + pdbres1 + ":" + chain2 + " " + pdbres2);
pdbAln[blockNr][0][eqrNr] = chain1+":"+pdbres1;
pdbAln[blockNr][1][eqrNr] = chain2+":"+pdbres2;
// A WORK AROUND FOR THE PROBLEM THAT WE DON:T HAVE PDBs LOADED AT THIS TIME...
/* int pos1 = getPositionForPDBresunm(pdbres1,chain1,ca1);
int pos2 = getPositionForPDBresunm(pdbres2,chain2,ca2);
//System.out.println("settion optAln " + blockNr + " " + eqrNr + " " + pos1);
optAln[blockNr][0][eqrNr] = pos1;
optAln[blockNr][1][eqrNr] = pos2;
*/
} else if ( eqr.getNodeName().equals("matrix")){
// process Matrix
Matrix m = new Matrix(3,3);
for (int i =1 ; i <= 3 ; i++){
for (int j =1 ; j <= 3 ; j++){
String att = getAttribute(eqr, "mat" +i + j);
double val = Double.parseDouble(att);
m.set(i-1,j-1,val);
}
}
ms[blockNr] = m;
} else if ( eqr.getNodeName().equals("shift")){
Atom shift = new AtomImpl();
double x = Double.parseDouble(getAttribute(eqr, "x"));
double y = Double.parseDouble(getAttribute(eqr, "y"));
double z = Double.parseDouble(getAttribute(eqr, "z"));
shift.setX(x);
shift.setY(y);
shift.setZ(z);
shifts[blockNr] = shift;
}
}
//System.out.println("setting block " + blockNr + " eqr: " + nrEqr);
optLen[blockNr] = nrEqr;
a.setOptLen(optLen);
//a.setOptAln(optAln);
a.setPdbAln(pdbAln);
a.setBlockGap(blockGap);
a.setBlockSize(blockSize);
a.setBlockScore(blockScore);
a.setBlockRmsd(blockRmsd);
}
private static String getAttribute(Node node, String attr){
if( ! node.hasAttributes())
return null;
NamedNodeMap atts = node.getAttributes();
if ( atts == null)
return null;
Node att = atts.getNamedItem(attr);
if ( att == null)
return null;
String value = att.getTextContent();
return value;
}
/** get the position of PDB residue nr X in the ato marray
*
* @param pdbresnum pdbresidue number
* @param authId chain name
* @param atoms atom array
* @return
*/
private static int getPositionForPDBresunm(String pdbresnum, String authId , Atom[] atoms){
ResidueNumber residueNumber = ResidueNumber.fromString(pdbresnum);
residueNumber.setChainName(authId);
boolean blankChain = authId == null || authId.equalsIgnoreCase("null") || authId.equals("_");
for ( int i =0; i< atoms.length ;i++){
Group g = atoms[i].getGroup();
// match _ to any chain
if( blankChain ) {
residueNumber.setChainName(g.getChain().getName());
}
//System.out.println(g.getResidueNumber() + "< ? >" + residueNumber +"<");
if ( g.getResidueNumber().equals(residueNumber)){
//System.out.println(g + " == " + residueNumber );
Chain c = g.getChain();
if ( blankChain || c.getName().equals(authId)){
return i;
}
}
}
return -1;
}
}