/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
* BIFParser.java
* Copyright (C) 2003 University of Waikato, Hamilton, New Zealand
*
*/
package weka.gui.graphvisualizer;
import java.io.InputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.StringReader;
import java.util.StringTokenizer;
import weka.core.FastVector;
import org.w3c.dom.Document;
import org.w3c.dom.NodeList;
import org.w3c.dom.Element;
/**
* This class parses an inputstream or a string in
* XMLBIF ver. 0.3 format, and builds the datastructures
* that are passed to it through the constructor.
*
* @author Ashraf M. Kibriya (amk14@cs.waikato.ac.nz)
* @version $Revision: 1.7 $ - 24 Apr 2003 - Initial version (Ashraf M. Kibriya)
*/
public class BIFParser implements GraphConstants {
/** These holds the nodes and edges of the graph */
protected FastVector m_nodes, m_edges;
/** This holds the name of the graph (i.e. the name of network tag in XMLBIF
* input)
*/
protected String graphName;
/** This holds the string to be parsed */
protected String inString;
/** This holds the InputStream to be parsed */
protected InputStream inStream;
/**
* Constructor (if our input is a String)
*
* @param input the string to be parsed (should not be null)
* @param nodes vector containing GraphNode objects (should be empty)
* @param edges vector containing GraphEdge objects (should be empty)
*/
public BIFParser(String input, FastVector nodes, FastVector edges) {
m_nodes = nodes; m_edges = edges; inString = input;
}
/**
* Constructor (if our input is an InputStream)
*
* @param instream the InputStream to be parsed (should not be null)
* @param nodes vector containing GraphNode objects (should be empty)
* @param edges vector containing GraphEdge objects (should be empty)
*/
public BIFParser(InputStream instream, FastVector nodes, FastVector edges) {
m_nodes = nodes; m_edges = edges; inStream = instream;
}
/**
* This method parses the string or the InputStream that we
* passed in through the constructor and builds up the
* m_nodes and m_edges vectors
* @exception Exception if both the inString and inStream are
* null, i.e. no input has been provided
* @exception BIFFormatException if there is format of the
* input is not correct. The format should conform to
* XMLBIF version 0.3
* @exception NumberFormatException if there is an invalid
* char in the probability table of a node.
* @return returns the name of the graph
*/
public String parse() throws Exception {
Document dc=null;
javax.xml.parsers.DocumentBuilderFactory dbf =
javax.xml.parsers.DocumentBuilderFactory.newInstance();
dbf.setIgnoringElementContentWhitespace(true);
javax.xml.parsers.DocumentBuilder db = dbf.newDocumentBuilder();
if(inStream!=null)
dc = db.parse(inStream);
else if(inString!=null)
dc = db.parse(new org.xml.sax.InputSource(new StringReader(inString)));
else
{ throw new Exception("No input given"); }
NodeList nl = dc.getElementsByTagName( "NETWORK" );
if(nl.getLength()==0) {
throw new BIFFormatException( "NETWORK tag not found" );
}
//take only the first network node
NodeList templist = ((Element)nl.item(0)).getElementsByTagName( "NAME" );
graphName = templist.item(0).getFirstChild().getNodeValue();
//System.out.println("The name of the network is "+
//templist.item(0).getFirstChild().getNodeValue());
//Get all the variables
nl = dc.getElementsByTagName("VARIABLE");
for(int i=0; i<nl.getLength(); i++) {
templist = ((Element)nl.item(i)).getElementsByTagName("NAME");
if(templist.getLength()>1)
throw new BIFFormatException("More than one name tags found for "+
"variable no. "+(i+1));
String nodename =
((org.w3c.dom.Node)templist.item(0)).getFirstChild().getNodeValue();
GraphNode n = new GraphNode( nodename, nodename, GraphNode.NORMAL );
m_nodes.addElement(n);
//getting nodes position
templist = ((Element)nl.item(i)).getElementsByTagName("PROPERTY");
for(int j=0; j<templist.getLength(); j++) {
if( ((org.w3c.dom.Node)templist.item(j)).getFirstChild()
.getNodeValue().startsWith("position") ) {
String xy = templist.item(j).getFirstChild().getNodeValue();
//System.out.println("x: "+
// xy.substring(xy.indexOf('(')+1, xy.indexOf(','))+
// " y: "+
// xy.substring(xy.indexOf(',')+1, xy.indexOf(')'))
// );
n.x = Integer.parseInt( xy.substring(xy.indexOf('(')+
1, xy.indexOf(',')).trim() );
n.y = Integer.parseInt( xy.substring(xy.indexOf(',')+
1, xy.indexOf(')')).trim() );
break;
}
}
//getting all the outcomes of the node
templist = ((Element)nl.item(i)).getElementsByTagName("OUTCOME");
n.outcomes = new String[templist.getLength()];
for(int j=0; j<templist.getLength(); j++) {
n.outcomes[j] = templist.item(j).getFirstChild().getNodeValue();
//System.out.println("Outcome["+j+"]: "+n.outcomes[j]);
}
} //end for (for variables)
//Get all the edges and probability tables by getting all the definitions
nl = dc.getElementsByTagName("DEFINITION");
for(int i=0; i<nl.getLength(); i++) {
templist = ((Element)nl.item(i)).getElementsByTagName("FOR");
//the Label of the node the edges are coming into
String nid = templist.item(0).getFirstChild().getNodeValue();
//getting the GraphNode object with the above label
GraphNode n = (GraphNode)m_nodes.elementAt(0);
for(int j=1; j<m_nodes.size() && !n.ID.equals(nid); j++)
n = (GraphNode)m_nodes.elementAt(j);
templist = ((Element)nl.item(i)).getElementsByTagName("GIVEN");
int parntOutcomes = 1; //for creating the probability table later on
//creating all the edges coming into the node
for(int j=0; j<templist.getLength(); j++) {
nid = templist.item(j).getFirstChild().getNodeValue();
GraphNode n2 = (GraphNode)m_nodes.elementAt(0);
for(int k=1; k<m_nodes.size() && !n2.ID.equals(nid); k++)
n2 = (GraphNode)m_nodes.elementAt(k);
m_edges.addElement( new GraphEdge(m_nodes.indexOf(n2),
m_nodes.indexOf(n), 1) );
parntOutcomes *= n2.outcomes.length;
}
//creating the probability table for the node
templist = ((Element)nl.item(i)).getElementsByTagName("TABLE");
if(templist.getLength()>1)
throw new BIFFormatException("More than one Probability Table for "+
n.ID);
String probs = templist.item(0).getFirstChild().getNodeValue();
StringTokenizer tk = new StringTokenizer(probs, " \n\t");
if(parntOutcomes*n.outcomes.length > tk.countTokens())
throw new BIFFormatException("Probability Table for "+n.ID+
" contains more values than it should");
else if(parntOutcomes*n.outcomes.length < tk.countTokens())
throw new BIFFormatException("Probability Table for "+n.ID+
" contains less values than it should");
else {
n.probs = new double[parntOutcomes][n.outcomes.length];
for(int r=0; r<parntOutcomes; r++) //row
for(int c=0; c<n.outcomes.length; c++) //column
try {
n.probs[r][c] = Double.parseDouble( tk.nextToken() );
}
catch(NumberFormatException ne) { throw ne; }
} // end of creating probability table
} //endfor (for edges)
//int tmpMatrix[][] = new int[m_nodes.size()][m_nodes.size()];
//for(int i=0; i<m_edges.size(); i++)
// tmpMatrix[((GraphEdge)m_edges.elementAt(i)).src]
// [((GraphEdge)m_edges.elementAt(i)).dest] =
// ((GraphEdge)m_edges.elementAt(i)).type;
//for(int i=0; i<m_nodes.size(); i++) {
// GraphNode n = (GraphNode)m_nodes.elementAt(i);
// n.edges = tmpMatrix[i];
//}
//Adding parents, and those edges to a node which are coming out from it
int tmpEdges[], noOfEdgesOfNode[]=new int[m_nodes.size()];
int noOfPrntsOfNode[]=new int[m_nodes.size()];
for(int i=0; i<m_edges.size(); i++) {
GraphEdge e = (GraphEdge)m_edges.elementAt(i);
noOfEdgesOfNode[e.src]++;
noOfPrntsOfNode[e.dest]++;
}
for(int i=0; i<m_edges.size(); i++) {
GraphEdge e = (GraphEdge)m_edges.elementAt(i);
GraphNode n = (GraphNode)m_nodes.elementAt(e.src);
GraphNode n2 = (GraphNode)m_nodes.elementAt(e.dest);
if(n.edges==null) {
n.edges = new int[noOfEdgesOfNode[e.src]][2];
for(int k=0; k<n.edges.length; k++)
n.edges[k][0]=-1;
}
if(n2.prnts==null) {
n2.prnts = new int[noOfPrntsOfNode[e.dest]];
for(int k=0; k<n2.prnts.length; k++)
n2.prnts[k]=-1;
}
int k=0;
while(n.edges[k][0]!=-1) k++;
n.edges[k][0] = e.dest;
n.edges[k][1] = e.type;
k=0;
while(n2.prnts[k]!=-1) k++;
n2.prnts[k] = e.src;
}
//processGraph();
//setAppropriateSize();
return graphName;
} //end readBIF
/**
* This method writes a graph in XMLBIF ver. 0.3 format to a file.
* However, if is reloaded in GraphVisualizer we would need to layout
* the graph again to display it correctly.
*
* @param filename The name of the file to write in. (will overwrite)
* @param graphName The name of the graph. (will be the name of network
* tag in XMLBIF)
* @param nodes Vector containing all the nodes
* @param edges Vector containing all the edges
*/
public static void writeXMLBIF03(String filename, String graphName,
FastVector nodes, FastVector edges) {
try {
FileWriter outfile = new FileWriter(filename);
StringBuffer text = new StringBuffer();
text.append("<?xml version=\"1.0\"?>\n");
text.append("<!-- DTD for the XMLBIF 0.3 format -->\n");
text.append("<!DOCTYPE BIF [\n");
text.append(" <!ELEMENT BIF ( NETWORK )*>\n");
text.append(" <!ATTLIST BIF VERSION CDATA #REQUIRED>\n");
text.append(" <!ELEMENT NETWORK ( NAME, ( PROPERTY | VARIABLE | DEFI"+
"NITION )* )>\n");
text.append(" <!ELEMENT NAME (#PCDATA)>\n");
text.append(" <!ELEMENT VARIABLE ( NAME, ( OUTCOME | PROPERTY )* )"+
" >\n");
text.append(" <!ATTLIST VARIABLE TYPE (nature|decision|utility"+
") \"nature\">\n");
text.append(" <!ELEMENT OUTCOME (#PCDATA)>\n");
text.append(" <!ELEMENT DEFINITION ( FOR | GIVEN | TABLE | PROPERTY"+
" )* >\n");
text.append(" <!ELEMENT FOR (#PCDATA)>\n");
text.append(" <!ELEMENT GIVEN (#PCDATA)>\n");
text.append(" <!ELEMENT TABLE (#PCDATA)>\n");
text.append(" <!ELEMENT PROPERTY (#PCDATA)>\n");
text.append("]>\n");
text.append("\n");
text.append("\n");
text.append("<BIF VERSION=\"0.3\">\n");
text.append("<NETWORK>\n");
text.append("<NAME>" + XMLNormalize(graphName) + "</NAME>\n");
//Writing all the node names and their outcomes
//If outcome is null(ie if the graph was loaded from DOT file) then
//simply write TRUE
for(int nodeidx=0; nodeidx<nodes.size(); nodeidx++) {
GraphNode n = (GraphNode)nodes.elementAt(nodeidx);
if(n.nodeType!=GraphNode.NORMAL)
continue;
text.append("<VARIABLE TYPE=\"nature\">\n");
text.append("\t<NAME>" + XMLNormalize(n.ID) + "</NAME>\n");
if(n.outcomes!=null)
for(int outidx=0; outidx<n.outcomes.length; outidx++)
text.append("\t<OUTCOME>" + XMLNormalize(n.outcomes[outidx])+
"</OUTCOME>\n");
else
text.append("\t<OUTCOME>true</OUTCOME>\n");
text.append("\t<PROPERTY>position = ("+n.x+","+n.y+")</PROPERTY>\n");
text.append("</VARIABLE>\n");
}
//Writing all the nodes definitions and their probability tables
//If probability table is null then simply write 1 for all
//the posible outcomes of the parents
for (int nodeidx=0; nodeidx<nodes.size(); nodeidx++) {
GraphNode n = (GraphNode) nodes.elementAt(nodeidx);
if(n.nodeType!=GraphNode.NORMAL)
continue;
text.append("<DEFINITION>\n");
text.append("<FOR>" + XMLNormalize(n.ID) + "</FOR>\n");
int parntOutcomes = 1;
if(n.prnts!=null)
for(int pidx=0; pidx<n.prnts.length; pidx++) {
GraphNode prnt = (GraphNode)nodes.elementAt(n.prnts[pidx]);
text.append("\t<GIVEN>" + XMLNormalize(prnt.ID) + "</GIVEN>\n");
if(prnt.outcomes!=null)
parntOutcomes *= prnt.outcomes.length;
}
text.append("<TABLE>\n");
for(int i=0; i<parntOutcomes; i++) {
if(n.outcomes!=null)
for(int outidx=0; outidx<n.outcomes.length; outidx++){
text.append(n.probs[i][outidx]+" ");
}
else
text.append("1");
text.append('\n');
}
text.append("</TABLE>\n");
text.append("</DEFINITION>\n");
}
text.append("</NETWORK>\n");
text.append("</BIF>\n");
outfile.write(text.toString());
outfile.close();
}
catch(IOException ex) { ex.printStackTrace(); }
} // writeXMLBIF
/** XMLNormalize converts the five standard XML entities in a string
* g.e. the string V&D's is returned as V&D's
* @author Remco Bouckaert (rrb@xm.co.nz)
* @param sStr string to normalize
* @return normalized string
*/
private static String XMLNormalize(String sStr) {
StringBuffer sStr2 = new StringBuffer();
for (int iStr = 0; iStr < sStr.length(); iStr++) {
char c = sStr.charAt(iStr);
switch (c) {
case '&': sStr2.append("&"); break;
case '\'': sStr2.append("'"); break;
case '\"': sStr2.append("""); break;
case '<': sStr2.append("<"); break;
case '>': sStr2.append(">"); break;
default:
sStr2.append(c);
}
}
return sStr2.toString();
} // XMLNormalize
} // BIFParser