package prefuse.data.io; import java.io.InputStream; import java.util.Date; import java.util.HashMap; import javax.xml.parsers.SAXParser; import javax.xml.parsers.SAXParserFactory; import org.xml.sax.Attributes; import org.xml.sax.SAXException; import org.xml.sax.helpers.DefaultHandler; import prefuse.data.Graph; import prefuse.data.Schema; import prefuse.data.Table; import prefuse.data.parser.DataParseException; import prefuse.data.parser.DataParser; import prefuse.data.parser.ParserFactory; import prefuse.util.collections.IntIterator; /** * GraphReader instance that reads in graph file formatted using the * GraphML file format. GraphML is an XML format supporting graph * structure and typed data schemas for both nodes and edges. For more * information about the format, please see the * <a href="http://graphml.graphdrawing.org/">GraphML home page</a>. * * @author <a href="http://jheer.org">jeffrey heer</a> */ public class GraphMLReader extends AbstractGraphReader implements GraphReader { /** * @see prefuse.data.io.GraphReader#readGraph(java.io.InputStream) */ public Graph readGraph(InputStream is) throws DataIOException { try { SAXParserFactory factory = SAXParserFactory.newInstance(); SAXParser saxParser = factory.newSAXParser(); GraphMLHandler handler = new GraphMLHandler(); saxParser.parse(is, handler); return handler.getGraph(); } catch ( Exception e ) { if ( e instanceof DataIOException ) { throw (DataIOException)e; } else { throw new DataIOException(e); } } } /** * String tokens used in the GraphML format. */ public interface Tokens { public static final String ID = "id"; public static final String GRAPH = "graph"; public static final String EDGEDEF = "edgedefault"; public static final String DIRECTED = "directed"; public static final String UNDIRECTED = "undirected"; public static final String KEY = "key"; public static final String FOR = "for"; public static final String ALL = "all"; public static final String ATTRNAME = "attr.name"; public static final String ATTRTYPE = "attr.type"; public static final String DEFAULT = "default"; public static final String NODE = "node"; public static final String EDGE = "edge"; public static final String SOURCE = "source"; public static final String TARGET = "target"; public static final String DATA = "data"; public static final String TYPE = "type"; public static final String INT = "int"; public static final String INTEGER = "integer"; public static final String LONG = "long"; public static final String FLOAT = "float"; public static final String DOUBLE = "double"; public static final String REAL = "real"; public static final String BOOLEAN = "boolean"; public static final String STRING = "string"; public static final String DATE = "date"; } /** * A SAX Parser for GraphML data files. */ public static class GraphMLHandler extends DefaultHandler implements Tokens { protected ParserFactory m_pf = ParserFactory.getDefaultFactory(); protected static final String SRC = Graph.DEFAULT_SOURCE_KEY; protected static final String TRG = Graph.DEFAULT_TARGET_KEY; protected static final String SRCID = SRC+'_'+ID; protected static final String TRGID = TRG+'_'+ID; protected Schema m_nsch = new Schema(); protected Schema m_esch = new Schema(); protected String m_graphid; protected Graph m_graph = null; protected Table m_nodes; protected Table m_edges; // schema parsing protected String m_id; protected String m_for; protected String m_name; protected String m_type; protected String m_dflt; protected StringBuffer m_sbuf = new StringBuffer(); // node,edge,data parsing private String m_key; private int m_row = -1; private Table m_table = null; protected HashMap m_nodeMap = new HashMap(); protected HashMap m_idMap = new HashMap(); private boolean m_directed = false; private boolean inSchema; public void startDocument() { m_nodeMap.clear(); inSchema = true; m_esch.addColumn(SRC, int.class); m_esch.addColumn(TRG, int.class); m_esch.addColumn(SRCID, String.class); m_esch.addColumn(TRGID, String.class); } public void endDocument() throws SAXException { // time to actually set up the edges IntIterator rows = m_edges.rows(); while (rows.hasNext()) { int r = rows.nextInt(); String src = m_edges.getString(r, SRCID); if (!m_nodeMap.containsKey(src)) { throw new SAXException( "Tried to create edge with source node id=" + src + " which does not exist."); } int s = ((Integer) m_nodeMap.get(src)).intValue(); m_edges.setInt(r, SRC, s); String trg = m_edges.getString(r, TRGID); if (!m_nodeMap.containsKey(trg)) { throw new SAXException( "Tried to create edge with target node id=" + trg + " which does not exist."); } int t = ((Integer) m_nodeMap.get(trg)).intValue(); m_edges.setInt(r, TRG, t); } m_edges.removeColumn(SRCID); m_edges.removeColumn(TRGID); // now create the graph m_graph = new Graph(m_nodes, m_edges, m_directed); if (m_graphid != null) m_graph.putClientProperty(ID, m_graphid); } public void startElement(String namespaceURI, String localName, String qName, Attributes atts) { // first clear the character buffer m_sbuf.delete(0, m_sbuf.length()); if ( qName.equals(GRAPH) ) { // parse directedness default String edef = atts.getValue(EDGEDEF); m_directed = DIRECTED.equalsIgnoreCase(edef); m_graphid = atts.getValue(ID); } else if ( qName.equals(KEY) ) { if ( !inSchema ) { error("\""+KEY+"\" elements can not" + " occur after the first node or edge declaration."); } m_for = atts.getValue(FOR); m_id = atts.getValue(ID); m_name = atts.getValue(ATTRNAME); m_type = atts.getValue(ATTRTYPE); } else if ( qName.equals(NODE) ) { schemaCheck(); m_row = m_nodes.addRow(); String id = atts.getValue(ID); m_nodeMap.put(id, new Integer(m_row)); m_table = m_nodes; } else if ( qName.equals(EDGE) ) { schemaCheck(); m_row = m_edges.addRow(); // do not use the id value // String id = atts.getValue(ID); // if ( id != null ) { // if ( !m_edges.canGetString(ID) ) // m_edges.addColumn(ID, String.class); // m_edges.setString(m_row, ID, id); // } m_edges.setString(m_row, SRCID, atts.getValue(SRC)); m_edges.setString(m_row, TRGID, atts.getValue(TRG)); // currently only global directedness is used // ignore directed edge value for now // String dir = atts.getValue(DIRECTED); // boolean d = m_directed; // if ( dir != null ) { // d = dir.equalsIgnoreCase("false"); // } // m_edges.setBoolean(m_row, DIRECTED, d); m_table = m_edges; } else if ( qName.equals(DATA) ) { m_key = atts.getValue(KEY); } } public void endElement(String namespaceURI, String localName, String qName) { if ( qName.equals(DEFAULT) ) { // value is in the buffer m_dflt = m_sbuf.toString(); } else if ( qName.equals(KEY) ) { // time to add to the proper schema(s) addToSchema(); } else if ( qName.equals(DATA) ) { // value is in the buffer String value = m_sbuf.toString(); String name = (String)m_idMap.get(m_key); Class type = m_table.getColumnType(name); try { Object val = parse(value, type); m_table.set(m_row, name, val); } catch ( DataParseException dpe ) { error(dpe); } } else if ( qName.equals(NODE) || qName.equals(EDGE) ) { m_row = -1; m_table = null; } } public void characters(char[] ch, int start, int length) throws SAXException { m_sbuf.append(ch, start, length); } // -------------------------------------------------------------------- protected void schemaCheck() { if ( inSchema ) { m_nsch.lockSchema(); m_esch.lockSchema(); m_nodes = m_nsch.instantiate(); m_edges = m_esch.instantiate(); inSchema = false; } } protected void addToSchema() { if ( m_name == null || m_name.length() == 0 ) error("Empty "+KEY+" name."); if ( m_type == null || m_type.length() == 0 ) error("Empty "+KEY+" type."); try { Class type = parseType(m_type); Object dflt = m_dflt==null ? null : parse(m_dflt, type); if ( m_for == null || m_for.equals(ALL) ) { m_nsch.addColumn(m_name, type, dflt); m_esch.addColumn(m_name, type, dflt); } else if ( m_for.equals(NODE) ) { m_nsch.addColumn(m_name, type, dflt); } else if ( m_for.equals(EDGE) ) { m_esch.addColumn(m_name, type, dflt); } else { error("Unrecognized \""+FOR+"\" value: "+ m_for); } m_idMap.put(m_id, m_name); m_dflt = null; } catch ( DataParseException dpe ) { error(dpe); } } protected Class parseType(String type) { type = type.toLowerCase(); if ( type.equals(INT) || type.equals(INTEGER) ) { return int.class; } else if ( type.equals(LONG) ) { return long.class; } else if ( type.equals(FLOAT) ) { return float.class; } else if ( type.equals(DOUBLE) || type.equals(REAL)) { return double.class; } else if ( type.equals(BOOLEAN) ) { return boolean.class; } else if ( type.equals(STRING) ) { return String.class; } else if ( type.equals(DATE) ) { return Date.class; } else { error("Unrecognized data type: "+type); return null; } } protected Object parse(String s, Class type) throws DataParseException { DataParser dp = m_pf.getParser(type); return dp.parse(s); } public Graph getGraph() { return m_graph; } protected void error(String s) { throw new RuntimeException(s); } protected void error(Exception e) { throw new RuntimeException(e); } } // end of inner class GraphMLHandler } // end of class XMLGraphReader