/*
* File: GraphFileIo.java
* Authors: Jeremy D. Wendt
* Company: Sandia National Laboratories
* Project: Cognitive Foundry
*
* Copyright 2016, Sandia Corporation. Under the terms of Contract
* DE-AC04-94AL85000, there is a non-exclusive license for use of this work by
* or on behalf of the U.S. Government. Export of this program may require a
* license from the United States Government. See CopyrightHistory.txt for
* complete details.
*/
package examples.io;
import gov.sandia.cognition.graph.DenseMemoryGraph;
import gov.sandia.cognition.graph.DirectedNodeEdgeGraph;
import gov.sandia.cognition.graph.DirectedWeightedNodeEdgeGraph;
import gov.sandia.cognition.graph.WeightedDenseMemoryGraph;
import gov.sandia.cognition.util.Pair;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.regex.Pattern;
/**
* This class reads in graph files and returns the graph stored in the file.
*
* @author jdwendt
*/
public class GraphFileIo
{
/**
* This is the attribute name used for node labels for dot-file graphs
*/
private static final String NODE_LABEL_ATTR = "label";
/**
* This is the attribute name used for edge weights for dot-file graphs. If
* the graph contains this attribute for _any_ edges, the resulting read-in
* graph will be a WeightedDenseMemoryGraph with the double-value of the
* input weights on the edges (1.0 where none are provided).
*/
private static final String EDGE_WEIGHT_ATTR = "weight";
/**
* The most basic of graph formats: Each line defines an edge. If two values
* are on the line, the line is considered a weight 1 edge between the nodes
* defined by the two values (considered strings that define a name). If the
* line contains three values, the third value must be a floating point or
* integer value that defines the weight on the edge.
*
* @param filename The file to parse
* @return The graph the file contained. NOTE: If the file contained three
* values on _any_ of the lines, this will return a
* DirectedWeightedNodeEdgeGraph.
*/
public static DirectedNodeEdgeGraph<String> readEdgeListFile(String filename)
{
boolean addedWeights = false;
DirectedWeightedNodeEdgeGraph<String> ret
= new WeightedDenseMemoryGraph<>();
try (BufferedReader br = new BufferedReader(new InputStreamReader(
new FileInputStream(filename))))
{
String line;
while ((line = br.readLine()) != null)
{
String[] vals = line.trim().split("\\s+");
// Java breaks empty strings into one empty component for some reason
if (vals.length == 1 && vals[0].isEmpty())
{
continue;
}
switch (vals.length)
{
case 0:
continue;
case 2:
ret.addEdge(vals[0], vals[1]);
break;
case 3:
ret.addEdge(vals[0], vals[1],
Double.parseDouble(vals[2]));
addedWeights = true;
break;
default:
throw new RuntimeException(
"Unable to read current line \"" + line + "\" with "
+ vals.length
+ " separate chunks (only 2 or 3 (with weights) are "
+ "supported).");
}
}
if (!addedWeights)
{
DirectedNodeEdgeGraph<String> noWeights
= new DenseMemoryGraph<>(ret.getNumNodes(), ret.getNumEdges());
for (int i = 0; i < ret.getNumEdges(); ++i)
{
Pair<Integer, Integer> e = ret.getEdgeEndpointIds(i);
noWeights.addEdge(ret.getNode(e.getFirst()), ret.getNode(
e.getSecond()));
}
return noWeights;
}
// If weights were added to the edges, let's return those!
return ret;
}
catch (IOException ioe)
{
throw new RuntimeException(ioe);
}
}
/**
* Simple helper that identifies single-line comments that fill the whole
* line
*
* @param line The line to test for if it's a comment
* @return True if the whole line is a comment, else false
*/
private static boolean isDotCommentLine(String line)
{
line = line.trim();
return line.isEmpty() || line.startsWith("//") || line.startsWith("#");
// TODO: I should figure out how to handle multi-line comments /* */
// TODO: Also, this doesn't handle // comments that aren't at the beginning of the line
}
/**
* Identifies if the input line is a close-line for dot files
*
* @param line The line to test
* @return True if the whole line is a close-line for dot files
*/
private static boolean isDotCloseLine(String line)
{
return line.trim().equals("}");
}
/**
* Cleans the input string to get it down to the "core" text (removes
* edge-whitespace, surrounding quotes)
*
* @param s The string to clean
* @return A cleaned version of the string
*/
private static String clean(String s)
{
s = s.trim();
if (s.startsWith("\"") && s.endsWith("\""))
{
s = s.substring(1, s.length() - 1);
}
return s;
}
/**
* Reads the input node's attributes looking for the "label" attribute to
* assign that to the nodes name. Otherwise, it's the integer used by the
* dot file for this node's name.
*
* @param node The integer value supplied by the dot file
* @param nodeAttributes The attributes from the dot file for this node
* @return the appropriate name (see above)
*/
private static String toNodeName(String node,
Map<String, String> nodeAttributes)
{
if ((nodeAttributes != null) && nodeAttributes.containsKey(
NODE_LABEL_ATTR))
{
return nodeAttributes.get(NODE_LABEL_ATTR);
}
return node;
}
/**
* Returns the weight for the edge whose attributes are passed in. 1.0 if no
* "weight" attribute is included.
*
* @param edgeAttrs The edge's attributes from the Dot file.
* @return The appropriate weight (see above)
*/
private static double getEdgeWeight(Map<String, String> edgeAttrs)
{
for (String key : edgeAttrs.keySet())
{
if (key.toLowerCase().equals(EDGE_WEIGHT_ATTR))
{
return Double.parseDouble(edgeAttrs.get(key));
}
}
return 1.0;
}
/**
* Returns the graph found by reading the input graph file. No attributes
* will be returned.
*
* @param filename The file to read
* @return the graph found in that file.
*/
public static DirectedNodeEdgeGraph<String> readDotFile(String filename)
{
return readDotFile(filename, null, null);
}
/**
* Returns the graph found by reading in the input graph file. Attributes
* will be returned in the input maps (if not null; one or both can be null
* if those attributes aren't wanted). The maps will be
* Node/EdgeName-to-Map-of-AttributeName-to-AttributeValue. EdgeName is
* SrcNodeName,DstNodeName.
*
* @param filename The filename to read
* @param nodeAttrs The map to return the node attributes from the dot file
* in
* @param edgeAttrs The map to return the edge attributes from the dot file
* in
* @return The graph read at the filename
*/
public static DirectedNodeEdgeGraph<String> readDotFile(String filename,
Map<String, Map<String, String>> nodeAttrs,
Map<String, Map<String, String>> edgeAttrs)
{
try
{
return readDotFile(new InputStreamReader(new FileInputStream(
filename)), nodeAttrs, edgeAttrs);
}
catch (FileNotFoundException fnfe)
{
throw new RuntimeException(fnfe);
}
}
/**
* Returns the graph found by reading the input graph. No attributes will be
* returned.
*
* @param file The file to read
* @return the graph found in that file.
*/
public static DirectedNodeEdgeGraph<String> readDotFile(Reader file)
{
return readDotFile(file, null, null);
}
/**
* Returns the graph found by reading in the input graph file. Attributes
* will be returned in the input maps (if not null; one or both can be null
* if those attributes aren't wanted). The maps will be
* Node/EdgeName-to-Map-of-AttributeName-to-AttributeValue. EdgeName is
* SrcNodeName,DstNodeName.
*
* @param file The file to read
* @param nodeAttrsOut The data-structure for outputing the nodes'
* attributes
* @param edgeAttrsOut The data-structure for outputing the edges'
* attributes
* @return The read-in graph
*/
public static DirectedNodeEdgeGraph<String> readDotFile(Reader file,
Map<String, Map<String, String>> nodeAttrsOut,
Map<String, Map<String, String>> edgeAttrsOut)
{
// Just make sure there's nothing already there polluting things
if (nodeAttrsOut != null)
{
nodeAttrsOut.clear();
}
if (edgeAttrsOut != null)
{
edgeAttrsOut.clear();
}
// Initialize temporary locations for things.
Set<String> nodes = new HashSet<>();
Map<String, Map<String, String>> nodeAttributes = new HashMap<>();
Map<String, Map<String, String>> edgeAttributes = new HashMap<>();
Set<String> edges = new HashSet<>();
Pattern fullStatement = Pattern.compile(".*;\\s*$");
Pattern hasAttributes = Pattern.compile(".*\\[.*\\].*");
boolean edgeWeightsFound = false;
// Read the file and load into temporary locations
try (BufferedReader br = new BufferedReader(file))
{
String line;
boolean firstLine = true;
while ((line = br.readLine()) != null)
{
if (isDotCommentLine(line))
{
continue;
}
if (firstLine)
{
line = line.toLowerCase();
boolean isGraphFormat = (line.contains("graph")
|| line.contains("digraph")) && line.contains("{");
if (!isGraphFormat)
{
throw new RuntimeException("Expected graph to start "
+ "with graph or digraph and an open brace");
}
firstLine = false;
continue;
}
if (isDotCloseLine(line))
{
break;
}
// We are now not a comment or the first line
if (!fullStatement.matcher(line).matches())
{
throw new RuntimeException("Unable to handle input line: "
+ line + ". Not a statement line.");
}
// If I don't strip the semicolon from the end, weird things happen when there are no attributes on this line
line = line.substring(0, line.lastIndexOf(";"));
// Handle attributes if they exist
boolean attrWeight = false;
Map<String, String> attributes = new HashMap<>();
if (hasAttributes.matcher(line).matches())
{
int start = line.indexOf("[");
String attrs = line.substring(start + 1, line.indexOf("]"));
String[] attrList = attrs.split(";|,");
for (String attr : attrList)
{
if (attr.trim().isEmpty())
{
continue;
}
String[] vs = attr.split("=");
if (vs.length != 2)
{
System.out.println("Weird attribute: " + attr);
continue;
}
String key = clean(vs[0]);
if (key.toLowerCase().equals(EDGE_WEIGHT_ATTR))
{
attrWeight = true;
}
attributes.put(key, clean(vs[1]));
}
line = line.substring(0, start);
}
boolean containsEdgeOp = line.contains("--") || line.contains(
"->");
if (containsEdgeOp)
{
// Then this is an edge
String[] endpts = line.split("--|->");
for (int i = 0; i < endpts.length - 1; ++i)
{
String src = endpts[i].trim();
String dst = endpts[i + 1].trim();
String edge = toEdgeName(src, dst);
edges.add(edge);
edgeAttributes.put(edge, attributes);
if (attrWeight)
{
edgeWeightsFound = true;
}
}
}
else
{
// Then this is a node
line = line.trim();
nodes.add(line);
nodeAttributes.put(line, attributes);
}
}
}
catch (IOException ioe)
{
throw new RuntimeException(ioe);
}
// Now, write to the output datastructures
DirectedNodeEdgeGraph<String> ret;
if (edgeWeightsFound)
{
ret = new WeightedDenseMemoryGraph<>(nodes.size(), edges.size());
}
else
{
ret = new DenseMemoryGraph<>(nodes.size(), edges.size());
}
for (String node : nodes)
{
String nodeName = toNodeName(node, nodeAttributes.get(node));
ret.addNode(nodeName);
if (nodeAttrsOut != null && !nodeAttributes.get(node).isEmpty())
{
nodeAttrsOut.put(nodeName, new HashMap<>());
for (Map.Entry<String, String> e
: nodeAttributes.get(node).entrySet())
{
if (e.getKey().equals(NODE_LABEL_ATTR))
{
continue;
}
nodeAttrsOut.get(nodeName).put(e.getKey(), e.getValue());
}
}
}
for (String edge : edges)
{
String[] endpts = edge.split(",");
if (endpts.length != 2)
{
throw new RuntimeException("It seems a node name contains a "
+ "comma or something. Somehow, I created this edge w/o "
+ "exactly two endpoints: " + edge);
}
String src = toNodeName(endpts[0], nodeAttributes.get(endpts[0]));
String dst = toNodeName(endpts[1], nodeAttributes.get(endpts[1]));
if (edgeWeightsFound)
{
double w = getEdgeWeight(edgeAttributes.get(edge));
((WeightedDenseMemoryGraph<String>) ret).addEdge(src, dst, w);
}
else
{
ret.addEdge(src, dst);
}
if (edgeAttrsOut != null)
{
String externalKey = toEdgeName(src, dst);
edgeAttrsOut.put(externalKey, new HashMap<>());
for (Map.Entry<String, String> e
: edgeAttributes.get(edge).entrySet())
{
if (e.getKey().equals(EDGE_WEIGHT_ATTR))
{
continue;
}
edgeAttrsOut.get(externalKey).put(e.getKey(), e.getValue());
}
}
}
return ret;
}
/**
* Appends the attributes to the input string builder in the dot-file graph
* format
*
* @param sb The StringBuilder to add to
* @param attrs The attributes to process
* @param predecessorExists True if any attributes have been added before
* this
*/
private static void appendAttrs(StringBuilder sb,
Map<String, String> attrs,
boolean predecessorExists)
{
if (attrs != null)
{
for (Map.Entry<String, String> e : attrs.entrySet())
{
if (predecessorExists)
{
sb.append("; ");
}
predecessorExists = true;
sb.append(e.getKey()).append("=\"").append(e.getValue()).append(
"\"");
}
}
}
/**
* Helper that creates a node's dot-format line for the node id, name, and
* attributes
*
* @param id The node's id
* @param nodeName The node's name
* @param attrs The node's parameters (can be null)
* @return The string representation of the input node in dot-file format
*/
private static String toNodeLine(int id,
String nodeName,
Map<String, String> attrs)
{
StringBuilder sb = new StringBuilder();
sb.append(" ").append(id).append(" [ label=\"").append(nodeName).append(
"\"");
appendAttrs(sb, attrs, true);
sb.append(" ] ;\n");
return sb.toString();
}
/**
* Helper that creates an edge's dot-format line for the input edge, weight
* and attributes.
*
* @param edge The source and destination node's ids for the edge
* @param weight The edge's weight (can be null)
* @param attrs The edge's attributes (can be null)
* @return The string representation of the input edge in dot-file format
*/
private static String toEdgeLine(Pair<Integer, Integer> edge,
Double weight,
Map<String, String> attrs)
{
StringBuilder sb = new StringBuilder();
sb.append(" ").append(edge.getFirst()).append(" -> ").append(
edge.getSecond());
boolean addAttributes = weight != null || attrs != null;
if (addAttributes)
{
sb.append(" [ ");
}
boolean predecessorAttr = false;
if (weight != null)
{
sb.append(EDGE_WEIGHT_ATTR).append("=\"").append(weight).append("\"");
predecessorAttr = true;
}
appendAttrs(sb, attrs, predecessorAttr);
if (addAttributes)
{
sb.append(" ]");
}
sb.append(" ;\n");
return sb.toString();
}
/**
* Writes the input graph to a dot-file-format with the given graph (which
* may be weighted) and attributes for the nodes and edges.
*
* @param filename The file to write the output to
* @param graph The graph to write out
* @param nodeAttrs The nodes' attributes to write (can be null)
* @param edgeAttrs The edges' attributes to write (can be null)
*/
public static void writeDotFile(String filename,
DirectedNodeEdgeGraph<String> graph,
Map<String, Map<String, String>> nodeAttrs,
Map<String, Map<String, String>> edgeAttrs)
{
try (BufferedWriter bw = new BufferedWriter(new FileWriter(filename)))
{
bw.write("digraph agraph {\n");
for (int i = 0; i < graph.getNumNodes(); ++i)
{
String node = graph.getNode(i);
bw.write(toNodeLine(i, node, nodeAttrs == null ? null
: nodeAttrs.get(node)));
}
bw.write("\n\n\n");
for (int i = 0; i < graph.getNumEdges(); ++i)
{
Double weight = null;
if (graph instanceof DirectedWeightedNodeEdgeGraph)
{
weight
= ((DirectedWeightedNodeEdgeGraph) graph).getEdgeWeight(
i);
}
Pair<Integer, Integer> edge = graph.getEdgeEndpointIds(i);
bw.write(toEdgeLine(edge, weight, edgeAttrs == null ? null
: edgeAttrs.get(toEdgeName(graph.getNode(edge.getFirst()),
graph.getNode(edge.getSecond())))));
}
bw.write("}\n");
}
catch (IOException ioe)
{
throw new RuntimeException(ioe);
}
}
/**
* Simple helper that forms the edge name for the edge between src and dst
* to be used in storing the edge attribute maps.
*
* @param src The source node's name
* @param dst The destination node's name
* @return A standard-format name for the edge src,dst
*/
public static String toEdgeName(String src,
String dst)
{
return src + "," + dst;
}
}