/* * DumpToGraphViz.java - Dumps data to GraphViz .dot files. * * Copyright (c) 2005 Andrew Krizhanovsky /aka at mail.iias.spb.su/ * Distributed under GNU Public License. */ package wikipedia.kleinberg; import wikipedia.language.Encodings; import wikipedia.clustering.*; import wikipedia.util.*; import wikipedia.sql.*; import java.util.*; public class DumpToGraphViz { public FileWriter file; public FileWriter file_dot; /** User can run the Windows bat file to create .jpeg and .svg files from .dot */ public FileWriter file_bat; /** The *nix executable script does the same things as the file_bat but in *nix environment */ public FileWriter file_sh; public boolean enable_file; public boolean enable_file_dot; // if false then don't write text to .dot and .bat files public DumpToGraphViz () { file = new FileWriter(); file_dot = new FileWriter(); file_bat = new FileWriter(); file_sh = new FileWriter(); enable_file = true; enable_file_dot = true; } /** Write header "#!/bin/sh" to .sh file if it is empty */ public void WriteShellHeaderToEmptyFile() { if(enable_file_dot && 0 == file_sh.GetFileLength()) { file_sh. Print("#!/bin/sh"); file_sh. Flush(); } } public String Header() { return new String( // "strict" forbids the creation of self-arcs and multi-edges; they are ignored in the input file. "strict digraph G {\n" + //"digraph G {\n" + " compound=true;\n" + // to connect clusters //" size=\"8,6\"; ratio=fill;\n" + //" size=\"8,6\";\n" + //" node [fontname=\"ARIALUNI\",fontsize=10,color=black,fillcolor=white,fontcolor=black,shape=circle];\n" + // fontname ARIALUNI //" node [fontname=\"ARIALUNI\",fontsize=10,color=black,fillcolor=white,fontcolor=black,shape=circle,width=\"0.5\",height=\"0.5\"];\n" + //" edge [fontname=\"ARIALUNI\",fontsize=10,color=black,fontcolor=black];\n"); // wo fontname " node [fontsize=10,color=black,fillcolor=white,fontcolor=black,shape=circle,width=\"0.5\",height=\"0.5\"];\n" + " edge [fontsize=10,color=black,fontcolor=black];\n"); } public String Footer() { return new String("\n}"); } //graphVizCluster public <T> void DumpCluster (Map<Integer, Article> articles, Map<Integer, Category> categories, List<ClusterCategory> clusters, String nodes_type) { if (!enable_file_dot) return; for(int i=0; i<clusters.size(); i++) { ClusterCategory c = clusters.get(i); file_dot.PrintNL(c.graphVizCluster(articles, categories)); file_dot.PrintNL(c.graphVizClusterEdges()); } file_dot.Flush(); WriteShellHeaderToEmptyFile(); String s = nodes_type + ClusterCategory.getStatistics(clusters); file_bat.Print("\n:: " + s); file_sh .Print("\necho '" + s + "'"); file_bat.Flush(); file_sh .Flush(); } /** * @param nodes_type the type of dumped nodes will be stored in \.bat file */ public <T> void Dump (Map<Integer, T> hash_node, String nodes_type) { if (!enable_file_dot) return; Iterator<Integer> it = hash_node.keySet().iterator(); Integer i; while (it.hasNext()) { int id = it.next(); Node node = (Node)hash_node.get(id); file_dot.PrintNL(node.GraphVizNode()); file_dot.PrintNL(node.GraphVizLinksOut()); } file_dot.Flush(); WriteShellHeaderToEmptyFile(); String s = nodes_type + GetStatisticsHashMap(hash_node); file_bat.Print("\n:: " + s); file_sh .Print("\necho '" + s + "'"); file_bat.Flush(); file_sh. Flush(); } /** * Use the following command sequence to dump data to graphviz dot file: * if( DotOpen ) {Dump(); Dump(); ... BatEnd(); } */ public boolean DotOpen (String filename_new) { if (!enable_file_dot) return false; file_dot.SetFilename(filename_new); file_dot.Open(false, "UTF8"); file_dot.PrintNL(Header()); return true; } public <T> void BatEnd () { file_dot.Print(Footer()); file_dot.Flush(); file_bat.Print(GetDotCommand("svg", true)); file_sh. Print(GetDotCommand("svg", false)); file_bat.Flush(); file_sh. Flush(); } public <T> void DumpDotBat (Map<Integer, T> nodes, String filename_new) { if (!enable_file_dot) return; file_dot.SetFilename(filename_new); file_dot.Open(false, "UTF8"); file_dot.PrintNL(Header()); Dump(nodes, ""); file_dot.Print(Footer()); file_dot.Flush(); file_bat.Print(GetStatisticsHashMap(nodes) + GetDotCommand("jpeg", true)); file_sh. Print(GetStatisticsHashMap(nodes) + GetDotCommand("jpeg", false)); file_bat.Flush(); file_sh. Flush(); } public void DumpDotBat(Article[] nodes, String filename_new) { if (!enable_file_dot) return; file_dot.SetFilename(StringUtilRegular.encodeRussianToLatinitsa(filename_new, Encodings.enc_java_default, Encodings.enc_int_default)); file_dot.Open(false, "UTF8"); file_dot.PrintNL(Header()); for (Article a:nodes) { file_dot.PrintNL(a.GraphVizNode()); file_dot.PrintNL(a.GraphVizLinksOut()); } file_dot.Print(Footer()); file_dot.Flush(); WriteShellHeaderToEmptyFile(); file_bat.Print(GetDotCommand("svg", true)); file_sh. Print(GetDotCommand("svg", false)); file_bat.Flush(); file_sh. Flush(); } /** Return the string like: * :: Робот.dot vertices:9 edges:11 */ public <T> String GetStatisticsHashMap (Map<Integer, T> n) { if (null == n || !enable_file_dot) return ""; return " " + file_dot.GetFilename() + " vertices:" + n.values().size() + " edges:" + DCEL.CountLinksIn(n); } /** Return the string like: * <pre> * if (b_windows) * dot.exe -Tjpeg Робот.dot -v -o Робот.jpeg (when the output_format is jpeg) * else * fdp -Tjpeg Робот.dot -v -o Робот.jpeg * </pre> */ public String GetDotCommand(String output_format, boolean b_windows) { if (!enable_file_dot) return ""; String dot_name = b_windows ? "dot.exe" : "fdp"; return "\n" + dot_name + " -T" + output_format + " " + file_dot.GetFilename() + " -o " + file_dot.GetFilenameWoExt() + "." + output_format + "\n"; } public void PrintSynonyms(SessionHolder session,List<Article> nodes) { if (null == nodes || !enable_file) return; file.PrintNL( "synonyms (authority pages sorted by X):\n"); file.Flush(); String titles = " N:synonym:count_l_from:count_l_to\n"; file.PrintNL(titles); file.Flush(); int count_l_from, count_l_to; for(int i=0; i<nodes.size(); i++) { Article n = nodes.get(i); titles = String.format("%2d:%-20s:%2d:%2d", // :%s i, n.page_title, //n.count_l_from, n.count_l_to, n.GetLinksOutLength(), n.GetLinksInLength()); file.PrintNL(titles); file.Flush(); //if(null != session.category_black_list.getBlackList()) { String[] categories = session.category_black_list.getCategoryUpIteratively(n.page_id, null); titles = String.format(":%s", StringUtil.join("|", categories)); file.PrintNL(titles); //} file.Flush(); } } }