package maui.main;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.io.UnsupportedEncodingException;
import java.sql.SQLException;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import org.wikipedia.miner.model.Article;
import org.wikipedia.miner.model.Wikipedia;
import org.wikipedia.miner.util.text.CaseFolder;
import weka.core.Utils;
public class PrintGraphs {
public static void computeRelatedness(Collection<Article> topics) {
double relatedness = 0;
for (Article a : topics) {
for (Article c : topics) {
if (!c.equals(a)) {
try {
relatedness = a.getRelatednessTo(c);
if (relatedness > 0) {
System.out.println(a.getTitle()
+ " and "
+ c.getTitle()
+ "\t"
+ Utils
.doubleToString(relatedness * 100,
2));
}
} catch (SQLException e) {
e.printStackTrace();
}
}
}
}
}
public static void computeGraph(HashMap<Article, Integer> topics,
String root, String outputFile) {
FileOutputStream out;
PrintWriter printer;
try {
System.out.println("Printing into " + outputFile);
out = new FileOutputStream(outputFile);
printer = new PrintWriter(out);
printer.print("graph G {\n");
printer.print("graph [root=\"" + root
+ "\", outputorder=\"depthfirst\"];\n");
HashSet<String> done = new HashSet<String>();
double relatedness = 0;
for (Article a : topics.keySet()) {
int count = topics.get(a).intValue();
if (count < 1) {
printer.print("\"" + a.getTitle() + "\" [fontsize=22];\n");
} else if (count < 3) {
printer
.print("\"" + a.getTitle()
+ "\" [fontsize = 18];\n");
} else if (count < 6) {
printer
.print("\"" + a.getTitle()
+ "\" [fontsize = 14];\n");
} else {
printer
.print("\"" + a.getTitle()
+ "\" [fontsize = 12];\n");
}
for (Article c : topics.keySet()) {
if (!c.equals(a)) {
try {
relatedness = a.getRelatednessTo(c);
String relation = "\"" + a.getTitle() + "\" -- \""
+ c.getTitle();
String relation2 = "\"" + c.getTitle() + "\" -- \""
+ a.getTitle();
if (!done.contains(relation2)
&& !done.contains(relation)) {
done.add(relation2);
done.add(relation);
if (relatedness < 0.2) {
printer.print(relation
+ "\"[style=invis];\n");
} else {
printer.print(relation
+ "\" [penwidth = \""
+ (int) (relatedness * 10 - 0.2)
+ "\"];\n");
}
}
} catch (SQLException e) {
e.printStackTrace();
}
}
}
}
printer.print("}\n");
printer.close();
out.close();
} catch (Exception e1) {
e1.printStackTrace();
}
}
/**
* Creates GraphViz files for all key files in a directory
*
* @param args
* @throws Exception
*/
public static void main(String[] args) throws Exception {
// location of the Wikipedia data
Wikipedia wikipedia = new Wikipedia("localhost", "enwiki_20090306",
"root", null);
// location of the directory with the keyphrase files
String inputDir = "/Users/alyona/Documents/PHD/chapters_txt/";
String line;
File directory = new File(inputDir);
for (File file : directory.listFiles()) {
if (file.getName().endsWith("key")) {
String out = file.getAbsolutePath();
out = out.replace(".key", ".gv");
HashMap<Article, Integer> topics = new HashMap<Article, Integer>();
InputStreamReader inputStreamReader = new InputStreamReader(
new FileInputStream(file), "ISO-8859-1");
BufferedReader input = new BufferedReader(inputStreamReader);
int i = 0;
String root = "";
while ((line = input.readLine()) != null) {
line = line.trim();
Article article = wikipedia.getArticleByTitle(line);
if (article == null) {
article = wikipedia.getMostLikelyArticle(line,
new CaseFolder());
}
if (article != null) {
if (root == "") {
root = article.getTitle();
}
topics.put(article, new Integer(i));
} else {
System.out.println("Couldn't find article for " + line + " in " + file);
}
i++;
}
input.close();
// Just to print out the relatedness information
computeRelatedness(topics.keySet());
// To generate the graph:
computeGraph(topics, root, out);
}
}
}
}