/*
* To change this template, choose Tools | Templates
* and open the template in the editor.
*/
package automenta.spacenet.plugin.xml;
import automenta.spacenet.plugin.comm.Contains;
import automenta.spacenet.var.graph.MemGraph;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.htmlparser.Node;
import org.htmlparser.Parser;
import org.htmlparser.Remark;
import org.htmlparser.Tag;
import org.htmlparser.Text;
import org.htmlparser.util.NodeList;
import org.htmlparser.util.ParserException;
/**
*
* @author seh
*/
public class HTMLGrapher {
private final MemGraph graph;
public HTMLGrapher(MemGraph graph, String url) {
super();
this.graph = graph;
addURL(url);
}
public void addURL(String url) {
Parser p;
try {
p = new Parser(url);
p.setFeedback(null);
NodeList nodes = p.parse(null);
for (Node n : nodes.toNodeArray()) {
addNode(url, null, n);
}
} catch (ParserException ex) {
Logger.getLogger(HTMLGrapher.class.getName()).log(Level.SEVERE, null, ex);
}
}
public void addNode(String url, Object parent, Node n) {
Object node = n;
if (n instanceof Text) {
Text t = (Text)n;
String text = t.getText();
text = text.trim();
if (text.equals(""))
return;
node = text;
//...
}
else if (n instanceof Tag) {
Tag t = (Tag)n;
//TODO use HTMLTag class
node = "htmlTag{" + t.getTagName() + "}";
//TODO recurse tag attributes
}
else if (n instanceof Remark) {
Remark r = (Remark)n;
//TODO use HTMLComment class
node = "htmlComment{" + r.getText() + "}";
//...
}
System.out.println("ADDING NODE: \"" + node + "\"");
graph.addNode(node);
if (parent!=null) {
graph.addEdge(new Contains(), parent, node);
}
else {
try {
URL u = new URL(url);
graph.addNode(u);
graph.addEdge(new Contains(), u, node);
} catch (MalformedURLException ex) {
Logger.getLogger(HTMLGrapher.class.getName()).log(Level.SEVERE, null, ex);
}
}
if (n.getChildren()!=null) {
for (Node c : n.getChildren().toNodeArray()) {
addNode(url, node, c);
}
}
}
}