package net.i2p.router.news;
/******************************************************************
* Contains code modified from JaxpParser:
*
* CyberXML for Java
*
* Copyright (C) Satoshi Konno 2004
*
* Author: Markus Thurner (http://thoean.com)
*
* Contains code modified from Node:
*
* CyberXML for Java
*
* Copyright (C) Satoshi Konno 2002
******************************************************************/
import org.w3c.dom.NamedNodeMap;
import net.i2p.I2PAppContext;
import net.i2p.util.Log;
import org.cybergarage.xml.Attribute;
import org.cybergarage.xml.Node;
import org.cybergarage.xml.XML;
import org.cybergarage.xml.parser.JaxpParser;
/**
* Override so that XHTML is parsed correctly.
*
* This requires us to maintain mixed text and subnodes and output both.
*
* @since 0.9.17
*/
public class XMLParser extends JaxpParser {
private final Log _log;
public static final String TEXT_NAME = "#text";
public XMLParser(I2PAppContext ctx) {
super();
_log = ctx.logManager().getLog(XMLParser.class);
}
/**
* Modified from UPnP JaxpParser
*
* @param parentNode null if at top
* @param rank parse level, only for debug
* @return the parsed node, or the parent node, unused except at top level
*/
@Override
public org.cybergarage.xml.Node parse(Node parentNode, org.w3c.dom.Node domNode, int rank) {
int domNodeType = domNode.getNodeType();
String domNodeName = domNode.getNodeName();
String domNodeValue = domNode.getNodeValue();
NamedNodeMap attrs = domNode.getAttributes();
int arrrsLen = (attrs != null) ? attrs.getLength() : 0;
if (_log.shouldLog(Log.DEBUG)) {
String val = domNodeValue != null ?
" = \"" + domNodeValue.replace("\n", "\\n").replace("\r", "\\r") + '"' :
"";
_log.debug("[" + rank + "] ELEM : \"" + domNodeName + '"' + val +
" type = " + domNodeType + " with " + arrrsLen + " attrs");
}
// I2P -
// If it's only whitespace, skip it altogether.
// Only add it to the value if we don't have any other nodes.
// Otherwise, add it as a node.
if (domNodeType == org.w3c.dom.Node.TEXT_NODE) {
if (domNodeValue.replaceAll("[ \t\r\n]", "").length() == 0) {
return parentNode;
}
if (!parentNode.hasNodes()) {
parentNode.addValue(domNodeValue);
return parentNode;
}
// else we will add it as a node below
} else if (domNodeType != org.w3c.dom.Node.ELEMENT_NODE) {
return parentNode;
}
Node node = new Node();
node.setName(domNodeName);
node.setValue(domNodeValue);
if (parentNode != null) {
// I2P - take the value and convert it to a text node, if it's not just whitespace
String oldValue = parentNode.getValue();
if (oldValue != null && oldValue.length() > 0) {
parentNode.setValue("");
Node text = new Node();
text.setName(TEXT_NAME);
text.setValue(oldValue);
parentNode.addNode(text);
if (_log.shouldLog(Log.DEBUG))
_log.debug("Converted value to node");
}
parentNode.addNode(node);
}
if (domNodeType == org.w3c.dom.Node.TEXT_NODE)
return parentNode;
if (attrs != null) {
for (int n = 0; n < arrrsLen; n++) {
org.w3c.dom.Node attr = attrs.item(n);
String attrName = attr.getNodeName();
String attrValue = attr.getNodeValue();
node.setAttribute(attrName, attrValue);
}
}
org.w3c.dom.Node child = domNode.getFirstChild();
if (child == null) {
node.setValue("");
return node;
}
do{
parse(node, child, rank+1);
child = child.getNextSibling();
} while (child != null);
return node;
}
/**
* A replacement for Node.toString(), which does not recognize #text.
*/
public static void toString(StringBuilder buf, Node node) {
output(buf, node, 0);
}
/**
* A replacement for Node.output(), which does not recognize #text.
* Also, we use the empty entity, so <br /> does not turn into <br></br>.
*/
private static void output(StringBuilder buf, Node node, int indentLevel) {
String name = node.getName();
String value = XML.escapeXMLChars(node.getValue());
if (name.equals(TEXT_NAME)) {
buf.append(value);
return;
}
buf.append('<').append(name);
int nAttributes = node.getNAttributes();
for (int n = 0; n < nAttributes; n++) {
Attribute attr = node.getAttribute(n);
buf.append(' ').append(attr.getName()).append("=\"").append(XML.escapeXMLChars(attr.getValue())).append('"');
}
// As in Node, output either the nodes or the value.
// If mixed values and nodes, the values must be text nodes. See parser above.
if (node.hasNodes()) {
buf.append('>');
int nChildNodes = node.getNNodes();
for (int n = 0; n < nChildNodes; n++) {
Node cnode = node.getNode(n);
output(buf, cnode, indentLevel + 1);
}
buf.append("</").append(name).append('>');
} else {
if (value == null || value.length() == 0) {
// space for <br />
buf.append(" />");
} else {
buf.append('>').append(value).append("</").append(name).append('>');
}
}
}
}