/******************************************************************
*
* CyberXML for Java
*
* Copyright (C) Satoshi Konno 2004
*
* Author: Markus Thurner (http://thoean.com)
*
* File: JaxpParser.java
*
* Revision;
*
* 06/15/04
* - first revision.
* 01/08/08
* - Fixed parse() not to occur null exception when the NamedNodeMap is null on Android.
* 02/08/08
* - Change parse() to use Node::addValue() instead of the setValue().
*
******************************************************************/
package org.cybergarage.xml.parser;
import java.io.ByteArrayInputStream;
import java.io.FilterInputStream;
import java.io.IOException;
import java.io.InputStream;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.cybergarage.xml.Node;
import org.cybergarage.xml.Parser;
import org.cybergarage.xml.ParserException;
import org.w3c.dom.Document;
import org.w3c.dom.NamedNodeMap;
import org.xml.sax.EntityResolver;
import org.xml.sax.InputSource;
public class JaxpParser extends Parser
{
public JaxpParser()
{
super();
}
////////////////////////////////////////////////
// parse (Node)
////////////////////////////////////////////////
public org.cybergarage.xml.Node parse(org.cybergarage.xml.Node parentNode, org.w3c.dom.Node domNode, int rank)
{
int domNodeType = domNode.getNodeType();
// if (domNodeType != Node.ELEMENT_NODE)
// return;
String domNodeName = domNode.getNodeName();
String domNodeValue = domNode.getNodeValue();
NamedNodeMap attrs = domNode.getAttributes();
int arrrsLen = (attrs != null) ? attrs.getLength() : 0;
// Debug.message("[" + rank + "] ELEM : " + domNodeName + ", " + domNodeValue + ", type = " + domNodeType + ", attrs = " + arrrsLen);
if (domNodeType == org.w3c.dom.Node.TEXT_NODE) {
// Change to use Node::addValue() instead of the setValue(). (2008/02/07)
//parentNode.setValue(domNodeValue);
parentNode.addValue(domNodeValue);
return parentNode;
}
if (domNodeType != org.w3c.dom.Node.ELEMENT_NODE)
return parentNode;
org.cybergarage.xml.Node node = new org.cybergarage.xml.Node();
node.setName(domNodeName);
node.setValue(domNodeValue);
if (parentNode != null)
parentNode.addNode(node);
NamedNodeMap attrMap = domNode.getAttributes();
if (attrMap != null) {
int attrLen = attrMap.getLength();
//Debug.message("attrLen = " + attrLen);
for (int n = 0; n<attrLen; n++) {
org.w3c.dom.Node attr = attrMap.item(n);
String attrName = attr.getNodeName();
String attrValue = attr.getNodeValue();
node.setAttribute(attrName, attrValue);
}
}
org.w3c.dom.Node child = domNode.getFirstChild();
if(child==null){
node.setValue("");
return node;
}
do{
parse(node, child, rank+1);
child = child.getNextSibling();
}while (child != null);
return node;
}
public org.cybergarage.xml.Node parse(org.cybergarage.xml.Node parentNode, org.w3c.dom.Node domNode)
{
return parse(parentNode, domNode, 0);
}
/* (non-Javadoc)
* @see org.cybergarage.xml.Parser#parse(java.io.InputStream)
*/
public Node parse(InputStream inStream) throws ParserException
{
org.cybergarage.xml.Node root = null;
try {
// https://www.owasp.org/index.php/XML_External_Entity_%28XXE%29_Processing
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
factory.setValidating(false);
factory.setNamespaceAware(true);
factory.setExpandEntityReferences(false);
try {
try {
factory.setFeature("http://xml.org/sax/features/external-general-entities", false);
} catch (ParserConfigurationException pce) {}
try {
factory.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
} catch (ParserConfigurationException pce) {}
try {
factory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
} catch (ParserConfigurationException pce) {}
try {
factory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
} catch (ParserConfigurationException pce) {}
} catch (AbstractMethodError ame) {} // FreeBSD
DocumentBuilder builder = factory.newDocumentBuilder();
builder.setEntityResolver(new BlankingResolver());
InputSource inSrc = new InputSource(new NullFilterInputStream(inStream));
Document doc = builder.parse(inSrc);
org.w3c.dom.Element docElem = doc.getDocumentElement();
if (docElem != null)
root = parse(root, docElem);
/*
NodeList rootList = doc.getElementsByTagName("root");
Debug.message("rootList = " + rootList.getLength());
if (0 < rootList.getLength())
root = parse(root, rootList.item(0));
*/
}
catch (Exception e) {
throw new ParserException(e);
}
return root;
}
/**
* I2P -
* Filter out nulls, hopefully to avoid
* SAXParserException "Content not allowed in trailing section",
* which is apparently caused by nulls.
* Alternative is to remove all stuff between '>' and '<',
* which isn't so hard if we assume no CDATA.
*/
private static class NullFilterInputStream extends FilterInputStream {
public NullFilterInputStream(InputStream is) {
super(is);
}
@Override
public int read() throws IOException {
int rv;
while ((rv = super.read()) == 0) {
// try again
}
return rv;
}
/** @since 0.9.22 */
@Override
public int read(byte[] b) throws IOException {
return this.read(b, 0, b.length);
}
/** @since 0.9.22 */
@Override
public int read(byte[] b, int off, int len) throws IOException {
if (b == null) {
throw new NullPointerException();
} else if (off < 0 || len < 0 || len > b.length - off) {
throw new IndexOutOfBoundsException();
} else if (len == 0) {
return 0;
}
int rv = this.read();
if (-1 == rv) {
return -1;
}
int i = 1;
b[off] = (byte) rv;
for (; i < len; i++) {
rv = this.read();
if (-1 == rv) {
break;
}
b[off + i] = (byte) rv;
}
return i;
}
}
/**
* I2P -
* http://stackoverflow.com/questions/5883542/disable-xml-validation-based-on-external-dtd-xsd
*/
private static class BlankingResolver implements EntityResolver {
private static final byte[] DUMMY = new byte[0];
public InputSource resolveEntity(String arg0, String arg1) {
return new InputSource(new ByteArrayInputStream(DUMMY));
}
}
}