package org.hl7.fhir.dstu2016may.metamodel;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.List;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMResult;
import javax.xml.transform.sax.SAXSource;
import org.hl7.fhir.dstu2016may.formats.FormatUtilities;
import org.hl7.fhir.dstu2016may.formats.IParser.OutputStyle;
import org.hl7.fhir.dstu2016may.metamodel.Element.SpecialElement;
import org.hl7.fhir.dstu2016may.model.DateTimeType;
import org.hl7.fhir.dstu2016may.model.ElementDefinition.PropertyRepresentation;
import org.hl7.fhir.dstu2016may.model.Enumeration;
import org.hl7.fhir.dstu2016may.model.OperationOutcome.IssueSeverity;
import org.hl7.fhir.dstu2016may.model.OperationOutcome.IssueType;
import org.hl7.fhir.dstu2016may.model.StructureDefinition;
import org.hl7.fhir.dstu2016may.utils.IWorkerContext;
import org.hl7.fhir.dstu2016may.utils.ToolingExtensions;
import org.hl7.fhir.dstu2016may.utils.XmlLocationAnnotator;
import org.hl7.fhir.dstu2016may.utils.XmlLocationData;
import org.hl7.fhir.exceptions.FHIRException;
import org.hl7.fhir.exceptions.FHIRFormatError;
import org.hl7.fhir.utilities.Utilities;
import org.hl7.fhir.utilities.xhtml.XhtmlComposer;
import org.hl7.fhir.utilities.xhtml.XhtmlNode;
import org.hl7.fhir.utilities.xhtml.XhtmlParser;
import org.hl7.fhir.utilities.xml.XMLUtil;
import org.hl7.fhir.utilities.xml.XMLWriter;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.xml.sax.InputSource;
import org.xml.sax.XMLReader;
public class XmlParser extends ParserBase {
public XmlParser(IWorkerContext context) {
super(context);
}
public Element parse(InputStream stream) throws Exception {
Document doc = null;
try {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
// xxe protection
factory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
factory.setFeature("http://xml.org/sax/features/external-general-entities", false);
factory.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
factory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
factory.setXIncludeAware(false);
factory.setExpandEntityReferences(false);
factory.setNamespaceAware(true);
if (policy == ValidationPolicy.EVERYTHING) {
// use a slower parser that keeps location data
TransformerFactory transformerFactory = TransformerFactory.newInstance();
Transformer nullTransformer = transformerFactory.newTransformer();
DocumentBuilder docBuilder = factory.newDocumentBuilder();
doc = docBuilder.newDocument();
DOMResult domResult = new DOMResult(doc);
SAXParserFactory spf = SAXParserFactory.newInstance();
spf.setNamespaceAware(true);
spf.setValidating(false);
SAXParser saxParser = spf.newSAXParser();
XMLReader xmlReader = saxParser.getXMLReader();
// xxe protection
spf.setFeature("http://xml.org/sax/features/external-general-entities", false);
spf.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
xmlReader.setFeature("http://xml.org/sax/features/external-general-entities", false);
xmlReader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
XmlLocationAnnotator locationAnnotator = new XmlLocationAnnotator(xmlReader, doc);
InputSource inputSource = new InputSource(stream);
SAXSource saxSource = new SAXSource(locationAnnotator, inputSource);
nullTransformer.transform(saxSource, domResult);
} else {
DocumentBuilder builder = factory.newDocumentBuilder();
doc = builder.parse(stream);
}
} catch (Exception e) {
logError(0, 0, "(syntax)", IssueType.INVALID, e.getMessage(), IssueSeverity.FATAL);
doc = null;
}
if (doc == null)
return null;
else
return parse(doc);
}
private void checkForProcessingInstruction(Document document) throws FHIRFormatError {
if (policy == ValidationPolicy.EVERYTHING) {
Node node = document.getFirstChild();
while (node != null) {
if (node.getNodeType() == Node.PROCESSING_INSTRUCTION_NODE)
logError(line(document), col(document), "(document)", IssueType.INVALID, "No processing instructions allowed in resources", IssueSeverity.ERROR);
node = node.getNextSibling();
}
}
}
private int line(Node node) {
XmlLocationData loc = (XmlLocationData) node.getUserData(XmlLocationData.LOCATION_DATA_KEY);
return loc == null ? 0 : loc.getStartLine();
}
private int col(Node node) {
XmlLocationData loc = (XmlLocationData) node.getUserData(XmlLocationData.LOCATION_DATA_KEY);
return loc == null ? 0 : loc.getStartColumn();
}
public Element parse(Document doc) throws Exception {
checkForProcessingInstruction(doc);
org.w3c.dom.Element element = doc.getDocumentElement();
return parse(element);
}
public Element parse(org.w3c.dom.Element element) throws Exception {
String ns = element.getNamespaceURI();
String name = element.getLocalName();
String path = "/"+pathPrefix(ns)+name;
StructureDefinition sd = getDefinition(line(element), col(element), ns, name);
if (sd == null)
return null;
Element result = new Element(element.getLocalName(), new Property(context, sd.getSnapshot().getElement().get(0), sd));
checkElement(element, path, result.getProperty());
result.markLocation(line(element), col(element));
result.setType(element.getLocalName());
parseChildren(path, element, result);
result.numberChildren();
return result;
}
private String pathPrefix(String ns) {
if (Utilities.noString(ns))
return "";
if (ns.equals(FormatUtilities.FHIR_NS))
return "f:";
if (ns.equals(FormatUtilities.XHTML_NS))
return "h:";
if (ns.equals("urn:hl7-org:v3"))
return "v3:";
return "?:";
}
private boolean empty(org.w3c.dom.Element element) {
for (int i = 0; i < element.getAttributes().getLength(); i++) {
String n = element.getAttributes().item(i).getNodeName();
if (!n.equals("xmlns") && !n.startsWith("xmlns:"))
return false;
}
if (!Utilities.noString(element.getTextContent().trim()))
return false;
Node n = element.getFirstChild();
while (n != null) {
if (n.getNodeType() == Node.ELEMENT_NODE)
return false;
n = n.getNextSibling();
}
return true;
}
private void checkElement(org.w3c.dom.Element element, String path, Property prop) throws FHIRFormatError {
if (policy == ValidationPolicy.EVERYTHING) {
if (empty(element))
logError(line(element), col(element), path, IssueType.INVALID, "Element must have some content", IssueSeverity.ERROR);
String ns = FormatUtilities.FHIR_NS;
if (ToolingExtensions.hasExtension(prop.getDefinition(), "http://hl7.org/fhir/StructureDefinition/elementdefinition-namespace"))
ns = ToolingExtensions.readStringExtension(prop.getDefinition(), "http://hl7.org/fhir/StructureDefinition/elementdefinition-namespace");
else if (ToolingExtensions.hasExtension(prop.getStructure(), "http://hl7.org/fhir/StructureDefinition/elementdefinition-namespace"))
ns = ToolingExtensions.readStringExtension(prop.getStructure(), "http://hl7.org/fhir/StructureDefinition/elementdefinition-namespace");
if (!element.getNamespaceURI().equals(ns))
logError(line(element), col(element), path, IssueType.INVALID, "Wrong namespace - expected '"+ns+"'", IssueSeverity.ERROR);
}
}
public Element parse(org.w3c.dom.Element base, String type) throws Exception {
StructureDefinition sd = getDefinition(0, 0, FormatUtilities.FHIR_NS, type);
Element result = new Element(base.getLocalName(), new Property(context, sd.getSnapshot().getElement().get(0), sd));
String path = "/"+pathPrefix(base.getNamespaceURI())+base.getLocalName();
checkElement(base, path, result.getProperty());
result.setType(base.getLocalName());
parseChildren(path, base, result);
result.numberChildren();
return result;
}
private void parseChildren(String path, org.w3c.dom.Element node, Element context) throws Exception {
// this parsing routine retains the original order in a the XML file, to support validation
reapComments(node, context);
List<Property> properties = getChildProperties(context.getProperty(), context.getName(), XMLUtil.getXsiType(node));
String text = XMLUtil.getDirectText(node).trim();
if (!Utilities.noString(text)) {
Property property = getTextProp(properties);
if (property != null) {
context.getChildren().add(new Element(property.getName(), property, property.getType(), text).markLocation(line(node), col(node)));
} else {
logError(line(node), col(node), path, IssueType.STRUCTURE, "Text should not be present", IssueSeverity.ERROR);
}
}
for (int i = 0; i < node.getAttributes().getLength(); i++) {
Node attr = node.getAttributes().item(i);
if (!(attr.getNodeName().equals("xmlns") || attr.getNodeName().startsWith("xmlns:"))) {
Property property = getAttrProp(properties, attr.getNodeName());
if (property != null) {
String av = attr.getNodeValue();
if (ToolingExtensions.hasExtension(property.getDefinition(), "http://www.healthintersections.com.au/fhir/StructureDefinition/elementdefinition-dateformat"))
av = convertForDateFormat(ToolingExtensions.readStringExtension(property.getDefinition(), "http://www.healthintersections.com.au/fhir/StructureDefinition/elementdefinition-dateformat"), av);
if (property.getName().equals("value") && context.isPrimitive())
context.setValue(av);
else
context.getChildren().add(new Element(property.getName(), property, property.getType(), av).markLocation(line(node), col(node)));
} else {
logError(line(node), col(node), path, IssueType.STRUCTURE, "Undefined attribute '@"+attr.getNodeName()+"'", IssueSeverity.ERROR);
}
}
}
Node child = node.getFirstChild();
while (child != null) {
if (child.getNodeType() == Node.ELEMENT_NODE) {
Property property = getElementProp(properties, child.getLocalName());
if (property != null) {
if (!property.isChoice() && "xhtml".equals(property.getType())) {
XhtmlNode xhtml = new XhtmlParser().setValidatorMode(true).parseHtmlNode((org.w3c.dom.Element) child);
context.getChildren().add(new Element("div", property, "xhtml", new XhtmlComposer().setXmlOnly(true).compose(xhtml)).setXhtml(xhtml).markLocation(line(child), col(child)));
} else {
String npath = path+"/"+pathPrefix(child.getNamespaceURI())+child.getLocalName();
Element n = new Element(child.getLocalName(), property).markLocation(line(child), col(child));
checkElement((org.w3c.dom.Element) child, npath, n.getProperty());
boolean ok = true;
if (property.isChoice()) {
if (property.getDefinition().hasRepresentation(PropertyRepresentation.TYPEATTR)) {
String xsiType = ((org.w3c.dom.Element) child).getAttributeNS(FormatUtilities.NS_XSI, "type");
if (xsiType == null) {
logError(line(child), col(child), path, IssueType.STRUCTURE, "No type found on '"+child.getLocalName()+'"', IssueSeverity.ERROR);
ok = false;
} else {
if (xsiType.contains(":"))
xsiType = xsiType.substring(xsiType.indexOf(":")+1);
n.setType(xsiType);
}
} else
n.setType(n.getType());
}
context.getChildren().add(n);
if (ok) {
if (property.isResource())
parseResource(npath, (org.w3c.dom.Element) child, n);
else
parseChildren(npath, (org.w3c.dom.Element) child, n);
}
}
} else
logError(line(child), col(child), path, IssueType.STRUCTURE, "Undefined element '"+child.getLocalName()+'"', IssueSeverity.ERROR);
} else if (child.getNodeType() == Node.CDATA_SECTION_NODE){
logError(line(child), col(child), path, IssueType.STRUCTURE, "CDATA is not allowed", IssueSeverity.ERROR);
} else if (!Utilities.existsInList(child.getNodeType(), 3, 8)) {
logError(line(child), col(child), path, IssueType.STRUCTURE, "Node type "+Integer.toString(child.getNodeType())+" is not allowed", IssueSeverity.ERROR);
}
child = child.getNextSibling();
}
}
private Property getElementProp(List<Property> properties, String nodeName) {
for (Property p : properties)
if (!p.getDefinition().hasRepresentation(PropertyRepresentation.XMLATTR) && !p.getDefinition().hasRepresentation(PropertyRepresentation.XMLTEXT)) {
if (p.getName().equals(nodeName))
return p;
if (p.getName().endsWith("[x]") && nodeName.length() > p.getName().length()-3 && p.getName().substring(0, p.getName().length()-3).equals(nodeName.substring(0, p.getName().length()-3)))
return p;
}
return null;
}
private Property getAttrProp(List<Property> properties, String nodeName) {
for (Property p : properties)
if (p.getName().equals(nodeName) && p.getDefinition().hasRepresentation(PropertyRepresentation.XMLATTR))
return p;
return null;
}
private Property getTextProp(List<Property> properties) {
for (Property p : properties)
if (p.getDefinition().hasRepresentation(PropertyRepresentation.XMLTEXT))
return p;
return null;
}
private String convertForDateFormat(String fmt, String av) throws FHIRException {
if ("v3".equals(fmt)) {
DateTimeType d = DateTimeType.parseV3(av);
return d.asStringValue();
} else
throw new FHIRException("Unknown Data format '"+fmt+"'");
}
private void parseResource(String string, org.w3c.dom.Element container, Element parent) throws Exception {
org.w3c.dom.Element res = XMLUtil.getFirstChild(container);
String name = res.getLocalName();
StructureDefinition sd = context.fetchResource(StructureDefinition.class, "http://hl7.org/fhir/StructureDefinition/"+name);
if (sd == null)
throw new FHIRFormatError("Contained resource does not appear to be a FHIR resource (unknown name '"+res.getLocalName()+"')");
parent.updateProperty(new Property(context, sd.getSnapshot().getElement().get(0), sd), parent.getProperty().getName().equals("contained") ? SpecialElement.CONTAINED : SpecialElement.BUNDLE_ENTRY);
parent.setType(name);
parseChildren(res.getLocalName(), res, parent);
}
private void reapComments(org.w3c.dom.Element element, Element context) {
Node node = element.getPreviousSibling();
while (node != null && node.getNodeType() != Node.ELEMENT_NODE) {
if (node.getNodeType() == Node.COMMENT_NODE)
context.getComments().add(0, node.getTextContent());
node = node.getPreviousSibling();
}
node = element.getLastChild();
while (node != null && node.getNodeType() != Node.ELEMENT_NODE) {
node = node.getPreviousSibling();
}
while (node != null) {
if (node.getNodeType() == Node.COMMENT_NODE)
context.getComments().add(node.getTextContent());
node = node.getNextSibling();
}
}
private boolean isAttr(Property property) {
for (Enumeration<PropertyRepresentation> r : property.getDefinition().getRepresentation()) {
if (r.getValue() == PropertyRepresentation.XMLATTR) {
return true;
}
}
return false;
}
private boolean isText(Property property) {
for (Enumeration<PropertyRepresentation> r : property.getDefinition().getRepresentation()) {
if (r.getValue() == PropertyRepresentation.XMLTEXT) {
return true;
}
}
return false;
}
@Override
public void compose(Element e, OutputStream stream, OutputStyle style, String base) throws Exception {
XMLWriter xml = new XMLWriter(stream, "UTF-8");
xml.setPretty(style == OutputStyle.PRETTY);
xml.start();
xml.setDefaultNamespace(e.getProperty().getNamespace());
composeElement(xml, e, e.getType());
xml.end();
}
private void composeElement(XMLWriter xml, Element element, String elementName) throws IOException {
for (String s : element.getComments()) {
xml.comment(s, true);
}
if (isText(element.getProperty())) {
xml.enter(elementName);
xml.text(element.getValue());
xml.exit(elementName);
} else if (element.isPrimitive() || (element.hasType() && ParserBase.isPrimitive(element.getType()))) {
if (element.getType().equals("xhtml")) {
xml.escapedText(element.getValue());
} else if (isText(element.getProperty())) {
xml.text(element.getValue());
} else {
if (element.hasValue())
xml.attribute("value", element.getValue());
if (element.hasChildren()) {
xml.enter(elementName);
for (Element child : element.getChildren())
composeElement(xml, child, child.getName());
xml.exit(elementName);
} else
xml.element(elementName);
}
} else {
for (Element child : element.getChildren()) {
if (isAttr(child.getProperty()))
xml.attribute(child.getName(), child.getValue());
}
xml.enter(elementName);
if (element.getSpecial() != null)
xml.enter(element.getType());
for (Element child : element.getChildren()) {
if (isText(child.getProperty()))
xml.text(child.getValue());
else if (!isAttr(child.getProperty()))
composeElement(xml, child, child.getName());
}
if (element.getSpecial() != null)
xml.exit(element.getType());
xml.exit(elementName);
}
}
}