/**
* Copyright (C) 2015 - present by OpenGamma Inc. and the OpenGamma group of companies
*
* Please see distribution for license.
*/
package com.opengamma.strata.collect.io;
import java.io.InputStream;
import java.io.UncheckedIOException;
import java.util.HashMap;
import java.util.Map;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamConstants;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.io.ByteSource;
import com.opengamma.strata.collect.ArgChecker;
import com.opengamma.strata.collect.Unchecked;
/**
* An XML file.
* <p>
* Represents an XML file together with the ability to parse it from a {@link ByteSource}.
* <p>
* This uses the standard StAX API to parse the file.
* Once parsed, the XML is represented as a DOM-like structure, see {@link XmlElement}.
* This approach is suitable for XML files where the size of the parsed XML file is
* known to be manageable in memory.
* <p>
* Note that the {@link XmlElement} representation does not express all XML features.
* No support is provided for processing instructions, comments or mixed content.
* In addition, it is not possible to determine the difference between empty content and no children.
* <p>
* There is no support for namespaces.
* All namespace prefixes are dropped.
* There are cases where this can be a problem, but most of the time lenient parsing is helpful.
*/
public final class XmlFile {
/**
* The root element.
*/
private final XmlElement root;
/**
* The map of references.
*/
private final ImmutableMap<String, XmlElement> refs;
//-----------------------------------------------------------------------
/**
* Parses the specified source as an XML file to an in-memory DOM-like structure.
* <p>
* This parses the specified byte source expecting an XML file format.
* The resulting instance can be queried for the root element.
*
* @param source the XML source data
* @return the parsed file
* @throws UncheckedIOException if an IO exception occurs
* @throws IllegalArgumentException if the file cannot be parsed
*/
public static XmlFile of(ByteSource source) {
return of(source, "");
}
/**
* Parses the specified source as an XML file to an in-memory DOM-like structure.
* <p>
* This parses the specified byte source expecting an XML file format.
* The resulting instance can be queried for the root element.
* <p>
* This supports capturing attribute references, such as an id/href pair.
* Wherever the parser finds an attribute with the specified name, the element is added
* to the internal map, accessible by calling {@link #getReferences()}.
* <p>
* For example, if one part of the XML has {@code <foo id="fooId">}, the references map will
* contain an entry mapping "fooId" to the parsed element {@code <foo>}.
*
* @param source the XML source data
* @param refAttrName the attribute name that should be parsed as a reference
* @return the parsed file
* @throws UncheckedIOException if an IO exception occurs
* @throws IllegalArgumentException if the file cannot be parsed
*/
public static XmlFile of(ByteSource source, String refAttrName) {
ArgChecker.notNull(source, "source");
return Unchecked.wrap(() -> {
try (InputStream in = source.openBufferedStream()) {
XMLStreamReader xmlReader = xmlInputFactory().createXMLStreamReader(in);
try {
HashMap<String, XmlElement> refs = new HashMap<>();
XmlElement root = parse(xmlReader, refAttrName, refs);
return new XmlFile(root, refs);
} finally {
xmlReader.close();
}
}
});
}
//-------------------------------------------------------------------------
/**
* Parses the tree from the StAX stream reader, capturing references.
* <p>
* The reader should be created using the factory returned from {@link #xmlInputFactory()}.
* <p>
* This method supports capturing attribute references, such as an id/href pair.
* Wherever the parser finds an attribute with the specified name, the element is added
* to the specified map. Note that the map is mutated.
*
* @param reader the StAX stream reader, positioned at or before the element to be parsed
* @param refAttr the attribute name that should be parsed as a reference, null if not applicable
* @param refs the mutable map of references to update, null if not applicable
* @return the parsed element
* @throws IllegalArgumentException if the input cannot be parsed
*/
private static XmlElement parse(XMLStreamReader reader, String refAttr, Map<String, XmlElement> refs) {
try {
// parse start element
String elementName = parseElementName(reader);
ImmutableMap<String, String> attrs = parseAttributes(reader);
// parse children or content
ImmutableList.Builder<XmlElement> childBuilder = ImmutableList.builder();
String content = "";
int event = reader.next();
while (event != XMLStreamConstants.END_ELEMENT) {
switch (event) {
// parse child when start element found
case XMLStreamConstants.START_ELEMENT:
childBuilder.add(parse(reader, refAttr, refs));
break;
// append content when characters found
// since XMLStreamReader has IS_COALESCING=true means there should only be one content call
case XMLStreamConstants.CHARACTERS:
case XMLStreamConstants.CDATA:
content += reader.getText();
break;
default:
break;
}
event = reader.next();
}
ImmutableList<XmlElement> children = childBuilder.build();
XmlElement parsed = children.isEmpty() ?
XmlElement.ofContent(elementName, attrs, content) :
XmlElement.ofChildren(elementName, attrs, children);
String ref = attrs.get(refAttr);
if (ref != null) {
refs.put(ref, parsed);
}
return parsed;
} catch (XMLStreamException ex) {
throw new IllegalArgumentException(ex);
}
}
// find the start element and parses the name
private static String parseElementName(XMLStreamReader reader) throws XMLStreamException {
int event = reader.getEventType();
while (event != XMLStreamConstants.START_ELEMENT) {
event = reader.next();
}
return reader.getLocalName();
}
// parses attributes into a map
private static ImmutableMap<String, String> parseAttributes(XMLStreamReader reader) {
ImmutableMap<String, String> attrs;
int attributeCount = reader.getAttributeCount() + reader.getNamespaceCount();
if (attributeCount == 0) {
attrs = ImmutableMap.of();
} else {
ImmutableMap.Builder<String, String> builder = ImmutableMap.builder();
for (int i = 0; i < reader.getAttributeCount(); i++) {
builder.put(reader.getAttributeLocalName(i), reader.getAttributeValue(i));
}
attrs = builder.build();
}
return attrs;
}
//-------------------------------------------------------------------------
// creates the XML input factory, recreated each time to avoid JDK-8028111
// this also provides some protection against hackers attacking XML
private static XMLInputFactory xmlInputFactory() {
XMLInputFactory factory = XMLInputFactory.newFactory();
factory.setProperty(XMLInputFactory.IS_COALESCING, true);
factory.setProperty(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES, true);
factory.setProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES, false);
factory.setProperty(XMLInputFactory.SUPPORT_DTD, false);
return factory;
}
//-------------------------------------------------------------------------
/**
* Restricted constructor.
*/
private XmlFile(XmlElement root, Map<String, XmlElement> refs) {
this.root = ArgChecker.notNull(root, "root");
this.refs = ImmutableMap.copyOf(refs);
}
//-------------------------------------------------------------------------
/**
* Gets the root element of this file.
*
* @return the root element
*/
public XmlElement getRoot() {
return root;
}
/**
* Gets the reference map of id to element.
* <p>
* This is used to decode references, such as an id/href pair.
* <p>
* For example, if one part of the XML has {@code <foo id="fooId">}, the map will
* contain an entry mapping "fooId" to the parsed element {@code <foo>}.
*
* @return the map of id to element
*/
public ImmutableMap<String, XmlElement> getReferences() {
return refs;
}
//-------------------------------------------------------------------------
/**
* Checks if this file equals another.
* <p>
* The comparison checks the content and reference map.
*
* @param obj the other section, null returns false
* @return true if equal
*/
@Override
public boolean equals(Object obj) {
if (obj == this) {
return true;
}
if (obj instanceof XmlFile) {
XmlFile other = (XmlFile) obj;
return root.equals(other.root) && refs.equals(other.refs);
}
return false;
}
/**
* Returns a suitable hash code for the file.
*
* @return the hash code
*/
@Override
public int hashCode() {
return root.hashCode() ^ refs.hashCode();
}
/**
* Returns a string describing the file.
*
* @return the descriptive string
*/
@Override
public String toString() {
return root.toString();
}
}