package com.bagri.core.server.api.df.xml;
import static javax.xml.stream.XMLInputFactory.*;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import javax.xml.namespace.QName;
import javax.xml.stream.XMLEventReader;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamConstants;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.events.Attribute;
import javax.xml.stream.events.Characters;
import javax.xml.stream.events.Comment;
import javax.xml.stream.events.EndElement;
import javax.xml.stream.events.Namespace;
import javax.xml.stream.events.ProcessingInstruction;
import javax.xml.stream.events.StartDocument;
import javax.xml.stream.events.StartElement;
import javax.xml.stream.events.XMLEvent;
import javax.xml.transform.Source;
import javax.xml.xquery.XQItemType;
import com.bagri.core.api.BagriException;
import com.bagri.core.model.Data;
import com.bagri.core.model.Element;
import com.bagri.core.model.NodeKind;
import com.bagri.core.model.Null;
import com.bagri.core.model.Occurrence;
import com.bagri.core.model.Path;
import com.bagri.core.server.api.ContentParser;
import com.bagri.core.server.api.ModelManagement;
import com.bagri.core.server.api.impl.ContentParserBase;
/**
* XDM Parser implementation for XML data format. Uses reference implementation (Xerces) of XML streaming parser.
*
* @author Denis Sukhoroslov
*
*/
public class XmlStaxParser extends ContentParserBase implements ContentParser<String> {
private static XMLInputFactory factory = XMLInputFactory.newInstance();
/**
*
* @param model the model management component
* @param xml the document content in XML format
* @return the list of parsed XDM data elements
* @throws XMLStreamException in case of content read exception
* @throws BagriException in case of content parse exception
*/
public static List<Data> parseDocument(ModelManagement model, String xml) throws XMLStreamException, BagriException {
XmlStaxParser parser = new XmlStaxParser(model);
parser.init(new Properties());
return parser.parse(xml);
}
/**
*
* @param model the model management component
*/
XmlStaxParser(ModelManagement model) {
super(model);
}
/**
* {@inheritDoc}
*/
public void init(Properties properties) {
logger.trace("init; got context: {}", properties);
for (Map.Entry prop: properties.entrySet()) {
String name = (String) prop.getKey();
if (factory.isPropertySupported(name)) {
String value = (String) prop.getValue();
if (value != null && value.length() > 0) {
if (name.equals(ALLOCATOR) || name.equals(REPORTER) || name.equals(RESOLVER)) {
factory.setProperty(name, value);
} else {
factory.setProperty(name, Boolean.valueOf(value));
}
}
}
}
}
/**
* {@inheritDoc}
*/
@Override
public List<Data> parse(String xml) throws BagriException {
try (Reader reader = new StringReader(xml)) {
return parse(reader);
} catch (IOException ex) {
throw new BagriException(ex, BagriException.ecInOut);
}
}
/**
* {@inheritDoc}
*/
@Override
public List<Data> parse(File file) throws BagriException {
try (Reader reader = new FileReader(file)) {
return parse(reader);
} catch (IOException ex) {
throw new BagriException(ex, BagriException.ecInOut);
}
}
/**
* {@inheritDoc}
*/
@Override
public List<Data> parse(InputStream stream) throws BagriException {
XMLEventReader eventReader = null;
try {
try {
eventReader = factory.createXMLEventReader(stream);
return parse(eventReader);
} finally {
if (eventReader != null) {
eventReader.close();
}
}
} catch (XMLStreamException ex) {
throw new BagriException(ex, BagriException.ecInOut);
}
}
/**
* {@inheritDoc}
*/
@Override
public List<Data> parse(Reader reader) throws BagriException {
XMLEventReader eventReader = null;
try {
try {
eventReader = factory.createXMLEventReader(reader);
return parse(eventReader);
} finally {
if (eventReader != null) {
eventReader.close();
}
}
} catch (XMLStreamException ex) {
throw new BagriException(ex, BagriException.ecInOut);
}
}
/**
*
* @param source the XML source
* @return the list of parsed XDM data elements
* @throws BagriException in case of content parse exception
*/
public List<Data> parse(Source source) throws BagriException {
XMLEventReader eventReader = null;
try {
try {
eventReader = factory.createXMLEventReader(source);
return parse(eventReader);
} finally {
if (eventReader != null) {
eventReader.close();
}
}
} catch (XMLStreamException ex) {
throw new BagriException(ex, BagriException.ecInOut);
}
}
/**
*
* @param eventReader the XML streaming parser
* @return the list of parsed XDM data elements
* @throws BagriException in case of content parse exception
*/
public List<Data> parse(XMLEventReader eventReader) throws BagriException {
XmlParserContext ctx = initContext();
while (eventReader.hasNext()) {
try {
processEvent(ctx, eventReader.nextEvent());
} catch (XMLStreamException ex) {
throw new BagriException(ex, BagriException.ecInOut);
}
}
return ctx.getDataList();
}
private void processEvent(XmlParserContext ctx, XMLEvent xmlEvent) throws BagriException {
if (ctx.getDocRoot() == null) {
ctx.firstEvents.add(xmlEvent);
if (xmlEvent.getEventType() == XMLStreamConstants.START_ELEMENT) {
String root = "/" + xmlEvent.asStartElement().getName();
ctx.addDocument(root);
for (XMLEvent event: ctx.firstEvents) {
processEvent(ctx, event);
}
}
} else {
switch (xmlEvent.getEventType()) {
case XMLStreamConstants.START_ELEMENT:
startElement(ctx, xmlEvent.asStartElement());
break;
case XMLStreamConstants.CHARACTERS:
if (!xmlEvent.asCharacters().isWhiteSpace()) {
ctx.addCharacters(xmlEvent.asCharacters().getData());
}
break;
case XMLStreamConstants.END_ELEMENT:
ctx.endElement();
break;
case XMLStreamConstants.ATTRIBUTE:
ctx.addAttribute(((Attribute) xmlEvent).getName(), ((Attribute) xmlEvent).getValue());
break;
case XMLStreamConstants.COMMENT:
ctx.addComment(((Comment) xmlEvent).getText());
break;
case XMLStreamConstants.PROCESSING_INSTRUCTION:
ctx.addProcessingInstruction(((ProcessingInstruction) xmlEvent).getTarget(), ((ProcessingInstruction) xmlEvent).getData());
break;
default:
break;
}
}
}
/**
* {@inheritDoc}
*/
@Override
protected XmlParserContext initContext() {
return new XmlParserContext();
}
@SuppressWarnings("unchecked")
private void startElement(XmlParserContext ctx, StartElement element) throws BagriException {
ctx.appendCharacters();
ctx.addData(element.getName().toString()); // getLocalPart());
ctx.addElement();
for (Iterator<Namespace> itr = element.getNamespaces(); itr.hasNext();) {
Namespace ns = itr.next();
// TODO: process default namespace properly
String namespace = ns.getValue();
if (namespace != null) {
String prefix = ns.getName().getLocalPart();
ctx.addNamespace(prefix, namespace);
}
}
for (Iterator<Attribute> itr = element.getAttributes(); itr.hasNext();) {
Attribute a = itr.next();
//if (!a.getName().getPrefix().isEmpty()) {
// String prefix = model.translateNamespace(a.getName().getNamespaceURI(), a.getName().getPrefix());
// ctx.addNamespace(prefix, a.getName().getNamespaceURI());
//}
ctx.addAttribute(a.getName(), a.getValue());
}
}
private class XmlParserContext extends ParserContext {
private StringBuilder chars;
private Set<String> nspaces;
private List<XMLEvent> firstEvents;
XmlParserContext() {
super();
firstEvents = new ArrayList<XMLEvent>();
nspaces = new HashSet<>();
chars = new StringBuilder();
}
private void addAttribute(QName name, String value) throws BagriException {
logger.trace("attribute: {}:{}", name, value);
addData("@" + name, NodeKind.attribute, value, XQItemType.XQBASETYPE_ANYATOMICTYPE, Occurrence.onlyOne);
}
public void addCharacters(String data) {
chars.append(data);
}
boolean appendCharacters() throws BagriException {
if (chars.length() > 0) {
String content = chars.toString();
// normalize xml content.. what if it is already normalized??
content = content.replaceAll("&", "&");
// trim left/right ? this is schema-dependent. trim if schema-type
// is xs:token, for instance..
addData("text()", NodeKind.text, content, XQItemType.XQBASETYPE_ANYATOMICTYPE, Occurrence.zeroOrOne);
chars.delete(0, chars.length());
return true;
}
return false;
}
public void addComment(String comment) throws BagriException {
addData("comment()", NodeKind.comment, comment, XQItemType.XQBASETYPE_ANYTYPE, Occurrence.zeroOrOne);
}
public void addNamespace(String prefix, String namespace) throws BagriException {
// "xml" namespace is assumed, no need to add it
if (!"xml".equals(prefix) && nspaces.add(prefix)) {
addData("#" + prefix, NodeKind.namespace, namespace, XQItemType.XQBASETYPE_QNAME, Occurrence.onlyOne);
}
}
public void addProcessingInstruction(String target, String data) throws BagriException {
//For a processing-instruction node: processing-instruction(local)[position] where local is the name
//of the processing instruction node and position is an integer representing the position of the selected
//node among its like-named processing-instruction node siblings
addData("/?" + target, NodeKind.pi, data, XQItemType.XQBASETYPE_ANYTYPE, Occurrence.zeroOrOne);
}
@Override
public void endElement() throws BagriException {
if (!appendCharacters()) {
if (isTopEmpty()) {
addData("text()", NodeKind.text, Null._null, XQItemType.XQBASETYPE_ANYATOMICTYPE, Occurrence.zeroOrOne);
}
}
super.endElement();
}
}
}