/* * Copyright 2013, 2014 Pascal Christoph (hbz) * * Licensed under the Apache License, Version 2.0 the "License"; * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.culturegraph.mf.xml; import java.util.HashSet; import org.culturegraph.mf.commons.XmlUtil; import org.culturegraph.mf.framework.FluxCommand; import org.culturegraph.mf.framework.StreamReceiver; import org.culturegraph.mf.framework.XmlReceiver; import org.culturegraph.mf.framework.annotations.Description; import org.culturegraph.mf.framework.annotations.In; import org.culturegraph.mf.framework.annotations.Out; import org.culturegraph.mf.framework.helpers.DefaultXmlPipe; import org.xml.sax.Attributes; import org.xml.sax.SAXException; /** * An XML Element splitter. * * @author Pascal Christoph (dr0i) * */ @Description("Splits elements (e.g. defining single records) residing in one XML document into multiple single XML documents.") @In(XmlReceiver.class) @Out(StreamReceiver.class) @FluxCommand("split-xml-elements") public final class XmlElementSplitter extends DefaultXmlPipe<StreamReceiver> { private String Element; private StringBuilder builder = new StringBuilder(); private HashSet<String> namespaces = new HashSet<>(); private boolean inElement = false; private int recordCnt = 0; private String root; private String rootStart = ""; private String rootEnd = ""; private String xmlDeclaration = "<?xml version = \"1.0\" encoding = \"UTF-8\"?>"; private int ElementDepth = 0; /** * default constructor */ public XmlElementSplitter() { } /** * enriched constructor setting the top level element and the Element name * * @param aTopLevelElement * the name of the top level XML tag * @param aElementName * the name of the tag defining a new Element to be split */ public XmlElementSplitter(String aTopLevelElement, String aElementName) { setTopLevelElement(aTopLevelElement); setElementName(aElementName); } /** * Sets the name of the Element. All these elements in the XML stream will * be XML documents on their own. * * @param name * Identifies the elements */ public void setElementName(final String name) { this.Element = name; } /** * Sets the top-level XML document element. * * @param root * the top level element. Don't set it to omit setting top level * element. */ public void setTopLevelElement(final String root) { this.root = root; this.rootStart = "<" + root; this.rootEnd = "</" + root + ">"; } /** * Sets the XML declaration. * * @param xmlDeclaration * the xml declaration. Default is '<?xml version = "1.0" * encoding = "UTF-8"?>'. If empty value is given, the xml * declaration is skipped. */ public void setXmlDeclaration(final String xmlDeclaration) { this.xmlDeclaration = xmlDeclaration; } @Override public void startPrefixMapping(String prefix, String uri) throws SAXException { super.startPrefixMapping(prefix, uri); if (root != null & !prefix.isEmpty() && uri != null) { namespaces.add(" xmlns:" + prefix + "=\"" + uri + "\""); } } @Override public void startElement(final String uri, final String localName, final String qName, final Attributes attributes) throws SAXException { if (!inElement) { if (Element.equals(localName)) { builder = new StringBuilder(); getReceiver().startRecord(String.valueOf(this.recordCnt++)); inElement = true; appendValuesToElement(qName, attributes); ElementDepth++; } } else { if (Element.equals(localName)) { ElementDepth++; } appendValuesToElement(qName, attributes); } } private void appendValuesToElement(final String qName, final Attributes attributes) { this.builder.append("<" + qName); if (attributes.getLength() > 0) { for (int i = 0; i < attributes.getLength(); i++) { builder.append(" " + attributes.getQName(i) + "=\"" + XmlUtil.escape(attributes.getValue(i)) + "\""); } } builder.append(">"); } @Override public void endElement(final String uri, final String localName, final String qName) throws SAXException { if (inElement) { builder.append("</" + qName + ">"); if (Element.equals(localName)) { if (ElementDepth <= 1) { StringBuilder sb = new StringBuilder(xmlDeclaration + rootStart); if (this.root != null && namespaces.size() > 0) { for (String ns : namespaces) { sb.append(ns); } sb.append(">"); } builder.insert(0, sb.toString()).append(rootEnd); getReceiver().literal("Element", builder.toString()); getReceiver().endRecord(); reset(); return; } ElementDepth--; } } } @Override public void characters(final char[] chars, final int start, final int length) throws SAXException { try { builder.append(XmlUtil.escape(new String(chars, start, length))); } catch (Exception e) { reset(); } } private void reset() { inElement = false; builder = new StringBuilder(); ElementDepth = 0; } /** * Get the XML declaration. * * @return the XML decalration */ public String getXmlDeclaration() { return xmlDeclaration; } @Override public void onResetStream() { reset(); } }