/*
* Copyright 2013, 2014 Deutsche Nationalbibliothek
*
* Licensed under the Apache License, Version 2.0 the "License";
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.culturegraph.mf.xml;
import java.io.IOException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Properties;
import org.culturegraph.mf.commons.ResourceUtil;
import org.culturegraph.mf.framework.FluxCommand;
import org.culturegraph.mf.framework.MetafactureException;
import org.culturegraph.mf.framework.ObjectReceiver;
import org.culturegraph.mf.framework.StreamReceiver;
import org.culturegraph.mf.framework.annotations.Description;
import org.culturegraph.mf.framework.annotations.In;
import org.culturegraph.mf.framework.annotations.Out;
import org.culturegraph.mf.framework.helpers.DefaultStreamPipe;
/**
*
* Encodes a stream as XML
*
* @author Markus Michael Geipel
* @author Christoph Böhme
*
*/
@Description("Encodes a stream as xml")
@In(StreamReceiver.class)
@Out(String.class)
@FluxCommand("stream-to-xml")
public final class SimpleXmlEncoder extends DefaultStreamPipe<ObjectReceiver<String>> {
public static final String ATTRIBUTE_MARKER = "~";
public static final String DEFAULT_ROOT_TAG = "records";
public static final String DEFAULT_RECORD_TAG = "record";
private static final String NEW_LINE = "\n";
private static final String INDENT = "\t";
private static final String BEGIN_ATTRIBUTE = "=\"";
private static final String END_ATTRIBUTE = "\"";
private static final String BEGIN_OPEN_ELEMENT = "<";
private static final String END_OPEN_ELEMENT = ">";
private static final String END_EMPTY_ELEMENT = " />";
private static final String BEGIN_CLOSE_ELEMENT = "</";
private static final String END_CLOSE_ELEMENT = ">";
private static final String XML_HEADER = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
private static final String XMLNS_MARKER = " xmlns";
private final StringBuilder builder = new StringBuilder();
private String rootTag = DEFAULT_ROOT_TAG;
private String recordTag = DEFAULT_RECORD_TAG;
private Map<String, String> namespaces = new HashMap<String, String>();
private boolean writeRootTag = true;
private boolean writeXmlHeader = true;
private boolean separateRoots;
private Element element;
private boolean atStreamStart = true;
public void setRootTag(final String rootTag) {
this.rootTag = rootTag;
}
public void setRecordTag(final String tag) {
recordTag = tag;
}
public void setNamespaceFile(final String file) {
final Properties properties;
try {
properties = ResourceUtil.loadProperties(file);
} catch (IOException e) {
throw new MetafactureException("Failed to load namespaces list", e);
}
for (final Entry<Object, Object> entry : properties.entrySet()) {
namespaces.put(entry.getKey().toString(), entry.getValue().toString());
}
}
public void setNamespaceFile(final URL url) {
final Properties properties;
try {
properties = ResourceUtil.loadProperties(url);
} catch (IOException e) {
throw new MetafactureException("Failed to load namespaces list", e);
}
for (final Entry<Object, Object> entry : properties.entrySet()) {
namespaces.put(entry.getKey().toString(), entry.getValue().toString());
}
}
public void setWriteXmlHeader(final boolean writeXmlHeader) {
this.writeXmlHeader = writeXmlHeader;
}
public void setWriteRootTag(final boolean writeRootTag) {
this.writeRootTag = writeRootTag;
}
public void setSeparateRoots(final boolean separateRoots) {
this.separateRoots = separateRoots;
}
public void setNamespaces(final Map<String, String> namespaces) {
this.namespaces = namespaces;
}
@Override
public void startRecord(final String identifier) {
if (separateRoots) {
writeHeader();
} else if (atStreamStart) {
writeHeader();
sendAndClearData();
}
atStreamStart = false;
element = new Element(recordTag);
if (!writeRootTag) {
addNamespacesToElement();
}
}
private void addNamespacesToElement() {
for (final Entry<String, String> namespace : namespaces.entrySet()) {
final String key = namespace.getKey();
final String name = XMLNS_MARKER + (key.isEmpty() ? "" : ":") + key;
element.addAttribute(name, namespace.getValue());
}
}
@Override
public void endRecord() {
element.writeElement(builder, 1);
if (separateRoots) {
writeFooter();
}
sendAndClearData();
}
@Override
public void startEntity(final String name) {
element = element.createChild(name);
}
@Override
public void endEntity() {
element = element.getParent();
}
@Override
public void literal(final String name, final String value) {
if (name.isEmpty()) {
element.setText(value);
} else if (name.startsWith(ATTRIBUTE_MARKER)) {
element.addAttribute(name.substring(1), value);
} else {
element.createChild(name).setText(value);
}
}
@Override
protected void onResetStream() {
if (!atStreamStart) {
writeFooter();
}
sendAndClearData();
atStreamStart = true;
}
@Override
protected void onCloseStream() {
if (!separateRoots) {
if (!atStreamStart) {
writeFooter();
}
sendAndClearData();
}
}
private void sendAndClearData() {
getReceiver().process(builder.toString());
builder.delete(0, builder.length());
}
private void writeHeader() {
if (writeXmlHeader) {
builder.append(XML_HEADER);
}
if (writeRootTag) {
builder.append(BEGIN_OPEN_ELEMENT);
builder.append(rootTag);
for (final Entry<String, String> entry : namespaces.entrySet()) {
builder.append(XMLNS_MARKER);
if (!entry.getKey().isEmpty()) {
builder.append(':');
builder.append(entry.getKey());
}
builder.append(BEGIN_ATTRIBUTE);
writeEscaped(builder, entry.getValue());
builder.append(END_ATTRIBUTE);
}
builder.append(END_OPEN_ELEMENT);
}
}
private void writeFooter() {
if (writeRootTag) {
builder.append(NEW_LINE);
builder.append(BEGIN_CLOSE_ELEMENT);
builder.append(rootTag);
builder.append(END_CLOSE_ELEMENT);
}
}
protected static void writeEscaped(final StringBuilder builder, final String str) {
final int len = str.length();
for (int i = 0; i < len; ++i) {
final char c = str.charAt(i);
final String entityName;
switch (c) {
case '&':
entityName = "amp";
break;
case '<':
entityName = "lt";
break;
case '>':
entityName = "gt";
break;
case '\'':
entityName = "apos";
break;
case '"':
entityName = "quot";
break;
default:
entityName = null;
break;
}
if (entityName == null) {
builder.append(c);
} else {
builder.append('&');
builder.append(entityName);
builder.append(';');
}
}
}
/**
* An XML element.
*
*/
private static final class Element {
private static final List<Element> NO_CHILDREN = Collections.emptyList();
private final StringBuilder attributes = new StringBuilder();
private final Element parent;
private final String name;
private String text = "";
private List<Element> children = NO_CHILDREN;
public Element(final String name) {
this.name = name;
this.parent = null;
}
private Element(final String name, final Element parent) {
this.name = name;
this.parent = parent;
}
public void addAttribute(final String name, final String value) {
attributes.append(" ");
attributes.append(name);
attributes.append(BEGIN_ATTRIBUTE);
writeEscaped(attributes, value);
attributes.append(END_ATTRIBUTE);
}
public void setText(final String text) {
this.text = text;
}
public Element createChild(final String name) {
final Element child = new Element(name, this);
if (children == NO_CHILDREN) {
children = new ArrayList<SimpleXmlEncoder.Element>();
}
children.add(child);
return child;
}
public Element getParent() {
return parent;
}
public void writeElement(final StringBuilder builder, final int indent) {
if (!name.isEmpty()) {
builder.append(NEW_LINE);
writeIndent(builder, indent);
builder.append(BEGIN_OPEN_ELEMENT);
builder.append(name);
builder.append(attributes);
if (text.isEmpty() && children.isEmpty()) {
builder.append(END_EMPTY_ELEMENT);
return;
}
builder.append(END_OPEN_ELEMENT);
}
writeEscaped(builder, text);
for (final Element element : children) {
element.writeElement(builder, indent + 1);
}
if (text.isEmpty() && !children.isEmpty()) {
builder.append(NEW_LINE);
writeIndent(builder, indent);
}
if (!name.isEmpty()) {
builder.append(BEGIN_CLOSE_ELEMENT);
builder.append(name);
builder.append(END_CLOSE_ELEMENT);
}
}
private static void writeIndent(final StringBuilder builder, final int indent) {
for (int i = 0; i < indent; ++i) {
builder.append(INDENT);
}
}
}
}