/*
* Copyright 2016 Christoph Böhme
*
* Licensed under the Apache License, Version 2.0 the "License";
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.culturegraph.mf.xml;
import org.culturegraph.mf.framework.FluxCommand;
import org.culturegraph.mf.framework.FormatException;
import org.culturegraph.mf.framework.StreamReceiver;
import org.culturegraph.mf.framework.XmlReceiver;
import org.culturegraph.mf.framework.annotations.Description;
import org.culturegraph.mf.framework.annotations.In;
import org.culturegraph.mf.framework.annotations.Out;
import org.culturegraph.mf.framework.helpers.DefaultXmlPipe;
import org.xml.sax.Attributes;
/**
* Decodes an CG-XML document into an event stream.
* <p>
* The GG-XML format is a simple XML schema for writing event streams into XML
* documents. The following example shows a complete CG-XML document:
* <pre>{@literal
* <?xml version="1.0" encoding="UTF-8"?>
* <cgxml xmlns="http://www.culturegraph.org/cgxml" version="1.0">
* <records>
* <record id="1">
* <literal name="Name" value="Thomas Mann" />
* <entity name="Address">
* <entity name="Street">
* <literal name="Street" value="Alte Landstrasse" />
* <literal name="Number" value="39" />
* </entity>
* <literal name="City" value="Kilchberg" />
* <literal name="Postcode" /> <!-- the value attribute is optional -->
* </entity>
* </record>
* <record> <!-- the id attribute is optional -->
* </record>
* </records>
* </cgxml>
* }</pre>
*
* @author Christoph Böhme
*
*/
@Description("Reads CG-XML files")
@In(XmlReceiver.class)
@Out(StreamReceiver.class)
@FluxCommand("handle-cg-xml")
public final class CGXmlHandler extends DefaultXmlPipe<StreamReceiver> {
public static final String CGXML_NAMESPACE =
"http://www.culturegraph.org/cgxml";
private static final String ROOT_TAG = "cgxml";
private static final String RECORDS_TAG = "records";
private static final String RECORD_TAG = "record";
private static final String ENTITY_TAG = "entity";
private static final String LITERAL_TAG = "literal";
private static final String VERSION_ATTR = "version";
private static final String ID_ATTR = "id";
private static final String NAME_ATTR = "name";
private static final String VALUE_ATTR = "value";
private static final String VERSION = "1.0";
@Override
public void startElement(final String uri, final String localName,
final String qName, final Attributes attributes) {
if (!CGXML_NAMESPACE.equals(uri)) {
return;
}
switch (localName) {
case ROOT_TAG:
verifyValidVersion(attributes);
break;
case RECORDS_TAG:
// Nothing to do
break;
case RECORD_TAG:
emitStartRecord(attributes);
break;
case ENTITY_TAG:
emitStartEntity(attributes);
break;
case LITERAL_TAG:
emitLiteral(attributes);
break;
default:
throw new FormatException("Unexpected element: " + localName);
}
}
private void verifyValidVersion(final Attributes attributes) {
final String version = attributes.getValue("", VERSION_ATTR);
if (!VERSION.equals(version)) {
throw new FormatException("Invalid cg-xml version: " + version);
}
}
private void emitStartRecord(final Attributes attributes) {
final String recordId = attributes.getValue("", ID_ATTR);
if (recordId == null) {
getReceiver().startRecord("");
} else {
getReceiver().startRecord(recordId);
}
}
private void emitStartEntity(final Attributes attributes) {
final String name = attributes.getValue("", NAME_ATTR);
if (name == null) {
throw new FormatException("Entity name must not be null");
}
getReceiver().startEntity(name);
}
private void emitLiteral(final Attributes attributes) {
final String name = attributes.getValue("", NAME_ATTR);
final String value = attributes.getValue("", VALUE_ATTR);
if (name == null) {
throw new FormatException("Literal name must not be null");
}
getReceiver().literal(name, value);
}
@Override
public void endElement(final String uri, final String localName,
final String qName) {
if (!CGXML_NAMESPACE.equals(uri)) {
return;
}
if (RECORD_TAG.equals(localName)) {
getReceiver().endRecord();
} else if (ENTITY_TAG.equals(localName)) {
getReceiver().endEntity();
}
}
}