/*
* Copyright 2009 Revelytix.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.mulgara.query;
import java.io.BufferedOutputStream;
import java.io.OutputStream;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.log4j.Logger;
import org.jrdf.graph.BlankNode;
import org.jrdf.graph.Literal;
import org.jrdf.graph.Node;
import org.jrdf.graph.ObjectNode;
import org.jrdf.graph.PredicateNode;
import org.jrdf.graph.SubjectNode;
import org.jrdf.graph.Triple;
import org.jrdf.graph.URIReference;
import org.mulgara.query.rdf.TripleImpl;
import org.mulgara.util.StringUtil;
import org.openrdf.model.URI;
/**
* <p>
* Static utility class for emitting RDF/XML that represents the results of a
* SPARQL CONSTRUCT or DESCRIBE query.
* </p>
* <p>
* <b>Note:</b> This utility builds up the entire set of statements in-memory to sort
* them, so is not suitable for very large graphs.
* </p>
* <p>
* This class borrows heavily from the org.mulgara.content.rdfxml.writer.RDFXMLWriter class.
* </p>
*
* @created Jul 10, 2008
* @author Alex Hall
* @copyright © 2008 <a href="http://www.revelytix.com">Revelytix, Inc.</a>
* @licence <a href="{@docRoot}/../../LICENCE.txt">Open Software License v3.0</a>
*/
public class RdfXmlEmitter {
private static final Logger logger = Logger.getLogger(RdfXmlEmitter.class);
/** Comparator for sorting statements in the graph. */
private static final TripleComparator TRIPLE_COMPARATOR = new TripleComparator();
/** Index used to access the subject column of the GraphAnswer */
private static final int SUBJECT_INDEX = 0;
/** Index used to access the predicate column of the GraphAnswer */
private static final int PREDICATE_INDEX = 1;
/** Index used to access the object column of the GraphAnswer */
private static final int OBJECT_INDEX = 2;
/** Prefix to use for the RDF namespace. */
private static final String RDF_PREFIX = "rdf";
/** The RDF namespace. */
private static final String RDF_NAMESPACE = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
/** Convenience reference to the new line character(s) */
private static final String NEWLINE = System.getProperty("line.separator");
/**
* Compares RDF triples in subject, predicate, object order. The order for individual
* nodes is determined as follows:
* <ol>
* <li>URI References (ordered based on the natural ordering of the component URI's)</li>
* <li>Blank nodes (ordered based on the natural ordering of the blank node labels)</li>
* <li>
* Literals, ordered as follows:
* <ul>
* <li>Datatype URI (untyped literals precede typed literals)</li>
* <li>Language tag (literals without a language tag precede those with a tag)</li>
* <li>Lexical form</li>
* </ul>
* </li>
* </ol>
*/
private static class TripleComparator implements Comparator<Triple> {
public int compare(Triple triple1, Triple triple2) {
int c = compare(triple1.getSubject(), triple2.getSubject());
if (c != 0) return c;
c = compare(triple1.getPredicate(), triple2.getPredicate());
if (c != 0) return c;
return compare(triple1.getObject(), triple2.getObject());
}
private int compare(Node n1, Node n2) {
if (n1 instanceof URIReference) {
return compareUri((URIReference)n1, n2);
} else if (n1 instanceof BlankNode) {
return compareBlankNode((BlankNode)n1, n2);
} else if (n1 instanceof Literal) {
return compareLiteral((Literal)n1, n2);
}
return 0;
}
private int compareUri(URIReference n1, Node n2) {
return (n2 instanceof URIReference) ? n1.getURI().compareTo(((URIReference)n2).getURI()) : -1;
}
private int compareBlankNode(BlankNode n1, Node n2) {
int c = 0;
if (n2 instanceof URIReference) {
c = 1;
} else if (n2 instanceof BlankNode) {
c = n1.getID().compareTo(((BlankNode)n2).getID());
} else {
c = -1;
}
return c;
}
private int compareLiteral(Literal n1, Node n2) {
if (n2 instanceof Literal) {
Literal n2lit = (Literal)n2;
int c = compareComponent(n1.getDatatypeURI(), n2lit.getDatatypeURI());
if (c != 0) return c;
c = compareComponent(n1.getLanguage(), n2lit.getLanguage());
if (c != 0) return c;
return n1.getLexicalForm().compareTo(n2lit.getLexicalForm());
} else {
return 1;
}
}
private <T extends Comparable<T>> int compareComponent(T t1, T t2) {
if (t1 == null) {
return (t2 == null) ? 0 : -1;
} else {
return (t2 == null) ? 1 : t1.compareTo(t2);
}
}
}
/**
* Writes the RDF graph described by the specified GraphAnswer to an output stream.
* This method consumes and closes the supplied Answer, so calling code should pass in
* a clone if it needs to access the Answer after this method finishes. This method will
* include the XML document headers and is equivalent to calling:
* <pre>
* RdfXmlEmitter.writeRdfXml(answer, output, true);
* </pre>
* @see #writeRdfXml(GraphAnswer, OutputStream, boolean)
* @param answer The answer object that contains the RDF graph statements.
* @param output The output stream to write to.
* @throws QueryException If there was an error accessing statements from the answer.
*/
public static void writeRdfXml(GraphAnswer answer, OutputStream output) throws QueryException {
writeRdfXml(answer, output, true, true);
}
/**
* Writes the RDF graph described by the specified GraphAnswer to an output stream.
* This method consumes and closes the supplied Answer, so calling code should pass in
* a clone if it needs to access the Answer after this method finishes.
* @param answer The answer object that contains the RDF graph statements.
* @param output The output stream to write to.
* @param includeDocHeader If <code>true</code>, the RDF/XML output will contain an XML
* header with entity definitions, and the body will use the entities to abbreviate
* RDF resource URI's.
* @param closeAns Close the Answer when it has been finished with.
* @throws QueryException If there was an error accessing statements from the answer.
*/
public static void writeRdfXml(GraphAnswer answer, OutputStream output, boolean includeDocHeader, boolean closeAns) throws QueryException {
PrintWriter writer = new PrintWriter(new BufferedOutputStream(output));
Map<String,String> nsMap = createInitialNsMap();
List<Triple> statements = getStatementList(answer, nsMap, closeAns);
Collections.sort(statements, TRIPLE_COMPARATOR);
if (includeDocHeader) {
writeDocHeader(nsMap, writer);
}
writeRdfHeader(nsMap, writer, includeDocHeader);
writeRdfBody(statements, nsMap, writer, includeDocHeader);
writeRdfFooter(nsMap, writer);
writer.flush();
}
/**
* Extracts a list of RDF statements from the GraphAnswer, consuming and closing the answer.
* This method also builds up a mapping of namespace URI's to prefixes for all URIReferences
* that it encounters.
* @param answer The graph to extract statements from.
* @param nsMap The mapping of namespace URI to prefix.
* @return An unsorted list of RDF statements.
* @throws QueryException if there was an error accessing the statements.
*/
private static List<Triple> getStatementList(GraphAnswer answer, Map<String,String> nsMap, boolean closeAns) throws QueryException {
assert answer != null;
List<Triple> statements = new ArrayList<Triple>();
int nsCounter = 0;
try {
answer.beforeFirst();
while (answer.next()) {
Object subject = answer.getObject(SUBJECT_INDEX);
if (!(subject instanceof SubjectNode)) {
throw new QueryException("Illegal value in subject position: " + subject);
}
Object predicate = answer.getObject(PREDICATE_INDEX);
if (!(predicate instanceof PredicateNode)) {
throw new QueryException("Illegal value in predicate position: " + predicate);
}
Object object = answer.getObject(OBJECT_INDEX);
if (!(object instanceof ObjectNode)) {
throw new QueryException("Illegal value in object position: " + object);
}
nsCounter = addNamespaceToMap(subject, nsMap, nsCounter);
nsCounter = addNamespaceToMap(predicate, nsMap, nsCounter);
nsCounter = addNamespaceToMap(object, nsMap, nsCounter);
statements.add(new TripleImpl((SubjectNode)subject, (PredicateNode)predicate, (ObjectNode)object));
}
} catch (TuplesException te) {
throw new QueryException("Error accessing statements from GraphAnswer", te);
} finally {
try {
if (closeAns) answer.close();
} catch (TuplesException te) {
logger.warn("Error closing GraphAnswer", te);
}
}
return statements;
}
/**
* Writes the XML document header to the specified writer.
* @param nsMap The namespace prefix mappings.
* @param out The writer.
*/
private static void writeDocHeader(Map<String,String> nsMap, PrintWriter out) {
assert out != null;
String rdf = getRdfPrefix(nsMap);
out.println("<?xml version=\"1.0\"?>");
//print opening DOCTYPE DECLARATION tag
out.print(NEWLINE + "<!DOCTYPE " + rdf + ":RDF [");
for (Map.Entry<String,String> entry : nsMap.entrySet()) {
String ns = entry.getKey();
String prefix = entry.getValue();
if (ns != null && prefix != null) {
//write as: <!ENTITY ns 'http://example.org/abc#'>
out.print(NEWLINE + " <!ENTITY " + prefix + " '" + ns + "'>");
}
}
//close the opening tag (add a space for readability)
out.println("]>" + NEWLINE);
}
/**
* Writes the opening tag for the root element of the RDF/XML document. The root element
* will define XML namespaces for entries in the namespace prefix mapping.
* @param nsMap The namespace prefix mapping.
* @param out The writer.
* @param useEntities Determines whether to abbreviate namespaces using XML entities.
*/
private static void writeRdfHeader(Map<String,String> nsMap, PrintWriter out, boolean useEntities) {
//print opening RDF tag (including namespaces)
out.print("<" + getRdfPrefix(nsMap) + ":RDF");
//print namespaces
for (Map.Entry<String,String> entry : nsMap.entrySet()) {
String prefix = entry.getValue();
String ns = useEntities ? "&" + prefix + ";" : entry.getKey();
if (prefix != null && ns != null) {
out.print(NEWLINE + " xmlns:" + prefix + "=\"" + ns + "\"");
}
}
//close the opening tag (add a space for readability)
out.println(">" + NEWLINE);
}
/**
* Writes the closing tag for the root element of the RDF/XML document.
* @param nsMap The namespace prefix mapping.
* @param out The writer.
*/
private static void writeRdfFooter(Map<String,String> nsMap, PrintWriter out) {
out.println("</" + getRdfPrefix(nsMap) + ":RDF>");
}
/**
* Writes the given statements as RDF/XML, using the supplied namespace prefix mappings.
* @param statements The statements to write.
* @param nsMap The namespace prefix mappings.
* @param out The writer.
* @param useEntities Determines whether to abbreviate URIReferences using XML entities.
*/
private static void writeRdfBody(List<Triple> statements, Map<String,String> nsMap, PrintWriter out, boolean useEntities) {
SubjectNode subject = null;
SubjectNode newSubject = null;
for (Triple statement : statements) {
newSubject = statement.getSubject();
assert newSubject != null;
if (!newSubject.equals(subject)) {
if (subject != null) {
writeClosingSubjectTag(nsMap, out);
}
subject = newSubject;
writeOpeningSubjectTag(subject, nsMap, out, useEntities);
}
ObjectNode obj = statement.getObject();
if (obj instanceof URIReference) {
writeUriStatement(statement, nsMap, out, useEntities);
} else if (obj instanceof BlankNode) {
writeBlankNodeStatement(statement, nsMap, out);
} else if (obj instanceof Literal) {
writeLiteralStatement(statement, nsMap, out, useEntities);
}
}
if (subject != null) {
writeClosingSubjectTag(nsMap, out);
}
}
/**
* Writes an opening <code>rdf:Description</code> tag for the given resource.
* @param subject The subject of an RDF statement.
* @param nsMap The namespace prefix mappings for the document.
* @param out The writer.
* @param useEntities Determines whether to abbreviate URIReferences using XML entities.
*/
private static void writeOpeningSubjectTag(SubjectNode subject, Map<String,String> nsMap, PrintWriter out, boolean useEntities) {
String idAttr = null;
String rdf = getRdfPrefix(nsMap);
if (subject instanceof URIReference) {
URIReference subjUri = (URIReference)subject;
idAttr = rdf + ":about=\"" + (useEntities ? abbreviateUriWithEntity(subjUri, nsMap) : subjUri.getURI().toString()) + "\"";
} else if (subject instanceof BlankNode) {
idAttr = rdf + ":nodeID=\"" + StringUtil.quoteAV(((BlankNode)subject).getID()) + "\"";
} else {
throw new IllegalArgumentException("Unrecognized SubjectNode type: " + subject.getClass());
}
out.println(" <" + rdf + ":Description " + idAttr + ">");
}
/**
* Writes the closing <code>rdf:Description</code> tag for a resource.
* @param nsMap The namespace prefix mappings for the document.
* @param out The writer.
*/
private static void writeClosingSubjectTag(Map<String,String> nsMap, PrintWriter out) {
out.println(" </" + getRdfPrefix(nsMap) + ":Description>" + NEWLINE);
}
/**
* Writes the predicate/object for an RDF statement whose object is a URIReference.
* Does not write the subject, as it is written in the containing <code>rdf:Description</code> element.
* @param statement An RDF statement.
* @param nsMap The namespace prefix mappings for the document.
* @param out The writer.
* @param useEntities Determines whether to abbreviate URIReferences using XML entities.
*/
private static void writeUriStatement(Triple statement, Map<String,String> nsMap, PrintWriter out, boolean useEntities) {
assert statement.getObject() instanceof URIReference;
URIReference objUri = (URIReference)statement.getObject();
String resource = useEntities ? abbreviateUriWithEntity(objUri, nsMap) : objUri.getURI().toString();
String predicate = prefixPredicateUri(statement.getPredicate(), nsMap);
out.println(" <" + predicate + " " + getRdfPrefix(nsMap) + ":resource=\"" + resource + "\"/>");
}
/**
* Writes the predicate/object for an RDF statement whose object is a BlankNode.
* Does not write the subject, as it is written in the containing <code>rdf:Description</code> element.
* @param statement An RDF statement.
* @param nsMap The namespace prefix mappings for the document.
* @param out The writer.
*/
private static void writeBlankNodeStatement(Triple statement, Map<String,String> nsMap, PrintWriter out) {
assert statement.getObject() instanceof BlankNode;
String nodeId = StringUtil.quoteAV(((BlankNode)statement.getObject()).getID());
String predicate = prefixPredicateUri(statement.getPredicate(), nsMap);
out.println(" <" + predicate + " " + getRdfPrefix(nsMap) + ":nodeID=\"" + nodeId + "\"/>");
}
/**
* Writes the predicate/object for an RDF statement whose object is a Literal.
* Does not write the subject, as it is written in the containing <code>rdf:Description</code> element.
* @param statement An RDF statement.
* @param nsMap The namespace prefix mappings for the document.
* @param out The writer.
* @param useEntities Determines whether to abbreviate URIReferences using XML entities.
*/
private static void writeLiteralStatement(Triple statement, Map<String,String> nsMap, PrintWriter out, boolean useEntities) {
assert statement.getObject() instanceof Literal;
Literal objLiteral = (Literal)statement.getObject();
String predicate = prefixPredicateUri(statement.getPredicate(), nsMap);
out.print(" <" + predicate);
URI datatype = objLiteral.getDatatype();
if (datatype != null) {
out.print(" " + getRdfPrefix(nsMap) + ":datatype=\"" +
(useEntities ? abbreviateUriWithEntity(datatype, nsMap) : datatype.toString()) + "\"");
}
String lang = objLiteral.getLanguage();
if (lang != null) {
out.print(" xml:lang=\"" + lang + "\"");
}
out.println(">" + StringUtil.quoteAV(objLiteral.getLexicalForm()) + "</" + predicate + ">");
}
/**
* Gets a string representation of an RDF predicate suitable for use in RDF/XML, substituting
* a namespace prefix where appropriate.
* @param predicate The RDF predicate node.
* @param nsMap The namespace prefix mappings for the document.
* @return The prefixed predicate URI.
*/
private static String prefixPredicateUri(PredicateNode predicate, Map<String,String> nsMap) {
if (!(predicate instanceof URIReference)) {
throw new IllegalArgumentException("Invalid predicate type: " + predicate.getClass());
}
URIReference predUri = (URIReference)predicate;
String value = predUri.getURI().toString();
assert value != null;
String ns = predUri.getNamespace();
String prefix = nsMap.get(ns);
// Substitute the namespace prefix
if (prefix != null) {
assert value.startsWith(ns);
value = value.replaceFirst(ns, prefix + ":");
}
// Handle the RDF container predicates.
if (value.startsWith(getRdfPrefix(nsMap))) {
value = value.replaceAll("_[0-9]+", "li");
}
return value;
}
/**
* Abbreviate an RDF resource URI using an XML entity taken from the specified prefix mappings,
* for use in an <code>rdf:about</code> or <code>rdf:resource</code> RDF/XML attribute.
* @param uriRef The resource URI.
* @param nsMap The namespace prefix mappings for the document.
* @return The abbreviated URI reference.
*/
private static String abbreviateUriWithEntity(URI uriRef, Map<String,String> nsMap) {
String value = uriRef.toString();
assert value != null;
String ns = uriRef.getNamespace();
String prefix = nsMap.get(ns);
return (prefix != null) ? value.replaceFirst(ns, "&" + prefix + ";") : value;
}
/**
* If the given node is a URIReference or a literal with a datatype URI, then get the namespace URI
* and check if it in the supplied namespace mappings. If not, allocate a new namespace prefix
* and increment the counter.
* @param node The RDF node to check.
* @param nsMap The mapping from namespace URI to namespace prefix.
* @param nsCounter The counter, used to allocate new namespaces.
* @return The supplied counter value, incremented if a new entry was added to the mapping.
*/
private static int addNamespaceToMap(Object node, Map<String,String> nsMap, int nsCounter) {
URI uriToAdd = null;
if (node instanceof URIReference) {
uriToAdd = (URIReference)node;
} else if (node instanceof Literal) {
uriToAdd = ((Literal)node).getDatatype();
}
if (uriToAdd != null) {
String ns = uriToAdd.getNamespace();
if (!nsMap.containsKey(ns)) {
String nsPrefix = "ns" + nsCounter++;
nsMap.put(ns, nsPrefix);
}
}
return nsCounter;
}
/**
* Gets the RDF namespace prefix from the mappings. Does basic error checking to verify
* that the prefix was not reassigned.
* @param nsMap The prefix mappings.
* @return The RDF namespace prefix.
*/
private static String getRdfPrefix(Map<String,String> nsMap) {
String prefix = nsMap.get(RDF_NAMESPACE);
assert prefix != null && prefix.equals(RDF_PREFIX) : "RDF prefix was reassigned";
return prefix;
}
/**
* Creates a new mapping of namespace URI to prefix string, pre-populated with mappings for the
* RDF, RDFS, OWL, and DC namespaces.
* @return An initial namespace prefix mapping.
*/
private static Map<String,String> createInitialNsMap() {
Map<String,String> nsMap = new HashMap<String,String>();
nsMap.put(RDF_NAMESPACE, RDF_PREFIX);
nsMap.put("http://www.w3.org/2000/01/rdf-schema#", "rdfs");
nsMap.put("http://www.w3.org/2002/07/owl#", "owl");
nsMap.put("http://purl.org/dc/elements/1.1/", "dc");
nsMap.put("http://www.w3.org/2001/XMLSchema#", "xsd");
return nsMap;
}
}