/*
* #!
* Ontopia Engine
* #-
* Copyright (C) 2001 - 2013 The Ontopia Project
* #-
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* !#
*/
package net.ontopia.topicmaps.utils.rdf;
import com.hp.hpl.jena.rdfxml.xmlinput.ALiteral;
import com.hp.hpl.jena.rdfxml.xmlinput.AResource;
import com.hp.hpl.jena.rdfxml.xmlinput.StatementHandler;
import com.hp.hpl.jena.rdf.model.Model;
import com.hp.hpl.jena.rdf.model.ModelFactory;
import com.hp.hpl.jena.rdf.model.RDFNode;
import com.hp.hpl.jena.rdf.model.Property;
import com.hp.hpl.jena.rdf.model.Literal;
import com.hp.hpl.jena.rdf.model.Resource;
import com.hp.hpl.jena.rdf.model.Statement;
import com.hp.hpl.jena.rdf.model.ResIterator;
import com.hp.hpl.jena.rdf.model.NodeIterator;
import com.hp.hpl.jena.rdf.model.StmtIterator;
import com.hp.hpl.jena.shared.JenaException;
import java.io.IOException;
import java.io.InputStream;
import java.util.Map;
import java.util.HashMap;
import java.util.Iterator;
import java.util.ArrayList;
import java.util.Collection;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URL;
import net.ontopia.utils.OntopiaRuntimeException;
import net.ontopia.topicmaps.utils.PSI;
import net.ontopia.topicmaps.utils.MergeUtils;
import net.ontopia.topicmaps.utils.AssociationBuilder;
import net.ontopia.infoset.core.LocatorIF;
import net.ontopia.infoset.impl.basic.URILocator;
import net.ontopia.topicmaps.query.core.QueryProcessorIF;
import net.ontopia.topicmaps.query.core.QueryResultIF;
import net.ontopia.topicmaps.query.core.InvalidQueryException;
import net.ontopia.topicmaps.query.utils.QueryUtils;
import net.ontopia.topicmaps.core.AssociationIF;
import net.ontopia.topicmaps.core.OccurrenceIF;
import net.ontopia.topicmaps.core.ScopedIF;
import net.ontopia.topicmaps.core.TMObjectIF;
import net.ontopia.topicmaps.core.TopicIF;
import net.ontopia.topicmaps.core.TopicMapBuilderIF;
import net.ontopia.topicmaps.core.TopicMapIF;
import net.ontopia.topicmaps.core.TopicNameIF;
import net.ontopia.utils.StreamUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* EXPERIMENTAL: Converts an RDF model to a topic map using a
* schema-specific mapping defined using RDF.
*/
public class RDFToTopicMapConverter {
private TopicMapIF topicmap;
private Map mappings;
private TopicMapBuilderIF builder;
private boolean lenient;
static Logger logger = LoggerFactory.getLogger(RDFToTopicMapConverter.class.getName());
static final String RTM_PREFIX = "http://psi.ontopia.net/rdf2tm/#";
public static final String RTM_MAPSTO = RTM_PREFIX + "maps-to";
public static final String RTM_BASENAME = RTM_PREFIX + "basename";
public static final String RTM_INSTANCE_OF = RTM_PREFIX + "instance-of";
public static final String RTM_OCCURRENCE = RTM_PREFIX + "occurrence";
public static final String RTM_ASSOCIATION = RTM_PREFIX + "association";
public static final String RTM_SUBJECT_ROLE = RTM_PREFIX + "subject-role";
public static final String RTM_OBJECT_ROLE = RTM_PREFIX + "object-role";
public static final String RTM_IN_SCOPE = RTM_PREFIX + "in-scope";
public static final String RTM_SUBJECT_URI = RTM_PREFIX + "subject-uri";
public static final String RTM_OBJECT_URI = RTM_PREFIX + "object-uri";
public static final String RTM_SOURCE_LOCATOR = RTM_PREFIX + "source-locator";
public static final String RTM_SUBJECT_IDENTIFIER = RTM_PREFIX + "subject-identifier";
public static final String RTM_SUBJECT_LOCATOR = RTM_PREFIX + "subject-locator";
public static final String RTM_TYPE = RTM_PREFIX + "type";
public static final String RTM_GENERATED_NAME = RTM_PREFIX + "generated-name";
/**
* EXPERIMENTAL: Converts an RDF model into the topic map using the
* given mapping.
* @param infileurl the URL to read the input data from
* @param syntax the syntax of the input data. Values are "RDF/XML", "N3",
* and "N-TRIPLE". Defaults to "RDF/XML" if null.
* @param mappingurl the URL to read the mapping from. If null the mapping
* is taken from the input data.
* @param mappingsyntax the syntax of the mapping. Values are "RDF/XML", "N3",
* and "N-TRIPLE". Defaults to "RDF/XML" if null.
* @param topicmap The topic map to add the converted data to.
* @param lenient When false, errors are thrown if the RDF data cannot be
* correctly mapped (for example, a statement type is mapped to a
* topic name, but has a URI value).
*/
public static void convert(String infileurl, String syntax,
String mappingurl, String mappingsyntax,
TopicMapIF topicmap, boolean lenient)
throws JenaException, IOException {
RDFToTopicMapConverter converter =
new RDFToTopicMapConverter(mappingurl, mappingsyntax, topicmap);
converter.setLenient(lenient);
converter.doConversion(infileurl, syntax);
}
/**
* EXPERIMENTAL: Converts an RDF model into the topic map using the
* mapping found within the RDF model.
*/
public static void convert(Model model, TopicMapIF topicmap)
throws JenaException, IOException {
RDFToTopicMapConverter converter = new RDFToTopicMapConverter(model, topicmap);
converter.doConversion(model);
}
/**
* EXPERIMENTAL: Automatically generates names for nameless topics
* based on their subject identifiers.
* @since 2.0.4
*/
public static void generateNames(TopicMapIF topicmap) {
TopicMapBuilderIF builder = topicmap.getBuilder();
QueryProcessorIF processor = QueryUtils.getQueryProcessor(topicmap);
QueryResultIF result, generatedNameQuery;
try {
result = processor.execute("select $TOPIC, $SI from " +
" subject-identifier($TOPIC, $SI), " +
" not(topic-name($TOPIC, $TN))?");
// Check if a topic has already been created for scoping generated names.
generatedNameQuery = processor.execute("select $TOPIC from " +
" subject-identifier($TOPIC, \""
+ RTM_GENERATED_NAME + "\")?");
} catch (InvalidQueryException e) {
throw new OntopiaRuntimeException(e); // impossible error
}
// If it exists, use old rtm:generated-name topic. Otherwise, create one.
TopicIF generatedNameTopic = (generatedNameQuery.next())
? (TopicIF) generatedNameQuery.getValue("TOPIC")
: builder.makeTopic();
try {
generatedNameTopic.addSubjectIdentifier(new URILocator(RTM_GENERATED_NAME));
builder.makeTopicName(generatedNameTopic, "Generated Name");
} catch (java.net.MalformedURLException e) {
throw new OntopiaRuntimeException(e); // impossible error
}
int tix = result.getIndex("TOPIC");
int six = result.getIndex("SI");
while (result.next()) {
TopicIF topic = (TopicIF) result.getValue(tix);
String indicator = (String) result.getValue(six);
int hash = indicator.indexOf('#');
int slash = indicator.lastIndexOf('/');
String name;
if (hash != -1)
name = indicator.substring(slash + 1, hash) + ":" +
indicator.substring(hash + 1);
else
name = indicator.substring(slash + 1);
if (name.length() > 0) {
TopicNameIF bn = builder.makeTopicName(topic, name);
bn.addTheme(generatedNameTopic);
}
}
result.close();
}
// --- Internal
private RDFToTopicMapConverter(String mappingurl, String syntax, TopicMapIF topicmap)
throws JenaException, MalformedURLException {
this.topicmap = topicmap;
this.builder = topicmap.getBuilder();
if (mappingurl != null) {
Model model = ModelFactory.createDefaultModel();
model.read(mappingurl, syntax);
buildMappings(model);
}
}
private RDFToTopicMapConverter(Model model, TopicMapIF topicmap)
throws JenaException, MalformedURLException {
this.topicmap = topicmap;
this.builder = topicmap.getBuilder();
buildMappings(model);
}
private void setLenient(boolean lenient) {
this.lenient = lenient;
}
private void doConversion(String url, String syntax)
throws JenaException, IOException {
if (mappings != null && (syntax == null || syntax.equals("RDF/XML")))
RDFUtils.parseRDFXML(url, new ToTMStatementHandler());
else {
URL uri;
try {
uri = new URL(url);
} catch (MalformedURLException mufe) {
throw new IOException(mufe);
}
Model model = ModelFactory.createDefaultModel();
model.read(uri.openStream(), url, syntax);
if (mappings == null)
buildMappings(model);
doConversion(model);
}
}
private void doConversion(Model model) throws JenaException {
StatementHandler totm = new ToTMStatementHandler();
AResourceWrapper subjw = new AResourceWrapper();
AResourceWrapper propw = new AResourceWrapper();
AResourceWrapper objtw = new AResourceWrapper();
ALiteralWrapper litlw = new ALiteralWrapper();
ResIterator it = model.listSubjects();
while (it.hasNext()) {
Resource subject = (Resource) it.next();
StmtIterator it2 = subject.listProperties(); // get all statements
while (it2.hasNext()) {
Statement stmt = (Statement) it2.next();
subjw.resource = stmt.getSubject();
propw.resource = stmt.getPredicate();
RDFNode obj = stmt.getObject();
if (obj instanceof Resource) {
objtw.resource = (Resource) obj;
totm.statement(subjw, propw, objtw);
} else {
litlw.literal = (Literal) obj;
totm.statement(subjw, propw, litlw);
}
}
}
}
private void buildMappings(Model model) throws MalformedURLException {
mappings = new HashMap();
Property mapsTo = model.createProperty(RTM_MAPSTO);
StmtIterator it = model.listStatements(null, mapsTo, (RDFNode) null);
while (it.hasNext()) {
Statement stmt = (Statement) it.next();
StatementHandler mapper = getMapper(stmt.getSubject(), stmt.getObject(), model);
mappings.put(stmt.getSubject().getURI(), mapper);
}
it.close();
}
// --- Configuration interface
// --- Internal
private StatementHandler getMapper(Resource subject, RDFNode node, Model model)
throws JenaException, MalformedURLException {
String uri = node.toString();
if (RTM_BASENAME.equals(uri))
return new TopicNameMapper(getScope(subject, model));
else if (RTM_INSTANCE_OF.equals(uri)) {
Collection scope = getScope(subject, model);
if (scope.isEmpty())
return new InstanceMapper();
else
return new ScopedInstanceMapper(scope);
} else if (RTM_SUBJECT_IDENTIFIER.equals(uri))
return new SubjectIdentifierMapper();
else if (RTM_SOURCE_LOCATOR.equals(uri))
return new SourceLocatorMapper();
else if (RTM_SUBJECT_LOCATOR.equals(uri))
return new SubjectLocatorMapper();
else if (RTM_OCCURRENCE.equals(uri))
return new OccurrenceMapper(getType(subject, model), getScope(subject, model));
else if (RTM_ASSOCIATION.equals(uri)) {
LocatorIF srole = getTopicIndicator(subject, RTM_SUBJECT_ROLE, model);
if (srole == null)
throw new RDFMappingException("No rtm:subject-role for " + subject);
LocatorIF orole = getTopicIndicator(subject, RTM_OBJECT_ROLE, model);
if (orole == null)
throw new RDFMappingException("No rtm:object-role for " + subject);
return new AssociationMapper(srole, orole, getType(subject, model),
getScope(subject, model));
} else
throw new RDFMappingException("Unknown value for rtm:maps-to: " + uri);
}
/**
* Finds all RTM_IN_SCOPE properties for this property and returns a
* collection containing the RDF URIs of the values as URILocators.
*/
private Collection getScope(RDFNode rdfprop, Model model)
throws JenaException, MalformedURLException {
Resource subject = (Resource) rdfprop;
Property prop = model.getProperty(RTM_IN_SCOPE);
NodeIterator it = model.listObjectsOfProperty(subject, prop);
ArrayList scope = new ArrayList();
while (it.hasNext()) {
Object o = it.next();
if (!(o instanceof Resource))
throw new RDFMappingException("Scoping topic must be specified by a resource, not by " + o);
Resource obj = (Resource) o;
LocatorIF loc = new URILocator(obj.getURI());
scope.add(loc);
}
return scope;
}
private TopicIF getType(RDFNode rdfprop, Model model)
throws JenaException, MalformedURLException {
Resource subject = (Resource) rdfprop;
Property prop = model.getProperty(RTM_TYPE);
NodeIterator it = model.listObjectsOfProperty(subject, prop);
while (it.hasNext()) {
Resource obj = (Resource) it.next();
LocatorIF loc = new URILocator(obj.getURI());
TopicIF topic = topicmap.getTopicBySubjectIdentifier(loc);
if (topic == null) {
topic = builder.makeTopic();
topic.addSubjectIdentifier(loc);
}
return topic;
}
return null;
}
private TopicIF getTopic(Resource subject, String property, Model model)
throws JenaException, MalformedURLException {
LocatorIF loc = getTopicIndicator(subject, property, model);
if (loc == null)
return null;
return getTopic(loc);
}
private TopicIF getTopic(LocatorIF loc) {
TopicIF topic = topicmap.getTopicBySubjectIdentifier(loc);
if (topic == null) {
topic = builder.makeTopic();
topic.addSubjectIdentifier(loc);
}
return topic;
}
private LocatorIF getTopicIndicator(Resource subject, String property,
Model model)
throws JenaException, MalformedURLException {
Property prop = model.getProperty(property);
NodeIterator it = model.listObjectsOfProperty(subject, prop);
while (it.hasNext()) {
Resource obj = (Resource) it.next();
if (obj.isAnon())
continue; // FIXME: is this really ok?
return new URILocator(obj.getURI());
}
return null;
}
// --- Individual mappers
abstract class AbstractMapper implements StatementHandler {
protected Collection scope;
protected String construct;
/**
* INTERNAL: If false, the scope collection contains unresolved
* URILocator objects. If true, the scope collection contains the
* topics identified by the URILocator objects. This is to avoid
* bug #1317, ie: to avoid creating topics for scope topics
* attached to unused properties when using an external mapping
* file.
*/
protected boolean translated;
public AbstractMapper(String construct) {
this.construct = construct;
}
public AbstractMapper(String construct, Collection scope) {
this.scope = scope;
this.construct = construct;
}
public TopicIF getSubject(AResource sub) {
try {
TopicIF topic;
if (sub.isAnonymous()) {
// next 5 lines solve bug #1339 by working around
// http://sourceforge.net/tracker/index.php?func=detail&aid=1082269&group_id=40417&atid=430288
String id;
if (sub.hasNodeID())
id = sub.getAnonymousID();
else
id = sub.toString();
// we don't want the pseudo-URIs of anonymous nodes as
// subject identifiers
LocatorIF loc = new URILocator("x-anon:" + id);
topic = (TopicIF) topicmap.getObjectByItemIdentifier(loc);
if (topic == null) {
topic = builder.makeTopic();
topic.addItemIdentifier(loc);
}
} else {
LocatorIF loc = new URILocator(sub.getURI());
topic = topicmap.getTopicBySubjectIdentifier(loc);
if (topic == null) {
topic = builder.makeTopic();
topic.addSubjectIdentifier(loc);
}
}
return topic;
} catch (java.net.MalformedURLException e) {
throw new OntopiaRuntimeException(e);
}
}
public TopicIF getPredicate(AResource obj) {
return getSubject(obj);
}
public TopicIF getObject(AResource obj) {
return getSubject(obj);
}
public void addScope(ScopedIF scoped) {
if (!translated)
resolveScope();
Iterator it = scope.iterator();
while (it.hasNext())
scoped.addTheme((TopicIF) it.next());
}
public void statement(AResource sub, AResource pred, ALiteral lit) {
String msg = "Statements mapped to " + construct + " cannot have literal " +
"objects. Found (" + sub + ", " + pred + ", " + lit + ")";
logger.warn(msg);
if (!lenient)
throw new RDFMappingException(msg);
}
public void statement(AResource sub, AResource pred, AResource obj) {
String msg = "Statements mapped to " + construct + " cannot have URI " +
"reference objects. Found (" + sub + ", " + pred + ", " + obj + ")";
logger.warn(msg);
if (!lenient)
throw new RDFMappingException(msg);
}
// Internal methods
private void resolveScope() {
translated = true; // avoid multiple calls here
Collection scope2 = new ArrayList(scope.size());
Iterator it = scope.iterator();
while (it.hasNext()) {
LocatorIF loc = (LocatorIF) it.next();
TopicIF topic = topicmap.getTopicBySubjectIdentifier(loc);
if (topic == null) {
topic = builder.makeTopic();
topic.addSubjectIdentifier(loc);
}
scope2.add(topic);
}
scope = scope2; // now we've translated the scope
}
}
class InstanceMapper extends AbstractMapper {
public InstanceMapper() {
super("rtm:instance-of");
}
public void statement(AResource sub, AResource pred, AResource obj) {
TopicIF topic = getSubject(sub);
TopicIF type = getObject(obj);
topic.addType(type);
}
}
class ScopedInstanceMapper extends AbstractMapper {
private AssociationBuilder abuilder;
public ScopedInstanceMapper(Collection scope) {
super("rtm:instance-of", scope);
TopicIF assoc = getTopic(PSI.getXTMClassInstance());
TopicIF role1 = getTopic(PSI.getXTMClass());
TopicIF role2 = getTopic(PSI.getXTMInstance());
abuilder = new AssociationBuilder(assoc, role1, role2);
}
public void statement(AResource sub, AResource pred, AResource obj) {
TopicIF topic = getSubject(sub);
TopicIF type = getObject(obj);
AssociationIF assoc = abuilder.makeAssociation(type, topic); // (class, instance)
addScope(assoc);
}
private TopicIF getTopic(LocatorIF indicator) {
TopicIF topic = topicmap.getTopicBySubjectIdentifier(indicator);
if (topic == null) {
topic = builder.makeTopic();
topic.addSubjectIdentifier(indicator);
}
return topic;
}
}
class SubjectIdentifierMapper extends AbstractMapper {
public SubjectIdentifierMapper() {
super("rtm:subject-identifier");
}
public void statement(AResource sub, AResource pred, AResource obj) {
if (obj.isAnonymous()) {
logger.warn("Blank nodes cannot be subject identifiers; " +
"subject: " + sub.getURI() + "; " +
"property: " + pred.getURI());
throw new RDFMappingException("Blank nodes cannot be subject identifiers",
sub.getURI(), pred.getURI());
}
TopicIF topic = getSubject(sub);
LocatorIF loc = null;
try {
loc = new URILocator(obj.getURI());
} catch (MalformedURLException e) {
throw new OntopiaRuntimeException("INTERNAL ERROR", e);
}
TopicIF other = topicmap.getTopicBySubjectIdentifier(loc);
if (other != null && other != topic)
MergeUtils.mergeInto(other, topic);
else
topic.addSubjectIdentifier(loc);
}
}
class SourceLocatorMapper extends AbstractMapper {
public SourceLocatorMapper() {
super("rtm:source-locator");
}
public void statement(AResource sub, AResource pred, AResource obj) {
if (obj.isAnonymous()) {
logger.warn("Blank nodes cannot be source locators; " +
"subject: " + sub.getURI() + "; " +
"property: " + pred.getURI());
if (!lenient)
throw new RDFMappingException("Blank nodes cannot be source locators",
sub.getURI(), pred.getURI());
}
TopicIF topic = getSubject(sub);
LocatorIF loc = null;
try {
loc = new URILocator(obj.getURI());
} catch (MalformedURLException e) {
throw new OntopiaRuntimeException("INTERNAL ERROR", e);
}
TMObjectIF other = topicmap.getObjectByItemIdentifier(loc);
if (other instanceof TopicIF) {
TopicIF othert = (TopicIF) other;
if (othert != null && othert != topic)
MergeUtils.mergeInto(othert, topic);
else
topic.addItemIdentifier(loc);
} // else FIXME: what to do?
}
}
class SubjectLocatorMapper extends AbstractMapper {
public SubjectLocatorMapper() {
super("rtm:subject-locator");
}
public void statement(AResource sub, AResource pred, AResource obj) {
if (obj.isAnonymous()) {
logger.warn("Blank nodes cannot be subject locators; " +
"subject: " + sub.getURI() + "; " +
"predicate: " + pred.getURI());
throw new RDFMappingException("Blank nodes cannot be subject locators",
sub.getURI(), pred.getURI());
}
TopicIF topic = getSubject(sub);
LocatorIF loc = null;
try {
loc = new URILocator(obj.getURI());
} catch (MalformedURLException e) {
throw new OntopiaRuntimeException("INTERNAL ERROR", e);
}
TopicIF other = topicmap.getTopicBySubjectLocator(loc);
if (other != null && other != topic)
MergeUtils.mergeInto(other, topic);
else
topic.addSubjectLocator(loc);
}
}
class TopicNameMapper extends AbstractMapper {
public TopicNameMapper(Collection scope) {
super("rtm:basename", scope);
}
public void statement(AResource sub, AResource pred, ALiteral lit) {
TopicIF topic = getSubject(sub); // FIXME: support xml:lang here?
TopicNameIF bn = builder.makeTopicName(topic, lit.toString());
addScope(bn);
}
}
class OccurrenceMapper extends AbstractMapper {
private TopicIF type;
public OccurrenceMapper(TopicIF type, Collection scope) {
super("rtm:occurrence", scope);
this.type = type;
}
public void statement(AResource sub, AResource pred, AResource obj) {
if (obj.isAnonymous()) {
logger.warn("Blank node cannot be occurrence value; " +
"subject: " + sub.getURI() + "; " +
"predicate: " + pred.getURI());
if (!lenient)
throw new RDFMappingException("Blank node cannot be occurrence value",
sub.getURI(), pred.getURI());
}
String uri = obj.getURI();
if (uri == null)
return; // this happens; not sure why, but it does, so we work around it
try {
TopicIF topic = getSubject(sub);
TopicIF ourtype = type;
if (ourtype == null)
ourtype = getPredicate(pred);
OccurrenceIF occ = builder.makeOccurrence(topic, ourtype, new URILocator(uri));
addScope(occ);
} catch (MalformedURLException e) {
throw new OntopiaRuntimeException(e);
}
}
public void statement(AResource sub, AResource pred, ALiteral lit) {
TopicIF topic = getSubject(sub);
TopicIF ourtype = type;
if (ourtype == null)
ourtype = getPredicate(pred);
OccurrenceIF occ = builder.makeOccurrence(topic, ourtype, lit.toString());
addScope(occ);
}
}
class AssociationMapper extends AbstractMapper {
private LocatorIF sroleloc; // avoids creating roles if not needed
private LocatorIF oroleloc; // ditto
private TopicIF srole;
private TopicIF orole;
private TopicIF type;
public AssociationMapper(LocatorIF sroleloc, LocatorIF oroleloc,
TopicIF type, Collection scope) {
super("rtm:association", scope);
this.type = type;
this.sroleloc = sroleloc;
this.oroleloc = oroleloc;
}
public void statement(AResource sub, AResource pred, AResource obj) {
TopicIF topic = getSubject(sub);
TopicIF object = getObject(obj);
TopicIF ourtype = type;
if (ourtype == null)
ourtype = getPredicate(pred);
if (srole == null) {
srole = getTopic(sroleloc);
orole = getTopic(oroleloc);
}
AssociationIF assoc = builder.makeAssociation(ourtype);
builder.makeAssociationRole(assoc, srole, topic);
builder.makeAssociationRole(assoc, orole, object);
addScope(assoc);
}
}
// --- Mapping statement handler
class ToTMStatementHandler implements StatementHandler {
public void statement(AResource sub, AResource pred, ALiteral lit) {
StatementHandler handler = (StatementHandler) mappings.get(pred.getURI());
if (handler != null)
handler.statement(sub, pred, lit);
}
public void statement(AResource sub, AResource pred, AResource obj) {
StatementHandler handler = (StatementHandler) mappings.get(pred.getURI());
if (handler != null)
handler.statement(sub, pred, obj);
}
}
}