/*
* Copyright (c) 2013, University of Toronto.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License. You may obtain
* a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*/
package edu.toronto.cs.xcurator.rdf;
import edu.toronto.cs.xcurator.common.RdfUriConfig;
import edu.toronto.cs.xcurator.common.ElementIdGenerator;
import com.hp.hpl.jena.rdf.model.Model;
import com.hp.hpl.jena.rdf.model.Property;
import com.hp.hpl.jena.rdf.model.Resource;
import com.hp.hpl.jena.tdb.TDBFactory;
import com.hp.hpl.jena.vocabulary.RDF;
import edu.toronto.cs.xcurator.common.DataDocument;
import edu.toronto.cs.xcurator.mapping.Mapping;
import edu.toronto.cs.xcurator.mapping.Attribute;
import edu.toronto.cs.xcurator.mapping.Schema;
import edu.toronto.cs.xcurator.mapping.Reference;
import edu.toronto.cs.xcurator.mapping.Relation;
import edu.toronto.cs.xcurator.common.NsContext;
import edu.toronto.cs.xcurator.common.XPathFinder;
import java.io.IOException;
import java.security.NoSuchAlgorithmException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.logging.Level;
import java.util.logging.Logger;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.XPathExpressionException;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
/**
*
* @author zhuerkan
*/
public class RdfGeneration implements RdfGenerationStep {
private final String tdbDirPath;
private final RdfUriConfig config;
private final XPathFinder xpath;
private final ElementIdGenerator elementIdGenerator;
public RdfGeneration(String tdbDirPath, RdfUriConfig config) {
this.tdbDirPath = tdbDirPath;
this.config = config;
this.xpath = new XPathFinder();
this.elementIdGenerator = new ElementIdGenerator(config.getResourceUriBase());
}
@Override
public void process(List<DataDocument> xmlDocuments, Mapping mapping) {
// Open a connection to the TDB
Model model = TDBFactory.createModel(tdbDirPath);
for (DataDocument dataDoc : xmlDocuments) {
try {
// Check if the mapping passed in is initialized
if (!mapping.isInitialized()) {
throw new Exception("Mapping was not initialized, missing preprocessing or deserializing?");
}
Iterator<Schema> it = mapping.getEntityIterator();
while (it.hasNext()) {
Schema entity = it.next();
NodeList nl = xpath.getNodesByPath(entity.getPath(), dataDoc.Data,
entity.getNamespaceContext());
for (int i = 0; i < nl.getLength(); i++) {
// Create RDFs
// The URI of the subject should be the XBRL link + UUID
// But a resolvable link should be used in the future
Element dataElement = (Element) nl.item(i);
generateRdfs(entity, mapping, dataElement, dataDoc, model);
}
}
// Finish writing to the TDB for this document
model.commit();
} catch (SAXException ex) {
Logger.getLogger(RdfGeneration.class.getName()).log(Level.SEVERE, null, ex);
} catch (IOException ex) {
Logger.getLogger(RdfGeneration.class.getName()).log(Level.SEVERE, null, ex);
} catch (ParserConfigurationException ex) {
Logger.getLogger(RdfGeneration.class.getName()).log(Level.SEVERE, null, ex);
} catch (XPathExpressionException ex) {
Logger.getLogger(RdfGeneration.class.getName()).log(Level.SEVERE, null, ex);
} catch (NoSuchAlgorithmException ex) {
Logger.getLogger(RdfGeneration.class.getName()).log(Level.SEVERE, null, ex);
} catch (Exception ex) {
Logger.getLogger(RdfGeneration.class.getName()).log(Level.SEVERE, null, ex);
}
}
// Close the connection
model.close();
}
private Resource generateRdfs(Schema entity, Mapping mapping, Element dataElement,
DataDocument dataDoc, Model model)
throws XPathExpressionException, IOException, NoSuchAlgorithmException {
// Generate a unique ID for this instance
String instanceUri = elementIdGenerator.generateUri(dataDoc.resourceUriPattern,
entity.getNamespaceContext(), dataElement, dataDoc.Data, xpath);
// Create RDF resources
Resource typeResource = model.createResource(entity.getRdfTypeUri());
Resource instanceResource = model.createResource(instanceUri);
// Return the resource if it has already been created
// Preventing the relation instance resources to be recreated
if (model.contains(instanceResource, RDF.type, typeResource)) {
return instanceResource;
}
// Add type to instance
instanceResource.addProperty(RDF.type, typeResource);
// Add XML type to instance
if (entity.getXmlTypeUri() != null) {
String uriBase = config.getPropertyResourceUriBase();
Property xmlTypeProperty = model.createProperty(
(uriBase.endsWith("/") ? uriBase : uriBase + "/") + "extractedFromXMLType");
instanceResource.addLiteral(xmlTypeProperty, entity.getXmlTypeUri());
}
// Add attribute properties of this instance
Iterator<Attribute> attrIterator = entity.getAttributeIterator();
while (attrIterator.hasNext()) {
Attribute attr = attrIterator.next();
Property attrProperty = model.createProperty(attr.getRdfUri());
NodeList nl = xpath.getNodesByPath(attr.getPath(), dataElement,
entity.getNamespaceContext());
for (int i = 0; i < nl.getLength(); i++) {
String value = nl.item(i).getTextContent().trim();
instanceResource.addProperty(attrProperty, value);
}
}
// Add relation properties of this instance
Iterator<Relation> relIterator = entity.getRelationIterator();
while (relIterator.hasNext()) {
Relation rel = relIterator.next();
// Get potential instances of target entity of this relation
NodeList nl = xpath.getNodesByPath(rel.getPath(), dataElement, dataDoc.Data,
entity.getNamespaceContext());
// Create a cache map for saving values of the references
Map<String, String> cache = new HashMap<>();
for (int i = 0; i < nl.getLength(); i++) {
Element targetElement = (Element) nl.item(i);
Schema targetEntity = mapping.getEntity(rel.getObjectXmlTypeUri());
Iterator<Reference> refIterator = rel.getReferenceIterator();
// Filter the ones that do not meet the reference
// Match is automatically true when there is no reference
boolean match = true;
while (refIterator.hasNext()) {
if (!referenceMatch(dataElement, targetElement, refIterator.next(),
entity.getNamespaceContext(), targetEntity.getNamespaceContext(), cache)) {
// Stop checking when seeing on mis-match
match = false;
break;
}
}
if (!match) {
continue;
}
// Recursively create the target resources
Resource targetResource = generateRdfs(targetEntity, mapping, targetElement, dataDoc, model);
// Build the relation
Property relProperty = model.createProperty(rel.getRdfUri());
instanceResource.addProperty(relProperty, targetResource);
}
}
return instanceResource;
}
private boolean referenceMatch(Element subjecElement, Element objectElement,
Reference reference, NsContext subjectNsContext, NsContext objectNsContext,
Map<String, String> cache) throws XPathExpressionException {
String subjectRefValue;
String path = reference.getPath();
if (cache.containsKey(path)) {
subjectRefValue = cache.get(path);
} else {
subjectRefValue = xpath.getStringByPath(reference.getPath(), subjecElement, subjectNsContext);
cache.put(path, subjectRefValue);
}
String objectRefValue = xpath.getStringByPath(reference.getTargetPath(), objectElement, objectNsContext);
return subjectRefValue.equals(objectRefValue);
}
}