/*
* Copyright (c) 2013, University of Toronto.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License. You may obtain
* a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*/
package edu.toronto.cs.xml2rdf.mapping;
import java.io.IOException;
import java.io.InputStream;
import java.io.PrintStream;
import java.io.Reader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.XPathExpressionException;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
import com.hp.hpl.jena.rdf.model.Bag;
import com.hp.hpl.jena.rdf.model.Model;
import com.hp.hpl.jena.rdf.model.NodeIterator;
import com.hp.hpl.jena.rdf.model.Resource;
import com.hp.hpl.jena.vocabulary.RDF;
import com.hp.hpl.jena.vocabulary.RDFS;
import edu.toronto.cs.xml2rdf.jena.JenaUtils;
import edu.toronto.cs.xml2rdf.string.StringMetric;
import edu.toronto.cs.xml2rdf.utils.LogUtils;
import edu.toronto.cs.xml2rdf.xml.XMLUtils;
public class Mapping {
private Document doc;
private List<Entity> entities;
private Set<String> blackList;
public Mapping(String path, Set<String> blackList) throws ParserConfigurationException, SAXException, IOException {
entities = new ArrayList<Entity>();
this.doc = XMLUtils.parse(path, -1);
this.blackList = blackList;
loadEntities(doc);
}
public Mapping(InputStream is, Set<String> blackList) throws SAXException, IOException, ParserConfigurationException {
entities = new ArrayList<Entity>();
this.doc = XMLUtils.parse(is, -1);
this.blackList = blackList;
loadEntities(doc);
}
public Mapping(Reader reader, Set<String> blackList) throws SAXException, IOException, ParserConfigurationException {
entities = new ArrayList<Entity>();
this.doc = XMLUtils.parse(reader, -1);
this.blackList = blackList;
loadEntities(doc);
}
public Mapping(Document doc, Set<String> blackList) throws SAXException, IOException, ParserConfigurationException {
entities = new ArrayList<Entity>();
this.doc = doc;
this.blackList = blackList;
loadEntities(this.doc);
}
private void loadEntities(Document doc) {
NodeList nl = doc.getElementsByTagName("entity");
for (int i = 0; i < nl.getLength(); i++) {
Element entityElement = (Element) nl.item(i);
Entity entity = getEntityFromElement(entityElement);
this.entities.add(entity);
}
}
private Entity getEntityFromElement(Element entityElement) {
String type = entityElement.getAttribute("type");
String path = entityElement.getAttribute("path");
Entity entity = new Entity(type, path, entityElement, doc, blackList);
entity.reloadId();
entity.reloadAttributes();
entity.reloadRelations();
entity.reloadLookupKeys();
entity.reloadOntologyLinks();
return entity;
}
public void generateRDFs(String tdbPath, Document dataDoc, String typePrefix, PrintStream out,
String format, StringMetric stringMetric, double threshold) throws XPathExpressionException {
Model model = JenaUtils.getTDBModel(tdbPath);
for (Entity entity : entities) {
String entityPath = entity.getPath();
LogUtils.debug(this.getClass(), "Creating instances of " + entity);
NodeList nodeList = XMLUtils.getNodesByPath(entityPath, null, dataDoc);
for (int i = 0; i < nodeList.getLength(); i++) {
entity.generateRDF((Element) nodeList.item(i), dataDoc, model, typePrefix, stringMetric, threshold);
}
LogUtils.debug(this.getClass(), "Instances of " + entity + " are created.");
}
if (out != null) {
model.write(out, format);
}
model.commit();
model.close();
}
public void generateRDFSchema(String tdbPath, Document dataDoc,
String typePrefix, PrintStream out,
String format, StringMetric stringMetric, double threshold) throws XPathExpressionException {
Model model = JenaUtils.getTDBModel(tdbPath);
Map<String, Set<String>> ranges = new HashMap<String, Set<String>>();
Map<String, Set<String>> domains = new HashMap<String, Set<String>>();
Resource rootResource = model.createResource(typePrefix);
com.hp.hpl.jena.rdf.model.Property classProperty = model.createProperty("http://dblab.cs.toronto.edu/project/xcurator/0.1#classes");
Bag classBag = model.createBag(typePrefix + "classBag");
model.add(rootResource, classProperty, classBag);
com.hp.hpl.jena.rdf.model.Property propertyProperty = model.createProperty("http://dblab.cs.toronto.edu/project/xcurator/0.1#properties");
Bag propertyBag = model.createBag(typePrefix + "propertyBag");
model.add(rootResource, propertyProperty, propertyBag);
for (Entity entity : entities) {
Resource type = model.createResource(entity.getType());
model.add(type, RDF.type, RDFS.Class);
model.add(type, RDFS.subClassOf, RDFS.Resource);
if (!classBag.contains(type)) {
classBag.add(type);
}
for (Property prop : entity.getProperties()) {
Set<String> propRange = ranges.get(prop.getName());
if (propRange == null) {
propRange = new HashSet<String>();
ranges.put(prop.getName(), propRange);
}
propRange.add(RDFS.Literal.getURI());
Set<String> propDomain = domains.get(prop.getName());
if (propDomain == null) {
propDomain = new HashSet<String>();
domains.put(prop.getName(), propDomain);
}
propDomain.add(entity.getType());
}
for (Relation relation : entity.getRelations()) {
Set<String> relRange = ranges.get(relation.getName());
if (relRange == null) {
relRange = new HashSet<String>();
ranges.put(relation.getName(), relRange);
}
relRange.add(relation.getTargetEntity());
Set<String> relDomain = domains.get(relation.getName());
if (relDomain == null) {
relDomain = new HashSet<String>();
domains.put(relation.getName(), relDomain);
}
relDomain.add(entity.getType());
}
}
for (String name : ranges.keySet()) {
Resource nameResouce = model.createResource(name);
model.add(nameResouce, RDF.type, RDF.Property);
if (!propertyBag.contains(nameResouce)) {
propertyBag.add(nameResouce);
}
if (ranges.get(name).size() == 0) {
model.add(nameResouce, RDFS.range, RDFS.Resource);
} else {
String range = ranges.get(name).iterator().next();
Resource rangeResouce = model.createResource(range);
model.add(nameResouce, RDFS.range, rangeResouce);
}
if (domains.get(name).size() == 0) {
model.add(nameResouce, RDFS.domain, RDFS.Resource);
} else {
String domain = domains.get(name).iterator().next();
Resource domainResouce = model.createResource(domain);
model.add(nameResouce, RDFS.domain, domainResouce);
}
}
// RDF.Bag
// RDFS.member
if (out != null) {
model.write(out, format);
}
model.commit();
model.close();
}
public static Set<String> getAllTypes(Model model, String typePrefix) {
Resource rootResource = model.createResource(typePrefix);
com.hp.hpl.jena.rdf.model.Property classProperty = model.createProperty("http://dblab.cs.toronto.edu/project/xcurator/0.1#classes");
Bag classBag = model.createBag("http://dblab.cs.toronto.edu/project/xcurator/0.1#classBag");
model.add(rootResource, classProperty, classBag);
Set<String> ret = new HashSet<String>();
NodeIterator iterator = classBag.iterator();
while (iterator.hasNext()) {
Resource resource = (Resource) iterator.next();
ret.add(resource.getURI());
}
return ret;
}
}