/*
* Copyright (c) 2013, University of Toronto.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License. You may obtain
* a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*/
package edu.toronto.cs.xml2rdf.mapping;
import java.io.IOException;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import javax.xml.xpath.XPathExpressionException;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import com.hp.hpl.jena.query.QueryExecution;
import com.hp.hpl.jena.query.QueryExecutionFactory;
import com.hp.hpl.jena.query.QuerySolution;
import com.hp.hpl.jena.query.ResultSet;
import com.hp.hpl.jena.rdf.model.Bag;
import com.hp.hpl.jena.rdf.model.Model;
import com.hp.hpl.jena.rdf.model.ResIterator;
import com.hp.hpl.jena.rdf.model.Resource;
import com.hp.hpl.jena.vocabulary.OWL;
import com.hp.hpl.jena.vocabulary.RDF;
import edu.toronto.cs.xml2rdf.freebase.FreeBaseLinker;
import edu.toronto.cs.xml2rdf.jena.SKOS;
import edu.toronto.cs.xml2rdf.opencyc.OpenCycOntology;
import edu.toronto.cs.xml2rdf.string.StringMetric;
import edu.toronto.cs.xml2rdf.utils.LogUtils;
import edu.toronto.cs.xml2rdf.xml.XMLUtils;
public class Entity {
static boolean debug = true;
public static final String AUTO_GENERATED = "UUID";
private Element element;
private String path;
private String type;
private Document doc;
private List<Property> properties;
private List<Relation> relations;
private List<LookupKey> lookupKeys;
private Set<String> ontologyTypes;
private String id;
private Set<String> blackList;
public Entity(String type, String path, Element entityElement, Document doc, Set<String> blackList) {
this.type = type;
this.path = path;
this.element = entityElement;
this.doc = doc;
this.blackList = blackList;
}
public void addProperty(Property attr) {
properties.add(attr);
}
public void addRelation(Relation relation) {
relations.add(relation);
}
public void addLookupKey(LookupKey key) {
lookupKeys.add(key);
}
public void addOntologyLink(String conceptURI) {
ontologyTypes.add(conceptURI);
}
public void reloadId() {
id = element.getElementsByTagName("id").item(0).getTextContent();
}
public void reloadAttributes() {
properties = new ArrayList<Property>();
NodeList nl = element.getElementsByTagName("property");
for (int i = 0; i < nl.getLength(); i++) {
Element attributeElement = (Element) nl.item(i);
if (attributeElement.getParentNode() != element) {
continue;
}
String name = attributeElement.getAttribute("name");
String type = attributeElement.getAttribute("type");
String path = attributeElement.getAttribute("path");
Set<String> typeSet = new HashSet<String>();
NodeList ontologyNodeList = attributeElement.getElementsByTagName("ontology-link");
for (int j = 0; j < ontologyNodeList.getLength(); j++) {
Node linkNode = ontologyNodeList.item(j);
if (linkNode instanceof Element) {
String typeURI = ((Element) linkNode).getAttribute("uri");
if (!blackList.contains(typeURI)) {
typeSet.add(typeURI);
}
}
}
addProperty(
new Property(name, type, this, path,
attributeElement, typeSet,
"true".equals(attributeElement.getAttribute("key"))));
}
}
public void reloadRelations() {
relations = new ArrayList<Relation>();
NodeList nl = element.getElementsByTagName("relation");
for (int i = 0; i < nl.getLength(); i++) {
Element relationElement = (Element) nl.item(i);
if (relationElement.getParentNode() != element) {
continue;
}
String name = relationElement.getAttribute("name");
String targetEntity = relationElement.getAttribute("targetEntity");
String path = relationElement.getAttribute("path");
/*
* Loading the foreign lookup key
*/
Element foreignLookupKeyElement = (Element) relationElement
.getElementsByTagName("lookupkey").item(0);
List<Property> properties = new ArrayList<Property>();
NodeList targetProperties = foreignLookupKeyElement
.getElementsByTagName("target-property");
for (int j = 0; j < targetProperties.getLength(); j++) {
Element propertyElement = (Element) targetProperties.item(j);
String propertyName = propertyElement.getAttribute("name");
String propertyPath = propertyElement.getAttribute("path");
properties.add(new Property(propertyName, propertyPath, this,
propertyPath, propertyElement, new HashSet<String>(), true));
}
ForeignLookupKey foreignLookupKey = new ForeignLookupKey(name,
this, targetEntity, properties, foreignLookupKeyElement);
Relation relation = new Relation(name, this, targetEntity,
foreignLookupKey, path, relationElement);
addRelation(relation);
}
}
public void reloadLookupKeys() {
lookupKeys = new ArrayList<LookupKey>();
NodeList nl = element.getElementsByTagName("lookupkey");
for (int i = 0; i < nl.getLength(); i++) {
Element lookupKeyElement = (Element) nl.item(i);
if (lookupKeyElement.getParentNode() != element) {
continue;
}
List<String> attributes = new ArrayList<String>();
NodeList pkAttribute = lookupKeyElement
.getElementsByTagName("attribute");
for (int j = 0; j < pkAttribute.getLength(); j++) {
Element attribute = (Element) pkAttribute.item(j);
attributes.add(attribute.getAttribute("name"));
}
LookupKey lookupKey = new LookupKey(type, this, attributes,
lookupKeyElement);
addLookupKey(lookupKey);
}
}
public void reloadOntologyLinks() {
ontologyTypes = new HashSet<String>();
NodeList nl = element.getElementsByTagName("ontology-link");
for (int i = 0; i < nl.getLength(); i++) {
Element ontologyLinkElement = (Element) nl.item(i);
if (ontologyLinkElement.getParentNode() != element) {
continue;
}
String uri = ontologyLinkElement.getAttribute("uri");
if (!blackList.contains(uri)) {
addOntologyLink(uri);
}
}
}
public Element getElement() {
return element;
}
public void setElement(Element element) {
this.element = element;
}
public String getPath() {
return path;
}
public void setPath(String path) {
this.path = path;
}
public String getType() {
return type;
}
public void setType(String type) {
this.type = type;
}
public Document getDoc() {
return doc;
}
public void setDoc(Document doc) {
this.doc = doc;
}
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public List<Property> getProperties() {
return properties;
}
public List<Relation> getRelations() {
return relations;
}
public List<LookupKey> getLookupKeys() {
return lookupKeys;
}
public String generateId(Element element, Document doc)
throws XPathExpressionException {
int lastEndIndex = 0;
String generatedId = "";
do {
int startIndex = id.indexOf("${", lastEndIndex);
if (startIndex == -1) {
break;
}
int endIndex = id.indexOf("}", startIndex);
if (endIndex == -1) {
break;
}
String literal = id.substring(lastEndIndex, startIndex);
String path = id.substring(startIndex + 2, endIndex);
String pathValue = null;
if (AUTO_GENERATED.equals(path)) {
try {
MessageDigest digest = MessageDigest.getInstance("MD5");
pathValue = "";
byte[] md5 = digest.digest(XMLUtils.asByteArray(element));
for (byte b : md5) {
pathValue += Integer.toString((b & 0xff) + 0x100, 16).substring(1);
}
} catch (NoSuchAlgorithmException e) {
// TODO Auto-generated catch block
if (debug) {
e.printStackTrace();
}
} catch (IOException e) {
// TODO Auto-generated catch block
if (debug) {
e.printStackTrace();
}
}
} else {
pathValue = XMLUtils.getStringByPath(path, element, doc);
}
generatedId += literal + pathValue;
lastEndIndex = endIndex + 1;
} while (true);
if (generatedId.trim().length() == 0) {
LogUtils.error(this.getClass(), "Error in generating id: " + XMLUtils.getAllLeaves(element));
}
return generatedId;
}
public void generateRDF(Element item, Document dataDoc, Model model, String typePrefix,
StringMetric stringMetric, double threshold)
throws XPathExpressionException {
if (getSameResource(model, typePrefix, item, dataDoc) != null) {
LogUtils.debug(this.getClass(), "Duplicate detected!");
return;
}
Resource rootResource = model.createResource(typePrefix);
com.hp.hpl.jena.rdf.model.Property classProperty = model.createProperty("http://dblab.cs.toronto.edu/project/xcurator/0.1#instances");
Bag instanceBag = model.createBag(typePrefix + "instanceBag");
model.add(rootResource, classProperty, instanceBag);
Resource typeResource = model.createResource(getType());
Resource instanceResource = model.createResource(generateId(item, dataDoc));
instanceResource.addProperty(RDF.type, typeResource);
if (!instanceBag.contains(instanceResource)) {
instanceBag.add(instanceResource);
}
String name = null;
for (Property property : getProperties()) {
property.createRDFProperty(model, instanceResource, item, dataDoc);
}
for (Relation relation : getRelations()) {
relation.createRDFRelation(model, instanceResource, item, dataDoc, typePrefix);
}
if (ontologyTypes != null && ontologyTypes.size() > 0) {
name = item.getTextContent();
Set<String> sameAs = OpenCycOntology.getInstance().findSameAsForResource(name, stringMetric, threshold, ontologyTypes);
for (String uri : sameAs) {
instanceResource.addProperty(sameAs.size() == 1 ? SKOS.exactMatch : SKOS.closeMatch, uri);
}
sameAs = new FreeBaseLinker().findSameAsForResource(name, stringMetric, threshold, ontologyTypes);
for (String uri : sameAs) {
instanceResource.addProperty(sameAs.size() == 1 ? SKOS.exactMatch : SKOS.closeMatch, uri);
}
for (String uri : ontologyTypes) {
typeResource.addProperty(OWL.equivalentClass, model.createResource(uri));
}
}
}
private Object getSameResource(Model model, String typePrefix,
Element item, Document dataDoc) throws XPathExpressionException {
QueryExecution qExec = null;
try {
String query = getEqualsQuery(model, typePrefix, item, dataDoc);
LogUtils.debug(this.getClass(), query);
qExec = QueryExecutionFactory.create(query, model);
ResultSet rs = qExec.execSelect();
while (rs.hasNext()) {
QuerySolution solution = rs.next();
return solution.get("?x0");
}
} catch (Exception e) {
if (debug) {
e.printStackTrace();
}
} finally {
if (qExec != null) {
qExec.close();
}
}
return null;
}
public String getEqualsQuery(Model model, String typePrefix, Element item, Document dataDoc) throws XPathExpressionException {
String whereClause = "WHERE {\n";
whereClause += "?x0 rdf:type <" + type + "> . \n";
boolean hasKey = false;
for (Property property : getProperties()) {
if (property.isKey()) {
hasKey = true;
}
}
for (Property property : getProperties()) {
if (property.isKey() || !hasKey) {
whereClause += property.getSPARQLEqualPhrase("?x0", item, dataDoc);
}
}
if (!hasKey) {
for (Relation rel : getRelations()) {
whereClause += rel.getSPARQLEqualPhrase("?x0", typePrefix, model, item, dataDoc);
}
}
whereClause += "}\n";
String prefixes = "PREFIX t: <" + typePrefix + "> \n"
+ "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> \n";
String select = "select ?x0 ";
return prefixes + select + whereClause;
}
public static Set<Resource> getAllEntitiesOfType(Model model, String type) {
Set<Resource> ret = new HashSet<Resource>();
ResIterator list = model.listSubjectsWithProperty(RDF.type, model.createResource(type));
while (list.hasNext()) {
Resource entity = list.next();
ret.add(entity);
}
return ret;
}
}