package com.vistatec.ocelot.segment.model.enrichment;
import java.awt.Image;
import java.io.IOException;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import javax.imageio.ImageIO;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.hp.hpl.jena.rdf.model.Model;
import com.hp.hpl.jena.rdf.model.NodeIterator;
import com.hp.hpl.jena.rdf.model.RDFNode;
import com.hp.hpl.jena.rdf.model.Resource;
/**
* This class provides constants and static methods for managing the link
* enrichments.
*/
public abstract class ELinkEnrichmentsConstants {
static final Logger LOG = LoggerFactory.getLogger(ELinkEnrichmentsConstants.class);
/** The long description property. */
public static final String LONG_DESCR_PROP = "http://dbpedia.org/ontology/abstract";
/** The short description property. */
public static final String SHORT_DESCR_PROP = "http://www.w3.org/2000/01/rdf-schema#comment";
/** The image property. */
public static final String IMAGE_PROP = "http://xmlns.com/foaf/0.1/depiction";
/** The small image property. */
public static final String SMALL_IMAGE_PROP = "http://dbpedia.org/ontology/thumbnail";
/** The wikipedia link property. */
public static final String WIKI_LINK_PROP = "http://xmlns.com/foaf/0.1/isPrimaryTopicOf";
/** The homepage link property. */
public static final String HOMEPAGE_LINK_PROP = "http://xmlns.com/foaf/0.1/homepage";
/** The birthdate property. */
public static final String BIRTHDATE_PROP = "http://dbpedia.org/ontology/birthDate";
/** The deathdate property. */
public static final String DEATHDATE_PROP = "http://dbpedia.org/ontology/deathDate";
/** The hometown property. */
public static final String HOMETOWN_PROP = "http://dbpedia.org/ontology/hometown";
/** The birth place property. */
public static final String BIRTH_PLACE_PROP = "http://dbpedia.org/ontology/birthPlace";
/** The death place property. */
public static final String DEATH_PLACE_PROP = "http://dbpedia.org/ontology/deathPlace";
/** The description property. */
public static final String DESCRIPTION_PROP = "http://purl.org/dc/elements/1.1/description";
/** The area total property. */
public static final String AREA_TOTAL_PROP = "http://dbpedia.org/ontology/PopulatedPlace/areaTotal";
/** The population total property. */
public static final String POPULATION_TOTAL = "http://dbpedia.org/ontology/populationTotal";
/** The latitude property. */
public static final String LATITUDE_PROP = "http://www.w3.org/2003/01/geo/wgs84_pos#lat";
/** The longitude property. */
public static final String LONGITUDE_PROP = "http://www.w3.org/2003/01/geo/wgs84_pos#long";
/** The type property. */
public static final String TYPE_PROP = "http://dbpedia.org/property/type";
/** The location property. */
public static final String LOCATION_PROP = "http://dbpedia.org/property/location";
/** The entity name property. */
public static final String ENTITY_NAME_PROP = "http://www.w3.org/2000/01/rdf-schema#label";
/**
* Gets the info properties.
*
* @return the info properties.
*/
public static List<LinkInfoData> getInfoProperties() {
List<LinkInfoData> properties = new ArrayList<LinkInfoData>();
properties.add(new LinkInfoData(DESCRIPTION_PROP, "Description",
String.class));
properties.add(new LinkInfoData(BIRTHDATE_PROP, "Birth Date",
Date.class));
properties.add(new LinkInfoData(DEATHDATE_PROP, "Death Date",
Date.class));
properties
.add(new LinkInfoData(HOMETOWN_PROP, "Hometown", String.class));
properties.add(new LinkInfoData(BIRTH_PLACE_PROP, "Birth Place",
String.class));
properties.add(new LinkInfoData(BIRTH_PLACE_PROP, "Death Place",
String.class));
properties.add(new LinkInfoData(AREA_TOTAL_PROP, "Area Total",
String.class, "Km2"));
properties.add(new LinkInfoData(POPULATION_TOTAL, "Population Total",
Integer.class));
properties
.add(new LinkInfoData(LATITUDE_PROP, "Latitude", Float.class));
properties.add(new LinkInfoData(LONGITUDE_PROP, "Longitude",
Float.class));
properties.add(new LinkInfoData(TYPE_PROP, "Type", String.class));
properties
.add(new LinkInfoData(LOCATION_PROP, "Location", String.class));
return properties;
}
/**
* Gets the triples context.
*
* @return the triples context.
*/
public static Map<String, String> getContext() {
Map<String, String> context = new HashMap<String, String>();
context.put("dbo", "http://dbpedia.org/ontology/");
context.put("rdfs", "http://www.w3.org/2000/01/rdf-schema#");
context.put("foaf", "http://xmlns.com/foaf/0.1/");
context.put("geo", "http://www.w3.org/2003/01/geo/wgs84_pos#");
context.put("dbp", "http://dbpedia.org/property/");
context.put("dc", "http://purl.org/dc/elements/1.1/");
context.put("dbpedia", "http://dbpedia.org/resource/");
return context;
}
/**
* Fills a link enrichment with properties retrieved by the triples model.
*
* @param linkEnrichment
* the link enrichment to be filled.
* @param linkModel
* the link triples model.
* @param entityURL
* the entity URL.
*/
public static void fillLinkEnrichment(LinkEnrichment linkEnrichment,
Model linkModel, String entityURL) {
Resource entityRes = linkModel.createResource(entityURL);
NodeIterator nameNodeIt = linkModel
.listObjectsOfProperty(
entityRes,
linkModel
.createProperty(ELinkEnrichmentsConstants.ENTITY_NAME_PROP));
linkEnrichment.setReferenceEntity(entityURL);
RDFNode currNode = null;
while(nameNodeIt.hasNext() && linkEnrichment.getEntityName() == null){
currNode = nameNodeIt.next();
if(checkLanguage(currNode, linkEnrichment.getLanguage())){
linkEnrichment.setEntityName(currNode.asLiteral().getString(), ENTITY_NAME_PROP);
}
}
if(linkEnrichment.getEntityName() == null){
int index = entityURL.lastIndexOf("/");
linkEnrichment.setEntityName(entityURL.substring(index + 1)
.replaceAll("_", " "),
ELinkEnrichmentsConstants.ENTITY_NAME_PROP);
}
// if (nameNodeIt.hasNext()) {
// linkEnrichment.setEntityName(nameNodeIt.next().asLiteral()
// .getString(), ELinkEnrichmentsConstants.ENTITY_NAME_PROP);
// } else {
// // String entityUri = entityStmt.getObject().asResource().getURI();
// int index = entityURL.lastIndexOf("/");
// linkEnrichment.setEntityName(entityURL.substring(index + 1)
// .replaceAll("_", " "),
// ELinkEnrichmentsConstants.ENTITY_NAME_PROP);
// }
NodeIterator shortDescrNodeIt = linkModel
.listObjectsOfProperty(
entityRes,
linkModel
.createProperty(ELinkEnrichmentsConstants.SHORT_DESCR_PROP));
while(shortDescrNodeIt.hasNext() && linkEnrichment.getShortDescription() == null){
currNode = shortDescrNodeIt.next();
if(checkLanguage(currNode, linkEnrichment.getLanguage())){
linkEnrichment.setShortDescription(currNode.asLiteral().getString(), SHORT_DESCR_PROP);
}
}
// if (shortDescrNodeIt.hasNext()) {
// linkEnrichment.setShortDescription(shortDescrNodeIt.next()
// .asLiteral().getString(),
// ELinkEnrichmentsConstants.SHORT_DESCR_PROP);
// }
NodeIterator longDescrNodeIt = linkModel
.listObjectsOfProperty(
entityRes,
linkModel
.createProperty(ELinkEnrichmentsConstants.LONG_DESCR_PROP));
while(longDescrNodeIt.hasNext() && linkEnrichment.getLongDescription() == null){
currNode = longDescrNodeIt.next();
if(checkLanguage(currNode, linkEnrichment.getLanguage())){
linkEnrichment.setLongDescription(currNode.asLiteral().getString(), LONG_DESCR_PROP);
}
}
// if (longDescrNodeIt.hasNext()) {
// linkEnrichment.setLongDescription(longDescrNodeIt.next()
// .asLiteral().getString(),
// ELinkEnrichmentsConstants.LONG_DESCR_PROP);
// }
NodeIterator imageNodeIt = linkModel
.listObjectsOfProperty(
entityRes,
linkModel
.createProperty(ELinkEnrichmentsConstants.SMALL_IMAGE_PROP));
if (imageNodeIt.hasNext()) {
RDFNode imageNode = imageNodeIt.next();
String imageURL = null;
if (imageNode.isResource()) {
imageURL = imageNode.asResource().getURI();
} else {
imageURL = imageNode.asLiteral().getString();
}
Image image = downloadImage(imageURL);
if (image != null) {
linkEnrichment.setImage(image);
linkEnrichment.setImageURL(imageURL,
ELinkEnrichmentsConstants.SMALL_IMAGE_PROP);
}
}
if (linkEnrichment.getImage() == null) {
imageNodeIt = linkModel.listObjectsOfProperty(entityRes, linkModel
.createProperty(ELinkEnrichmentsConstants.IMAGE_PROP));
if (imageNodeIt.hasNext()) {
RDFNode imageNode = imageNodeIt.next();
String imageURL = null;
if (imageNode.isResource()) {
imageURL = imageNode.asResource().getURI();
} else {
imageURL = imageNode.asLiteral().getString();
}
Image image = downloadImage(imageURL);
if (image != null) {
linkEnrichment.setImage(image);
linkEnrichment.setImageURL(imageURL,
ELinkEnrichmentsConstants.IMAGE_PROP);
}
}
}
NodeIterator wikiNodeIt = linkModel
.listObjectsOfProperty(
entityRes,
linkModel
.createProperty(ELinkEnrichmentsConstants.WIKI_LINK_PROP));
while(wikiNodeIt.hasNext() && linkEnrichment.getWikiPage() == null){
RDFNode wikiNode = wikiNodeIt.next();
if(wikiNode.isResource()){
linkEnrichment.setWikiPage(wikiNode.asResource().getURI(), WIKI_LINK_PROP);
} else if (checkLanguage(wikiNode, linkEnrichment.getLanguage())){
linkEnrichment.setWikiPage(wikiNode.asLiteral().getString(), WIKI_LINK_PROP);
}
}
// if (wikiNodeIt.hasNext()) {
// RDFNode wikiNode = wikiNodeIt.next();
// if (wikiNode.isResource()) {
// linkEnrichment.setWikiPage(wikiNode.asResource().getURI(),
// ELinkEnrichmentsConstants.WIKI_LINK_PROP);
// } else {
// linkEnrichment.setWikiPage(wikiNode.asLiteral().getString(),
// ELinkEnrichmentsConstants.WIKI_LINK_PROP);
// }
// }
NodeIterator homePageNodeIt = linkModel
.listObjectsOfProperty(
entityRes,
linkModel
.createProperty(ELinkEnrichmentsConstants.HOMEPAGE_LINK_PROP));
while(homePageNodeIt.hasNext() && linkEnrichment.getHomePage() == null){
RDFNode homePageNode = homePageNodeIt.next();
if(homePageNode.isResource()){
linkEnrichment.setHomePage(homePageNode.asResource().getURI(), HOMEPAGE_LINK_PROP);
} else if(checkLanguage(homePageNode, linkEnrichment.getLanguage())){
linkEnrichment.setHomePage(homePageNode.asLiteral().getString(), HOMEPAGE_LINK_PROP);
}
}
// if (homePageNodeIt.hasNext()) {
// RDFNode homePageNode = homePageNodeIt.next();
// if (homePageNode.isResource()) {
// linkEnrichment.setHomePage(homePageNode.asResource().getURI(),
// ELinkEnrichmentsConstants.HOMEPAGE_LINK_PROP);
// } else {
// linkEnrichment.setHomePage(
// homePageNode.asLiteral().getString(),
// ELinkEnrichmentsConstants.HOMEPAGE_LINK_PROP);
// }
// }
NodeIterator infoNodeIt = null;
List<LinkInfoData> enrichmentInfo = new ArrayList<LinkInfoData>();
for (LinkInfoData infoProp : ELinkEnrichmentsConstants
.getInfoProperties()) {
infoNodeIt = linkModel.listObjectsOfProperty(entityRes,
linkModel.createProperty(infoProp.getPropName()));
if (infoNodeIt.hasNext()) {
RDFNode node = infoNodeIt.next();
System.out.println("Property name: " + infoProp.getPropName());
if (node.isLiteral()) {
if(checkLanguage(node, linkEnrichment.getLanguage())){
infoProp.setValue(node.asLiteral().getString());
}
} else {
infoProp.setValue(node.asResource().getURI());
}
enrichmentInfo.add(infoProp);
}
}
linkEnrichment.setInfoList(enrichmentInfo);
}
private static boolean checkLanguage(RDFNode node, String language ){
return node.asLiteral().getLanguage().equals(language) || node.asLiteral().getLanguage().equals("");
}
/**
* Downloads an image from a specific URL. If the URL is redirecting to
* another, it follows the URL chain till the image is found.
*
* @param imagUrl
* the image URL
* @return the image
*/
public static Image downloadImage(String imagUrl) {
Image image = null;
try {
URL url = new URL(imagUrl);
while (image == null && url != null) {
image = ImageIO.read(url);
if (image == null) {
HttpURLConnection conn = (HttpURLConnection) url
.openConnection();
HttpURLConnection.setFollowRedirects(false);
String urlStr = conn.getHeaderField("Location");
if (urlStr != null) {
url = new URL(urlStr);
} else {
url = null;
}
}
}
} catch (MalformedURLException e) {
LOG.error("Error in the image URL: " + imagUrl, e);
} catch (IOException e) {
LOG.error("Error while downloading the image with URL " + imagUrl, e);
}
return image;
}
}