package edu.kit.aifb.summa;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.LinkedList;
import org.openrdf.model.Literal;
import org.openrdf.query.Binding;
import org.openrdf.query.BindingSet;
import org.openrdf.query.QueryLanguage;
import org.openrdf.query.TupleQuery;
import org.openrdf.query.TupleQueryResult;
import org.openrdf.repository.RepositoryConnection;
import org.openrdf.repository.sparql.SPARQLRepository;
import edu.kit.aifb.summa.model.Property;
import edu.kit.aifb.summa.model.TripleMeta;
import edu.kit.aifb.summa.model.URI;
import edu.kit.aifb.summa.model.TripleMeta.TripleFocus;
/**
* This is an example summarization approach that generates summaries with
* the DBpedia SPARQL endpoint.
*
*/
public class SimpleSummarizer implements Summarizer {
private static final String REPOSITORY = "http://dbpedia.org/sparql";
private static final String QUERY_0 = "SELECT DISTINCT ?l FROM <http://dbpedia.org> WHERE { "
+ "OPTIONAL {<ENTITY> <http://www.w3.org/2000/01/rdf-schema#label> ?l ."
+ "FILTER regex(lang(?l), \"LANG\", \"i\") . }}";
private static final String QUERY_1 = "PREFIX vrank:<http://purl.org/voc/vrank#>"
+ "SELECT DISTINCT ?o ?l "
+ "FROM <http://people.aifb.kit.edu/ath/#DBpedia_PageRank> "
+ "FROM <http://dbpedia.org> WHERE"
+ "{<ENTITY> ?p ?o . ?o vrank:hasRank/vrank:rankValue ?pageRank."
+ "PREDICATES"
+ "OPTIONAL {?o <http://www.w3.org/2000/01/rdf-schema#label> ?l . "
+ "FILTER regex(lang(?l), \"LANG\", \"i\") .}}"
+ "ORDER BY DESC (?pageRank) LIMIT TOPK";
private static final String QUERY_2 = "PREFIX vrank:<http://purl.org/voc/vrank#>"
+ "SELECT ?p ?l ?rank "
+ "FROM <http://people.aifb.kit.edu/ath/#DBpedia_PageRank> "
+ "FROM <http://dbpedia.org> WHERE {"
+ "<ENTITY> ?p <OBJECT> ."
+ "<OBJECT> vrank:hasRank/vrank:rankValue ?rank ."
+ "OPTIONAL {?p <http://www.w3.org/2000/01/rdf-schema#label> ?l."
+ "FILTER regex(lang(?l), \"LANG\", \"i\")} } ORDER BY asc(?p)";
/**
* main method to test the summarizer
*/
public static void main(String[] args) throws URISyntaxException {
Summarizer summ = new SimpleSummarizer();
LinkedList<TripleMeta> meta = summ.summarize(new java.net.URI("http://dbpedia.org/resource/Barack_Obama"), null, 5, 1, null);
for (TripleMeta tripleMeta : meta) {
System.out.println(tripleMeta.toString());
}
}
public LinkedList<TripleMeta> summarize(java.net.URI uri, String[] fixedProperties,
Integer topK, Integer maxHops, String language) {
SPARQLRepository rep = new SPARQLRepository(REPOSITORY);
if (language == null) {
language = "en";
}
if (fixedProperties == null) {
fixedProperties = new String [0];
}
RepositoryConnection con = null;
LinkedList<TripleMeta> result = new LinkedList<TripleMeta>();
try {
rep.initialize();
con = rep.getConnection();
TupleQuery q1 = con.prepareTupleQuery(QueryLanguage.SPARQL,
QUERY_0.replace("ENTITY", uri.toString()).replace("LANG", language));
TupleQueryResult r1 = q1.evaluate();
URI subject = null;
if (r1.hasNext()) {
BindingSet set = r1.next();
Binding l = set.getBinding("l");
if (l == null) {
subject = new URI(uri);
} else {
subject = new URI(uri, l.getValue().stringValue(), ((Literal) l.getValue()).getLanguage());
}
}
r1.close();
String query1 = QUERY_1.replace("ENTITY", uri.toString()).
replace("LANG", language).
replace("TOPK", Integer.toString(topK));
if (fixedProperties.length > 0) {
String replacement = "FILTER (";
for (String string : fixedProperties) {
replacement += "?p = <" + string + "> || ";
}
replacement = replacement.substring(0, replacement.length() - 3);
replacement += ") .";
query1 = query1.replaceAll("PREDICATES", replacement);
} else {
query1 = query1.replaceAll("PREDICATES", "");
}
TupleQuery q2 = con.prepareTupleQuery(QueryLanguage.SPARQL, query1);
TupleQueryResult r2 = q2.evaluate();
ArrayList<URI> objects = new ArrayList<URI>();
while (r2.hasNext()) {
BindingSet set = r2.next();
Binding o = set.getBinding("o");
Binding l = set.getBinding("l");
URI object = null;
if (l == null) {
object = new URI(new java.net.URI(o.getValue().toString()));
} else {
object = new URI(new java.net.URI(o.getValue().toString()), l.getValue().stringValue(), ((Literal) l.getValue()).getLanguage());
}
objects.add(object);
}
r2.close();
for (URI object : objects) {
TupleQuery q3 = con.prepareTupleQuery(QueryLanguage.SPARQL,
QUERY_2.replace("ENTITY", uri.toString()).
replace("LANG", language).
replace("OBJECT", object.getURI().toString()));
TupleQueryResult r3 = q3.evaluate();
if (r3.hasNext()) {
BindingSet set = r3.next();
Binding p = set.getBinding("p");
Binding l = set.getBinding("l");
Binding rank = set.getBinding("rank");
Property predicate = null;
if (l == null) {
predicate = new Property(new java.net.URI(p.getValue().stringValue()));
} else {
predicate = new Property(new java.net.URI(p.getValue().stringValue()),
l.getValue().stringValue());
}
TripleMeta meta = new TripleMeta(subject, predicate, object, TripleFocus.subject);
meta.setRank(new Double(rank.getValue().stringValue()));
result.add(meta);
}
r3.close();
}
} catch (Exception e) {
e.printStackTrace();
}
return result;
}
}