package org.deri.grefine.rdf.vocab; import java.util.HashSet; import java.util.List; import java.util.Set; import org.apache.any23.Any23; import org.apache.any23.http.HTTPClient; import org.apache.any23.source.DocumentSource; import org.apache.any23.source.HTTPDocumentSource; import org.apache.any23.writer.ReportingTripleHandler; import org.apache.any23.writer.RepositoryWriter; import org.openrdf.model.Value; import org.openrdf.query.BindingSet; import org.openrdf.query.QueryLanguage; import org.openrdf.query.TupleQuery; import org.openrdf.query.TupleQueryResult; import org.openrdf.repository.Repository; import org.openrdf.repository.RepositoryConnection; import org.openrdf.repository.RepositoryException; import org.openrdf.repository.sail.SailRepository; import org.openrdf.sail.inferencer.fc.ForwardChainingRDFSInferencer; import org.openrdf.sail.memory.MemoryStore; public class VocabularyImporter { public void importVocabulary(String name, String uri, String fetchUrl, List<RDFSClass> classes, List<RDFSProperty> properties) throws VocabularyImportException{ boolean strictlyRdf = faultyContentNegotiation(uri); Repository repos = getModel(fetchUrl, strictlyRdf); getTerms(repos, name, uri, classes, properties); } public void importVocabulary(String name, String uri,Repository repository, List<RDFSClass> classes, List<RDFSProperty> properties) throws VocabularyImportException{ getTerms(repository, name, uri, classes, properties); } private static final String PREFIXES = "PREFIX rdfs:<http://www.w3.org/2000/01/rdf-schema#> " + "PREFIX rdf:<http://www.w3.org/1999/02/22-rdf-syntax-ns#> " + "PREFIX skos:<http://www.w3.org/2004/02/skos/core#> "; private static final String CLASSES_QUERY_P1 = PREFIXES + "SELECT ?resource ?label ?en_label ?description ?en_description ?definition ?en_definition " + "WHERE { " + "?resource rdf:type rdfs:Class. " + "OPTIONAL {?resource rdfs:label ?label.} " + "OPTIONAL {?resource rdfs:label ?en_label. FILTER langMatches( lang(?en_label), \"EN\" ) } " + "OPTIONAL {?resource rdfs:comment ?description.} " + "OPTIONAL {?resource rdfs:comment ?en_description. FILTER langMatches( lang(?en_description), \"EN\" ) } " + "OPTIONAL {?resource skos:definition ?definition.} " + "OPTIONAL {?resource skos:definition ?en_definition. FILTER langMatches( lang(?en_definition), \"EN\" ) } " + "FILTER regex(str(?resource), \"^"; private static final String CLASSES_QUERY_P2 = "\")}"; private static final String PROPERTIES_QUERY_P1 = PREFIXES + "SELECT ?resource ?label ?en_label ?description ?en_description ?definition ?en_definition " + "WHERE { " + "?resource rdf:type rdf:Property. " + "OPTIONAL {?resource rdfs:label ?label.} " + "OPTIONAL {?resource rdfs:label ?en_label. FILTER langMatches( lang(?en_label), \"EN\" ) } " + "OPTIONAL {?resource rdfs:comment ?description.} " + "OPTIONAL {?resource rdfs:comment ?en_description. FILTER langMatches( lang(?en_description), \"EN\" ) } " + "OPTIONAL {?resource skos:definition ?definition.} " + "OPTIONAL {?resource skos:definition ?en_definition. FILTER langMatches( lang(?en_definition), \"EN\" ) } " + "FILTER regex(str(?resource), \"^"; private static final String PROPERTIES_QUERY_P2 = "\")}"; private Repository getModel(String url,boolean strictlyRdf) throws VocabularyImportException { try { Any23 runner; if(strictlyRdf){ runner = new Any23("rdf-xml"); }else{ runner = new Any23(); } runner.setHTTPUserAgent("google-refine-rdf-extension"); HTTPClient client = runner.getHTTPClient(); DocumentSource source = new HTTPDocumentSource(client, url); Repository repository = new SailRepository( new ForwardChainingRDFSInferencer(new MemoryStore())); repository.initialize(); RepositoryConnection con = repository.getConnection(); RepositoryWriter w = new RepositoryWriter(con); ReportingTripleHandler reporter = new ReportingTripleHandler(w); runner.extract(source, reporter); return repository; } catch (Exception e) { throw new VocabularyImportException( "Unable to import vocabulary from " + url, e); } } protected void getTerms(Repository repos, String name, String uri, List<RDFSClass> classes, List<RDFSProperty> properties) throws VocabularyImportException { try { RepositoryConnection con = repos.getConnection(); try { TupleQuery query = con.prepareTupleQuery(QueryLanguage.SPARQL,CLASSES_QUERY_P1 + uri + CLASSES_QUERY_P2); TupleQueryResult res = query.evaluate(); Set<String> seen = new HashSet<String>(); while (res.hasNext()) { BindingSet solution = res.next(); String clazzURI = solution.getValue("resource").stringValue(); if (seen.contains(clazzURI)) { continue; } seen.add(clazzURI); String label = getFirstNotNull(new Value[] { solution.getValue("en_label"), solution.getValue("label") }); String description = getFirstNotNull(new Value[] { solution.getValue("en_definition"), solution.getValue("definition"), solution.getValue("en_description"), solution.getValue("description") }); RDFSClass clazz = new RDFSClass(clazzURI, label, description, name, uri); classes.add(clazz); } query = con.prepareTupleQuery(QueryLanguage.SPARQL,PROPERTIES_QUERY_P1 + uri + PROPERTIES_QUERY_P2); res = query.evaluate(); seen = new HashSet<String>(); while (res.hasNext()) { BindingSet solution = res.next(); String propertyUri = solution.getValue("resource").stringValue(); if (seen.contains(propertyUri)) { continue; } seen.add(propertyUri); String label = getFirstNotNull(new Value[] { solution.getValue("en_label"), solution.getValue("label") }); String description = getFirstNotNull(new Value[] { solution.getValue("en_definition"), solution.getValue("definition"), solution.getValue("en_description"), solution.getValue("description") }); RDFSProperty prop = new RDFSProperty(propertyUri, label, description, name, uri); properties.add(prop); } } catch (Exception ex) { throw new VocabularyImportException("Error while processing vocabulary retrieved from " + uri, ex); } finally { con.close(); } } catch (RepositoryException ex) { throw new VocabularyImportException("Error while processing vocabulary retrieved from " + uri,ex); } } private String getFirstNotNull(Value[] values) { String s = null; for (int i = 0; i < values.length; i++) { s = getString(values[i]); if (s != null) { break; } } return s; } private String getString(Value v) { if (v != null) { return v.stringValue(); } return null; } private boolean faultyContentNegotiation(String uri){ //we add an exceptional treatment for SKOS as their deployment does not handle Accept header properly //SKSO always return HTML if the Accept header contains HTML regardless the other more preferred options return uri.equals("http://www.w3.org/2004/02/skos/core#"); } }