/*
* SPARQLSemanticAnnotationHelper.java
*
* Copyright (c) 2007-2011, The University of Sheffield.
*
* This file is part of GATE MÃmir (see http://gate.ac.uk/family/mimir.html),
* and is free software, licenced under the GNU Lesser General Public License,
* Version 3, June 2007 (also included with this distribution as file
* LICENCE-LGPL3.html).
*
* Valentin Tablan, 19 Apr 2011
*
* $Id$
*/
package gate.mimir.sparql;
import gate.mimir.Constraint;
import gate.mimir.ConstraintType;
import gate.mimir.SemanticAnnotationHelper;
import gate.mimir.index.AtomicAnnotationIndex;
import gate.mimir.index.Mention;
import gate.mimir.search.QueryEngine;
import gate.mimir.util.DelegatingSemanticAnnotationHelper;
import gate.util.GateRuntimeException;
import java.io.IOException;
import java.io.ObjectStreamException;
import java.io.OutputStream;
import java.io.UnsupportedEncodingException;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.net.URLEncoder;
import java.nio.charset.UnsupportedCharsetException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import javax.xml.bind.DatatypeConverter;
import javax.xml.stream.XMLStreamException;
import org.apache.log4j.Logger;
/**
* A Semantic annotation helper that, at query time, connects to a SPARQL
* endpoint to obtain a list of candidate URIs that are then passed to the
* underlying delegate annotation helper.
*
* This {@link DelegatingSemanticAnnotationHelper} wraps another actual
* {@link SemanticAnnotationHelper} (the delegate). At search time, this helper
* provides an extra synthetic feature (its name being the value of
* {@link SPARQLSemanticAnnotationHelper#SPARQL_QUERY_FEATURE_NAME}). The
* content of an {@link ConstraintType#EQ} constraint is interpreted as a SPARQL
* query, which is executed against the SPARQL endpoint (see
* {@link #getSparqlEndpoint()}). Each row in the result set contains a set of
* variable assignments, which are used to generate standard Mímir
* queries that are passed-on to the delegate.
*/
public class SPARQLSemanticAnnotationHelper extends
DelegatingSemanticAnnotationHelper {
/**
*
*/
private static final long serialVersionUID = 3855212427922484546L;
private static final Logger logger = Logger
.getLogger(SPARQLSemanticAnnotationHelper.class);
/**
* A query fragment that, if set, gets prepended to all SPARQL queries sent
* to the end point. This could be used, for example, for setting up a list of
* prefixes.
*/
private String queryPrefix;
/**
* A query fragment that, if set, gets appended to all SPARQL queries sent
* to the end point. This could be used, for example, for setting up a
* LIMIT constraint.
*/
private String querySuffix;
/**
* The name used for the synthetic feature used at query time to supply the
* SPARQL query ({@value}).
*/
public static final String DEFAULT_SPARQL_QUERY_FEATURE_NAME = "sparql";
/**
* The name of the virtual feature used to encode the SPARQL query passed-on
* to this helper.
*/
private String sparqlFeatureName = DEFAULT_SPARQL_QUERY_FEATURE_NAME;
/**
* The service endpoint where SPARQL queries are forwarded to.
*/
private String sparqlEndpoint;
private RequestMethod sparqlRequestMethod = RequestMethod.GET;
private transient String sparqlEndpointUser;
private transient String sparqlEndpointPassword;
/**
* HTTP Header used to authenticate with the remote endpoint. If set to
* <code>null</code>, then no authentication is done.
*/
private String authHeader;
/**
* See {@link #setQueryPrefix(String)}
* @return
*/
public String getQueryPrefix() {
return queryPrefix;
}
/**
* Sets the query prefix: a query fragment that, if set, gets prepended to
* all SPARQL queries sent to the end point. This could be used, for example,
* for setting up a list of PREFIXes.
*/
public void setQueryPrefix(String queryPrefix) {
this.queryPrefix = queryPrefix;
}
/**
* See {@link #setQuerySuffix(String)}.
* @return
*/
public String getQuerySuffix() {
return querySuffix;
}
/**
* Sets the query suffix: a query fragment that, if set, gets appended to
* all SPARQL queries sent to the end point. This could be used, for example,
* for setting up a LIMIT constraint.
*/
public void setQuerySuffix(String querySuffix) {
this.querySuffix = querySuffix;
}
public String getSparqlEndpoint() {
return sparqlEndpoint;
}
public void setSparqlEndpoint(String sparqlEndpoint) {
this.sparqlEndpoint = sparqlEndpoint;
}
public String getSparqlEndpointUser() {
return sparqlEndpointUser;
}
public void setSparqlEndpointUser(String sparqlEndpointUser) {
this.sparqlEndpointUser = sparqlEndpointUser;
}
public String getSparqlEndpointPassword() {
return sparqlEndpointPassword;
}
public void setSparqlEndpointPassword(String sparqlEndpointPassword) {
this.sparqlEndpointPassword = sparqlEndpointPassword;
}
public RequestMethod getSparqlRequestMethod() {
return sparqlRequestMethod;
}
public void setSparqlRequestMethod(RequestMethod sparqlRequestMethod) {
this.sparqlRequestMethod = sparqlRequestMethod;
}
/**
* Gets the name of the virtual features used to encode the SPARQL query.
* @return the sparqlFeatureName
*/
public String getSparqlFeatureName() {
return sparqlFeatureName;
}
/**
* Sets the name of for the virtual feature used to encode the SPARQL query
* that should be executed on the SPARQL end-point. Defaults to
* {@link #DEFAULT_SPARQL_QUERY_FEATURE_NAME} is not set to any other value.
*
* @param sparqlFeatureName the new name for the virtual feature.
*/
public void setSparqlFeatureName(String sparqlFeatureName) {
this.sparqlFeatureName = sparqlFeatureName;
}
/* (non-Javadoc)
* @see gate.mimir.util.DelegatingSemanticAnnotationHelper#getNominalFeatures()
*/
@Override
public String[] getNominalFeatures() {
String[] oldNomFeats = super.getNominalFeatures();
if(oldNomFeats == null) oldNomFeats = new String[0];
// add virtual "sparql" feature, if not already present
boolean sparqlAdded = false;
for(String aFeat : oldNomFeats) {
if(aFeat.equals(sparqlFeatureName)) {
sparqlAdded = true;
break;
}
}
if(!sparqlAdded) {
// add the virtual sparql feature (on the first position, to reduce the
// cost of future calls to this method).
String[] newNomFeats = new String[oldNomFeats.length + 1];
newNomFeats[0] = sparqlFeatureName;
System.arraycopy(oldNomFeats, 0, newNomFeats, 1, oldNomFeats.length);
nominalFeatureNames = newNomFeats;
}
return nominalFeatureNames;
}
/**
* Custom de-serialisation method to ensure fields that did not exist in
* previous versions are initialised to the correct default values.
*/
private Object readResolve() throws ObjectStreamException {
if(sparqlFeatureName == null){
sparqlFeatureName = DEFAULT_SPARQL_QUERY_FEATURE_NAME;
}
return this;
}
@Override
public void init(AtomicAnnotationIndex indexer) {
super.init(indexer);
// calculate authHeader value
if(sparqlEndpointUser != null && sparqlEndpointUser.length() > 0){
try {
if(sparqlEndpointPassword == null) sparqlEndpointPassword = "";
String userPass = sparqlEndpointUser + ":" + sparqlEndpointPassword;
authHeader = "Basic " + DatatypeConverter.printBase64Binary(
userPass.getBytes("UTF-8"));
} catch(UnsupportedEncodingException e) {
throw new UnsupportedCharsetException("UTF-8");
}
}
// ensure we have a sparqlRequestMethod set on deserialization
if(sparqlRequestMethod == null) {
sparqlRequestMethod = RequestMethod.GET;
}
}
@Override
public List<Mention> getMentions(String annotationType,
List<Constraint> constraints, QueryEngine engine) {
// Accumulate the mentions in a set, so that we remove duplicates.
Set<Mention> mentions = new HashSet<Mention>();
List<Constraint> passThroughConstraints = new ArrayList<Constraint>();
String query = null;
String originalQuery = null;
for(Constraint aConstraint : constraints) {
if(sparqlFeatureName.equals(aConstraint.getFeatureName())) {
originalQuery = (String)aConstraint.getValue();
query = (queryPrefix != null ? queryPrefix : "") +
originalQuery + (querySuffix != null ? querySuffix : "");
} else {
passThroughConstraints.add(aConstraint);
}
}
if(query == null) {
// no SPARQL constraints in this query
return delegate.getMentions(annotationType, constraints, engine);
} else {
// run the query on the SPARQL endpoint
try {
SPARQLResultSet srs = runQuery(query);
// check for errors
for(int i = 0; i < srs.getColumnNames().length; i++) {
if(srs.getColumnNames()[i].equals("error-message")) {
// we have an error message
String errorMessage = (srs.getRows().length > 0 &&
srs.getRows()[0].length > i) ? srs.getRows()[0][i] : null;
throw new IllegalArgumentException("Query \"" + originalQuery +
"\" resulted in an error" +
(errorMessage != null ? (":\n" + errorMessage) : "."));
}
}
// convert each result row into a query for the delegate
if(srs.getRows() != null) {
for(String[] aRow : srs.getRows()) {
List<Constraint> delegateConstraints =
new ArrayList<Constraint>(passThroughConstraints);
for(int i = 0; i < srs.getColumnNames().length; i++) {
delegateConstraints.add(new Constraint(ConstraintType.EQ, srs
.getColumnNames()[i], aRow[i]));
}
mentions.addAll(delegate.getMentions(annotationType,
delegateConstraints, engine));
}
}
} catch(IOException e) {
logger.error(
"I/O error while communicating with " + "SPARQL endpoint.", e);
throw new GateRuntimeException("I/O error while communicating with "
+ "SPARQL endpoint.", e);
} catch(XMLStreamException e) {
logger.error("Error parsing results from SPARQL endpoint.", e);
throw new GateRuntimeException("Error parsing results from SPARQL "
+ "endpoint.", e);
}
return new ArrayList<Mention>(mentions);
}
}
/**
* Runs a query against the SPARQL endpoint and returns the results.
*
* @param query
* @return
* @throws XMLStreamException
*/
protected SPARQLResultSet runQuery(String query) throws IOException,
XMLStreamException {
try {
String urlStr = sparqlEndpoint;
String requestBody = null;
String contentType = null;
switch(sparqlRequestMethod) {
case GET:
urlStr =
sparqlEndpoint + "?query=" + URLEncoder.encode(query, "UTF-8");
break;
case POST_ENCODED:
requestBody = "query=" + URLEncoder.encode(query, "UTF-8");
contentType = "application/x-www-form-urlencoded";
break;
case POST_PLAIN:
requestBody = query;
contentType = "application/sparql-query";
break;
default:
throw new RuntimeException("Unknown request method " + sparqlRequestMethod);
}
URL url = new URL(urlStr);
HttpURLConnection urlConn = (HttpURLConnection)url.openConnection();
urlConn.setRequestProperty("Accept", "application/sparql-results+xml");
if(authHeader != null) {
urlConn.setRequestProperty("Authorization", authHeader);
}
// for POST requests, write the request content to the connection
if(requestBody != null) {
urlConn.setDoOutput(true);
urlConn.setRequestMethod("POST");
urlConn.setRequestProperty("Content-Type", contentType);
byte[] requestBytes = requestBody.getBytes("UTF-8");
urlConn.setFixedLengthStreamingMode(requestBytes.length);
OutputStream urlOut = urlConn.getOutputStream();
try {
urlOut.write(requestBytes);
} finally {
urlOut.close();
}
}
return new SPARQLResultSet(urlConn.getInputStream());
} catch(UnsupportedEncodingException e) {
// like that's gonna happen...
throw new RuntimeException("UTF-8 encoding not supported by this JVM");
} catch(MalformedURLException e) {
// this may actually happen
throw new RuntimeException("Invalid URL - have you set the correct "
+ "SPARQL endpoint?", e);
}
}
}