package net.fortytwo.sesametools;
import org.openrdf.model.IRI;
import org.openrdf.model.Resource;
import org.openrdf.query.MalformedQueryException;
import org.openrdf.query.QueryLanguage;
import org.openrdf.query.Update;
import org.openrdf.query.UpdateExecutionException;
import org.openrdf.repository.Repository;
import org.openrdf.repository.RepositoryConnection;
import org.openrdf.repository.RepositoryException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
/**
* Translates between two IRI prefixes for a given set of triples.
*
* @author Peter Ansell p_ansell@yahoo.com
*/
public class URITranslator {
private final static Logger logger = LoggerFactory.getLogger(URITranslator.class);
private URITranslator() {
}
/**
* Maps IRIs for all triples in the given contexts in the given repository, between the input
* IRI prefix and the output IRI prefix.
*
* @param repository The repository containing the input triples, and which will contain the output
* triples
* @param inputUriPrefix The string defining the start of any IRIs to look for.
* @param outputUriPrefix The string defining the start of the IRIs which matched the inputUriPrefix, after
* the translation is complete.
* @param contexts The contexts in the repository that are relevant to the mapping
* @throws RepositoryException If the repository threw an exception during the course of the method.
* @throws MalformedQueryException If any of the translation queries could not be executed due to an error in the
* queries or a lack of understanding of the query by the repository.
* @throws UpdateExecutionException If the SPARQL Update queries used by this method were not able
* to be successfully executed on the given repository for some reason.
*/
public static void doTranslation(Repository repository, final String inputUriPrefix, final String outputUriPrefix,
Resource... contexts)
throws RepositoryException, MalformedQueryException, UpdateExecutionException {
Collection<IRI> subjectMappingPredicates = Collections.emptyList();
Collection<IRI> predicateMappingPredicates = Collections.emptyList();
Collection<IRI> objectMappingPredicates = Collections.emptyList();
doTranslation(repository, inputUriPrefix, outputUriPrefix, subjectMappingPredicates,
predicateMappingPredicates, objectMappingPredicates, true, contexts);
}
/**
* Maps IRIs for all triples in the given contexts in the given repository, between the input
* IRI prefix and the output IRI prefix.
* <p>
* The mapping predicates are used to define extra triples to link the input and output IRIs.
* <p>
* NOTE: The results for queries with deleteTranslatedTriples set to false may not be consistent
* with what you expect.
*
* @param repository The repository containing the input triples,
* and which will contain the output triples
* @param inputUriPrefix The string defining the start of any IRIs to look for.
* @param outputUriPrefix The string defining the start of the IRIs which matched the inputUriPrefix,
* after the translation is complete.
* @param nextSubjectMappingPredicates The predicates used to map subject IRIs that are translated.
* @param nextPredicateMappingPredicates The predicates used to map predicate IRIs that are translated.
* @param nextObjectMappingPredicates The predicates used to map object IRIs that are translated.
* @param deleteTranslatedTriples If this is true, then any triples which contained translated IRIs
* will be deleted. Mapping triples will still exist if any mapping
* predicates were utilised.
* @param contexts The contexts in the repository that are relevant to the mapping
* @throws RepositoryException If the repository threw an exception during the course of the method.
* @throws MalformedQueryException If any of the translation queries could not be executed due to an error in the
* queries or a lack of understanding of the query by the repository.
* @throws UpdateExecutionException If the SPARQL Update queries used by this method were not able
* to be successfully executed on the given repository for some reason.
*/
public static void doTranslation(Repository repository, final String inputUriPrefix, final String outputUriPrefix,
final Collection<IRI> nextSubjectMappingPredicates,
final Collection<IRI> nextPredicateMappingPredicates,
final Collection<IRI> nextObjectMappingPredicates,
boolean deleteTranslatedTriples, Resource... contexts)
throws RepositoryException, MalformedQueryException, UpdateExecutionException {
doTranslation(repository, inputUriPrefix, outputUriPrefix, nextSubjectMappingPredicates, true, false,
nextPredicateMappingPredicates, true, false, nextObjectMappingPredicates, true, false,
deleteTranslatedTriples, contexts);
}
/**
* Maps IRIs for all triples in the given contexts in the given repository, between the input
* IRI prefix and the output IRI prefix.
* <p>
* The mapping predicates are used to define extra triples to link the input and output IRIs.
* <p>
* NOTE: The results for queries with deleteTranslatedTriples set to false may not be consistent
* with what you expect.
*
* @param repository The repository containing the input triples, and which will contain
* the output triples
* @param inputUriPrefix The string defining the start of any IRIs to look for.
* @param outputUriPrefix The string defining the start of the IRIs which matched the inputUriPrefix,
* after the translation is complete.
* @param nextSubjectMappingPredicates The predicates used to map subject IRIs that are translated.
* @param translateSubjectUris True to translate subject IRIs and false otherwise.
* @param exactSubjectMatchRequired True to indicate that translation should only occur if the subject IRI
* matched the inputUriPrefix exactly.
* @param nextPredicateMappingPredicates The predicates used to map predicate IRIs that are translated.
* @param translatePredicateUris True to translate predicate IRIs and false otherwise.
* @param exactPredicateMatchRequired True to indicate that translation should only occur if the predicate IRI
* matched the inputUriPrefix exactly.
* @param nextObjectMappingPredicates The predicates used to map object IRIs that are translated.
* @param translateObjectUris True to translate object IRIs and false otherwise.
* @param exactObjectMatchRequired True to indicate that translation should only occur if the object IRI
* matched the inputUriPrefix exactly.
* @param deleteTranslatedTriples If this is true, then any triples which contained translated IRIs
* will be deleted. Mapping triples will still exist if any mapping
* predicates were utilised.
* @param contexts The contexts in the repository that are relevant to the mapping
* @throws RepositoryException If the repository threw an exception during the course of the method.
* @throws MalformedQueryException If any of the translation queries could not be executed due to an error in the
* queries or a lack of understanding of the query by the repository.
* @throws UpdateExecutionException If the SPARQL Update queries used by this method were not able
* to be successfully executed on the given repository for some reason.
*/
public static void doTranslation(Repository repository, final String inputUriPrefix, final String outputUriPrefix,
final Collection<IRI> nextSubjectMappingPredicates, boolean translateSubjectUris,
boolean exactSubjectMatchRequired,
final Collection<IRI> nextPredicateMappingPredicates,
boolean translatePredicateUris,
boolean exactPredicateMatchRequired,
final Collection<IRI> nextObjectMappingPredicates,
boolean translateObjectUris,
boolean exactObjectMatchRequired, boolean deleteTranslatedTriples,
Resource... contexts)
throws RepositoryException, MalformedQueryException, UpdateExecutionException {
RepositoryConnection repositoryConnection = null;
try {
repositoryConnection = repository.getConnection();
final List<String> withClauses = new ArrayList<>();
if (contexts != null) {
for (Resource nextResource : contexts) {
if (nextResource != null && nextResource instanceof IRI) {
withClauses.add(" WITH <" + nextResource.stringValue() + "> ");
} else {
logger.error("Did not recognise (and ignoring) the context: " + nextResource);
}
}
}
// add a single empty with clause if they didn't include any IRI resources as contexts
// to make the rest of the code simpler
if (withClauses.isEmpty()) {
withClauses.add("");
}
if (translateObjectUris) {
for (String nextWithClause : withClauses) {
final StringBuilder objectConstructBuilder =
new StringBuilder(nextObjectMappingPredicates.size() * 120);
for (final IRI nextMappingPredicate : nextObjectMappingPredicates) {
objectConstructBuilder.append(" ?normalisedObjectUri <")
.append(nextMappingPredicate.stringValue()).append("> ?objectUri . ");
}
final StringBuilder objectTemplateWhereBuilder = new StringBuilder();
objectTemplateWhereBuilder.append(" ?subjectUri ?predicateUri ?objectUri . ");
if (!exactObjectMatchRequired) {
objectTemplateWhereBuilder.append("filter(isIRI(?objectUri) && strStarts(str(?objectUri), \"")
.append(inputUriPrefix).append("\")");
objectTemplateWhereBuilder.append(") . ");
objectTemplateWhereBuilder.append("bind(iri(concat(\"");
objectTemplateWhereBuilder.append(outputUriPrefix);
objectTemplateWhereBuilder.append("\", encode_for_uri(substr(str(?objectUri), ");
objectTemplateWhereBuilder.append(inputUriPrefix.length() + 1);
objectTemplateWhereBuilder.append(")))) AS ?normalisedObjectUri) ");
} else {
// the following should be more efficient on large queries for exact matching,
// as it contains constants that can be compiled down to IRIs
// In addition, the branch above will work with exact matching,
// but is prone to collisions if the IRI is used as the base of a longer IRI
objectTemplateWhereBuilder.append("filter(isIRI(?objectUri) && sameTerm(?objectUri, IRI(\"")
.append(inputUriPrefix).append("\"))). bind(iri(\"").append(outputUriPrefix)
.append("\") AS ?normalisedObjectUri) . ");
}
String deleteObjectTemplate;
if (deleteTranslatedTriples) {
deleteObjectTemplate = " DELETE { ?subjectUri ?predicateUri ?objectUri . } ";
} else {
deleteObjectTemplate = "";
}
final String objectTemplate =
nextWithClause + " " + deleteObjectTemplate
+ " INSERT { ?subjectUri ?predicateUri ?normalisedObjectUri . "
+ objectConstructBuilder.toString() + " } " + " WHERE { "
+ objectTemplateWhereBuilder.toString()
+ " } ; ";
logger.debug("objectTemplate=" + objectTemplate);
// allQueries.add(objectTemplate);
executeSparqlUpdateQueries(repositoryConnection, objectTemplate);
}
// FIXME: Sesame seems to need this, or the following queries do not work correctly
repositoryConnection.commit();
}
if (translateSubjectUris) {
for (String nextWithClause : withClauses) {
final StringBuilder subjectConstructBuilder =
new StringBuilder(nextSubjectMappingPredicates.size() * 120);
for (final IRI nextMappingPredicate : nextSubjectMappingPredicates) {
subjectConstructBuilder.append(" ?normalisedSubjectUri <")
.append(nextMappingPredicate.stringValue()).append("> ?subjectUri . ");
}
final StringBuilder subjectTemplateWhereBuilder = new StringBuilder();
subjectTemplateWhereBuilder.append(" ?subjectUri ?predicateUri ?objectUri . ");
if (!exactObjectMatchRequired) {
subjectTemplateWhereBuilder
.append("filter(isIRI(?subjectUri) && strStarts(str(?subjectUri), \"")
.append(inputUriPrefix).append("\")");
subjectTemplateWhereBuilder.append(") . ");
subjectTemplateWhereBuilder.append("bind(iri(concat(\"");
subjectTemplateWhereBuilder.append(outputUriPrefix);
subjectTemplateWhereBuilder.append("\", encode_for_uri(substr(str(?subjectUri), ");
subjectTemplateWhereBuilder.append(inputUriPrefix.length() + 1);
subjectTemplateWhereBuilder.append(")))) AS ?normalisedSubjectUri) ");
} else {
// the following should be more efficient on large queries for exact matching,
// as it contains constants that can be compiled down to IRIs
// In addition, the branch above will work with exact matching,
// but is prone to collisions if the IRI is used as the base of a longer IRI
subjectTemplateWhereBuilder
.append("filter(isIRI(?subjectUri) && sameTerm(?subjectUri, IRI(\"")
.append(inputUriPrefix).append("\"))). bind(iri(\"").append(outputUriPrefix)
.append("\") AS ?normalisedSubjectUri) . ");
}
String deleteSubjectTemplate;
if (deleteTranslatedTriples) {
deleteSubjectTemplate = " DELETE { ?subjectUri ?predicateUri ?objectUri . } ";
} else {
deleteSubjectTemplate = "";
}
final String subjectTemplate =
nextWithClause + " " + deleteSubjectTemplate
+ " INSERT { ?normalisedSubjectUri ?predicateUri ?objectUri . "
+ subjectConstructBuilder.toString() + " } " + " WHERE { "
+ subjectTemplateWhereBuilder.toString()
+ " } ; ";
// allQueries.add(subjectTemplate);
executeSparqlUpdateQueries(repositoryConnection, subjectTemplate);
}
// FIXME: Sesame seems to need this, or the following queries do not work correctly
repositoryConnection.commit();
}
if (translatePredicateUris) {
for (String nextWithClause : withClauses) {
final StringBuilder predicateConstructBuilder =
new StringBuilder(nextPredicateMappingPredicates.size() * 120);
for (final IRI nextMappingPredicate : nextPredicateMappingPredicates) {
predicateConstructBuilder.append(" ?normalisedPredicateUri <")
.append(nextMappingPredicate.stringValue()).append("> ?predicateUri . ");
}
final StringBuilder predicateTemplateWhereBuilder = new StringBuilder();
predicateTemplateWhereBuilder.append(" ?subjectUri ?predicateUri ?objectUri . ");
if (!exactObjectMatchRequired) {
predicateTemplateWhereBuilder
.append("filter(isIRI(?predicateUri) && strStarts(str(?predicateUri), \"")
.append(inputUriPrefix).append("\")");
predicateTemplateWhereBuilder.append(") . ");
predicateTemplateWhereBuilder.append("bind(iri(concat(\"");
predicateTemplateWhereBuilder.append(outputUriPrefix);
predicateTemplateWhereBuilder.append("\", encode_for_uri(substr(str(?predicateUri), ");
predicateTemplateWhereBuilder.append(inputUriPrefix.length() + 1);
predicateTemplateWhereBuilder.append(")))) AS ?normalisedPredicateUri) ");
} else {
// the following should be more efficient on large queries for exact matching,
// as it contains constants that can be compiled down to IRIs
// In addition, the branch above will work with exact matching,
// but is prone to collisions if the IRI is used as the base of a longer IRI
predicateTemplateWhereBuilder
.append("filter(isIRI(?predicateUri) && sameTerm(?predicateUri, IRI(\"")
.append(inputUriPrefix).append("\"))). bind(iri(\"").append(outputUriPrefix)
.append("\") AS ?normalisedPredicateUri) . ");
}
String deletePredicateTemplate;
if (deleteTranslatedTriples) {
deletePredicateTemplate = " DELETE { ?subjectUri ?predicateUri ?objectUri . } ";
} else {
deletePredicateTemplate = "";
}
final String predicateTemplate =
nextWithClause + deletePredicateTemplate
+ " INSERT { ?subjectUri ?normalisedPredicateUri ?objectUri . "
+ predicateConstructBuilder.toString() + " } " + " WHERE { "
+ predicateTemplateWhereBuilder.toString() + " } ; ";
// allQueries.add(predicateTemplate);
executeSparqlUpdateQueries(repositoryConnection, predicateTemplate);
}
// executeSparqlUpdateQueries(repositoryConnection, allQueries);
repositoryConnection.commit();
}
} catch (RepositoryException | MalformedQueryException | UpdateExecutionException rex) {
// rollback the connection and then throw the resulting exception
// TODO: Will this get called before the repositoryConnection.close() in the finally
// block?
if (repositoryConnection != null){
repositoryConnection.rollback();
}
throw rex;
} finally {
if (repositoryConnection != null) {
try {
repositoryConnection.close();
} catch (RepositoryException rex) {
logger.error("Found repository exception while trying to close repository connection", rex);
}
}
}
}
/**
* Executes the given SPARQL Update query against the given repository.
*/
private static void executeSparqlUpdateQueries(RepositoryConnection repositoryConnection, String nextQuery)
throws RepositoryException, MalformedQueryException, UpdateExecutionException {
executeSparqlUpdateQueries(repositoryConnection, Collections.singletonList(nextQuery));
}
/**
* Executes the given SPARQL Update queries against the given repository.
*/
private static void executeSparqlUpdateQueries(RepositoryConnection repositoryConnection, List<String> nextQueries)
throws RepositoryException, MalformedQueryException, UpdateExecutionException {
for (String nextQuery : nextQueries) {
logger.info("nextQuery=" + nextQuery);
Update preparedUpdate = repositoryConnection.prepareUpdate(QueryLanguage.SPARQL, nextQuery);
preparedUpdate.execute();
}
}
}