/* * TypeQueries.java * * Created on Apr 25, 2008, 5:14:29 PM * * Description: Provides type inference queries into the the knowledge base specified by a given RDF entity manager. * * Copyright (C) Apr 25, 2008 Stephen L. Reed. * * This program is free software; you can redistribute it and/or modify it under the terms * of the GNU General Public License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along with this program; * if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ package org.texai.subsumptionReasoner; import java.util.ArrayList; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import net.jcip.annotations.NotThreadSafe; import org.apache.log4j.Logger; import org.openrdf.OpenRDFException; import org.openrdf.model.URI; import org.openrdf.model.impl.URIImpl; import org.openrdf.model.vocabulary.OWL; import org.openrdf.model.vocabulary.RDF; import org.openrdf.query.BindingSet; import org.openrdf.query.MalformedQueryException; import org.openrdf.query.QueryLanguage; import org.openrdf.query.TupleQuery; import org.openrdf.query.TupleQueryResult; import org.openrdf.repository.RepositoryConnection; import org.texai.kb.Constants; import org.texai.kb.persistence.RDFEntityManager; import org.texai.kb.persistence.RDFUtility; import org.texai.util.LRUMap; import org.texai.util.TexaiException; /** Provides type inference queries into the the knowledge base specified by a given RDF entity manager. * * @author Stephen L. Reed */ @NotThreadSafe public class TypeQueries { /** the log4j logger */ private static final Logger LOGGER = Logger.getLogger(TypeQueries.class); //TODO the OpenCyc KB has many relationships without context // /** the subclassof query string */ // private static final String TYPE_QUERY_STRING = "SELECT s, o FROM context <" + Constants.TERM_UNIVERSAL_VOCABULARY_MT + // "> {s} <" + Constants.RDF_NAMESPACE + "type> {o}"; // /** the subclassof query string */ // private static final String SUBCLASSOF_QUERY_STRING = "SELECT s, o FROM context <" + Constants.TERM_UNIVERSAL_VOCABULARY_MT + // "> {s} <" + Constants.RDFS_NAMESPACE + "subClassOf> {o}"; /** the rdf:type query string */ private static final String TYPE_QUERY_STRING = "SELECT s, o FROM {s} <" + Constants.RDF_NAMESPACE + "type> {o}"; /** the rdfs:subclassof query string */ private static final String SUBCLASSOF_QUERY_STRING = "SELECT s, o FROM {s} <" + Constants.RDFS_NAMESPACE + "subClassOf> {o}"; /** the cyc:Individual URI */ private static final URI INDIVIDUAL_TERM = new URIImpl(Constants.CYC_NAMESPACE + "Individual"); /** the cyc:Microtheory URI */ private static final URI MICROTHEORY_TERM = new URIImpl(Constants.CYC_NAMESPACE + "Microtheory"); /** the RDF entity manager */ private final RDFEntityManager rdfEntityManager; /** the isType cache */ private final IsTypeCache isTypeCache = new IsTypeCache(); /** the type hierarchy cache */ private final TypeHierarchyCache typeHierarchyCache = new TypeHierarchyCache(); /** the types cache */ private final TypesCache typesCache = new TypesCache(); /** Constructs a new TypeQueries instance. * * @param rdfEntityManager the RDF entity manager */ public TypeQueries(final RDFEntityManager rdfEntityManager) { //Preconditions assert rdfEntityManager != null : "rdfEntityManager must not be null"; this.rdfEntityManager = rdfEntityManager; } /** Returns whether the given term is directly an instance of the given type term. * * @param repositoryName the repository name * @param term the given term * @param typeTerm the given type term * @return whether the given term is directly an instance of the given type term */ public boolean isDirectType( final String repositoryName, final URI term, final URI typeTerm) { //Preconditions assert repositoryName != null : "repositoryName must not be null"; assert !repositoryName.isEmpty() : "repositoryName must not be empty"; assert term != null : "term must not be null"; assert typeTerm != null : "typeTerm must not be null"; final RepositoryConnection repositoryConnection = rdfEntityManager.getConnectionToNamedRepository(repositoryName); boolean isDirectType; try { final TupleQuery typeTupleQuery = repositoryConnection.prepareTupleQuery( QueryLanguage.SERQL, TYPE_QUERY_STRING); typeTupleQuery.setBinding("s", term); typeTupleQuery.setBinding("o", typeTerm); final TupleQueryResult tupleQueryResult = typeTupleQuery.evaluate(); isDirectType = tupleQueryResult.hasNext(); tupleQueryResult.close(); } catch (final MalformedQueryException ex) { throw new TexaiException(ex); } catch (final OpenRDFException ex) { throw new TexaiException(ex); } return isDirectType; } /** Returns the type hierarchy between the given term's direct type and the given type term. * * @param repositoryName the repository name * @param term the given term * @param typeTerm the given type term * @return the type hierarchy between the given term's direct type and the given type term */ public List<URI> typeHierarchy( final String repositoryName, final URI term, final URI typeTerm) { //Preconditions assert repositoryName != null : "repositoryName must not be null"; assert !repositoryName.isEmpty() : "repositoryName must not be empty"; assert term != null : "term must not be null"; assert typeTerm != null : "typeTerm must not be null"; List<URI> typeHierarchy; final List<URI> key = new ArrayList<>(2); if (repositoryName.equals("OpenCyc")) { key.add(term); key.add(typeTerm); typeHierarchy = typeHierarchyCache.get().get(key); if (typeHierarchy != null) { return typeHierarchy; } } typeHierarchy = new ArrayList<>(); final RepositoryConnection repositoryConnection = rdfEntityManager.getConnectionToNamedRepository(repositoryName); try { final List<URI> directTypeTerms = new ArrayList<>(); final TupleQuery typeTupleQuery = repositoryConnection.prepareTupleQuery( QueryLanguage.SERQL, TYPE_QUERY_STRING); typeTupleQuery.setBinding("s", term); final TupleQueryResult tupleQueryResult = typeTupleQuery.evaluate(); while (tupleQueryResult.hasNext()) { final BindingSet bindingSet = tupleQueryResult.next(); final URI directTypeTerm = (URI) bindingSet.getBinding("o").getValue(); if (directTypeTerm.equals(typeTerm)) { // is a direct type typeHierarchy.add(directTypeTerm); } else { directTypeTerms.add(directTypeTerm); } } tupleQueryResult.close(); if (typeHierarchy.isEmpty()) { final Set<URI> visitedTypeTerms = new HashSet<>(directTypeTerms); for (final URI directTypeTerm : directTypeTerms) { final List<URI> typeHierarchy1 = subClassOfHierarchy( repositoryName, directTypeTerm, typeTerm, visitedTypeTerms, repositoryConnection); if (!typeHierarchy1.isEmpty()) { typeHierarchy.addAll(typeHierarchy1); break; } } } } catch (final MalformedQueryException ex) { throw new TexaiException(ex); } catch (final OpenRDFException ex) { throw new TexaiException(ex); } if (LOGGER.isDebugEnabled()) { LOGGER.debug(RDFUtility.formatURIAsTurtle(term) + " type hierarchy to " + RDFUtility.formatURIAsTurtle(typeTerm) + " --> " + RDFUtility.formatResources(typeHierarchy)); } if (!typeHierarchy.isEmpty() && repositoryName.equals("OpenCyc")) { typeHierarchyCache.get().put(key, typeHierarchy); } return typeHierarchy; } /** Returns whether the given term is directly or indirectly an instance of the given type term. * * @param repositoryName the repository name * @param term the given term * @param typeTerm the given type term * @return whether the given term is directly or indirectly an instance of the given type term */ public boolean isType( final String repositoryName, final URI term, final URI typeTerm) { //Preconditions assert repositoryName != null : "repositoryName must not be null"; assert !repositoryName.isEmpty() : "repositoryName must not be empty"; assert term != null : "term must not be null"; assert typeTerm != null : "typeTerm must not be null"; String key = null; if (repositoryName.equals(Constants.OPEN_CYC)) { key = RDFUtility.formatResource(term) + "/" + RDFUtility.formatResource(typeTerm); final Boolean result = isTypeCache.get().get(key); if (result != null) { return result; } } // cache miss, so get the type from the repository final boolean isType = isType_NoCache( repositoryName, term, typeTerm); if (repositoryName.equals(Constants.OPEN_CYC)) { isTypeCache.get().put(key, isType); } return isType; } /** Returns whether the given term is directly or indirectly an instance of the given type term. * * @param repositoryName the repository name * @param term the given term * @param typeTerm the given type term * @return whether the given term is directly or indirectly an instance of the given type term */ private boolean isType_NoCache( final String repositoryName, final URI term, final URI typeTerm) { //Preconditions assert repositoryName != null : "repositoryName must not be null"; assert !repositoryName.isEmpty() : "repositoryName must not be empty"; assert term != null : "term must not be null"; assert typeTerm != null : "typeTerm must not be null"; final RepositoryConnection repositoryConnection = rdfEntityManager.getConnectionToNamedRepository(repositoryName); boolean isType = false; try { final List<URI> directTypeTerms = new ArrayList<>(); final TupleQuery typeTupleQuery = repositoryConnection.prepareTupleQuery( QueryLanguage.SERQL, TYPE_QUERY_STRING); typeTupleQuery.setBinding("s", term); final TupleQueryResult tupleQueryResult = typeTupleQuery.evaluate(); while (tupleQueryResult.hasNext()) { final BindingSet bindingSet = tupleQueryResult.next(); final URI directTypeTerm = (URI) bindingSet.getBinding("o").getValue(); if (directTypeTerm.equals(typeTerm)) { // is a direct type isType = true; } else { directTypeTerms.add(directTypeTerm); } } tupleQueryResult.close(); if (!isType) { final Set<URI> visitedTypeTerms = new HashSet<>(directTypeTerms); for (final URI directTypeTerm : directTypeTerms) { if (isSubClassOf( repositoryName, directTypeTerm, typeTerm, visitedTypeTerms)) { isType = true; break; } } } } catch (final MalformedQueryException ex) { throw new TexaiException(ex); } catch (final OpenRDFException ex) { throw new TexaiException(ex); } if (LOGGER.isDebugEnabled()) { if (isType) { LOGGER.debug(RDFUtility.formatURIAsTurtle(term) + " is type of " + RDFUtility.formatURIAsTurtle(typeTerm)); } else { LOGGER.debug(RDFUtility.formatURIAsTurtle(term) + " is not a type of " + RDFUtility.formatURIAsTurtle(typeTerm)); } } return isType; } /** Returns whether the given term is directly or indirectly a subclass of of the given type term. * * @param repositoryName the repository name * @param typeTerm1 the candidate subclass type term * @param typeTerm2 the given type term * @param visitedTypeTerms the visited type terms * @return whether the given term is directly or indirectly a subclass of of the given type term */ private boolean isSubClassOf( final String repositoryName, final URI typeTerm1, final URI typeTerm2, final Set<URI> visitedTypeTerms) { //Preconditions assert repositoryName != null : "repositoryName must not be null"; assert !repositoryName.isEmpty() : "repositoryName must not be empty"; assert typeTerm1 != null : "typeTerm1 must not be null"; assert typeTerm2 != null : "typeTerm2 must not be null"; final RepositoryConnection repositoryConnection = rdfEntityManager.getConnectionToNamedRepository(repositoryName); try { final TupleQuery subClassOfTupleQuery = repositoryConnection.prepareTupleQuery( QueryLanguage.SERQL, SUBCLASSOF_QUERY_STRING); subClassOfTupleQuery.setBinding("s", typeTerm1); final TupleQueryResult tupleQueryResult = subClassOfTupleQuery.evaluate(); final List<URI> superClassTerms = new ArrayList<>(); boolean isSubClassOf = false; while (tupleQueryResult.hasNext()) { final BindingSet bindingSet = tupleQueryResult.next(); final URI superClassTerm = (URI) bindingSet.getBinding("o").getValue(); if (typeTerm2.equals(superClassTerm)) { isSubClassOf = true; break; } else if (!visitedTypeTerms.contains(superClassTerm)) { visitedTypeTerms.add(superClassTerm); superClassTerms.add(superClassTerm); } } tupleQueryResult.close(); if (isSubClassOf) { if (LOGGER.isDebugEnabled()) { LOGGER.debug(RDFUtility.formatURIAsTurtle(typeTerm1) + " is subclass of " + RDFUtility.formatURIAsTurtle(typeTerm2)); } return true; } for (final URI superClassTerm : superClassTerms) { if (isSubClassOf( repositoryName, superClassTerm, typeTerm2, visitedTypeTerms)) { if (LOGGER.isDebugEnabled()) { LOGGER.debug(RDFUtility.formatURIAsTurtle(typeTerm1) + " is subclass of " + RDFUtility.formatURIAsTurtle(typeTerm2)); } return true; } } } catch (final MalformedQueryException ex) { throw new TexaiException(ex); } catch (final OpenRDFException ex) { throw new TexaiException(ex); } if (LOGGER.isDebugEnabled()) { LOGGER.debug(RDFUtility.formatURIAsTurtle(typeTerm1) + " is not a subclass of " + RDFUtility.formatURIAsTurtle(typeTerm2)); } return false; } /** Returns the first discovered subclass hierarchy terms between the two given type terms. * * @param repositoryName the repository name * @param typeTerm1 the subclass term * @param typeTerm2 the superclass term * @param visitedTypeTerms the visited type terms * @param repositoryConnection the repository connection * @return the subclass hierarchy terms between the two given type terms */ protected List<URI> subClassOfHierarchy( final String repositoryName, final URI typeTerm1, final URI typeTerm2, final Set<URI> visitedTypeTerms, final RepositoryConnection repositoryConnection) { //Preconditions assert repositoryName != null : "repositoryName must not be null"; assert !repositoryName.isEmpty() : "repositoryName must not be empty"; assert typeTerm1 != null : "term must not be null"; assert typeTerm2 != null : "typeTerm must not be null"; if (LOGGER.isDebugEnabled()) { LOGGER.debug("considering class " + RDFUtility.formatURIAsTurtle(typeTerm1) + " as a member of the super class hierarchy to " + RDFUtility.formatURIAsTurtle(typeTerm2)); } final List<URI> subClassOfHierarchy = new ArrayList<>(); subClassOfHierarchy.add(typeTerm1); if (typeTerm1.equals(typeTerm2)) { return subClassOfHierarchy; } try { final TupleQuery subClassOfTupleQuery = repositoryConnection.prepareTupleQuery( QueryLanguage.SERQL, SUBCLASSOF_QUERY_STRING); subClassOfTupleQuery.setBinding("s", typeTerm1); final TupleQueryResult tupleQueryResult = subClassOfTupleQuery.evaluate(); final List<URI> superClassTerms = new ArrayList<>(); boolean isDone = false; while (tupleQueryResult.hasNext()) { final BindingSet bindingSet = tupleQueryResult.next(); final URI superClassTerm = (URI) bindingSet.getBinding("o").getValue(); if (visitedTypeTerms.contains(superClassTerm)) { continue; } visitedTypeTerms.add(superClassTerm); superClassTerms.add(superClassTerm); if (typeTerm2.equals(superClassTerm)) { isDone = true; subClassOfHierarchy.add(superClassTerm); break; } } tupleQueryResult.close(); if (isDone) { if (LOGGER.isDebugEnabled()) { LOGGER.debug(RDFUtility.formatURIAsTurtle(typeTerm1) + " is subclass of " + RDFUtility.formatURIAsTurtle(typeTerm2)); } return subClassOfHierarchy; } for (final URI superClassTerm : superClassTerms) { final List<URI> subClassOfHierarchy1 = subClassOfHierarchy( repositoryName, superClassTerm, typeTerm2, visitedTypeTerms, repositoryConnection); if (!subClassOfHierarchy1.isEmpty()) { if (LOGGER.isDebugEnabled()) { LOGGER.debug(RDFUtility.formatURIAsTurtle(typeTerm1) + " is subclass of " + RDFUtility.formatURIAsTurtle(typeTerm2)); } subClassOfHierarchy.addAll(subClassOfHierarchy1); return subClassOfHierarchy; } } } catch (final MalformedQueryException ex) { throw new TexaiException(ex); } catch (final OpenRDFException ex) { throw new TexaiException(ex); } if (LOGGER.isDebugEnabled()) { LOGGER.debug(RDFUtility.formatURIAsTurtle(typeTerm1) + " is not a subclass of " + RDFUtility.formatURIAsTurtle(typeTerm2)); } subClassOfHierarchy.clear(); return subClassOfHierarchy; } /** Returns the direct types of the given term. * * @param repositoryName the repository name * @param term the given term * @return the direct types of the given term */ public Set<URI> getDirectTypes( final String repositoryName, final URI term) { //Preconditions assert repositoryName != null : "repositoryName must not be null"; assert !repositoryName.isEmpty() : "repositoryName must not be empty"; assert term != null : "term must not be null"; Set<URI> directTypeTerms; if (repositoryName.equals("OpenCyc")) { directTypeTerms = typesCache.get().get(term); if (directTypeTerms != null && !directTypeTerms.isEmpty()) { return directTypeTerms; } } final RepositoryConnection repositoryConnection = rdfEntityManager.getConnectionToNamedRepository(repositoryName); directTypeTerms = new HashSet<>(); try { final TupleQuery typeTupleQuery = repositoryConnection.prepareTupleQuery( QueryLanguage.SERQL, TYPE_QUERY_STRING); typeTupleQuery.setBinding("s", term); final TupleQueryResult tupleQueryResult = typeTupleQuery.evaluate(); while (tupleQueryResult.hasNext()) { final BindingSet bindingSet = tupleQueryResult.next(); directTypeTerms.add((URI) bindingSet.getBinding("o").getValue()); } tupleQueryResult.close(); } catch (final MalformedQueryException ex) { throw new TexaiException(ex); } catch (final OpenRDFException ex) { throw new TexaiException(ex); } if (repositoryName.equals("OpenCyc")) { typesCache.get().put(term, directTypeTerms); } return directTypeTerms; } /** Returns the direct instances of the given type term. * * @param repositoryName the repository name * @param typeTerm the given term * @return the direct instances of the given type term */ public Set<URI> getDirectInstances( final String repositoryName, final URI typeTerm) { //Preconditions assert repositoryName != null : "repositoryName must not be null"; assert !repositoryName.isEmpty() : "repositoryName must not be empty"; assert typeTerm != null : "typeTerm must not be null"; final RepositoryConnection repositoryConnection = rdfEntityManager.getConnectionToNamedRepository(repositoryName); final Set<URI> directInstances = new HashSet<>(); try { final TupleQuery typeTupleQuery = repositoryConnection.prepareTupleQuery( QueryLanguage.SERQL, TYPE_QUERY_STRING); typeTupleQuery.setBinding("o", typeTerm); final TupleQueryResult tupleQueryResult = typeTupleQuery.evaluate(); while (tupleQueryResult.hasNext()) { final BindingSet bindingSet = tupleQueryResult.next(); directInstances.add((URI) bindingSet.getBinding("s").getValue()); } tupleQueryResult.close(); } catch (final MalformedQueryException ex) { throw new TexaiException(ex); } catch (final OpenRDFException ex) { throw new TexaiException(ex); } return directInstances; } /** Returns whether the given term is an individual. * * @param repositoryName the repository name * @param term the given term * @return whether the given term is an individual */ public boolean isIndividualTerm( final String repositoryName, final URI term) { //Preconditions assert repositoryName != null : "repositoryName must not be null"; assert !repositoryName.isEmpty() : "repositoryName must not be empty"; assert term != null : "term must not be null"; return isType( repositoryName, term, INDIVIDUAL_TERM); } /** Returns whether the given term is a class. * * @param repositoryName the repository name * @param term the given term * @return whether the given term is a class */ public boolean isClassTerm( final String repositoryName, final URI term) { //Preconditions assert repositoryName != null : "repositoryName must not be null"; assert !repositoryName.isEmpty() : "repositoryName must not be empty"; assert term != null : "term must not be null"; return term.equals(OWL.CLASS) || isType(repositoryName, term, OWL.CLASS); } /** Returns whether the given term is a context. * * @param repositoryName the repository name * @param term the given term * @return whether the given term is a context */ public boolean isContextTerm( final String repositoryName, final URI term) { //Preconditions assert repositoryName != null : "repositoryName must not be null"; assert !repositoryName.isEmpty() : "repositoryName must not be empty"; assert term != null : "term must not be null"; return isType( repositoryName, term, MICROTHEORY_TERM); } /** Returns whether the given term is a property. * * @param repositoryName the repository name * @param term the given term * @return whether the given term is a property */ public boolean isPropertyTerm( final String repositoryName, final URI term) { //Preconditions assert repositoryName != null : "repositoryName must not be null"; assert !repositoryName.isEmpty() : "repositoryName must not be empty"; assert term != null : "term must not be null"; return isType( repositoryName, term, RDF.PROPERTY); } /** Clears the caches. */ public void clearCaches() { isTypeCache.get().clear(); typesCache.get().clear(); } /** Provides the isType cache. */ class IsTypeCache extends ThreadLocal<Map<String, Boolean>> { /** Returns the current thread's "initial value" for this thread-local variable. * * @return the current thread's "initial value */ @Override protected Map<String, Boolean> initialValue() { return new LRUMap<>( 10, // initialCapacity 10000); // maxCapacity } } /** Provides the type hierarchy cache. */ class TypeHierarchyCache extends ThreadLocal<Map<List<URI>, List<URI>>> { /** Returns the current thread's "initial value" for this thread-local variable. * * @return the current thread's "initial value */ @Override protected Map<List<URI>, List<URI>> initialValue() { return new LRUMap<>( 10, // initialCapacity 50000); // maxCapacity } } /** Provides the types cache. */ class TypesCache extends ThreadLocal<Map<URI, Set<URI>>> { /** Returns the current thread's "initial value" for this thread-local variable. * * @return the current thread's "initial value */ @Override protected Map<URI, Set<URI>> initialValue() { return new LRUMap<>( 10, // initialCapacity 50000); // maxCapacity } } }