/* * CachedSubsumptionGraph.java * * Created on May 2, 2011, 11:20:41 AM * * Description: Provides a cached subsumption graph of the OpenCyc repository. * * Copyright (C) May 2, 2011, Stephen L. Reed, Texai.org. * */ package org.texai.subsumptionGraph; import java.io.Serializable; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; import java.util.List; import java.util.Map; import net.jcip.annotations.NotThreadSafe; import net.sf.ehcache.CacheManager; import org.apache.log4j.Logger; import org.openrdf.model.Statement; import org.openrdf.model.URI; import org.openrdf.model.vocabulary.OWL; import org.openrdf.model.vocabulary.RDF; import org.openrdf.model.vocabulary.RDFS; import org.openrdf.repository.RepositoryConnection; import org.openrdf.repository.RepositoryException; import org.openrdf.repository.RepositoryResult; import org.texai.kb.CacheInitializer; import org.texai.kb.persistence.DistributedRepositoryManager; import org.texai.kb.persistence.RDFEntityManager; import org.texai.kb.persistence.RDFUtility; import org.texai.util.TexaiException; /** Provides a cached subsumption graph of the OpenCyc repository. * * @author reed */ @NotThreadSafe public class CachedSubsumptionGraph implements Serializable { /** the logger */ private static final Logger LOGGER = Logger.getLogger(CachedSubsumptionGraph.class); /** the serial version UID */ private static final long serialVersionUID = 1L; /** the URI to id dictionary, URI --> id */ private final Map<String, Integer> uriToIdDictionary = new HashMap<>(); /** the URIs, indexed by id */ private final List<String> uris = new ArrayList<>(); /** the superclass dictionary, class id --> super class ids */ private final Map<Integer, List<Integer>> superClassDictionary = new HashMap<>(); /** the subClassOf dictionary, super class id --> sub class ids */ private final Map<Integer, List<Integer>> subClassOfDictionary = new HashMap<>(); /** the type dictionary, instance id --> class id */ private final Map<Integer, List<Integer>> typeDictionary = new HashMap<>(); /** the instance dictionary, class id --> instance ids */ private final Map<Integer, List<Integer>> instanceDictionary = new HashMap<>(); /** the disjoint with dictionary, class id --> disjoint class ids */ private final Map<Integer, List<Integer>> disjointWithDictionary = new HashMap<>(); /** the next available uri id */ private int nextId = 0; /** the singleton instance */ private static CachedSubsumptionGraph cachedSubsumptionGraph; /** Constructs a new CachedSubsumptionGraph instance. */ public CachedSubsumptionGraph() { } /** Returns the direct superclasses of the given term. * * @param repositoryName the repository name * @param term the given term * @return the direct superclasses of the given term */ public Collection<URI> getDirectSuperClasses( final String repositoryName, final URI term) { //Preconditions assert "OpenCyc".equals(repositoryName) : "repositoryName must be OpenCyc"; assert term != null : "term must not be null"; final Collection<URI> superClassTerms = new ArrayList<>(); final Integer id = uriToIdDictionary.get(RDFUtility.formatResource(term)); if (id != null) { final List<Integer> superClasses = superClassDictionary.get(id); if (superClasses != null) { for (final Integer superClass : superClasses) { superClassTerms.add(RDFUtility.makeURIFromAlias(uris.get(superClass))); } } } return superClassTerms; } /** Returns the class terms that are directly asserted to disjoint with the given class term. * * @param term the given class term * @return the class terms that are directly asserted to disjoint with the given class term */ public Collection<URI> getDirectDisjointWiths(final URI term) { //preconditions assert term != null : "term must not be null"; final Collection<URI> disjointWithTerms = new ArrayList<>(); final Integer id = uriToIdDictionary.get(RDFUtility.formatResource(term)); if (id != null) { final List<Integer> disjointWiths = disjointWithDictionary.get(id); if (disjointWiths != null) { for (final Integer disjointWith : disjointWiths) { disjointWithTerms.add(RDFUtility.makeURIFromAlias(uris.get(disjointWith))); } } } return disjointWithTerms; } /** Loads the dictionaries from the OpenCyc repository. * * @param rdfEntityManager the RDF entity manager */ public void loadDictionariesFromOpenCycRepository(final RDFEntityManager rdfEntityManager) { //Preconditions assert rdfEntityManager != null : "rdfEntityManager must not be null"; uriToIdDictionary.clear(); uris.clear(); superClassDictionary.clear(); subClassOfDictionary.clear(); typeDictionary.clear(); instanceDictionary.clear(); disjointWithDictionary.clear(); nextId = 0; final RepositoryConnection repositoryConnection = rdfEntityManager.getConnectionToNamedRepository("OpenCyc"); try { final RepositoryResult<Statement> repositoryResult = repositoryConnection.getStatements(null, null, null, false); int statementCnt = 0; LOGGER.info("gathering OpenCyc statements..."); while (repositoryResult.hasNext()) { statementCnt++; final Statement statement = repositoryResult.next(); if (statementCnt % 100000 == 0) { LOGGER.info(statementCnt + " " + RDFUtility.formatStatementAsTurtle(statement)); } final URI predicate = statement.getPredicate(); if (predicate.equals(RDF.TYPE)) { // load typeDictionary entry final Integer subjectIndex = getId((URI) statement.getSubject()); final Integer objectIndex = getId((URI) statement.getObject()); List<Integer> types = typeDictionary.get(subjectIndex); if (types == null) { types = new ArrayList<>(); typeDictionary.put(subjectIndex, types); } types.add(objectIndex); // load instanceDictionary entry List<Integer> instances = instanceDictionary.get(subjectIndex); if (instances == null) { instances = new ArrayList<>(); instanceDictionary.put(subjectIndex, instances); } instances.add(objectIndex); } else if (predicate.equals(RDFS.SUBCLASSOF)) { // load superClassDictionary entry final Integer subjectIndex = getId((URI) statement.getSubject()); final Integer objectIndex = getId((URI) statement.getObject()); List<Integer> superClasses = superClassDictionary.get(subjectIndex); if (superClasses == null) { superClasses = new ArrayList<>(); superClassDictionary.put(subjectIndex, superClasses); } superClasses.add(objectIndex); // load subClassOfDictionary entry List<Integer> subClasses = subClassOfDictionary.get(objectIndex); if (subClasses == null) { subClasses = new ArrayList<>(); subClassOfDictionary.put(objectIndex, subClasses); } subClasses.add(subjectIndex); } else if (predicate.equals(OWL.DISJOINTWITH)) { // load disjointWithDictionary entry with two symmetric entries final Integer subjectIndex = getId((URI) statement.getSubject()); final Integer objectIndex = getId((URI) statement.getObject()); List<Integer> disjointClasses = disjointWithDictionary.get(subjectIndex); if (disjointClasses == null) { disjointClasses = new ArrayList<>(); disjointWithDictionary.put(subjectIndex, disjointClasses); } disjointClasses.add(objectIndex); disjointClasses = disjointWithDictionary.get(objectIndex); if (disjointClasses == null) { disjointClasses = new ArrayList<>(); disjointWithDictionary.put(objectIndex, disjointClasses); } disjointClasses.add(subjectIndex); } } LOGGER.info(statementCnt + " OpenCyc statements"); } catch (RepositoryException ex) { throw new TexaiException(ex); } } /** Gets the id for the given URI. * * @param uri the given URI * @return the id */ private Integer getId(final URI uri) { //Preconditions assert uri != null : "uri must not be null"; final String uriString = RDFUtility.formatResource(uri); Integer id = uriToIdDictionary.get(uriString); if (id == null) { id = nextId++; uriToIdDictionary.put(uriString, id); uris.add(uriString); assert uris.get(id).equals(uriString); } return id; } /** Initializes the singleton instance. * * @param rdfEntityManager the RDF entity manager */ public static void initializeSingletonInstance(final RDFEntityManager rdfEntityManager) { //Preconditions assert rdfEntityManager != null : "rdfEntityManager must not be null"; cachedSubsumptionGraph = new CachedSubsumptionGraph(); cachedSubsumptionGraph.loadDictionariesFromOpenCycRepository(rdfEntityManager); } /** Gets the singleton instance. * * @return the singleton instance */ public static CachedSubsumptionGraph getInstance() { return cachedSubsumptionGraph; } /** Logs the dictionary statistics. */ public void logDictionaryStatistics() { LOGGER.info("uriToIdDictionary: " + uriToIdDictionary.size()); LOGGER.info("uris: " + uris.size()); LOGGER.info("superClassDictionary: " + superClassDictionary.size()); LOGGER.info("subClassOfDictionary: " + subClassOfDictionary.size()); LOGGER.info("typeDictionary: " + typeDictionary.size()); LOGGER.info("instanceDictionary: " + instanceDictionary.size()); LOGGER.info("disjointWithDictionary: " + disjointWithDictionary.size()); } /** Executes this application. * * @param args the command line args - unused */ public static void main(final String[] args) { CacheInitializer.initializeCaches(); final RDFEntityManager rdfEntityManager = new RDFEntityManager(); CachedSubsumptionGraph.initializeSingletonInstance(rdfEntityManager); rdfEntityManager.close(); final CachedSubsumptionGraph cachedSubsumptionGraph1 = CachedSubsumptionGraph.getInstance(); cachedSubsumptionGraph1.logDictionaryStatistics(); DistributedRepositoryManager.shutDown(); CacheManager.getInstance().shutdown(); LOGGER.info("CachedSubsumptionGraph completed"); } }