OntTools.java example

Explorer
jena-master
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

// Package
///////////////
package org.apache.jena.ontology;


// Imports
///////////////
import java.util.*;
import java.util.function.Predicate;

import org.apache.jena.rdf.model.* ;
import org.apache.jena.shared.JenaException ;


/**
 * <p>
 * Some general utilities and algorithms to support developers working with the
 * general classes in the Jena ontology API. <strong>Warning</strong> these
 * utilities are <strong>experimental</strong>. Extensive testing has not yet
 * occurred (see {@link org.apache.jena.ontology.impl.TestOntTools} in the
 * test area for basic unit tests), 
 * and in particular performance testing has not been carried out yet. 
 * Users are advised to exercise caution before relying on these utilities in 
 * production code. Please send any comments or suggestions to the
 * <a href="http://tech.groups.yahoo.com/group/jena-dev">Jena support email list</a>.
 * </p>
 */
public class OntTools
{
    // Constants
    //////////////////////////////////

    // Static variables
    //////////////////////////////////

//    static private Logger log = LoggerFactory.getLogger( OntTools.class );

    // Instance variables
    //////////////////////////////////

    // Constructors
    //////////////////////////////////

    // External signature methods
    //////////////////////////////////

    /**
     * <p>Answer the lowest common ancestor of two classes in a given ontology. This
     * is the class that is farthest from the root concept (defaulting to
     * <code>owl:Thing</code> which is a super-class of both <code>u</code>
     * and <code>v</code>. The algorithm is based on
     * <a href="http://en.wikipedia.org/wiki/Tarjan's_off-line_least_common_ancestors_algorithm">Tarjan's
     * off-line LCA</a>. The current implementation expects that the given model:
     * </p>
     * <ul>
     * <li>is transitively closed over the <code>subClassOf</code> relation</li>
     * <li>can cheaply determine <em>direct sub-class</em> relations</li>
     * </ul>
     * <p>Both of these conditions are true of the built-in Jena OWL reasoners,
     * such as {@link OntModelSpec#OWL_MEM_MICRO_RULE_INF}, and external DL
     * reasoners such as Pellet.</p>
     *
     * @param m The ontology model being queried to find the LCA, which should conform
     * to the reasoner capabilities described above
     * @param u An ontology class
     * @param v An ontology class
     * @return The LCA of <code>u</code> and <code>v</code>
     * @exception JenaException if the language profile of the given model does not
     * define a top concept (e.g. <code>owl:Thing</code>)
     */
    public static OntClass getLCA( OntModel m, OntClass u, OntClass v ) {
        Resource root = m.getProfile().THING();
        if (root == null) {
            throw new JenaException( "The given OntModel has a language profile that does not define a generic root class (such as owl:Thing)" );
        }

        root = root.inModel( m );
        return getLCA( m, root.as( OntClass.class ), u, v );
    }

    /**
     * Answer the lowest common ancestor of two classes, assuming that the given
     * class is the root concept to start searching from. See {@link #getLCA(OntModel, OntClass, OntClass)}
     * for details.
     *
     * @param m The ontology model being queried to find the LCA, which should conform
     * to the reasoner capabilities described above
     * @param root The root concept, which will be the starting point for the algorithm
     * @param u An ontology class
     * @param v An ontology class
     * @return The LCA of <code>u</code> and <code>v</code>
     * @exception JenaException if the language profile of the given model does not
     * define a top concept (e.g. <code>owl:Thing</code>)
     */
    public static OntClass getLCA( OntModel m, OntClass root, OntClass u, OntClass v ) {
        // check some common cases first
        if (u.equals( root ) || v.equals( root )) {
            return root;
        }

        if (u.hasSubClass( v )) {
            return u;
        }

        if (v.hasSubClass( u )) {
            return v;
        }

        // not a common case, so apply Tarjan's LCA algorithm
        LCAIndex index = new LCAIndex();
        lca( root, u, v, index );
        return (OntClass) index.getLCA( u, v );
    }


    /**
     * <p>Answer the shortest path from the <code>start</code> resource to the <code>end</code> RDF node,
     * such that every step on the path is accepted by the given filter. A path is a {@link List}
     * of RDF {@link Statement}s. The subject of the first statement in the list is <code>start</code>,
     * and the object of the last statement in the list is <code>end</code>.</p>
     * <p>The <code>onPath</code> argument is a {@link Predicate}, which accepts a statement and returns
     * true if the statement should be considered to be on the path. To search for an unconstrained
     * path, pass <code>()->true</code> as an argument. To search for a path whose predicates match a
     * fixed restricted set of property names, pass an instance of {@link PredicatesFilter}.</p>
     * <p>If there is more than one path of minimal length from <code>start</code> to <code>end</code>,
     * this method returns an arbitrary one. The algorithm is blind breadth-first search,
     * with loop detection.</p>
     *
     * @param m The model in which we are seeking a path
     * @param start The starting resource
     * @param end The end, or goal, node
     * @param onPath A filter which determines whether a given statement can be considered part
     * of the path
     * @return A path, consisting of a list of statements whose first subject is <code>start</code>,
     * and whose last object is <code>end</code>, or null if no such path exists.
     */
    public static Path findShortestPath( Model m, Resource start, RDFNode end, Predicate<Statement> onPath ) {
        List<Path> bfs = new LinkedList<>();
        Set<Resource> seen = new HashSet<>();

        // initialise the paths
        for (Iterator<Statement> i = m.listStatements( start, null, (RDFNode) null ).filterKeep( onPath ); i.hasNext(); ) {
            bfs.add( new Path().append( i.next() ) );
        }

        // search
        Path solution = null;
        while (solution == null && !bfs.isEmpty()) {
            Path candidate = bfs.remove( 0 );

            if (candidate.hasTerminus( end )) {
                solution = candidate;
            }
            else {
                Resource terminus = candidate.getTerminalResource();
                if (terminus != null) {
                    seen.add( terminus );

                    // breadth-first expansion
                    for (Iterator<Statement> i = terminus.listProperties().filterKeep( onPath ); i.hasNext(); ) {
                        Statement link = i.next();

                        // no looping allowed, so we skip this link if it takes us to a node we've seen
                        if (!seen.contains( link.getObject() )) {
                            bfs.add( candidate.append( link ) );
                        }
                    }
                }
            }
        }

        return solution;
    }


    /**
     * Answer a list of the named hierarchy roots of a given {@link OntModel}. This
     * will be similar to the results of {@link OntModel#listHierarchyRootClasses()},
     * with the added constraint that every member of the returned iterator will be a
     * named class, not an anonymous class expression. The named root classes are
     * calculated from the root classes, by recursively replacing every anonymous class
     * with its direct sub-classes. Thus it can be seen that the values in the list
     * consists of the shallowest fringe of named classes in the hierarchy.
     * @param m An ontology model
     * @return A list of classes whose members are the named root classes of the
     * class hierarchy in <code>m</code>
     */
    public static List<OntClass> namedHierarchyRoots( OntModel m ) {
        List<OntClass> nhr = new ArrayList<>();         // named roots
        List<OntClass> ahr = new ArrayList<>();         // anon roots

        // do the initial partition of the root classes
        partitionByNamed( m.listHierarchyRootClasses(), nhr, ahr );

        // now push the fringe down until we have only named classes
        while (!ahr.isEmpty()) {
            OntClass c = ahr.remove( 0 );
            partitionByNamed( c.listSubClasses( true ), nhr, ahr );
        }

        return nhr;
    }


    // Internal implementation methods
    //////////////////////////////////

    /**
     * Compute the LCA disjoint set at <code>cls</code>, noting that we are
     * searching for the LCA of <code>uCls</code> and <code>vCls</code>.
     * @param cls The class we are testing (this is 'u' in the Wiki article)
     * @param uCls One of the two classes we are searching for the LCA of. We
     * have simplified the set P of pairs to the unity set {uCls,vCls}
     * @param vCls One of the two classes we are searching for the LCA of. We
     * have simplified the set P of pairs to the unity set {uCls,vCls}
     * @param index A data structure mapping resources to disjoint sets (since
     * we can't side-effect Jena resources), and which is used to record the
     * LCA pairs
     */
    protected static DisjointSet lca( OntClass cls, OntClass uCls, OntClass vCls, LCAIndex index ) {
//        log.debug( "Entering lca(), cls = " + cls );
        DisjointSet clsSet = index.getSet( cls );
        if (clsSet.isBlack()) {
            // already visited
            return clsSet;
        }

        // not visited yet
        clsSet.setAncestor( clsSet );

        // for each child of cls
        for (Iterator<OntClass> i = cls.listSubClasses( true ); i.hasNext(); ) {
            OntClass child = i.next();

            if (child.equals( cls ) || child.equals( cls.getProfile().NOTHING() )) {
                // we ignore the reflexive case and bottom
                continue;
            }

            // compute the LCA of the sub-tree
            DisjointSet v = lca( child, uCls, vCls, index );

            // union the two disjoint sets together
            clsSet.union( v );

            // propagate the distinguished member
            clsSet.find().setAncestor( clsSet );
        }

        // this node is done
        clsSet.setBlack();

        // are we inspecting one of the elements we're interested in?
        if (cls.equals( uCls )) {
            checkSolution( uCls, vCls, index );
        }
        else if (cls.equals( vCls )) {
            checkSolution( vCls, uCls, index );
        }

        return clsSet;
    }

    /**
     * Check to see if we have found a solution to the problem.
     * TODO: we could throw an exception to simulate a non-local exit
     * here, since we've assumed that P is the unity set.
     * @param uCls
     * @param vCls
     * @param index
     */
    protected static void checkSolution( OntClass uCls, OntClass vCls, LCAIndex index ) {
        DisjointSet vSet = index.getSet( vCls );
        DisjointSet uSet = index.getSet( uCls );

        if (vSet != null && vSet.isBlack() && !vSet.used() &&
            uSet != null && uSet.isBlack() && !uSet.used()) {
            vSet.setUsed();
            uSet.setUsed();
//            log.debug( "Found LCA: u = " + uCls + ", v = " + vCls  );
            OntClass lca = (OntClass) vSet.find().getAncestor().getNode();
//            log.debug( "Found LCA: lca = " + lca );
            index.setLCA( uCls, vCls, lca );
        }

    }

    /**
     * Partition the members of an iterator into two lists, according to whether
     * they are named or anonymous classes
     * @param i An iterator to partition
     * @param named A list of named classes
     * @param anon A list of anonymous classes
     */
    protected static void partitionByNamed( Iterator<? extends OntClass> i, List<OntClass> named, List<OntClass> anon ) {
        while (i.hasNext()) {
            OntClass c = i.next();
            boolean ignore = false;

            // duplicate check: we ignore this class if we've already got it
            if (named.contains( c )) {
                ignore = true;
            }

            // subsumption check: c must have only anon classes or Thing
            // as super-classes to still qualify as a root class
            Resource thing = c.getProfile().THING();
            for (Iterator<OntClass> j = c.listSuperClasses(); !ignore && j.hasNext(); ) {
                OntClass sup = j.next();
                if (!((thing != null && sup.equals( thing )) ||
                      sup.isAnon() ||
                      sup.equals( c )))
                {
                    ignore = true;
                }
            }

            if (!ignore) {
                // place the class in the appropriate partition
                (c.isAnon() ? anon : named).add( c );
            }
        }
    }


    //==============================================================================
    // Inner class definitions
    //==============================================================================

    /**
     * A simple representation of disjoint sets
     */
    public static class DisjointSet
    {
        /** The resource this set represents */
        private Resource m_node;

        /** The parent set in a union */
        private DisjointSet m_parent;

        /** Heuristic used to build balanced unions */
        private int m_rank;

        /** The link to the distinguished member set */
        private DisjointSet m_ancestor;

        /** Set to true when the node has been processed */
        private boolean m_black = false;

        /** Set to true when we've inspected a black set, since the result is only
         * correct just after both of the sets for u and v have been marked black */
        private boolean m_used = false;

        public DisjointSet( Resource node ) {
            m_node = node;
            m_rank = 0;
            m_parent = this;
        }

        public Resource getNode() {
            return m_node;
        }

        public DisjointSet getParent() {
            return m_parent;
        }

        public void setParent( DisjointSet parent ) {
            m_parent = parent;
        }

        public int getRank() {
            return m_rank;
        }

        public void incrementRank() {
            m_rank++;
        }

        public DisjointSet getAncestor() {
            return m_ancestor;
        }

        public void setAncestor( DisjointSet anc ) {
            m_ancestor = anc;
        }

        public void setBlack() {
            m_black = true;
        }

        public boolean isBlack() {
            return m_black;
        }

        public boolean used() {
            return m_used;
        }

        public void setUsed() {
            m_used = true;
        }

        /**
         * The find operation collapses the pointer to the root parent, which is
         * one of Tarjan's standard optimisations.
         * @return The representative of the union containing this set
         */
        public DisjointSet find() {
            DisjointSet root;
            if (getParent() == this) {
                // the representative of the set
                root = this;
            }
            else {
                // otherwise, seek the representative of my parent and save it
                root = getParent().find();
                setParent( root );
            }

            return root;
        }

        /**
         * The union of two sets
         * @param y
         */
        public void union( DisjointSet y ) {
            DisjointSet xRoot = find();
            DisjointSet yRoot = y.find();

            if (xRoot.getRank() > yRoot.getRank()) {
                yRoot.setParent( xRoot );
            }
            else if (yRoot.getRank() > xRoot.getRank()) {
                xRoot.setParent( yRoot );
            }
            else if (xRoot != yRoot) {
                yRoot.setParent( xRoot );
                xRoot.incrementRank();
            }
        }

        /**
         * @see java.lang.Object#toString()
         * @return A string representation of this set for debugging
         */
        @Override
        public String toString() {
            StringBuilder buf = new StringBuilder();
            buf.append( "DisjointSet{node=" );
            buf.append( m_node );
            buf.append( ",anc=" );
            buf.append( (getAncestor() == this) ? "self" : (getAncestor() == null ? "null" : getAncestor().toShortString()) );
            buf.append( ",parent=" );
            buf.append( (getParent() == this) ? "self" : (getParent() == null ? "null" : getParent().toShortString()) );
            buf.append( ",rank=" );
            buf.append( getRank() );
            buf.append( m_black ? ",black" : ",white" );
            buf.append( "}");

            return buf.toString();
        }

        public String toShortString() {
            StringBuilder buf = new StringBuilder();
            buf.append( "DisjointSet{node=" );
            buf.append( m_node );
            buf.append( ",parent=" );
            buf.append( (getParent() == this) ? "self" : (getParent() == null ? "null" : getParent().toShortString()) );
            buf.append( "...}" );

            return buf.toString();
        }
    }

    /**
     * Simple data structure mapping RDF nodes to disjoint sets, and
     * pairs of resources to their LCA.
     */
    public static class LCAIndex
    {
        private Map<Resource, DisjointSet> m_setIndex = new HashMap<>();
        private Map<Resource, Map<Resource, Resource>> m_lcaIndex = new HashMap<>();

        public Resource getLCA( Resource u, Resource v ) {
            Map<Resource, Resource> map = m_lcaIndex.get( u );
            Resource lca = (map == null) ? null : (Resource) map.get( v );

            if (lca == null) {
                map = m_lcaIndex.get( v );
                lca = (map == null) ? null : (Resource) map.get( u );
            }

            return lca;
        }

        public void setLCA( Resource u, Resource v, Resource lca ) {
            Map<Resource, Resource> uMap = m_lcaIndex.get( u );
            if (uMap == null) {
                uMap = new HashMap<>();
                m_lcaIndex.put( u, uMap );
            }
            uMap.put( v, lca );
        }

        public DisjointSet getSet( Resource r ) {
            DisjointSet s = m_setIndex.get( r );
            if (s == null) {
//                log.debug( "Generating new set for " + r );
                s = new DisjointSet( r );
                m_setIndex.put( r, s );
            }
            else {
//                log.debug( "Retrieving old set for " + r );

            }
            return s;
        }
    }

    /**
     * A path is an application of {@link java.util.List} containing only {@link Statement}
     * objects, and in which for all adjacent elements <code>S<sub>i-1</sub></code>
     * and  <code>S<sub>i</sub></code>, where <code>i > 0</code>, it is true that:
     * <code><pre>S<sub>i-1</sub>.getObject().equals( S<sub>i</sub>.getSubject() )</pre></code>
     */
    public static class Path extends ArrayList<Statement>
    {
        public Path() {
            super();
        }

        public Path( Path basePath ) {
            super( basePath );
        }

        public Statement getStatement( int i ) {
            return get( i );
        }

        /** Answer a new Path whose elements are this Path with <code>s</code> added at the end */
        public Path append( Statement s ) {
            Path newPath = new Path( this );
            newPath.add( s );
            return newPath;
        }

        /** Answer true if the last link on the path has object equal to <code>n</code>  */
        public boolean hasTerminus( RDFNode n ) {
            return n != null && n.equals( getTerminal() );
        }

        /** Answer the RDF node at the end of the path, if defined, or null */
        public RDFNode getTerminal() {
            return size() > 0 ? get( size() - 1 ).getObject() : null;
        }

        /** Answer the resource at the end of the path, if defined, or null */
        public Resource getTerminalResource() {
            RDFNode n = getTerminal();
            return (n != null && n.isResource()) ? (Resource) n : null;
        }
    }

    /**
     * A filter which accepts statements whose predicate matches one of a collection
     * of predicates held by the filter object.
     */
    public static class PredicatesFilter implements Predicate<Statement>
    {
        public Collection<Property> m_preds;

        /** Accept statements with any predicate from <code>preds</code> */
        public PredicatesFilter( Collection<Property> preds ) {
            m_preds = preds;
        }

        /** Accept statements with any predicate from <code>preds</code> */
        public PredicatesFilter( Property[] preds ) {
            m_preds = new HashSet<>();
            for ( Property pred : preds )
            {
                m_preds.add( pred );
            }
        }

        /** Accept statements with predicate <code>pred</code> */
        public PredicatesFilter( Property pred ) {
            m_preds = new HashSet<>();
            m_preds.add( pred );
        }

        @Override public boolean test( Statement s ) {
            return m_preds.contains( s.getPredicate() );
        }
    }
}