RdfUtility.java example

Explorer
podd-redesign-master
/**
 * PODD is an OWL ontology database used for scientific project management
 *
 * Copyright (C) 2009-2013 The University Of Queensland
 *
 * This program is free software: you can redistribute it and/or modify it under the terms of the
 * GNU Affero General Public License as published by the Free Software Foundation, either version 3
 * of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
 * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License along with this program.
 * If not, see <http://www.gnu.org/licenses/>.
 */
package com.github.podd.utils;

import info.aduna.iteration.Iterations;

import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Queue;
import java.util.Set;

import org.openrdf.OpenRDFException;
import org.openrdf.model.Model;
import org.openrdf.model.Resource;
import org.openrdf.model.Statement;
import org.openrdf.model.URI;
import org.openrdf.model.Value;
import org.openrdf.model.impl.LinkedHashModel;
import org.openrdf.model.impl.ValueFactoryImpl;
import org.openrdf.model.vocabulary.OWL;
import org.openrdf.query.GraphQuery;
import org.openrdf.query.QueryResults;
import org.openrdf.query.TupleQuery;
import org.openrdf.query.impl.DatasetImpl;
import org.openrdf.query.resultio.helpers.QueryResultCollector;
import org.openrdf.repository.RepositoryConnection;
import org.openrdf.repository.RepositoryException;
import org.openrdf.rio.RDFFormat;
import org.openrdf.rio.Rio;
import org.openrdf.rio.helpers.StatementCollector;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * @author kutila
 *
 */
public class RdfUtility
{
    
    private final static Logger log = LoggerFactory.getLogger(RdfUtility.class);
    
    /**
     * Helper method to execute a given SPARQL Graph query.
     *
     * @param graphQuery
     * @param contexts
     * @return
     * @throws OpenRDFException
     */
    public static Model executeGraphQuery(final GraphQuery graphQuery, final URI... contexts) throws OpenRDFException
    {
        final DatasetImpl dataset = new DatasetImpl();
        for(final URI uri : contexts)
        {
            dataset.addDefaultGraph(uri);
        }
        graphQuery.setDataset(dataset);
        final Model results = new LinkedHashModel();
        final long before = System.currentTimeMillis();
        graphQuery.evaluate(new StatementCollector(results));
        final long total = System.currentTimeMillis() - before;
        RdfUtility.log.debug("graph query took {}", Long.toString(total));
        if(total > 50 && RdfUtility.log.isDebugEnabled())
        {
            new Throwable().printStackTrace();
        }
        else if(total > 30 && RdfUtility.log.isTraceEnabled())
        {
            new Throwable().printStackTrace();
        }
        
        return results;
    }
    
    /**
     * Helper method to execute a given SPARQL Tuple query, which may have had bindings attached.
     *
     * @param tupleQuery
     * @param contexts
     * @return
     * @throws OpenRDFException
     */
    public static QueryResultCollector executeTupleQuery(final TupleQuery tupleQuery, final URI... contexts)
        throws OpenRDFException
    {
        final DatasetImpl dataset = new DatasetImpl();
        for(final URI uri : contexts)
        {
            dataset.addDefaultGraph(uri);
        }
        tupleQuery.setDataset(dataset);
        
        final QueryResultCollector results = new QueryResultCollector();
        final long before = System.currentTimeMillis();
        QueryResults.report(tupleQuery.evaluate(), results);
        final long total = System.currentTimeMillis() - before;
        RdfUtility.log.debug("tuple query took {}", Long.toString(total));
        if(total > 50 && RdfUtility.log.isDebugEnabled())
        {
            new Throwable().printStackTrace();
        }
        else if(total > 30 && RdfUtility.log.isTraceEnabled())
        {
            new Throwable().printStackTrace();
        }
        
        return results;
    }
    
    /**
     * Given a set of RDF Statements, and a Root node, this method finds any nodes that are not
     * connected to the Root node.
     *
     * A <b>Node</b> is a Value that is of type URI (i.e. Literals are ignored).
     *
     * A direct connection between two nodes exist if there is a Statement with the two nodes as the
     * Subject and the Object.
     *
     * @param root
     *            The Root of the Graph, from which connectedness is calculated.
     * @param connection
     *            A RepositoryConnection
     * @param context
     *            The Graph containing statements.
     * @return A <code>Set</code> containing any URIs that are not connected to the Root.
     * @throws RepositoryException
     */
    public static Set<URI> findDisconnectedNodes(final URI root, final RepositoryConnection connection,
            final URI... context) throws RepositoryException
    {
        final List<URI> exclusions =
                Arrays.asList(root, OWL.THING, OWL.ONTOLOGY, OWL.INDIVIDUAL,
                        ValueFactoryImpl.getInstance().createURI("http://www.w3.org/2002/07/owl#NamedIndividual"));
        
        final List<URI> propertyExclusions = Arrays.asList(OWL.IMPORTS, OWL.VERSIONIRI);
        
        // - identify nodes that should be connected to the root
        final Set<URI> nodesToCheck = new HashSet<URI>();
        
        final List<Statement> allStatements =
                Iterations.asList(connection.getStatements(null, null, null, false, context));
        for(final Statement s : allStatements)
        {
            final URI predicateValue = s.getPredicate();
            if(propertyExclusions.contains(predicateValue))
            {
                continue;
            }
            
            final Value objectValue = s.getObject();
            if(objectValue instanceof URI && !exclusions.contains(objectValue))
            {
                nodesToCheck.add((URI)objectValue);
            }
            
            final Resource subjectValue = s.getSubject();
            if(subjectValue instanceof URI && !exclusions.contains(subjectValue))
            {
                nodesToCheck.add((URI)subjectValue);
            }
            
        }
        
        // RdfUtility.log.info("{} nodes to check for connectivity.", nodesToCheck.size());
        // for(final URI u : objectsToCheck)
        // {
        // System.out.println("    " + u);
        // }
        
        // - check for connectivity
        final Queue<URI> queue = new LinkedList<URI>();
        final Set<URI> visitedNodes = new HashSet<URI>(); // to handle cycles
        queue.add(root);
        visitedNodes.add(root);
        
        while(!queue.isEmpty())
        {
            final URI currentNode = queue.remove();
            
            final List<URI> children = RdfUtility.getImmediateChildren(currentNode, connection, context);
            for(final URI child : children)
            {
                // visit child node
                if(nodesToCheck.contains(child))
                {
                    nodesToCheck.remove(child);
                    if(nodesToCheck.isEmpty())
                    {
                        // all identified nodes are connected.
                        return nodesToCheck;
                    }
                }
                if(!visitedNodes.contains(child))
                {
                    queue.add(child);
                    visitedNodes.add(child);
                }
            }
        }
        RdfUtility.log.debug("{} unconnected node(s). {}", nodesToCheck.size(), nodesToCheck);
        return nodesToCheck;
    }
    
    /**
     * Internal helper method to retrieve the direct child objects of a given object.
     *
     * @param node
     * @param connection
     * @param context
     * @return
     * @throws RepositoryException
     */
    private static List<URI> getImmediateChildren(final URI node, final RepositoryConnection connection,
            final URI... context) throws RepositoryException
    {
        final List<URI> children = new ArrayList<URI>();
        final List<Statement> childStatements =
                Iterations.asList(connection.getStatements(node, null, null, false, context));
        for(final Statement s : childStatements)
        {
            if(s.getObject() instanceof URI)
            {
                children.add((URI)s.getObject());
            }
        }
        return children;
    }
    
    /**
     * Helper method to load an {@link InputStream} into an {@link Model}.
     *
     * @param resourceStream
     *            The input stream with RDF statements
     * @param format
     *            Format found in the input RDF data
     * @return an {@link Model} populated with the statements from the input stream.
     *
     * @throws OpenRDFException
     * @throws IOException
     */
    public static Model inputStreamToModel(final InputStream resourceStream, final RDFFormat format)
        throws OpenRDFException, IOException
    {
        if(resourceStream == null)
        {
            throw new IOException("Inputstream was null");
        }
        return Rio.parse(resourceStream, "", format);
    }
    
}