/**
* PODD is an OWL ontology database used for scientific project management
*
* Copyright (C) 2009-2013 The University Of Queensland
*
* This program is free software: you can redistribute it and/or modify it under the terms of the
* GNU Affero General Public License as published by the Free Software Foundation, either version 3
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
* even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License along with this program.
* If not, see <http://www.gnu.org/licenses/>.
*/
package com.github.podd.utils;
import info.aduna.iteration.Iterations;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Queue;
import java.util.Set;
import org.openrdf.OpenRDFException;
import org.openrdf.model.Model;
import org.openrdf.model.Resource;
import org.openrdf.model.Statement;
import org.openrdf.model.URI;
import org.openrdf.model.Value;
import org.openrdf.model.impl.LinkedHashModel;
import org.openrdf.model.impl.ValueFactoryImpl;
import org.openrdf.model.vocabulary.OWL;
import org.openrdf.query.GraphQuery;
import org.openrdf.query.QueryResults;
import org.openrdf.query.TupleQuery;
import org.openrdf.query.impl.DatasetImpl;
import org.openrdf.query.resultio.helpers.QueryResultCollector;
import org.openrdf.repository.RepositoryConnection;
import org.openrdf.repository.RepositoryException;
import org.openrdf.rio.RDFFormat;
import org.openrdf.rio.Rio;
import org.openrdf.rio.helpers.StatementCollector;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* @author kutila
*
*/
public class RdfUtility
{
private final static Logger log = LoggerFactory.getLogger(RdfUtility.class);
/**
* Helper method to execute a given SPARQL Graph query.
*
* @param graphQuery
* @param contexts
* @return
* @throws OpenRDFException
*/
public static Model executeGraphQuery(final GraphQuery graphQuery, final URI... contexts) throws OpenRDFException
{
final DatasetImpl dataset = new DatasetImpl();
for(final URI uri : contexts)
{
dataset.addDefaultGraph(uri);
}
graphQuery.setDataset(dataset);
final Model results = new LinkedHashModel();
final long before = System.currentTimeMillis();
graphQuery.evaluate(new StatementCollector(results));
final long total = System.currentTimeMillis() - before;
RdfUtility.log.debug("graph query took {}", Long.toString(total));
if(total > 50 && RdfUtility.log.isDebugEnabled())
{
new Throwable().printStackTrace();
}
else if(total > 30 && RdfUtility.log.isTraceEnabled())
{
new Throwable().printStackTrace();
}
return results;
}
/**
* Helper method to execute a given SPARQL Tuple query, which may have had bindings attached.
*
* @param tupleQuery
* @param contexts
* @return
* @throws OpenRDFException
*/
public static QueryResultCollector executeTupleQuery(final TupleQuery tupleQuery, final URI... contexts)
throws OpenRDFException
{
final DatasetImpl dataset = new DatasetImpl();
for(final URI uri : contexts)
{
dataset.addDefaultGraph(uri);
}
tupleQuery.setDataset(dataset);
final QueryResultCollector results = new QueryResultCollector();
final long before = System.currentTimeMillis();
QueryResults.report(tupleQuery.evaluate(), results);
final long total = System.currentTimeMillis() - before;
RdfUtility.log.debug("tuple query took {}", Long.toString(total));
if(total > 50 && RdfUtility.log.isDebugEnabled())
{
new Throwable().printStackTrace();
}
else if(total > 30 && RdfUtility.log.isTraceEnabled())
{
new Throwable().printStackTrace();
}
return results;
}
/**
* Given a set of RDF Statements, and a Root node, this method finds any nodes that are not
* connected to the Root node.
*
* A <b>Node</b> is a Value that is of type URI (i.e. Literals are ignored).
*
* A direct connection between two nodes exist if there is a Statement with the two nodes as the
* Subject and the Object.
*
* @param root
* The Root of the Graph, from which connectedness is calculated.
* @param connection
* A RepositoryConnection
* @param context
* The Graph containing statements.
* @return A <code>Set</code> containing any URIs that are not connected to the Root.
* @throws RepositoryException
*/
public static Set<URI> findDisconnectedNodes(final URI root, final RepositoryConnection connection,
final URI... context) throws RepositoryException
{
final List<URI> exclusions =
Arrays.asList(root, OWL.THING, OWL.ONTOLOGY, OWL.INDIVIDUAL,
ValueFactoryImpl.getInstance().createURI("http://www.w3.org/2002/07/owl#NamedIndividual"));
final List<URI> propertyExclusions = Arrays.asList(OWL.IMPORTS, OWL.VERSIONIRI);
// - identify nodes that should be connected to the root
final Set<URI> nodesToCheck = new HashSet<URI>();
final List<Statement> allStatements =
Iterations.asList(connection.getStatements(null, null, null, false, context));
for(final Statement s : allStatements)
{
final URI predicateValue = s.getPredicate();
if(propertyExclusions.contains(predicateValue))
{
continue;
}
final Value objectValue = s.getObject();
if(objectValue instanceof URI && !exclusions.contains(objectValue))
{
nodesToCheck.add((URI)objectValue);
}
final Resource subjectValue = s.getSubject();
if(subjectValue instanceof URI && !exclusions.contains(subjectValue))
{
nodesToCheck.add((URI)subjectValue);
}
}
// RdfUtility.log.info("{} nodes to check for connectivity.", nodesToCheck.size());
// for(final URI u : objectsToCheck)
// {
// System.out.println(" " + u);
// }
// - check for connectivity
final Queue<URI> queue = new LinkedList<URI>();
final Set<URI> visitedNodes = new HashSet<URI>(); // to handle cycles
queue.add(root);
visitedNodes.add(root);
while(!queue.isEmpty())
{
final URI currentNode = queue.remove();
final List<URI> children = RdfUtility.getImmediateChildren(currentNode, connection, context);
for(final URI child : children)
{
// visit child node
if(nodesToCheck.contains(child))
{
nodesToCheck.remove(child);
if(nodesToCheck.isEmpty())
{
// all identified nodes are connected.
return nodesToCheck;
}
}
if(!visitedNodes.contains(child))
{
queue.add(child);
visitedNodes.add(child);
}
}
}
RdfUtility.log.debug("{} unconnected node(s). {}", nodesToCheck.size(), nodesToCheck);
return nodesToCheck;
}
/**
* Internal helper method to retrieve the direct child objects of a given object.
*
* @param node
* @param connection
* @param context
* @return
* @throws RepositoryException
*/
private static List<URI> getImmediateChildren(final URI node, final RepositoryConnection connection,
final URI... context) throws RepositoryException
{
final List<URI> children = new ArrayList<URI>();
final List<Statement> childStatements =
Iterations.asList(connection.getStatements(node, null, null, false, context));
for(final Statement s : childStatements)
{
if(s.getObject() instanceof URI)
{
children.add((URI)s.getObject());
}
}
return children;
}
/**
* Helper method to load an {@link InputStream} into an {@link Model}.
*
* @param resourceStream
* The input stream with RDF statements
* @param format
* Format found in the input RDF data
* @return an {@link Model} populated with the statements from the input stream.
*
* @throws OpenRDFException
* @throws IOException
*/
public static Model inputStreamToModel(final InputStream resourceStream, final RDFFormat format)
throws OpenRDFException, IOException
{
if(resourceStream == null)
{
throw new IOException("Inputstream was null");
}
return Rio.parse(resourceStream, "", format);
}
}