/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.jena.reasoner.transitiveReasoner;
import java.util.*;
import org.apache.jena.graph.* ;
import org.apache.jena.reasoner.* ;
import org.apache.jena.util.iterator.* ;
/**
* Datastructure used to represent a closed transitive reflexive relation.
* It (mostly) incrementally maintains a transitive reduction and transitive
* closure of the relationship and so queries should be faster than dynamically
* computing the closed or reduced relations.
* <p>
* The implementation stores the reduced and closed relations as real graph
* (objects linked together by pointers). For each graph node we store its direct
* predecessors and successors and its closed successors. A cost penalty
* is the storage turnover involved in turning the graph representation back into
* triples to answer queries. We could avoid this by optionally also storing the
* manifested triples for the links.
* </p><p>
* Cycles are currently handled by collapsing strongly connected components.
* Incremental deletes would be possible but at the price of substanially
* more storage and code complexity. We compromise by doing the easy cases
* incrementally but some deletes (those that break strongly connected components)
* will trigger a fresh rebuild.
* </p><p>
* TODO Combine this with interval indexes (Agrawal, Borigda and Jagadish 1989)
* for storing the closure of the predecessor relationship. Typical graphs
* will be nearly tree shaped so the successor closure is modest (L^2 where
* L is the depth of the tree branch) but the predecessor closure would be
* expensive. The interval index would handle predecessor closure nicely.
* </p>
*/
// Note to maintainers. The GraphNode object is treated as a record structure
// rather than an abstract datatype by the rest of the GraphCache code - which
// directly access its structure. I justify this on the flimsy grounds that it is a
// private inner class.
public class TransitiveGraphCache implements Finder {
/** Flag controlling the whether the triples
* representing the closed relation should also be cached. */
protected boolean cacheTriples = false;
/** Map from RDF Node to the corresponding Graph node. */
protected HashMap<Node, GraphNode> nodeMap = new HashMap<>();
/** The RDF predicate representing the direct relation */
protected Node directPredicate;
/** The RDF predicate representing the closed relation */
protected Node closedPredicate;
/** A list of pending deletes which break the cycle-free normal form */
protected Set<Triple> deletesPending;
/** The original triples, needed for processing delete operations
* because some information is lost in the SCC process */
protected Set<Triple> originalTriples = new HashSet<>();
/**
* Inner class used to represent vistors than can be applied to each
* node in a graph walk.
*/
static interface Visitor<Alpha, Beta> {
// The visitor must not delete and pred entries to avoid CME
// If this is needed return a non-null result which is a list of pred nodes to kill
List<GraphNode> visit(GraphNode node, GraphNode processing, Alpha arg1, Beta arg2);
}
/**
* Inner class used to walk backward links of the graph.
* <p> The triples are dynamically allocated which is costly.
*/
static class GraphWalker extends NiceIterator<Triple> implements ExtendedIterator<Triple> {
/** Indicate if this is a shallow or deep walk */
boolean isDeep;
/** The current node being visited */
GraphNode node;
/** The root node for reconstructing triples */
Node root;
/** The predicate for reconstructing triples */
Node predicate;
/** Iterator over the predecessors to the current node bein walked */
Iterator<GraphNode> iterator = null;
/** Iterator over the aliases of the current predecessor being output */
Iterator<GraphNode> aliasIterator = null;
/** stack of graph nodes being walked */
ArrayList<GraphNode> nodeStack = new ArrayList<>();
/** stack of iterators for the higher nodes in the walk */
ArrayList<Iterator<GraphNode>> iteratorStack = new ArrayList<>();
/** The next value to be returned */
Triple next;
/** The set of junction nodes already visited */
HashSet<GraphNode> visited = new HashSet<>();
/**
* Constructor. Creates an iterator which will walk
* the graph, returning triples.
* @param node the starting node for the walk
* @param rdfNode the rdfNode we are try to find predecessors for
* @param closed set to true of walking the whole transitive closure
* @param predicate the predicate to be walked
*/
GraphWalker(GraphNode node, Node rdfNode, boolean closed, Node predicate) {
isDeep = closed;
this.node = node;
this.root = rdfNode;
this.predicate = predicate;
this.iterator = node.pred.iterator();
aliasIterator = node.siblingIterator();
next = new Triple(root, predicate, root); // implicit reflexive case
}
/** Iterator interface - test if more values available */
@Override public boolean hasNext() {
return next != null;
}
/** Iterator interface - get next value */
@Override public Triple next() {
Triple toReturn = next;
walkOne();
return toReturn;
}
/**
* Walk one step
*/
protected void walkOne() {
if (aliasIterator != null) {
if (aliasIterator.hasNext()) {
GraphNode nextNode = aliasIterator.next();
next = new Triple(nextNode.rdfNode, predicate, root);
return;
} else {
aliasIterator = null;
}
}
if (iterator.hasNext()) {
GraphNode nextNode = iterator.next();
if (visited.add(nextNode)) {
// Set up for depth-first visit next
if (isDeep)
pushStack(nextNode);
next = new Triple(nextNode.rdfNode, predicate, root);
aliasIterator = nextNode.siblingIterator();
} else {
// Already visited this junction, skip it
walkOne();
return;
}
} else {
// Finished this node
if (nodeStack.isEmpty()) {
next = null;
return;
}
popStack();
walkOne();
}
}
/**
* Push the current state onto the stack
*/
protected void pushStack(GraphNode next) {
nodeStack.add(node);
iteratorStack.add(iterator);
iterator = next.pred.iterator();
node = next;
}
/**
* Pop the prior state back onto the stack
*/
protected void popStack() {
int i = nodeStack.size()-1;
iterator = iteratorStack.remove(i);
node = nodeStack.remove(i);
}
} // End of GraphWalker inner class
/**
* Inner class used to do a complete walk over the graph
*/
private static class FullGraphWalker extends NiceIterator<Triple> implements ExtendedIterator<Triple> {
/** Flag whether we are walking over the closed or direct relations */
boolean closed;
/** Iterator over the start nodes in the node map */
Iterator<GraphNode> baseNodeIt;
/** The current node being visited */
GraphNode node;
/** The root node for reconstructing triples */
Node nodeN;
/** The predicate for reconstructing triples */
Node predicate;
/** Iterator over the successor nodes for the baseNode */
Iterator<GraphNode> succIt = null;
/** The current successor being processed */
GraphNode succ;
/** Iterator over the aliases for the current successor */
Iterator<GraphNode> aliasesIt = null;
/** The next value to be returned */
Triple next;
/** Construct a walker for the full closed or direct graph */
FullGraphWalker(boolean closed, Node predicate, HashMap<Node, GraphNode> nodes) {
this.predicate = predicate;
this.closed = closed;
baseNodeIt = nodes.values().iterator();
walkOne();
}
/** Iterator interface - test if more values available */
@Override public boolean hasNext() {
return next != null;
}
/** Iterator interface - get next value */
@Override public Triple next() {
Triple toReturn = next;
walkOne();
return toReturn;
}
/**
* Walk one step
*/
protected void walkOne() {
if (aliasesIt != null) {
while (aliasesIt.hasNext()) {
GraphNode al = aliasesIt.next();
if (al != succ && al != node) {
next = new Triple(nodeN, predicate, al.rdfNode);
return;
}
}
aliasesIt = null; // End of aliases
}
if (succIt != null) {
while (succIt.hasNext()) {
succ = succIt.next();
if (succ == node) continue; // Skip accidental reflexive cases, already done
aliasesIt = succ.siblingIterator();
next = new Triple(nodeN, predicate, succ.rdfNode);
return;
}
succIt = null; // End of the successors
}
if (baseNodeIt.hasNext()) {
node = baseNodeIt.next();
nodeN = node.rdfNode;
GraphNode lead = node.leadNode();
succIt = (closed ? lead.succClosed : lead.succ).iterator();
succIt = lead.concatenateSiblings( succIt );
next = new Triple(nodeN, predicate, nodeN); // Implicit reflexive case
} else {
next = null; // End of walk
}
}
} // End of FullGraphWalker inner class
/**
* Constructor - create a new cache to hold the given relation information.
* @param directPredicate The RDF predicate representing the direct relation
* @param closedPredicate The RDF predicate representing the closed relation
*/
public TransitiveGraphCache(Node directPredicate, Node closedPredicate) {
this.directPredicate = directPredicate;
this.closedPredicate = closedPredicate;
}
/**
* Returns the closedPredicate.
* @return Node
*/
public Node getClosedPredicate() {
return closedPredicate;
}
/**
* Returns the directPredicate.
* @return Node
*/
public Node getDirectPredicate() {
return directPredicate;
}
/**
* Register a new relation instance in the cache
*/
public synchronized void addRelation(Triple t) {
originalTriples.add(t);
addRelation(t.getSubject(), t.getObject());
}
/**
* Register a new relation instance in the cache
*/
private void addRelation(Node start, Node end) {
if (start.equals(end)) return; // Reflexive case is built in
GraphNode startN = getLead(start);
GraphNode endN = getLead(end);
// Check if this link is already known about
if (startN.pathTo(endN)) {
// yes, so no work to do
return;
}
boolean needJoin = endN.pathTo(startN);
Set<GraphNode> members = null;
if (needJoin) {
// Reduce graph to DAG by factoring out SCCs
// startN.assertLinkTo(endN);
// First find all the members of the new component
members = new HashSet<>();
members.add(endN);
startN.visitPredecessors(new Visitor<Set<GraphNode>, GraphNode>() {
@Override
public List<GraphNode> visit(GraphNode node, GraphNode processing, Set<GraphNode> members, GraphNode endN) {
if (endN.pathTo(node)) members.add( node );
return null;
} }, members, endN);
// Then create the SCC
startN.makeLeadNodeFor(members);
// Now propagate the closure in the normalized graph
startN.propagateSCC();
} else {
// Walk all predecessors of start retracting redundant direct links
// and adding missing closed links
startN.propagateAdd(endN);
startN.assertLinkTo(endN);
}
if (needJoin) {
// Create a new strongly connected component
}
}
/**
* Remove an instance of a relation from the cache.
*/
public void removeRelation(Triple t) {
Node start = t.getSubject();
Node end = t.getObject();
if (start == end) {
return; // Reflexive case is built in
}
GraphNode startN = getLead(start);
GraphNode endN = getLead(end);
if (startN != endN && !(startN.directPathTo(endN))) {
// indirect link can't be removed by itself
return;
}
// This is a remove of a direct link possibly within an SCC
// Delay as long as possible and do deletes in a batch
if (deletesPending == null) {
deletesPending = new HashSet<>();
}
deletesPending.add(t);
}
/**
* Process outstanding delete actions
*/
private void processDeletes() {
// The kernel is the set of start nodes of deleted links
Set<GraphNode> kernel = new HashSet<>();
for ( Triple t : deletesPending )
{
GraphNode start = nodeMap.get( t.getSubject() );
kernel.add( start );
}
// The predecessor set of kernel
Set<GraphNode> pKernel = new HashSet<>();
pKernel.addAll(kernel);
for ( GraphNode n : nodeMap.values() )
{
for ( Iterator<GraphNode> j = kernel.iterator(); j.hasNext(); )
{
GraphNode target = j.next();
if ( n.pathTo( target ) )
{
pKernel.add( n );
break;
}
}
}
// Cut the pKernel away from the finge of nodes that it connects to
for ( GraphNode n : pKernel )
{
for ( Iterator<GraphNode> j = n.succ.iterator(); j.hasNext(); )
{
GraphNode fringe = j.next();
if ( !pKernel.contains( fringe ) )
{
fringe.pred.remove( n );
}
}
n.succ.clear();
n.succClosed.clear();
n.pred.clear();
}
// Delete the triples
originalTriples.removeAll(deletesPending);
deletesPending.clear();
// Reinsert the remaining links
for ( Triple t : originalTriples )
{
GraphNode n = nodeMap.get( t.getSubject() );
if ( pKernel.contains( n ) )
{
addRelation( t );
}
}
}
/**
* Extended find interface used in situations where the implementator
* may or may not be able to answer the complete query.
* <p>
* In this case any query on the direct or closed predicates will
* be assumed complete, any other query will pass on to the continuation.</p>
* @param pattern a TriplePattern to be matched against the data
* @param continuation either a Finder or a normal Graph which
* will be asked for additional match results if the implementor
* may not have completely satisfied the query.
*/
@Override
public ExtendedIterator<Triple> findWithContinuation(TriplePattern pattern, Finder continuation) {
Node p = pattern.getPredicate();
if (p.isVariable()) {
// wildcard predicate so return merge of cache and continuation
return find(pattern).andThen(continuation.find(pattern));
} else if (p.equals(directPredicate) || p.equals(closedPredicate)) {
// Satisfy entire query from the cache
return find(pattern);
} else {
// No matching triples in this cache so just search the continuation
return continuation.find(pattern);
}
}
/**
* Return true if the given pattern occurs somewhere in the find sequence.
*/
@Override
public boolean contains(TriplePattern pattern) {
ClosableIterator<Triple> it = find(pattern);
boolean result = it.hasNext();
it.close();
return result;
}
/**
* Return an iterator over all registered subject nodes
*/
public ExtendedIterator<Node> listAllSubjects() {
return WrappedIterator.create(nodeMap.keySet().iterator());
}
/**
* Return true if the given Node is registered as a subject node
*/
public boolean isSubject(Node node) {
return nodeMap.keySet().contains(node);
}
/**
* Cache all instances of the given predicate which are
* present in the given Graph.
* @param graph the searchable set of triples to cache
* @param predicate the predicate to cache, need not be the registered
* predicate due to subProperty declarations
* @return returns true if new information has been cached
*/
public boolean cacheAll(Finder graph, Node predicate) {
ExtendedIterator<Triple> it = graph.find(new TriplePattern(null, predicate, null));
boolean foundsome = it.hasNext();
while (it.hasNext()) {
addRelation(it.next());
}
it.close();
return foundsome;
}
/**
* Basic pattern lookup interface.
* @param pattern a TriplePattern to be matched against the data
* @return a ExtendedIterator over all Triples in the data set
* that match the pattern
*/
@Override
public ExtendedIterator<Triple> find(TriplePattern pattern) {
if (deletesPending != null && deletesPending.size() > 0) {
processDeletes();
}
Node s = pattern.getSubject();
Node p = pattern.getPredicate();
Node o = pattern.getObject();
if (p.isVariable() || p.equals(directPredicate) || p.equals(closedPredicate)) {
boolean closed = !p.equals(directPredicate);
Node pred = closedPredicate; // p.isVariable() ? closedPredicate : p;
if (s.isVariable()) {
if (o.isVariable()) {
// list all the graph contents
// ExtendedIterator result = null;
// for (Iterator i = nodeMap.values().iterator(); i.hasNext(); ) {
// ExtendedIterator nexti = ((GraphNode)i.next()).listTriples(closed, this);
// if (result == null) {
// result = nexti;
// } else {
// result = result.andThen(nexti);
// }
// }
// if (result == null) {
// return NullIterator.instance;
// }
return new FullGraphWalker(closed, closedPredicate, nodeMap);
} else {
// list all backwards from o
GraphNode gn_o = nodeMap.get(o);
if (gn_o == null) return NullIterator.instance();
return gn_o.listPredecessorTriples(closed, this);
}
} else {
GraphNode gn_s = nodeMap.get(s);
if (gn_s == null) return NullIterator.instance();
if (o.isVariable()) {
// list forward from s
return gn_s.listTriples(closed, this);
} else {
// Singleton test
GraphNode gn_o = nodeMap.get(o);
gn_s = gn_s.leadNode();
if (gn_o == null) return NullIterator.instance();
gn_o = gn_o.leadNode();
if ( closed ? gn_s.pathTo(gn_o) : gn_s.directPathTo(gn_o) ) {
return new SingletonIterator<>(new Triple(s, pred, o));
} else {
return NullIterator.instance();
}
}
}
} else {
// No matching triples in this cache
return NullIterator.instance();
}
}
/**
* Create a deep copy of the cache contents.
* Works by creating a completely new cache and just adding in the
* direct links.
*/
public TransitiveGraphCache deepCopy() {
TransitiveGraphCache copy = new TransitiveGraphCache(directPredicate, closedPredicate);
Iterator<Triple> i = find(new TriplePattern(null, directPredicate, null));
while (i.hasNext()) {
Triple t = i.next();
copy.addRelation(t.getSubject(), t.getObject());
}
return copy;
}
/**
* Clear the entire cache contents.
*/
public void clear() {
nodeMap.clear();
}
/**
* Enable/disabling caching of the Triples representing the relationships. If this is
* enabled then a number of triples quadratic in the graph depth will be stored. If it
* is disabled then all queries will turn over storage dynamically creating the result triples.
*/
public void setCaching(boolean enable) {
if (! enable && cacheTriples) {
// Switching off so clear the existing cache
for ( GraphNode graphNode : nodeMap.values() )
{
graphNode.clearTripleCache();
}
}
cacheTriples = enable;
}
/**
* Dump a description of the cache to a string for debug.
*/
public String dump() {
StringBuffer sb = new StringBuffer();
for ( GraphNode n : nodeMap.values() )
{
sb.append( n.dump() );
sb.append( "\n" );
}
return sb.toString();
}
// ----------------------------------------------------------------------
// Internal utility methods
// ----------------------------------------------------------------------
/**
* Return the lead node of the strongly connected component corresponding
* to the given RDF node.
*/
private GraphNode getLead(Node n) {
GraphNode gn = nodeMap.get(n);
if (gn == null) {
gn = new GraphNode(n);
nodeMap.put(n, gn);
return gn;
} else {
return gn.leadNode();
}
}
}