/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.jena.graph.impl;
import static org.apache.jena.util.iterator.WrappedIterator.create;
import java.util.*;
import org.apache.jena.graph.* ;
import org.apache.jena.shared.* ;
import org.apache.jena.util.CollectionFactory ;
import org.apache.jena.util.iterator.* ;
// Purely syntactic: Uses .equals, not .sameVAlueAs (see the one note at "PURE SYNTAX" below and "containsSameTerm")
/**
* An implemantation of graph isomorphism for Graph equality.
* The underlying algorithm is exponential but will only enter
* a non-deterministic polynomial part when there are a lot of difficult to
* distinguish anonymous nodes
* connected to each other by statements with the same property(s).
* Non-pathological examples, where most nodes have some properties that
* help distinguish them from other nodes, will experience nearly linear
* performance.
*/
public class GraphMatcher extends java.lang.Object {
static private Random random = new Random(0);
/**
* Are the two models isomorphic?
* The isomorphism is defined as a bijection between the anonymous
* variables such that the statements are identical.
* This is
* described in
* <a href="http://www.w3.org/TR/rdf-concepts#section-Graph-syntax">
* http://www.w3.org/TR/rdf-concepts#section-Graph-syntax
* </a>
*/
static public boolean equals(Graph m1,Graph m2) {
if ( m1 == m2 )
return true;
return match(m1,m2) != null;
}
static public int hashCode(Graph g) {
ClosableIterator<Triple> ci = GraphUtil.findAll( g );
int hash = 0;
GraphMatcher gm = new GraphMatcher(g);
while ( ci.hasNext() ) {
Triple t = ci.next();
hash += gm.new AnonStatement(t).myHashCode(null);
}
return hash;
}
/**
* Return an isomorphism between the two models.
* This function is nondeterministic in that it may return a
* different bijection on each call, in cases where there are
* multiple isomorphisms between the models.
* @return <code>null</code> on failure or an array of related pairs
(arrays of length 2) of anonymous nodes.
<code>match(m1,m2)[i][0]</code> is from <code>m1</code>,
and <code>match(m1,m2)[i][1]</code> is the corresponding node in
<code>m2</code>.
*/
static public Node[][] match(Graph m1,Graph m2) {
return new GraphMatcher(m1).match(new GraphMatcher(m2));
}
/* NOTE: inner classes
* We use a number of non-static inner classes, these all
* refer to the GraphMatcher for context.
*
* NOTE: the built-in hashCode() is not modified, so that Set's etc
* still work.
* This algorithm depends on a hash function, which I call myHashCode()
* This has the somewhat perplexing property of changing as we do
* the binding.
* obj.myHashCode() depends on:
* - obj and it's non anonymous subcomponents
* - ModelMatcher.myHashLevel (in the enclosing ModelMatcher)
* - for a bound AnonResource b in obj, it depends on a
* random value generated at the time that b got bound
* - for an unbound AnonResource, if myHashLevel>0, then
* it depends on the value of myHashCode() at myHashLevel-1
*
*
*
*/
static final private boolean TRACE = false;
private Graph m;
private GraphMatcher other;
private int myHashLevel = 0; // This is usually 0, but can be any value
// less than MAX_HASH_DEPTH
static final private int MAX_HASH_DEPTH = 3;
// I don't think there's much
// mileage in a huge number here
// A large number is likely to be unhelpful in typical
// cases, but helps in pathological cases.
// The pathological cases are the slowest, so perhaps it
// is best to optimise for them.!
// The rehashable - hash table
// A Map from Integer => Bucket
// Most of the time the table is a mess,
// this is reflected in state=BAD
private Map<Integer, Bucket> table;
// This variable is mainly for sanity checking and
// documentation. It has one logical impact in
// AnonResource.myHashCodeFromStatement() and
// AnonResource.wrapStatements()
// AnonResource.myHashCodeFromStatement() requires
// either state != HASH_BAD or myHashLevel = 0,
// we ensure that one or other is the case in
// AnonResource.wrapStatements().
private int state;
static final private int REHASHING = 1;
static final private int HASH_OK = 2;
static final private int HASH_BAD = 4;
// As the algorithm proceeds we move resources
// from one to the other.
// At completion unBoundAnonResources is empty.
private Set<AnonResource> unboundAnonResources = CollectionFactory.createHashedSet();
private Set<AnonResource> boundAnonResources = CollectionFactory.createHashedSet();
private GraphMatcher(Graph m1x) {
m = m1x;
}
private Node[][] match(GraphMatcher oth) {
other = oth;
oth.other = this;
in(HASH_BAD);
// check that the size's are the same.
// If the size is not accurate then it is a lower bound
if (m.getCapabilities().sizeAccurate()
&& m.size() < other.m.size() )
return null;
if (other.m.getCapabilities().sizeAccurate()
&& m.size() > other.m.size() )
return null;
int myPrep = prepare(other.m);
if ( myPrep == -1 || myPrep != other.prepare(m) ) {
return null;
}
if ( bind() ) {
if ( !unboundAnonResources.isEmpty() )
impossible();
Node rslt[][] = new Node[boundAnonResources.size()][];
int ix = 0;
for ( AnonResource r : boundAnonResources )
{
rslt[ix++] = new Node[]{ r.r, r.bound.r };
}
return rslt;
}
else {
return null;
}
}
// bind returns true if we have a binding,
// false if not, in either case table is screwed.
private boolean bind() {
Set<AnonResource> locallyBound = obligBindings();
if (locallyBound==null) // Contradiction reached - fail.
return false;
check(HASH_OK);
Bucket bkt = smallestBucket();
if ( bkt == null )
return true; // No smallest bucket - we are finished.
Bucket otherBkt = other.matchBucket(bkt);
if ( otherBkt != null ) {
AnonResource v = bkt.aMember();
Iterator<AnonResource> candidates = otherBkt.members();
// System.out.println("Guessing");
while ( candidates.hasNext() ) {
check(HASH_OK|HASH_BAD);
AnonResource otherV = candidates.next();
trace(true,"Guess: ");
if (!bkt.bind(v,otherBkt,otherV))
continue;
if (bind())
return true;
v.unbind();
}
}
unbindAll(locallyBound);
return false;
}
/*
* Called with hashing incorrect.
* Returns null if situation is unworkable.
* Returns non-null with no outstanding obvious bindings,
* and with the hashing correct.
* The set of obligatorily bound resources is returned.
*
*/
private Set<AnonResource> obligBindings() {
int hashLevel = 0;
boolean newBinding;
Set<AnonResource> rslt = CollectionFactory.createHashedSet();
check(HASH_OK|HASH_BAD);
do {
if ( rehash(hashLevel) != other.rehash(hashLevel) ){
unbindAll(rslt);
return null;
}
refinableHash = false;
newBinding = false;
Iterator<Bucket> singles = scanBuckets();
while ( singles.hasNext() ) {
newBinding = true;
Bucket bkt = singles.next();
Bucket otherBkt = other.matchBucket(bkt);
if ( otherBkt == null ) {
unbindAll(rslt);
return null;
}
AnonResource bindMe = bkt.aMember();
if (!bkt.bind(otherBkt)) {
unbindAll(rslt);
return null;
}
rslt.add(bindMe);
}
if ( newBinding )
hashLevel = 0;
else
hashLevel++;
} while (hashLevel<MAX_HASH_DEPTH && (refinableHash||newBinding));
return rslt;
}
// Communication between obligBindings and scanBuckets
private boolean refinableHash;
private Iterator<Bucket> scanBuckets() {
// Looks through buckets,
// if has single member then return in iterator.
// Otherwise if some member of the bucket has friends
// we can refine the hash, and we set refinableHash.
check(HASH_OK);
return create(table.values().iterator()).filterKeep(b -> {
if (b.size() == 1)
return true;
if (!refinableHash) {
Iterator<AnonResource> it = b.members();
while (it.hasNext())
if (!it.next().friends.isEmpty()) {
refinableHash = true;
break;
}
}
return false;
});
}
private void unbindAll(Set<AnonResource> s) {
for ( AnonResource value : s )
{
value.unbind();
}
in(HASH_BAD);
}
private int prepare(Graph otherm) {
ClosableIterator<Triple> ss = GraphUtil.findAll( m );
myHashLevel = 0;
int hash = 0;
try {
while ( ss.hasNext() ) {
Triple s = ss.next();
AnonStatement ass = new AnonStatement(s);
if ( ass.pattern == NOVARS ) {
if ( !containsSameTerm( otherm, s ) ) return -1;
} else {
hash += ass.myHashCode(ass.vars[0]);
for (int i=0;i<ass.vars.length;i++) {
ass.vars[i].occursIn.add(ass);
for (int j=i+1;j<ass.vars.length;j++) {
ass.vars[i].friends.add(ass.vars[j]);
ass.vars[j].friends.add(ass.vars[i]);
}
}
}
}
return hash==-1?1:hash;
}
finally {
ss.close();
}
}
/** Special "contains" test that always provide "same term",
* not "sameValueAs" semantics on the containment.
* @param otherm
* @param triple
* @return
*/
private static boolean containsSameTerm(Graph otherm, Triple triple) {
boolean b = otherm.contains(triple) ;
Node o = triple.getObject() ;
if ( !o.isConcrete() || !o.isLiteral() )
return b ;
if ( ! b )
return false ;
// Force to same term when o is a ground literal.
ExtendedIterator<Triple> iter = otherm.find(triple) ;
while (iter.hasNext()) {
Triple t = iter.next() ;
if ( t.getObject().equals(o) )
return true ;
}
return false ;
}
private Bucket smallestBucket() {
check(HASH_OK);
Iterator<Bucket> bit = table.values().iterator();
Bucket smallB = null;
int smallest = Integer.MAX_VALUE;
while ( bit.hasNext() ) {
Bucket b = bit.next();
int sz = b.size();
if ( sz < smallest ) {
smallB = b;
smallest = sz;
}
}
return smallB;
}
private Bucket matchBucket(Bucket key) {
check(HASH_OK);
Integer hash = new Integer(key.aMember().myHash);
Bucket rslt = table.get(hash);
if ( rslt != null ) {
if ( key.size() != rslt.size() )
return null;
}
return rslt;
}
/* rehash performance notes:
*Uncommenting below gives an easy way of measuring
*rehash performance.
*On a 480ms job the rehash appeared to take over 200ms.
*(Since with the code below uncommented the same
*problem took about 1300ms).
*
*/
private int rehash(int lvl) {
/*
rehash0(lvl);
rehash0(lvl);
rehash0(lvl);
rehash0(lvl);
**/
return rehash0(lvl);
}
private int rehash0( int level ) {
in(REHASHING);
this.table = CollectionFactory.createHashedMap();
// Set a global to define the hash of an AnonResource
// level = 0 ==> AnonResource.myHashCode() = 0
// level = n+1 ==> AnonResource.myHashCode() = hash[n]
myHashLevel = level;
// Now compute all hashes and stick things in the
// right buckets.
for ( AnonResource a : unboundAnonResources )
{
Integer hash = new Integer( a.myHashCode() );
Bucket bkt = table.get( hash );
if ( bkt == null )
{
bkt = new Bucket();
table.put( hash, bkt );
}
bkt.add( a );
}
// Produce a checksum for the table.
int rslt = 0;
for ( Map.Entry<Integer, Bucket> pair : table.entrySet() )
{
int hash = pair.getKey().intValue();
Bucket bkt = pair.getValue();
int sz = bkt.size();
rslt += sz * 0x10001 ^ hash;
}
in(HASH_OK);
return rslt;
}
/* subjects identified by bits 0 and 1,
* predicate by bits 2 and 3,
* object by 4 and 5
* If neither bit set then role is bound.
* If lower bit is set then role is unbound to
* singleton variable in triple.
* If higher bit is set then role is unbound
* with anonymous variable that is also
* unbound to a different role.
* It is an error if both bits are set.
*
*
* These funny things are read like this: e.g.
*
* SXPYOX - the subject is a variable X,
* the predicate is another var Y
* the object is the same var X
*
*/
static final private int NOVARS = 0;
static final private int SX = 1;
static final private int PX = 4;
static final private int OX = 16;
// SD, PD and OD are illegal values
// by themselves, should only
// be found in combination with
// each other.
// D for duplicate.
static final private int SD = 2;
static final private int PD = 8;
static final private int OD = 32;
static final private int SXPY = SX|PX;
static final private int SXOY = SX|OX;
static final private int PXOY = PX|OX;
static final private int SXPYOZ = SX|PX|OX;
static final private int SXPX = SD|PD;
static final private int SXOX = SD|OD;
static final private int PXOX = PD|OD;
static final private int SXPXOY = SD|PD|OX;
static final private int SXPYOX = SD|OD|PX;
static final private int SXPYOY = SX|PD|OD;
static final private int SXPXOX = SD|PD|OD;
static final private int S = SX|SD;
static final private int P = PX|PD;
static final private int O = OX|OD;
static private boolean legalPattern(int mask) {
switch (mask) {
case NOVARS:
case SX:
case OX:
case PX:
case SXPY:
case SXOY:
case PXOY:
case SXPYOZ:
case SXPX:
case SXOX:
case PXOX:
case SXPXOY:
case SXPYOX:
case SXPYOY:
case SXPXOX:
return true;
default:
return false;
}
}
// if i = 0 return the X component of pattern
// if i = 1 return the Y component of pattern
// if i = 2 return the Z component of pattern
static private int varPosInPattern(int i,int pattern) {
switch (pattern) {
case NOVARS:
break;
case SX:
if (i==0) return SX;
break;
case OX:
if (i==0) return OX;
break;
case PX:
if (i==0) return PX;
break;
case SXPY:
switch (i) {
case 0:
return SX;
case 1:
return PX;
}
break;
case SXOY:
switch (i) {
case 0:
return SX;
case 1:
return OX;
}
break;
case PXOY:
switch (i) {
case 0:
return PX;
case 1:
return OX;
}
break;
case SXPYOZ:
switch (i) {
case 0:
return SX;
case 1:
return PX;
case 2:
return OX;
}
break;
case SXPX:
if (i==0) return SXPX;
break;
case SXOX:
if (i==0) return SXOX;
break;
case PXOX:
if (i==0) return PXOX;
break;
case SXPXOY:
switch (i) {
case 0:
return SXPX;
case 1:
return OX;
}
break;
case SXPYOX:
switch (i) {
case 0:
return SXOX;
case 1:
return PX;
}
break;
case SXPYOY:
switch (i) {
case 0:
return SX;
case 1:
return PXOX;
}
break;
case SXPXOX:
if (i==0) return SXPXOX;
break;
}
System.out.println("Bad: " + i + " " + pattern);
impossible();
return 0;
}
static private interface SomeResource {
int myHashCodeFromStatement();
boolean mightBeEqual(SomeResource r);
}
static private class FixedResource implements SomeResource {
int hash;
Node node;
@Override
public String toString() {
return "f" + hash;
}
@Override
public int myHashCodeFromStatement() {
return hash;
}
FixedResource(Node n) {
hash = n.hashCode();
node = n;
}
@Override
public boolean mightBeEqual(SomeResource r) {
if (r!=null && (r instanceof FixedResource)) {
FixedResource f = (FixedResource)r;
return hash == f.hash && node.equals(f.node); // PURE SYNTAX
} else {
return false;
}
}
}
// Record the occurence of variable r in bag.
static void count(Map<SomeResource, int[]> bag, SomeResource r,int pos) {
if ( r instanceof AnonResource ) {
int v[] = bag.get(r);
if (v==null) {
v=new int[]{-1,-1,-1};
bag.put(r,v);
}
for (int i=0;i<3;i++)
if ( v[i] == -1 ) {
v[i] = pos;
return;
}
}
}
private class AnonStatement {
int varCount;
AnonResource vars[];
SomeResource subj;
SomeResource pred;
SomeResource obj;
int pattern;
AnonStatement(Triple s) {
Map<SomeResource, int[]> bag = CollectionFactory.createHashedMap();
pattern = NOVARS;
subj = convert(s.getSubject());
pred = convert(s.getPredicate());
obj = convert(s.getObject());
count(bag,subj,0);
count(bag,pred,2);
count(bag,obj,4);
varCount = bag.size();
vars = new AnonResource[varCount];
add(subj);
add(pred);
add(obj);
for ( int[] v : bag.values() )
{
int last = 2;
int p;
while ( v[last] == -1 )
{
last--;
}
if ( last == 0 )
{
p = SX;
}
else
{
p = SD;
}
for ( int i = 0; i <= last; i++ )
{
pattern |= p << v[i];
}
}
if (!legalPattern(pattern)) {
System.out.println("s: " + subj + " p: " + pred + " o: " + obj + " pattern: " + pattern);
impossible();
}
}
private void add(SomeResource r) {
if ( r instanceof AnonResource ) {
for (int i=0;i<vars.length; i++)
if (vars[i]==null || vars[i]==r ) {
vars[i] = (AnonResource)r;
return;
}
impossible();
}
}
// returns the location of v in this statement.
// e.g. PXOX to say that v is both the predicate and object.
int varPos(AnonResource v) {
if ( v == null)
return 0;
for (int i=0;i<vars.length;i++)
if ( vars[i] == v )
return varPosInPattern(i,pattern);
impossible();
return 0;
}
int myHashCode(AnonResource v) {
int vX = varPos(v);
int hash = vX;
// The multipliers are chosen to be 2 bit numbers.
// These muddle up the bits; should be quick in an optimised
// compilation or JIT (a shift and an add); and ensure
// that positional information (SPO) is encoded in the hash.
if ( (vX & S) == 0) {
hash ^= subj.myHashCodeFromStatement() * 0x101;
}
if ( (vX & P )== 0 ) {
hash ^= pred.myHashCodeFromStatement() * 0x3f;
}
if ( (vX & O )== 0 ) {
hash ^= obj.myHashCodeFromStatement() * 0x41;
}
return hash;
}
boolean contextualEquals(AnonResource v,AnonStatement sB,AnonResource vB) {
int vX = varPos(v);
if ( vX != sB.varPos(vB) )
return false;
return
((vX & S) != 0 || subj.mightBeEqual(sB.subj))
&& ((vX & P) != 0 || pred.mightBeEqual(sB.pred))
&& ((vX & O) != 0 || obj.mightBeEqual(sB.obj));
}
}
// Bucket's live longer than the table that they sit in.
// If a bucket is matched before the main bind() loop then
// we are iterating over it's members while the rest of the
// algorithm is proceeding.
private class Bucket {
Set<AnonResource> anonRes = CollectionFactory.createHashedSet();
int hash[] = new int[MAX_HASH_DEPTH];
boolean bind(Bucket singleton) {
return bind(aMember(),singleton,singleton.aMember());
}
boolean bind(AnonResource mine,Bucket other,AnonResource binding) {
if ( mine.checkBinding(binding) ) {
mine.bind(binding);
return true;
} else {
return false;
}
}
void add(AnonResource r) {
anonRes.add(r);
}
AnonResource aMember() {
return anonRes.iterator().next();
}
Iterator<AnonResource> members() {
return anonRes.iterator();
}
int size() {
return anonRes.size();
}
}
private class AnonResource implements SomeResource {
AnonResource bound;
Node r;
Set<AnonStatement> occursIn = CollectionFactory.createHashedSet(); // The AnonStatements containing me.
int hash[] = new int[MAX_HASH_DEPTH];
int boundHash;
Set<AnonResource> friends = CollectionFactory.createHashedSet(); // Other vars in AnonStatements containing me.
int myHash;
@Override
public String toString() {
String rslt = r.toString();
if ( bound!=null )
rslt += "[" + bound.r.toString() + "]";
return rslt;
}
AnonResource(Node r) {
unboundAnonResources.add(this);
this.r = r;
}
@Override
public int myHashCodeFromStatement() {
if ( bound != null )
return boundHash;
if (myHashLevel==0) {
return 0xcafebabe;
}
check(REHASHING|HASH_OK);
return hash[myHashLevel-1];
}
// MUST NOT BE CALLED FROM WITHIN THE LOOP
// OF OBLIG BINDINGS, use myHash
// ONLY INTENDED TO BE CALLED FROM WITHIN rehash
int myHashCode() {
check(REHASHING);
if ( bound!=null )
impossible();
myHash = 0;
for ( AnonStatement ass : occursIn )
{
myHash += ass.myHashCode( this );
}
hash[myHashLevel] = myHash;
return myHash;
}
void bind(AnonResource pair) {
bound = pair;
if (!unboundAnonResources.remove(this))
impossible();
boundAnonResources.add(this);
if ( pair.bound == null ) {
trace( true, r.getBlankNodeId()+ "=" + pair.r.getBlankNodeId() + ", " );
pair.bind(this);
// choice any arbitary number here
// helps spread the bits.
bound.boundHash= boundHash =random.nextInt();
// if ( myHash != bound.myHash )
// impossible();
// Sometimes they are different, after we have
// guessed badly, changed bound.myHash and then
// backtracked.
}
if ( bound.bound != this )
impossible();
}
void unbind() {
AnonResource pair = bound;
bound = null;
if (!boundAnonResources.remove(this))
impossible();
unboundAnonResources.add(this);
if ( pair.bound != null ) {
trace( false, r.getBlankNodeId() + "!=" + pair.r.getBlankNodeId() + ", " );
if ( pair.bound != this )
impossible();
pair.unbind();
}
in(HASH_BAD);
}
boolean checkBinding( AnonResource pair ) {
if ( occursIn.size() != pair.occursIn.size() )
return false;
Set<StatementWrapper> ourStatements = wrapStatements();
Set<StatementWrapper> otherStatements = pair.wrapStatements();
return ourStatements.removeAll(otherStatements)
&& ourStatements.isEmpty();
}
private Set<StatementWrapper> wrapStatements() {
if ( state == HASH_BAD ) {
// We are already in(HASH_BAD).
// We need to use AnonResource.myHashCodeFromStatement().
// That is OK as long as myHashLevel is 0
myHashLevel = 0;
}
Set<StatementWrapper> statements = CollectionFactory.createHashedSet();
// Add all our statements to the set.
for ( AnonStatement anOccursIn : occursIn )
{
statements.add( wrapStatement( anOccursIn ) );
}
return statements;
}
@Override
public boolean mightBeEqual(SomeResource r) {
if (r!=null && (r instanceof AnonResource)) {
AnonResource a = (AnonResource)r;
return a==this
|| bound == a
|| (bound == null && a.bound == null);
} else {
return false;
}
}
StatementWrapper wrapStatement(AnonStatement s) {
return new StatementWrapper(s);
}
// inner inner class -- ouch!
private class StatementWrapper {
int wrapHash;
AnonStatement statement;
@Override public boolean equals(Object o) {
if (o == null || (!(o instanceof StatementWrapper)))
return false;
StatementWrapper w = (StatementWrapper)o;
return wrapHash == w.wrapHash &&
statement.contextualEquals(AnonResource.this,w.statement,w.asAnonR());
}
@Override public int hashCode() {
return wrapHash;
}
StatementWrapper( AnonStatement s ) {
wrapHash = s.myHashCode(AnonResource.this);
statement = s;
}
AnonResource asAnonR() {
return AnonResource.this;
}
}
}
private Map<Node, SomeResource> anonLookup = CollectionFactory.createHashedMap();
private SomeResource convert(Node n) {
if ( n.isBlank() ) {
SomeResource anon = anonLookup.get(n);
if ( anon == null ) {
anon = new AnonResource( n );
anonLookup.put(n,anon);
}
return anon;
} else {
return new FixedResource(n);
}
}
private void check(int s) {
if (( state & s) == 0 )
impossible();
}
private void in(int s) {
state = s;
other.state = s;
}
static private void impossible() {
throw new JenaException( "Cannot happen!" );
}
static private int col = 0;
static private boolean lastDir = false;
static private void trace(boolean dir, String s) {
if (TRACE) {
if ( dir != lastDir ) {
traceNL();
lastDir = dir;
}
int nCol = col + s.length();
if ( col != 0 && nCol > 70 ) {
traceNL();
nCol = s.length();
}
System.out.print(s);
System.out.flush();
col = nCol;
}
}
static private void traceNL() {
if ( TRACE ) {
System.out.println();
col = 0;
}
}
}