package net.fortytwo.sesametools.deduplication;
import info.aduna.iteration.CloseableIteration;
import org.openrdf.model.Resource;
import org.openrdf.model.Statement;
import org.openrdf.model.ValueFactory;
import org.openrdf.model.impl.SimpleValueFactory;
import org.openrdf.sail.SailConnection;
import org.openrdf.sail.SailException;
import java.util.HashSet;
import java.util.Set;
/**
* @author Joshua Shinavier (http://fortytwo.net)
*/
public class DuplicateStatementFinder {
private static final ValueFactory valueFactory = SimpleValueFactory.getInstance();
private DuplicateStatementFinder() {
}
public static Set<Statement> findDuplicateStatements(final SailConnection sc) throws SailException {
boolean includeInferred = false;
// The HashSet is safe because none of the statements we'll add have a
// non-null named analysis context.
Set<Statement> results = new HashSet<>();
try (CloseableIteration<? extends Resource, SailException> contexts = sc.getContextIDs()) {
while (contexts.hasNext()) {
Resource ctx = contexts.next();
if (null != ctx) {
try (CloseableIteration<? extends Statement, SailException> stmts
= sc.getStatements(null, null, null, includeInferred, ctx)) {
while (stmts.hasNext()) {
Statement st = stmts.next();
try (CloseableIteration<? extends Statement, SailException> dups = sc.getStatements(
st.getSubject(), st.getPredicate(), st.getObject(), includeInferred)) {
int count = 0;
while (dups.hasNext()) {
count++;
if (2 == count) {
Statement dup = valueFactory.createStatement(
st.getSubject(), st.getPredicate(), st.getObject());
results.add(dup);
break;
}
dups.next();
}
}
}
}
}
}
}
return results;
}
}