/*
Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved.
Contact:
SYSTAP, LLC DBA Blazegraph
2501 Calvert ST NW #106
Washington, DC 20008
licenses@blazegraph.com
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/*
* Created on Nov 14, 2008
*/
package com.bigdata.rdf.store;
import java.io.File;
import java.io.FileNotFoundException;
import java.util.Properties;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.FutureTask;
import org.apache.log4j.Logger;
import org.openrdf.model.Statement;
import com.bigdata.journal.Journal;
import com.bigdata.journal.TimestampUtility;
import com.bigdata.rdf.axioms.Axioms;
import com.bigdata.rdf.axioms.NoAxioms;
import com.bigdata.rdf.internal.IV;
import com.bigdata.rdf.model.BigdataStatement;
import com.bigdata.rdf.rio.AbstractStatementBuffer.StatementBuffer2;
import com.bigdata.rdf.rio.StatementBuffer;
import com.bigdata.rdf.rules.BackchainAccessPath;
import com.bigdata.rdf.spo.ISPO;
import com.bigdata.rdf.spo.SPO;
import com.bigdata.rdf.store.AbstractTripleStore.Options;
import com.bigdata.relation.accesspath.BlockingBuffer;
import com.bigdata.relation.accesspath.IAccessPath;
import com.bigdata.striterator.IChunkedOrderedIterator;
import cutthecrap.utils.striterators.ICloseableIterator;
/**
* Utility class for comparing graphs for equality, bulk export, etc.
*
* @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a>
* @version $Id$
*/
public class TripleStoreUtility {
protected static final Logger log = Logger.getLogger(TripleStoreUtility.class);
/**
* Compares two RDF graphs for equality (same statements).
* <p>
* Note: This does NOT handle bnodes, which much be treated as variables for
* RDF semantics.
* <p>
* Note: Comparison is performed in terms of the externalized RDF
* {@link Statement}s rather than {@link SPO}s since different graphs use
* different lexicons.
* <p>
* Note: If the graphs differ in which entailments they are storing in their
* data and which entailments are backchained then you MUST make them
* consistent in this regard. You can do this by exporting one or both using
* {@link #bulkExport(AbstractTripleStore)}, which will cause all
* entailments to be materialized in the returned {@link TempTripleStore}.
*
* @param expected
* One graph.
*
* @param actual
* Another graph <strong>with a consistent policy for forward and
* backchained entailments</strong>.
*
* @return true if all statements in the expected graph are in the actual
* graph and if the actual graph does not contain any statements
* that are not also in the expected graph.
*/
public static boolean modelsEqual(AbstractTripleStore expected,
AbstractTripleStore actual) throws Exception {
// int actualSize = 0;
int notExpecting = 0;
int expecting = 0;
boolean sameStatements1 = true;
{
final ICloseableIterator<BigdataStatement> it = notFoundInTarget(actual, expected);
try {
while (it.hasNext()) {
final BigdataStatement stmt = it.next();
sameStatements1 = false;
log("Not expecting: " + stmt);
notExpecting++;
// actualSize++; // count #of statements actually visited.
}
} finally {
it.close();
}
log("all the statements in actual in expected? " + sameStatements1);
}
// int expectedSize = 0;
boolean sameStatements2 = true;
{
final ICloseableIterator<BigdataStatement> it = notFoundInTarget(expected, actual);
try {
while (it.hasNext()) {
final BigdataStatement stmt = it.next();
sameStatements2 = false;
log(" Expecting: " + stmt);
expecting++;
// expectedSize++; // counts statements actually visited.
}
} finally {
it.close();
}
// BigdataStatementIterator it = expected.asStatementIterator(expected
// .getInferenceEngine().backchainIterator(
// expected.getAccessPath(NULL, NULL, NULL)));
//
// try {
//
// while(it.hasNext()) {
//
// BigdataStatement stmt = it.next();
//
// if (!hasStatement(actual,//
// (Resource)actual.getValueFactory().asValue(stmt.getSubject()),//
// (URI)actual.getValueFactory().asValue(stmt.getPredicate()),//
// (Value)actual.getValueFactory().asValue(stmt.getObject()))//
// ) {
//
// sameStatements2 = false;
//
// log(" Expecting: " + stmt);
//
// expecting++;
//
// }
//
// expectedSize++; // counts statements actually visited.
//
// }
//
// } finally {
//
// it.close();
//
// }
log("all the statements in expected in actual? " + sameStatements2);
}
// final boolean sameSize = expectedSize == actualSize;
//
// log("size of 'expected' repository: " + expectedSize);
//
// log("size of 'actual' repository: " + actualSize);
log("# expected but not found: " + expecting);
log("# not expected but found: " + notExpecting);
return /*sameSize &&*/sameStatements1 && sameStatements2;
}
public static void log(final String s) {
if(log.isInfoEnabled())
log.info(s);
}
/**
* Visits <i>expected</i> {@link BigdataStatement}s not found in <i>actual</i>.
*
* @param expected
* @param actual
*
* @return An iterator visiting {@link BigdataStatement}s present in
* <i>expected</i> but not found in <i>actual</i>.
*
* @throws ExecutionException
* @throws InterruptedException
*/
public static ICloseableIterator<BigdataStatement> notFoundInTarget(//
final AbstractTripleStore expected,//
final AbstractTripleStore actual //
) throws InterruptedException, ExecutionException {
/*
* The source access path is a full scan of the SPO index.
*/
final IAccessPath<ISPO> expectedAccessPath = expected.getAccessPath(
(IV) null, (IV) null, (IV) null);
/*
* Efficiently convert SPOs to BigdataStatements (externalizes
* statements).
*/
final BigdataStatementIterator itr2 = expected
.asStatementIterator(expectedAccessPath.iterator());
final int capacity = 100000;
final BlockingBuffer<BigdataStatement> buffer = new BlockingBuffer<BigdataStatement>(
capacity);
final StatementBuffer2<Statement, BigdataStatement> sb = new StatementBuffer2<Statement, BigdataStatement>(
actual, true/* readOnly */, capacity) {
/**
* Statements not found in [actual] are written on the
* BlockingBuffer.
*
* @return The #of statements that were not found.
*/
@Override
protected int handleProcessedStatements(final BigdataStatement[] a) {
if (log.isInfoEnabled())
log.info("Given " + a.length + " statements");
// bulk filter for statements not present in [actual].
final IChunkedOrderedIterator<ISPO> notFoundItr = actual
.bulkFilterStatements(a, a.length, false/* present */);
int nnotFound = 0;
try {
while (notFoundItr.hasNext()) {
final ISPO notFoundStmt = notFoundItr.next();
if (log.isInfoEnabled())
log.info("Not found: " + notFoundStmt);
buffer.add((BigdataStatement) notFoundStmt);
nnotFound++;
}
} finally {
notFoundItr.close();
}
if (log.isInfoEnabled())
log.info("Given " + a.length + " statements, " + nnotFound
+ " of them were not found");
return nnotFound;
}
};
/**
* Run task. The task consumes externalized statements from [expected]
* and writes statements not found in [actual] onto the blocking buffer.
*/
final Callable<Void> myTask = new Callable<Void>() {
public Void call() throws Exception {
try {
while (itr2.hasNext()) {
// a statement from the source db.
final BigdataStatement stmt = itr2.next();
// if (log.isInfoEnabled()) log.info("Source: "
// + stmt);
// add to the buffer.
sb.add(stmt);
}
} finally {
itr2.close();
}
/*
* Flush everything in the StatementBuffer so that it
* shows up in the BlockingBuffer's iterator().
*/
final long nnotFound = sb.flush();
if (log.isInfoEnabled())
log.info("Flushed: #notFound=" + nnotFound);
return null;
}
};
/**
* @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/707">
* BlockingBuffer.close() does not unblock threads </a>
*/
// Wrap computation as FutureTask.
final FutureTask<Void> ft = new FutureTask<Void>(myTask);
// Set Future on BlockingBuffer.
buffer.setFuture(ft);
// Submit computation for evaluation.
actual.getExecutorService().submit(ft);
/*
* Return iterator reading "not found" statements from the blocking
* buffer.
*/
return buffer.iterator();
}
/**
* Exports all statements found in the data and all backchained entailments
* for the <i>db</i> into a {@link TempTripleStore}. This may be used to
* compare graphs purely in their data by pre-generation of all backchained
* entailments.
* <p>
* Note: This is not a general purpose bulk export as it uses only a single
* access path, does not store justifications, and does retain the
* {@link Axioms} model of the source graph. This method is specifically
* designed to export "just the triples", e.g., for purposes of comparison.
*
* @param db
* The source database.
*
* @return The {@link TempTripleStore}.
*/
static public TempTripleStore bulkExport(final AbstractTripleStore db) {
final Properties properties = new Properties();
properties.setProperty(Options.ONE_ACCESS_PATH, "true");
properties.setProperty(Options.JUSTIFY, "false");
properties.setProperty(Options.AXIOMS_CLASS,
NoAxioms.class.getName());
properties.setProperty(Options.STATEMENT_IDENTIFIERS,
"" + db.isStatementIdentifiers());
final TempTripleStore tmp = new TempTripleStore(properties);
try {
final StatementBuffer<Statement> sb = new StatementBuffer<Statement>(tmp, 100000/* capacity */,
10/* queueCapacity */);
final IV NULL = null;
final IChunkedOrderedIterator<ISPO> itr1 = new BackchainAccessPath(
db, db.getAccessPath(NULL, NULL, NULL)).iterator();
final BigdataStatementIterator itr2 = db.asStatementIterator(itr1);
try {
while (itr2.hasNext()) {
final BigdataStatement stmt = itr2.next();
sb.add(stmt);
}
} finally {
itr2.close();
}
sb.flush();
} catch (Throwable t) {
tmp.close();
throw new RuntimeException(t);
}
return tmp;
}
/**
* Compares two {@link LocalTripleStore}s
*
* @param args
* filename filename (namespace)
*
* @throws Exception
*
* @todo namespace for each, could be the same file, and timestamp for each.
*
* @todo handle other database modes.
*/
public static void main(String[] args) throws Exception {
if (args.length < 2 || args.length > 3) {
usage();
}
final File file1 = new File(args[0]);
final File file2 = new File(args[1]);
final String namespace = args.length == 3 ? args[2] : "kb";
if (!file1.exists())
throw new FileNotFoundException(file1.toString());
if (!file2.exists())
throw new FileNotFoundException(file2.toString());
Journal j1 = null, j2 = null;
try {
final Properties p = new Properties();
p.setProperty(com.bigdata.journal.Options.READ_ONLY, "true");
final AbstractTripleStore ts1;
{
Properties properties = new Properties(p);
properties.setProperty(com.bigdata.journal.Options.FILE, file1
.toString());
j1 = new Journal(properties);
ts1 = (AbstractTripleStore) j1.getResourceLocator().locate(
namespace,
TimestampUtility.asHistoricalRead(j1
.getLastCommitTime()));
}
final AbstractTripleStore ts2;
{
Properties properties = new Properties(p);
properties.setProperty(com.bigdata.journal.Options.FILE, file2
.toString());
j2 = new Journal(properties);
ts2 = (AbstractTripleStore) j2.getResourceLocator().locate(
namespace,
TimestampUtility.asHistoricalRead(j2
.getLastCommitTime()));
}
modelsEqual(ts1, ts2);
} finally {
if (j1 != null)
j1.close();
if (j2 != null)
j2.close();
}
}
private static void usage() {
System.err.println("usage: filename filename (namespace)");
System.exit(1);
}
}