/* Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved. Contact: SYSTAP, LLC DBA Blazegraph 2501 Calvert ST NW #106 Washington, DC 20008 licenses@blazegraph.com This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2 of the License. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /* * Created on Nov 14, 2008 */ package com.bigdata.rdf.store; import java.io.File; import java.io.FileNotFoundException; import java.util.Properties; import java.util.concurrent.Callable; import java.util.concurrent.ExecutionException; import java.util.concurrent.FutureTask; import org.apache.log4j.Logger; import org.openrdf.model.Statement; import com.bigdata.journal.Journal; import com.bigdata.journal.TimestampUtility; import com.bigdata.rdf.axioms.Axioms; import com.bigdata.rdf.axioms.NoAxioms; import com.bigdata.rdf.internal.IV; import com.bigdata.rdf.model.BigdataStatement; import com.bigdata.rdf.rio.AbstractStatementBuffer.StatementBuffer2; import com.bigdata.rdf.rio.StatementBuffer; import com.bigdata.rdf.rules.BackchainAccessPath; import com.bigdata.rdf.spo.ISPO; import com.bigdata.rdf.spo.SPO; import com.bigdata.rdf.store.AbstractTripleStore.Options; import com.bigdata.relation.accesspath.BlockingBuffer; import com.bigdata.relation.accesspath.IAccessPath; import com.bigdata.striterator.IChunkedOrderedIterator; import cutthecrap.utils.striterators.ICloseableIterator; /** * Utility class for comparing graphs for equality, bulk export, etc. * * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a> * @version $Id$ */ public class TripleStoreUtility { protected static final Logger log = Logger.getLogger(TripleStoreUtility.class); /** * Compares two RDF graphs for equality (same statements). * <p> * Note: This does NOT handle bnodes, which much be treated as variables for * RDF semantics. * <p> * Note: Comparison is performed in terms of the externalized RDF * {@link Statement}s rather than {@link SPO}s since different graphs use * different lexicons. * <p> * Note: If the graphs differ in which entailments they are storing in their * data and which entailments are backchained then you MUST make them * consistent in this regard. You can do this by exporting one or both using * {@link #bulkExport(AbstractTripleStore)}, which will cause all * entailments to be materialized in the returned {@link TempTripleStore}. * * @param expected * One graph. * * @param actual * Another graph <strong>with a consistent policy for forward and * backchained entailments</strong>. * * @return true if all statements in the expected graph are in the actual * graph and if the actual graph does not contain any statements * that are not also in the expected graph. */ public static boolean modelsEqual(AbstractTripleStore expected, AbstractTripleStore actual) throws Exception { // int actualSize = 0; int notExpecting = 0; int expecting = 0; boolean sameStatements1 = true; { final ICloseableIterator<BigdataStatement> it = notFoundInTarget(actual, expected); try { while (it.hasNext()) { final BigdataStatement stmt = it.next(); sameStatements1 = false; log("Not expecting: " + stmt); notExpecting++; // actualSize++; // count #of statements actually visited. } } finally { it.close(); } log("all the statements in actual in expected? " + sameStatements1); } // int expectedSize = 0; boolean sameStatements2 = true; { final ICloseableIterator<BigdataStatement> it = notFoundInTarget(expected, actual); try { while (it.hasNext()) { final BigdataStatement stmt = it.next(); sameStatements2 = false; log(" Expecting: " + stmt); expecting++; // expectedSize++; // counts statements actually visited. } } finally { it.close(); } // BigdataStatementIterator it = expected.asStatementIterator(expected // .getInferenceEngine().backchainIterator( // expected.getAccessPath(NULL, NULL, NULL))); // // try { // // while(it.hasNext()) { // // BigdataStatement stmt = it.next(); // // if (!hasStatement(actual,// // (Resource)actual.getValueFactory().asValue(stmt.getSubject()),// // (URI)actual.getValueFactory().asValue(stmt.getPredicate()),// // (Value)actual.getValueFactory().asValue(stmt.getObject()))// // ) { // // sameStatements2 = false; // // log(" Expecting: " + stmt); // // expecting++; // // } // // expectedSize++; // counts statements actually visited. // // } // // } finally { // // it.close(); // // } log("all the statements in expected in actual? " + sameStatements2); } // final boolean sameSize = expectedSize == actualSize; // // log("size of 'expected' repository: " + expectedSize); // // log("size of 'actual' repository: " + actualSize); log("# expected but not found: " + expecting); log("# not expected but found: " + notExpecting); return /*sameSize &&*/sameStatements1 && sameStatements2; } public static void log(final String s) { if(log.isInfoEnabled()) log.info(s); } /** * Visits <i>expected</i> {@link BigdataStatement}s not found in <i>actual</i>. * * @param expected * @param actual * * @return An iterator visiting {@link BigdataStatement}s present in * <i>expected</i> but not found in <i>actual</i>. * * @throws ExecutionException * @throws InterruptedException */ public static ICloseableIterator<BigdataStatement> notFoundInTarget(// final AbstractTripleStore expected,// final AbstractTripleStore actual // ) throws InterruptedException, ExecutionException { /* * The source access path is a full scan of the SPO index. */ final IAccessPath<ISPO> expectedAccessPath = expected.getAccessPath( (IV) null, (IV) null, (IV) null); /* * Efficiently convert SPOs to BigdataStatements (externalizes * statements). */ final BigdataStatementIterator itr2 = expected .asStatementIterator(expectedAccessPath.iterator()); final int capacity = 100000; final BlockingBuffer<BigdataStatement> buffer = new BlockingBuffer<BigdataStatement>( capacity); final StatementBuffer2<Statement, BigdataStatement> sb = new StatementBuffer2<Statement, BigdataStatement>( actual, true/* readOnly */, capacity) { /** * Statements not found in [actual] are written on the * BlockingBuffer. * * @return The #of statements that were not found. */ @Override protected int handleProcessedStatements(final BigdataStatement[] a) { if (log.isInfoEnabled()) log.info("Given " + a.length + " statements"); // bulk filter for statements not present in [actual]. final IChunkedOrderedIterator<ISPO> notFoundItr = actual .bulkFilterStatements(a, a.length, false/* present */); int nnotFound = 0; try { while (notFoundItr.hasNext()) { final ISPO notFoundStmt = notFoundItr.next(); if (log.isInfoEnabled()) log.info("Not found: " + notFoundStmt); buffer.add((BigdataStatement) notFoundStmt); nnotFound++; } } finally { notFoundItr.close(); } if (log.isInfoEnabled()) log.info("Given " + a.length + " statements, " + nnotFound + " of them were not found"); return nnotFound; } }; /** * Run task. The task consumes externalized statements from [expected] * and writes statements not found in [actual] onto the blocking buffer. */ final Callable<Void> myTask = new Callable<Void>() { public Void call() throws Exception { try { while (itr2.hasNext()) { // a statement from the source db. final BigdataStatement stmt = itr2.next(); // if (log.isInfoEnabled()) log.info("Source: " // + stmt); // add to the buffer. sb.add(stmt); } } finally { itr2.close(); } /* * Flush everything in the StatementBuffer so that it * shows up in the BlockingBuffer's iterator(). */ final long nnotFound = sb.flush(); if (log.isInfoEnabled()) log.info("Flushed: #notFound=" + nnotFound); return null; } }; /** * @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/707"> * BlockingBuffer.close() does not unblock threads </a> */ // Wrap computation as FutureTask. final FutureTask<Void> ft = new FutureTask<Void>(myTask); // Set Future on BlockingBuffer. buffer.setFuture(ft); // Submit computation for evaluation. actual.getExecutorService().submit(ft); /* * Return iterator reading "not found" statements from the blocking * buffer. */ return buffer.iterator(); } /** * Exports all statements found in the data and all backchained entailments * for the <i>db</i> into a {@link TempTripleStore}. This may be used to * compare graphs purely in their data by pre-generation of all backchained * entailments. * <p> * Note: This is not a general purpose bulk export as it uses only a single * access path, does not store justifications, and does retain the * {@link Axioms} model of the source graph. This method is specifically * designed to export "just the triples", e.g., for purposes of comparison. * * @param db * The source database. * * @return The {@link TempTripleStore}. */ static public TempTripleStore bulkExport(final AbstractTripleStore db) { final Properties properties = new Properties(); properties.setProperty(Options.ONE_ACCESS_PATH, "true"); properties.setProperty(Options.JUSTIFY, "false"); properties.setProperty(Options.AXIOMS_CLASS, NoAxioms.class.getName()); properties.setProperty(Options.STATEMENT_IDENTIFIERS, "" + db.isStatementIdentifiers()); final TempTripleStore tmp = new TempTripleStore(properties); try { final StatementBuffer<Statement> sb = new StatementBuffer<Statement>(tmp, 100000/* capacity */, 10/* queueCapacity */); final IV NULL = null; final IChunkedOrderedIterator<ISPO> itr1 = new BackchainAccessPath( db, db.getAccessPath(NULL, NULL, NULL)).iterator(); final BigdataStatementIterator itr2 = db.asStatementIterator(itr1); try { while (itr2.hasNext()) { final BigdataStatement stmt = itr2.next(); sb.add(stmt); } } finally { itr2.close(); } sb.flush(); } catch (Throwable t) { tmp.close(); throw new RuntimeException(t); } return tmp; } /** * Compares two {@link LocalTripleStore}s * * @param args * filename filename (namespace) * * @throws Exception * * @todo namespace for each, could be the same file, and timestamp for each. * * @todo handle other database modes. */ public static void main(String[] args) throws Exception { if (args.length < 2 || args.length > 3) { usage(); } final File file1 = new File(args[0]); final File file2 = new File(args[1]); final String namespace = args.length == 3 ? args[2] : "kb"; if (!file1.exists()) throw new FileNotFoundException(file1.toString()); if (!file2.exists()) throw new FileNotFoundException(file2.toString()); Journal j1 = null, j2 = null; try { final Properties p = new Properties(); p.setProperty(com.bigdata.journal.Options.READ_ONLY, "true"); final AbstractTripleStore ts1; { Properties properties = new Properties(p); properties.setProperty(com.bigdata.journal.Options.FILE, file1 .toString()); j1 = new Journal(properties); ts1 = (AbstractTripleStore) j1.getResourceLocator().locate( namespace, TimestampUtility.asHistoricalRead(j1 .getLastCommitTime())); } final AbstractTripleStore ts2; { Properties properties = new Properties(p); properties.setProperty(com.bigdata.journal.Options.FILE, file2 .toString()); j2 = new Journal(properties); ts2 = (AbstractTripleStore) j2.getResourceLocator().locate( namespace, TimestampUtility.asHistoricalRead(j2 .getLastCommitTime())); } modelsEqual(ts1, ts2); } finally { if (j1 != null) j1.close(); if (j2 != null) j2.close(); } } private static void usage() { System.err.println("usage: filename filename (namespace)"); System.exit(1); } }