/* Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved. Contact: SYSTAP, LLC DBA Blazegraph 2501 Calvert ST NW #106 Washington, DC 20008 licenses@blazegraph.com This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2 of the License. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ package com.bigdata.rdf.rules; import java.util.Arrays; import java.util.HashSet; import java.util.Set; import java.util.concurrent.Callable; import org.apache.log4j.Logger; import com.bigdata.bop.Constant; import com.bigdata.bop.IBindingSet; import com.bigdata.bop.IConstant; import com.bigdata.bop.Var; import com.bigdata.rdf.internal.IV; import com.bigdata.rdf.internal.IVUtility; import com.bigdata.rdf.spo.ISPO; import com.bigdata.rdf.spo.SPOKeyOrder; import com.bigdata.rdf.spo.SPOPredicate; import com.bigdata.rdf.spo.SPORelation; import com.bigdata.rdf.store.IRawTripleStore; import com.bigdata.rdf.vocab.RDFSVocabulary; import com.bigdata.relation.IRelation; import com.bigdata.relation.RelationFusedView; import com.bigdata.relation.accesspath.IAccessPath; import com.bigdata.relation.accesspath.IBuffer; import com.bigdata.relation.locator.IResourceLocator; import com.bigdata.relation.rule.IRule; import com.bigdata.relation.rule.QueryOptions; import com.bigdata.relation.rule.Rule; import com.bigdata.relation.rule.eval.IJoinNexus; import com.bigdata.relation.rule.eval.IRuleTaskFactory; import com.bigdata.relation.rule.eval.ISolution; import com.bigdata.relation.rule.eval.IStepTask; import com.bigdata.relation.rule.eval.RuleStats; import com.bigdata.striterator.IChunkedOrderedIterator; /** * Rule used in steps 3, 5, 6, 7, and 9 of the fast forward closure program. * * <pre> * (?x, {P}, ?y) -> (?x, propertyId, ?y) * </pre> * * where <code>{P}</code> is the closure of the subproperties of one of the FIVE * (5) reserved keywords: * <ul> * <li><code>rdfs:subPropertyOf</code></li> * <li><code>rdfs:subClassOf</code></li> * <li><code>rdfs:domain</code></li> * <li><code>rdfs:range</code></li> * <li><code>rdf:type</code></li> * </ul> * * The caller MUST define an {@link IRuleTaskFactory} that provides a concrete * implementation of {@link FastClosureRuleTask} which knows how to compute * "{P}" when they instantiate this rule. * * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a> * @version $Id$ */ public abstract class AbstractRuleFastClosure_3_5_6_7_9 extends Rule { // private final Set<IV> P; // private final IConstant<IV> rdfsSubPropertyOf; // private final IConstant<IV> propertyId; // private final Var x, y, SetP; /** * @param propertyId * @param taskFactory * An implementation returning a concrete instance of * {@link FastClosureRuleTask}. */ public AbstractRuleFastClosure_3_5_6_7_9(// final String name, final String relationName, final IConstant<IV> rdfsSubPropertyOf, final IConstant<IV> propertyId, final IRuleTaskFactory taskFactory // , Set<IV> P ) { super(name, new SPOPredicate(relationName, var("x"), propertyId, var("y")), // new SPOPredicate[] {// new SPOPredicate(relationName, var("x"), var("{P}"), var("y")) // },// QueryOptions.NONE,// null, // constraints null, // constants taskFactory ); if (rdfsSubPropertyOf == null) throw new IllegalArgumentException(); if (propertyId == null) throw new IllegalArgumentException(); // this.P = P; // this.rdfsSubPropertyOf = rdfsSubPropertyOf; // // this.propertyId = propertyId; // this.x = var("x"); // this.y = var("y"); // this.SetP = var("{P}"); } /** * Custom rule execution task. You must implement {@link #getSet()}. * * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a> * @version $Id$ */ abstract protected static class FastClosureRuleTask implements IStepTask { private final String database; private final String focusStore; private final IRule rule; private final IJoinNexus joinNexus; // Note: Not serializable. private final IBuffer<ISolution[]> buffer; // Note: Not serializable. // private final Set<IV> P; protected final IConstant<IV> rdfsSubPropertyOf; protected final IConstant<IV> propertyId; /** * @see #getView() */ private transient IRelation<ISPO> view = null; /** * <code>(?x, {P}, ?y) -> (?x, propertyId, ?y)</code> * * Note: Both the database and the (optional) focusStore relation names * MUST be declared for these rules. While the rule only declares a * single tail predicate, there is a "hidden" query based on the * [database + focusStore] fused view that populates the P,D,C,R, or T * Set which is an input to the custom evaluation of the rule. * * @param database * Name of the database relation (required). * @param focusStore * Optional name of the focusStore relation (may be null). * When non-<code>null</code>, this is used to query the * fused view of the [database + focusStore] in * {@link FastClosureRuleTask#getView()}. * @param rule * The rule. * @param joinNexus * @param buffer * A buffer used to accumulate chunks of entailments. * @param rdfsSubPropertyOf * The {@link Constant} corresponding to the term identifier * for <code>rdfs:subPropertyOf</code>. * @param propertyId * The propertyId to be used in the assertions. */ public FastClosureRuleTask(// String database, String focusStore, IRule rule, IJoinNexus joinNexus, IBuffer<ISolution[]> buffer, // Set<IV> P, IConstant<IV> rdfsSubPropertyOf, IConstant<IV> propertyId) { if (database == null) throw new IllegalArgumentException(); if (rule == null) throw new IllegalArgumentException(); if (joinNexus == null) throw new IllegalArgumentException(); if (buffer == null) throw new IllegalArgumentException(); // if (P == null) // throw new IllegalArgumentException(); if (rdfsSubPropertyOf== null) throw new IllegalArgumentException(); if (propertyId == null) throw new IllegalArgumentException(); this.database = database; this.focusStore = focusStore; // MAY be null. this.rule = rule; this.joinNexus = joinNexus; this.buffer = buffer; // this.P = P; this.rdfsSubPropertyOf = rdfsSubPropertyOf; this.propertyId = propertyId; } public RuleStats call() { if (INFO) log.info("running: rule=" + rule.getName() + ", propertyId=" + propertyId); final RuleStats stats = joinNexus.getRuleStatisticsFactory().newInstance(rule); final long begin = System.currentTimeMillis(); /* * Note: Since this task is always applied to a single tail rule, * the {@link TMUtility} rewrite of the rule will always read from * the focusStore alone. This makes the choice of the relation on * which to read easy - just read on whichever relation is specified * for tail[0]. */ // final String relationName = rule.getHead().getOnlyRelationName(); // // final long timestamp = joinNexus.getReadTimestamp(relationName); // // final SPORelation relation = (SPORelation) joinNexus // .getIndexManager().getResourceLocator().locate( // relationName, timestamp); final SPORelation relation = (SPORelation) joinNexus .getTailRelationView(rule.getTail(0)); /* * Query for the set {P} rather than requiring it as an input. * * Note: This is really aligning relations with different * arity/shape (long[1] vs SPO) * * @todo Make {P} a chunked iterator, proceed by chunk, and put each * chunk into ascending Long[] order. However the methods that * compute {P} are a custom closure operator and they fix point {P} * in memory. To generalize with a chunked iterator there would need * to be a backing BigdataLongSet and that will be less efficient * unless the property hierarchy scale is very large. */ final IV[] a = getSortedArray(getSet()); /* * For each p in the chunk. * * @todo execute subqueries in parallel against shared thread pool. */ for (IV p : a) { if (IVUtility.equals(p, propertyId.get())) { /* * The rule refuses to consider triple patterns where the * predicate for the subquery is the predicate for the * generated entailments since the support would then entail * itself. */ continue; } stats.subqueryCount[0]++; final IAccessPath<ISPO> accessPath = relation.getAccessPath( null, p, null); final IChunkedOrderedIterator<ISPO> itr2 = accessPath.iterator(); // ISPOIterator itr2 = (state.focusStore == null ? // state.database // .getAccessPath(NULL, p, NULL).iterator() // : state.focusStore.getAccessPath(NULL, p, NULL) // .iterator()); final IBindingSet bindingSet = joinNexus.newBindingSet(rule); try { while (itr2.hasNext()) { final ISPO[] chunk = itr2.nextChunk(SPOKeyOrder.POS); stats.chunkCount[0] ++; stats.elementCount[0] += chunk.length; if (DEBUG) { log.debug("stmts1: chunk=" + chunk.length + "\n" + Arrays.toString(chunk)); } final IBuffer<ISolution> tmp = joinNexus .newUnsynchronizedBuffer(buffer, chunk.length); for (ISPO spo : chunk) { /* * Note: since P includes rdfs:subPropertyOf (as * well as all of the sub-properties of * rdfs:subPropertyOf) there are going to be some * axioms in here that we really do not need to * reassert and generally some explicit statements * as well. * * @todo so, filter out explicit and axioms? * * @todo clone the bindingSet first? */ assert spo.p().equals(p) : "spo.p="+spo.p()+", p="+p; if(joinNexus.bind(rule, 0, spo, bindingSet)) { // joinNexus.copyValues(spo, rule.getTail(0), // bindingSet); // // if (rule.isConsistent(bindingSet)) { tmp.add(joinNexus.newSolution(rule, bindingSet)); stats.solutionCount.incrementAndGet(); } } // next spo in chunk. // flush onto the chunked solution buffer. tmp.flush(); } // while(itr2) } finally { itr2.close(); } } // next p in {P} stats.elapsed += System.currentTimeMillis() - begin; return stats; } /** * Convert a {@link Set} of term identifiers into a sorted array of term * identifiers. * <P> * Note: When issuing multiple queries against the database, it is * generally faster to issue those queries in key order. * * @return The sorted term identifiers. */ protected IV[] getSortedArray(Set<IV> ivs) { final int n = ivs.size(); final IV[] a = new IV[n]; int i = 0; for (IV iv : ivs) { a[i++] = iv; } Arrays.sort(a); return a; } /** * Return the {@link IRelation} (or {@link RelationFusedView}) used by * the {@link #getSet()} impls for their {@link IAccessPath}s. */ synchronized protected IRelation<ISPO> getView() { if (view == null) { /* * Setup the [database] or [database + focusStore] view used to * compute the closure. */ final IResourceLocator resourceLocator = joinNexus .getIndexManager().getResourceLocator(); if (focusStore == null) { final long timestamp = joinNexus.getReadTimestamp(/*database*/); return (IRelation<ISPO>)resourceLocator.locate(database, timestamp); } else { final long timestamp0 = joinNexus.getReadTimestamp(/*database*/); final long timestamp1 = joinNexus.getReadTimestamp(/*focusStore*/); return new RelationFusedView<ISPO>( // (IRelation<ISPO>)resourceLocator.locate(database, timestamp0), (IRelation<ISPO>)resourceLocator.locate(focusStore, timestamp1)) .init(); } // final IAccessPath accessPath = (focusStore == null // // ? database.getAccessPath(NULL, p, NULL)// // : new AccessPathFusedView(focusStore // .getAccessPath(NULL, p, NULL), // // database.getAccessPath(NULL, p, NULL)// // )); } return view; } /** * Return the set of term identifiers that will be processed by the * rule. When the closure is being computed for truth maintenance the * implementation MUST read from the [database+focusStore] fused view. * Otherwise it reads from the database. * <p> * Note: The subclass need only invoke {@link #getSubProperties()} or * {@link #getSubPropertiesOf(IConstant)} as appropriate for the rule. * * @return The set. */ abstract protected Set<IV> getSet(); /** * Delegates to {@link SubPropertyClosureTask} * * @return The closure. */ protected Set<IV> getSubProperties() { return new SubPropertyClosureTask(getView(), rdfsSubPropertyOf).call(); } /** * Delegates to {@link SubPropertiesOfClosureTask} * * @param propertyId * The property of interest. * * @return The closure. */ protected Set<IV> getSubPropertiesOf(IConstant<IV> propertyId) { return new SubPropertiesOfClosureTask(getView(), rdfsSubPropertyOf, propertyId).call(); } } // FastClosureRuleTask /** * Computes the set of possible sub properties of rdfs:subPropertyOf (<code>P</code>). * This is used by step <code>2</code> in * {@link RDFSVocabulary#fastForwardClosure()}. * * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a> * @version $Id$ */ public static class SubPropertyClosureTask implements Callable<Set<IV>> { final static protected Logger log = Logger.getLogger(SubPropertyClosureTask.class); private final IRelation<ISPO> view; // Note: Not serializable. private final IConstant<IV> rdfsSubPropertyOf; public SubPropertyClosureTask(IRelation<ISPO> view, IConstant<IV> rdfsSubPropertyOf) { if (view == null) throw new IllegalArgumentException(); if (rdfsSubPropertyOf == null) throw new IllegalArgumentException(); this.view = view; this.rdfsSubPropertyOf = rdfsSubPropertyOf; } public Set<IV> call() { return getSubProperties(); } /** * Compute the closure. * * @return A set containing the term identifiers for the members of P. */ public Set<IV> getSubProperties() { final Set<IV> P = new HashSet<IV>(); P.add(rdfsSubPropertyOf.get()); /* * query := (?x, P, P), adding new members to P until P reaches fix * point. */ { int nbefore; int nafter = 0; int nrounds = 0; final Set<IV> tmp = new HashSet<IV>(); do { nbefore = P.size(); tmp.clear(); /* * query := (?x, p, ?y ) for each p in P, filter ?y element * of P. */ for (IV p : P) { final SPOPredicate pred = new SPOPredicate(// "view",// @todo the label here is ignored, but should be the ordered names of the relations in the view. Var.var("x"), new Constant<IV>(p), Var.var("y")// ); final IAccessPath<ISPO> accessPath = view .getAccessPath(pred); // final IAccessPath accessPath = (focusStore == null // // ? database.getAccessPath(NULL, p, NULL)// // : new AccessPathFusedView(focusStore // .getAccessPath(NULL, p, NULL), // // database.getAccessPath(NULL, p, NULL)// // )); final IChunkedOrderedIterator<ISPO> itr = accessPath.iterator(); try { while (itr.hasNext()) { final ISPO[] stmts = itr.nextChunk(); for (ISPO stmt : stmts) { if (P.contains(stmt.o())) { tmp.add(stmt.s()); } } } } finally { itr.close(); } } P.addAll(tmp); nafter = P.size(); nrounds++; } while (nafter > nbefore); } // if (log.isDebugEnabled()) { // // Set<String> terms = new HashSet<String>(); // // for (Long id : P) { // // terms.add(database.toString(id)); // // } // // log.debug("P: " + terms); // // } return P; } } /** * Query the <i>database</i> for the sub properties of a given * property. * <p> * Pre-condition: The closure of <code>rdfs:subPropertyOf</code> has * been asserted on the database. * * @param p * The constant corresponding to the term identifier for the * property whose sub-properties will be obtain. * * @return A set containing the term identifiers for the sub properties * of <i>p</i>. * * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a> * @version $Id$ */ public static class SubPropertiesOfClosureTask implements Callable<Set<IV>> { final static protected Logger log = Logger.getLogger(SubPropertyClosureTask.class); private final IRelation<ISPO> view; // Note: Not serializable. private final IConstant<IV> rdfsSubPropertyOf; private final IConstant<IV> p; public SubPropertiesOfClosureTask(IRelation<ISPO> view, IConstant<IV> rdfsSubPropertyOf, IConstant<IV> p) { if (view == null) throw new IllegalArgumentException(); if (rdfsSubPropertyOf == null) throw new IllegalArgumentException(); if (p == null) throw new IllegalArgumentException(); this.view = view; this.rdfsSubPropertyOf = rdfsSubPropertyOf; this.p = p; } public Set<IV> call() { return getSubPropertiesOf(p); } /** * Compute the closure. * * @param p * The property of interest. * * @return The closure. */ public Set<IV> getSubPropertiesOf(IConstant<IV> p) { final SPOPredicate pred = new SPOPredicate(// "view", // Var.var("x"), rdfsSubPropertyOf, p// ); final IAccessPath<ISPO> accessPath = view.getAccessPath(pred); // final IAccessPath accessPath = // // (focusStore == null // // ? database.getAccessPath(NULL/* x */, rdfsSubPropertyOf.get(), p)// // : new AccessPathFusedView(// // focusStore.getAccessPath(NULL/* x */, // rdfsSubPropertyOf.get(), p), // // database.getAccessPath(NULL/* x */, // rdfsSubPropertyOf.get(), p)// // )); // if (log.isDebugEnabled()) { // // log.debug("p=" + database.toString(p)); // // } final Set<IV> tmp = new HashSet<IV>(); /* * query := (?x, rdfs:subPropertyOf, p). * * Distinct ?x are gathered in [tmp]. * * Note: This query is two-bound on the POS index. */ final IChunkedOrderedIterator<ISPO> itr = accessPath.iterator(); try { while (itr.hasNext()) { final ISPO[] stmts = itr.nextChunk(); for (ISPO spo : stmts) { boolean added = tmp.add(spo.s()); if (DEBUG) log.debug(spo.toString(/* database */) + ", added subject=" + added); } } } finally { itr.close(); } // if (log.isDebugEnabled()) { // // Set<String> terms = new HashSet<String>(); // // for (Long id : tmp) { // // terms.add(database.toString(id)); // // } // // log.debug("sub properties: " + terms); // // } return tmp; } } }