/** Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved. Contact: SYSTAP, LLC DBA Blazegraph 2501 Calvert ST NW #106 Washington, DC 20008 licenses@blazegraph.com This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2 of the License. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /* * Created on Oct 30, 2007 */ package com.bigdata.rdf.inf; import java.util.Arrays; import java.util.Iterator; import java.util.NoSuchElementException; import org.apache.log4j.Logger; import com.bigdata.bop.IPredicate; import com.bigdata.bop.IVariableOrConstant; import com.bigdata.rdf.internal.IV; import com.bigdata.rdf.lexicon.ITermIVFilter; import com.bigdata.rdf.model.StatementEnum; import com.bigdata.rdf.rules.InferenceEngine; import com.bigdata.rdf.spo.ExplicitSPOFilter; import com.bigdata.rdf.spo.ISPO; import com.bigdata.rdf.spo.SPO; import com.bigdata.rdf.spo.SPOKeyOrder; import com.bigdata.rdf.spo.SPORelation; import com.bigdata.rdf.store.AbstractTripleStore; import com.bigdata.relation.accesspath.IAccessPath; import com.bigdata.striterator.ChunkedArrayIterator; import com.bigdata.striterator.IChunkedIterator; import com.bigdata.striterator.IChunkedOrderedIterator; import com.bigdata.striterator.IKeyOrder; import cutthecrap.utils.striterators.Filter; import cutthecrap.utils.striterators.FilterBase; import cutthecrap.utils.striterators.ICloseable; import cutthecrap.utils.striterators.ICloseableIterator; import cutthecrap.utils.striterators.Resolver; import cutthecrap.utils.striterators.Striterator; /** * Provides backward chaining for (x rdf:type rdfs:Resource). * <p> * Note: You only need to do this on read from a high level query language since * the rest of the RDFS rules will run correctly without the (x rdf:type * rdfs:Resource) entailments being present. Further, you only need to do this * when the {@link InferenceEngine} was instructed to NOT store the (x rdf:type * rdfs:Resource) entailments. * <p> * Note: This iterator will NOT generate an inferred (x rdf:type rdfs:Resource) * entailment iff there is an explicit statement (x rdf:type rdfs:Resource) in * the database. * * @see InferenceEngine * @see InferenceEngine.Options * * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a> * @version $Id: BackchainTypeResourceIterator.java 3687 2010-09-29 22:50:32Z * mrpersonick $ */ public class BackchainTypeResourceIterator implements IChunkedOrderedIterator<ISPO> { protected static final Logger log = Logger .getLogger(BackchainTypeResourceIterator.class); private final IChunkedOrderedIterator<ISPO> _src; private final Iterator<ISPO> src; // private final long s; // private final AbstractTripleStore db; private final IV rdfType, rdfsResource; private final IKeyOrder<ISPO> keyOrder; private final int chunkSize = 100;//10000; /** * The subject(s) whose (s rdf:type rdfs:Resource) entailments will be * visited. */ private PushbackIterator<IV> resourceIds; /** * An iterator reading on the {@link SPOKeyOrder#POS} index. The predicate * is bound to <code>rdf:type</code> and the object is bound to * <code>rdfs:Resource</code>. If the subject was given to the ctor, then it * will also be bound. The iterator visits the term identifier for the * <em>subject</em> position. */ private PushbackIterator<IV> posItr; private boolean sourceExhausted = false; private boolean open = true; /** * This is set each time by {@link #nextChunk()} and inspected by * {@link #nextChunk(IKeyOrder)} in order to decide whether the chunk needs * to be sorted. */ private IKeyOrder<ISPO> chunkKeyOrder = null; /** * The last {@link ISPO} visited by {@link #next()}. */ private ISPO current = null; /** * Returns a suitably configured {@link BackchainTypeResourceIterator} -or- * <i>src</i> iff the <i>accessPath</i> does not require the materialization * of <code>(x rdf:type rdfs:Resource)</code> entailments. * * @param _src * The source iterator. {@link #nextChunk()} will sort statements * into the {@link IKeyOrder} reported by this iterator (as long * as the {@link IKeyOrder} is non-<code>null</code>). * @param accessPath * The {@link IAccessPath} from which the <i>src</i> iterator was * derived. Note that <i>src</i> is NOT necessarily equivalent to * {@link IAccessPath#iterator()} since it MAY have been layered * already to backchain other entailments, e.g., * <code>owl:sameAs</code>. * @param db * The database from which we will read the distinct subject * identifiers from its {@link SPORelation}. This parameter is * used iff this is an all unbound triple pattern. * @param rdfType * The term identifier that corresponds to rdf:Type for the * database. * @param rdfsResource * The term identifier that corresponds to rdf:Resource for the * database. * * @return The backchain iterator -or- the <i>src</i> iterator iff the * <i>accessPath</i> does not require the materialization of * <code>(x rdf:type rdfs:Resource)</code> entailments. */ @SuppressWarnings("unchecked") static public IChunkedOrderedIterator<ISPO> newInstance( final IChunkedOrderedIterator<ISPO> _src, final IAccessPath<ISPO> accessPath, final AbstractTripleStore db, final IV rdfType, final IV rdfsResource) { if (accessPath == null) throw new IllegalArgumentException(); // final SPO spo = new SPO(accessPath.getPredicate()); final IPredicate<ISPO> pred = accessPath.getPredicate(); final IV s = getTerm(pred, 0); final IV p = getTerm(pred, 1); final IV o = getTerm(pred, 2); if (((o == null || o.equals(rdfsResource)) && (p == null || p .equals(rdfType))) == false) { /* * Backchain will not generate any statements. */ return _src; } if (_src == null) throw new IllegalArgumentException(); if (db == null) throw new IllegalArgumentException(); /* * The subject(s) whose (s rdf:type rdfs:Resource) entailments will be * visited. */ final PushbackIterator<IV> resourceIds; /* * An iterator reading on the {@link SPOKeyOrder#POS} index. The * predicate is bound to <code>rdf:type</code> and the object is bound * to <code>rdfs:Resource</code>. If the subject was given to the ctor, * then it will also be bound. The iterator visits the term identifier * for the <em>subject</em> position. */ final PushbackIterator<IV> posItr; if (s == null) { /* * Backchain will generate one statement for each distinct subject * or object in the store. * * @todo This is Ok as long as you are forward chaining all of the * rules that put a predicate or an object into the subject position * since it will then have all resources. If you backward chain some * of those rules, e.g., rdf1, then you MUST change this to read on * the ids index and skip anything that is marked as a literal using * the low bit of the term identifier but you will overgenerate for * resources that are no longer in use by the KB (you could filter * for that). */ // resourceIds = // db.getSPORelation().distinctTermScan(SPOKeyOrder.SPO); resourceIds = new PushbackIterator<IV>(new MergedOrderedIterator(// db.getSPORelation().distinctTermScan(SPOKeyOrder.SPO), // db.getSPORelation().distinctTermScan(SPOKeyOrder.OSP, new ITermIVFilter() { private static final long serialVersionUID = 1L; public boolean isValid(IV iv) { // filter out literals from the OSP scan. return !iv.isLiteral(); } }))); /* * Reading (? rdf:Type rdfs:Resource) using the POS index. */ posItr = new PushbackIterator<IV>(new Striterator(db.getAccessPath( null, rdfType, rdfsResource, ExplicitSPOFilter.INSTANCE) .iterator()).addFilter(new Resolver() { private static final long serialVersionUID = 1L; @Override protected Object resolve(Object obj) { return ((SPO) obj).s; } })); } else { /* * Backchain will generate exactly one statement: (s rdf:type * rdfs:Resource). */ /* * resourceIds = new PushbackIterator<Long>( new * ClosableSingleItemIterator<Long>(spo.s)); */ /* * Reading a single point (s type resource), so this will actually * use the SPO index. */ /* * posItr = new PushbackIterator<Long>(new Striterator(db * .getAccessPath(spo.s, rdfType, rdfsResource, * ExplicitSPOFilter.INSTANCE).iterator()) .addFilter(new Resolver() * { private static final long serialVersionUID = 1L; * * @Override protected Object resolve(Object obj) { return * Long.valueOf(((SPO) obj).s); } })); */ return new BackchainSTypeResourceIterator(_src, accessPath, db, rdfType, rdfsResource); } /* * filters out (x rdf:Type rdfs:Resource) in case it is explicit in the * db so that we do not generate duplicates for explicit type resource * statement. */ final Iterator<ISPO> src = new Striterator(_src) .addFilter(new Filter() { private static final long serialVersionUID = 1L; @Override public boolean isValid(final Object arg0) { final SPO o = (SPO) arg0; if (o.p.equals(rdfType) && o.o.equals(rdfsResource)) { return false; } return true; } }); return new BackchainTypeResourceIterator(_src, src, resourceIds, posItr, rdfType, rdfsResource); } private static IV getTerm(final IPredicate<ISPO> pred, final int pos) { final IVariableOrConstant<IV> term = pred.get(pos); return term == null || term.isVar() ? null : term.get(); } /** * Create an iterator that will visit all statements in the source iterator * and also backchain any entailments of the form (x rdf:type rdfs:Resource) * which are valid for the given triple pattern. * * @param src * The source iterator. {@link #nextChunk()} will sort statements * into the {@link IKeyOrder} reported by this iterator (as long * as the {@link IKeyOrder} is non-<code>null</code>). * @param rdfType * The term identifier that corresponds to rdf:Type for the * database. * @param rdfsResource * The term identifier that corresponds to rdf:Resource for the * database. * * @see #newInstance(IChunkedOrderedIterator, IAccessPath, * AbstractTripleStore, long, long) */ @SuppressWarnings( "rawtypes" ) private BackchainTypeResourceIterator(IChunkedOrderedIterator<ISPO> _src,// Iterator<ISPO> src,// PushbackIterator<IV> resourceIds,// PushbackIterator<IV> posItr,// final IV rdfType,// final IV rdfsResource// ) { // the raw source - we pass close() through to this. this._src = _src; this.keyOrder = _src.getKeyOrder(); // MAY be null. // the source with (x type resource) filtered out. this.src = src; // this.resourceIds = resourceIds; this.posItr = posItr; this.rdfType = rdfType; this.rdfsResource = rdfsResource; } @Override public IKeyOrder<ISPO> getKeyOrder() { return keyOrder; } @Override public void close() { if (!open) return; // release any resources here. open = false; _src.close(); resourceIds.close(); resourceIds = null; if (posItr != null) { posItr.close(); } } @Override public boolean hasNext() { if (!open) { // the iterator has been closed. return false; } if (!sourceExhausted) { if (src.hasNext()) { // still consuming the source iterator. return true; } // the source iterator is now exhausted. sourceExhausted = true; _src.close(); } if (resourceIds.hasNext()) { // still consuming the subjects iterator. return true; } // the subjects iterator is also exhausted so we are done. return false; } /** * Visits all {@link SPO}s visited by the source iterator and then begins to * backchain ( x rdf:type: rdfs:Resource ) statements. * <p> * The "backchain" scans two iterators: an {@link IChunkedOrderedIterator} * on <code>( ? rdf:type * rdfs:Resource )</code> that reads on the database (this tells us whether * we have an explicit <code>(x rdf:type rdfs:Resource)</code> in the * database for a given subject) and iterator that reads on the term * identifiers for the distinct resources in the database (this bounds the * #of backchained statements that we will emit). * <p> * For each value visited by the {@link #resourceIds} iterator we examine * the statement iterator. If the next value that would be visited by the * statement iterator is an explicit statement for the current subject, then * we emit the explicit statement. Otherwise we emit an inferred statement. */ @Override public ISPO next() { if (!hasNext()) { throw new NoSuchElementException(); } if (src.hasNext()) { return current = src.next(); } else if (resourceIds.hasNext()) { /* * Examine resourceIds and posItr. */ // resourceIds is the source for _inferences_ final IV s1 = resourceIds.next(); if (posItr.hasNext()) { // posItr is the source for _explicit_ statements. final IV s2 = posItr.next(); final int cmp = s1.compareTo(s2); if (cmp < 0) { /* * Consuming from [resourceIds] (the term identifier ordered * LT the next term identifier from [posItr]). * * There is NOT an explicit statement from [posItr], so emit * as an inference and pushback on [posItr]. */ current = new SPO(s1, rdfType, rdfsResource, StatementEnum.Inferred); posItr.pushback(); } else { /* * Consuming from [posItr]. * * There is an explicit statement for the current term * identifer from [resourceIds]. */ if (cmp != 0) { /* * Since [resourceIds] and [posItr] are NOT visiting the * same term identifier, we pushback on [resourceIds]. * * Note: When they DO visit the same term identifier * then we only emit the explicit statement and we * consume (rather than pushback) from [resourceIds]. */ resourceIds.pushback(); } current = new SPO(s2, rdfType, rdfsResource, StatementEnum.Explicit); } } else { /* * [posItr] is exhausted so just emit inferences based on * [resourceIds]. */ current = new SPO(s1, rdfType, rdfsResource, StatementEnum.Inferred); } return current; } else { /* * Finish off the [posItr]. Anything from this source is an explicit * (? type resource) statement. */ assert posItr.hasNext(); return new SPO(posItr.next(), rdfType, rdfsResource, StatementEnum.Explicit); } } /** * Note: This method preserves the {@link IKeyOrder} of the source iterator * iff it is reported by {@link #getKeyOrder()}. Otherwise chunks read from * the source iterator will be in whatever order that iterator is using * while chunks containing backchained entailments will be in * {@link SPOKeyOrder#POS} order. * <p> * Note: In order to ensure that a consistent ordering is always used within * a chunk the backchained entailments will always begin on a chunk * boundary. */ @Override public ISPO[] nextChunk() { if (!hasNext()) throw new NoSuchElementException(); if (!sourceExhausted) { /* * Return a chunk from the source iterator. * * Note: The chunk will be in the order used by the source iterator. * If the source iterator does not report that order then * [chunkKeyOrder] will be null. */ chunkKeyOrder = keyOrder; ISPO[] s = new ISPO[chunkSize]; int n = 0; while (src.hasNext() && n < chunkSize) { s[n++] = src.next(); } ISPO[] stmts = new ISPO[n]; // copy so that stmts[] is dense. System.arraycopy(s, 0, stmts, 0, n); return stmts; } /* * Create a "chunk" of entailments. * * Note: This chunk will be in natural POS order since that is the index * that we scan to decide whether or not there was an explicit ( x * rdf:type rdfs:Resource ) while we consume the [subjects] in termId * order. */ IV[] s = new IV[chunkSize]; int n = 0; while (resourceIds.hasNext() && n < chunkSize) { s[n++] = resourceIds.next(); } SPO[] stmts = new SPO[n]; for (int i = 0; i < n; i++) { stmts[i] = new SPO(s[i], rdfType, rdfsResource, StatementEnum.Inferred); } if (keyOrder != null && keyOrder != SPOKeyOrder.POS) { /* * Sort into the same order as the source iterator. * * Note: We have to sort explicitly since we are scanning the POS * index */ Arrays.sort(stmts, 0, stmts.length, keyOrder.getComparator()); } /* * The chunk will be in POS order since that is how we are scanning the * indices. */ chunkKeyOrder = SPOKeyOrder.POS; return stmts; } @Override public ISPO[] nextChunk(final IKeyOrder<ISPO> keyOrder) { if (keyOrder == null) throw new IllegalArgumentException(); final ISPO[] stmts = nextChunk(); if (chunkKeyOrder != keyOrder) { // sort into the required order. Arrays.sort(stmts, 0, stmts.length, keyOrder.getComparator()); } return stmts; } /** * Note: You can not "remove" the backchained entailments. If the last * statement visited by {@link #next()} is "explicit" then the request is * delegated to the source iterator. */ @Override public void remove() { if (!open) throw new IllegalStateException(); if (current == null) throw new IllegalStateException(); if (current.isExplicit()) { /* * Delegate the request to the source iterator. */ src.remove(); } current = null; } /** * Reads on two iterators visiting elements in some natural order and visits * their order preserving merge (no duplicates). * * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan * Thompson</a> * @version $Id: BackchainTypeResourceIterator.java 3687 2010-09-29 * 22:50:32Z mrpersonick $ * @param <T> */ private static class MergedOrderedIterator<T extends Comparable<T>> implements IChunkedIterator<T> { private final IChunkedIterator<T> src1; private final IChunkedIterator<T> src2; public MergedOrderedIterator(IChunkedIterator<T> src1, IChunkedIterator<T> src2) { this.src1 = src1; this.src2 = src2; } @Override public void close() { src1.close(); src2.close(); } /** * Note: Not implemented since not used above and this class is private. */ @Override public T[] nextChunk() { throw new UnsupportedOperationException(); } @Override public boolean hasNext() { return tmp1 != null || tmp2 != null || src1.hasNext() || src2.hasNext(); } private T tmp1; private T tmp2; @Override public T next() { if (!hasNext()) throw new NoSuchElementException(); if (tmp1 == null && src1.hasNext()) { tmp1 = src1.next(); } if (tmp2 == null && src2.hasNext()) { tmp2 = src2.next(); } if (tmp1 == null) { // src1 is exhausted so deliver from src2. final T tmp = tmp2; tmp2 = null; return tmp; } if (tmp2 == null) { // src2 is exhausted so deliver from src1. final T tmp = tmp1; tmp1 = null; return tmp; } final int cmp = tmp1.compareTo(tmp2); if (cmp == 0) { final T tmp = tmp1; tmp1 = tmp2 = null; return tmp; } else if (cmp < 0) { final T tmp = tmp1; tmp1 = null; return tmp; } else { final T tmp = tmp2; tmp2 = null; return tmp; } } @Override public void remove() { throw new UnsupportedOperationException(); } } /** * Filterator style construct that allows push back of a single visited * element. * * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan * Thompson</a> * @version $Id: BackchainTypeResourceIterator.java 3687 2010-09-29 * 22:50:32Z mrpersonick $ * @param <E> */ public static class PushbackFilter<E> extends FilterBase { /** * */ private static final long serialVersionUID = -8010263934867149205L; @SuppressWarnings("unchecked") public PushbackIterator<E> filterOnce(Iterator src, Object context) { return new PushbackIterator<E>((Iterator<E>) src); } } /** * Implementation class for {@link PushbackFilter}. * * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan * Thompson</a> * @version $Id: BackchainTypeResourceIterator.java 3687 2010-09-29 * 22:50:32Z mrpersonick $ * @param <E> */ public static class PushbackIterator<E> implements Iterator<E>, ICloseableIterator<E> { private final Iterator<E> src; /** * The most recent element visited by the iterator. */ private E current; /** * When non-<code>null</code>, this element was pushed back and is the * next element to be visited. */ private E buffer; public PushbackIterator(final Iterator<E> src) { if (src == null) throw new IllegalArgumentException(); this.src = src; } @Override public boolean hasNext() { return buffer != null || src.hasNext(); } @Override public E next() { if (!hasNext()) throw new NoSuchElementException(); final E tmp; if (buffer != null) { tmp = buffer; buffer = null; } else { tmp = src.next(); } current = tmp; return tmp; } /** * Push the value onto the internal buffer. It will be returned by the * next call to {@link #next()}. * * @param value * The value. * * @throws IllegalStateException * if there is already a value pushed back. */ public void pushback() { if (buffer != null) throw new IllegalStateException(); // pushback the last visited element. buffer = current; } @Override public void remove() { throw new UnsupportedOperationException(); } @Override public void close() { if (src instanceof ICloseable) { ((ICloseable) src).close(); } } } private static class BackchainSTypeResourceIterator implements IChunkedOrderedIterator<ISPO> { private final IChunkedOrderedIterator<ISPO> _src; private final IAccessPath<ISPO> accessPath; private final AbstractTripleStore db; private final IV rdfType; private final IV rdfsResource; private final IV s; private IChunkedOrderedIterator<ISPO> appender; private boolean canRemove; public BackchainSTypeResourceIterator( final IChunkedOrderedIterator<ISPO> _src, final IAccessPath<ISPO> accessPath, final AbstractTripleStore db, final IV rdfType, final IV rdfsResource) { this._src = _src; this.accessPath = accessPath; this.db = db; this.rdfType = rdfType; this.rdfsResource = rdfsResource; this.s = (IV) accessPath.getPredicate().get(0).get(); SPO spo = new SPO(s, rdfType, rdfsResource, StatementEnum.Inferred); this.appender = new ChunkedArrayIterator<ISPO>(1, new SPO[] { spo }, SPOKeyOrder.SPO); } private void testSPO(ISPO spo) { // do not need to append if we see it in the data if (spo.s().equals(s) && spo.p().equals(rdfType) && spo.o().equals(rdfsResource)) { appender = null; } } @Override public boolean hasNext() { return _src.hasNext() || (appender != null && appender.hasNext()); } @Override public IKeyOrder<ISPO> getKeyOrder() { return _src.getKeyOrder(); } @Override public ISPO[] nextChunk(final IKeyOrder<ISPO> keyOrder) { if (_src.hasNext()) { final ISPO[] chunk = _src.nextChunk(keyOrder); for (ISPO spo : chunk) { testSPO(spo); } canRemove = true; return chunk; } else if (appender != null) { canRemove = false; return appender.nextChunk(keyOrder); } return null; } @Override public ISPO next() { if (_src.hasNext()) { final ISPO spo = _src.next(); testSPO(spo); canRemove = true; return spo; } else if (appender != null) { canRemove = false; return appender.next(); } return null; } @Override public ISPO[] nextChunk() { if (_src.hasNext()) { final ISPO[] chunk = _src.nextChunk(); for (ISPO spo : chunk) { testSPO(spo); } canRemove = true; return chunk; } else if (appender != null) { canRemove = false; return appender.nextChunk(); } return null; } @Override public void remove() { if (canRemove) { _src.remove(); } } @Override public void close() { _src.close(); } } }