/**
Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved.
Contact:
SYSTAP, LLC DBA Blazegraph
2501 Calvert ST NW #106
Washington, DC 20008
licenses@blazegraph.com
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/*
* Created on Sep 9, 2011
*/
package com.bigdata.rdf.sparql.ast.eval;
import java.io.Serializable;
import java.util.Arrays;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import org.apache.log4j.Logger;
import org.openrdf.model.URI;
import org.openrdf.model.impl.URIImpl;
import com.bigdata.bop.Constant;
import com.bigdata.bop.IBindingSet;
import com.bigdata.bop.IConstant;
import com.bigdata.bop.IPredicate;
import com.bigdata.bop.IVariable;
import com.bigdata.bop.bindingSet.EmptyBindingSet;
import com.bigdata.bop.bindingSet.ListBindingSet;
import com.bigdata.btree.BTree;
import com.bigdata.btree.IRangeQuery;
import com.bigdata.btree.ITupleIterator;
import com.bigdata.btree.keys.KeyBuilder;
import com.bigdata.btree.keys.SuccessorUtil;
import com.bigdata.cache.ConcurrentWeakValueCacheWithTimeout;
import com.bigdata.rdf.internal.IV;
import com.bigdata.rdf.internal.constraints.RangeBOp;
import com.bigdata.rdf.internal.impl.literal.XSDNumericIV;
import com.bigdata.rdf.sparql.ast.FilterNode;
import com.bigdata.rdf.sparql.ast.GroupNodeBase;
import com.bigdata.rdf.sparql.ast.IGroupMemberNode;
import com.bigdata.rdf.sparql.ast.StatementPatternNode;
import com.bigdata.rdf.sparql.ast.TermNode;
import com.bigdata.rdf.sparql.ast.service.BigdataNativeServiceOptions;
import com.bigdata.rdf.sparql.ast.service.BigdataServiceCall;
import com.bigdata.rdf.sparql.ast.service.IServiceOptions;
import com.bigdata.rdf.sparql.ast.service.ServiceCallCreateParams;
import com.bigdata.rdf.sparql.ast.service.ServiceNode;
import com.bigdata.rdf.spo.DistinctMultiTermAdvancer;
import com.bigdata.rdf.spo.ISPO;
import com.bigdata.rdf.spo.SPO;
import com.bigdata.rdf.spo.SPOKeyOrder;
import com.bigdata.rdf.store.AbstractTripleStore;
import com.bigdata.rdf.store.BD;
import com.bigdata.relation.accesspath.EmptyCloseableIterator;
import com.bigdata.relation.accesspath.ThickCloseableIterator;
import com.bigdata.util.BytesUtil;
import cutthecrap.utils.striterators.ICloseableIterator;
/**
* A factory for a statement pattern slicing service.
* It accepts a group with a single triple pattern in it:
* <pre>
* service bd:slice {
* ?s rdf:type ex:Foo .
*
* # required service params for the sample
* # either offset+limit
* bd:serviceParam bd:slice.offset 0 .
* bd:serviceParam bd:slice.limit 2000 .
* # or range
* bd:serviceParam bd:slice.range ?range
* }
* </pre>
* The service params are required and set the slicing parameters. You can
* either request a slice or request a range count depending on the params.
* The range count is useful when dealing with a "rangeSafe" predicate with
* a range filter.
*
* @see RangeBOp
*/
public class SliceServiceFactory extends AbstractServiceFactory {
private static final Logger log = Logger
.getLogger(SliceServiceFactory.class);
/**
* The URI service key.
*/
public static final URI SERVICE_KEY = new URIImpl(BD.NAMESPACE+"slice");
/**
* The service params for this service.
*/
public static interface SliceParams {
/**
* The offset into the range.
*/
URI OFFSET = new URIImpl(SERVICE_KEY.stringValue() + ".offset");
/**
* Default = 0.
*/
long DEFAULT_OFFSET = 0;
/**
* The limit on the slice.
*/
URI LIMIT = new URIImpl(SERVICE_KEY.stringValue() + ".limit");
/**
* Default = 1000.
*/
int DEFAULT_LIMIT = 1000;
/**
* A range request - object will be the variable to bind to the range
* count.
*/
URI RANGE = new URIImpl(SERVICE_KEY.stringValue() + ".range");
}
/**
* Keep a timeout cache of start and end indices for a give predicate.
* Typically these slice calls happen multiple times in a row in a very
* short time period, so it's best to not have to go back to the index
* every time for this information.
*/
private static final ConcurrentWeakValueCacheWithTimeout<IPredicate<ISPO>, CacheHit> cache;
private static final class CacheHit {
final long startIndex, endIndex;
public CacheHit(final long startIndex, final long endIndex) {
this.startIndex = startIndex;
this.endIndex = endIndex;
}
}
static {
cache = new ConcurrentWeakValueCacheWithTimeout<IPredicate<ISPO>, CacheHit>(
100, TimeUnit.MINUTES.toMillis(1));
}
/*
* Note: This could extend the base class to allow for search service
* configuration options.
*/
private final BigdataNativeServiceOptions serviceOptions;
public SliceServiceFactory() {
serviceOptions = new BigdataNativeServiceOptions();
// serviceOptions.setRunFirst(true);
}
@Override
public BigdataNativeServiceOptions getServiceOptions() {
return serviceOptions;
}
@Override
public BigdataServiceCall create(final ServiceCallCreateParams params,
final ServiceParams serviceParams) {
final AbstractTripleStore store = params.getTripleStore();
final ServiceNode serviceNode = params.getServiceNode();
/*
* Validate the predicates for a given service call.
*/
final StatementPatternNode sp = verifyGraphPattern(
store, serviceNode.getGraphPattern(), serviceParams);
/*
* Create and return the ServiceCall object which will execute this
* query.
*/
return new SliceCall(store, sp, serviceOptions, serviceParams);
}
/**
* Verify that there is only a single statement pattern node and that the
* service parameters are valid.
*/
private StatementPatternNode verifyGraphPattern(
final AbstractTripleStore database,
final GroupNodeBase<IGroupMemberNode> group,
final ServiceParams params) {
final Iterator<Map.Entry<URI, List<TermNode>>> it = params.iterator();
while (it.hasNext()) {
final URI param = it.next().getKey();
if (SliceParams.OFFSET.equals(param)) {
if (params.getAsLong(param, null) == null) {
throw new RuntimeException("must provide a value for: " + param);
}
} else if (SliceParams.LIMIT.equals(param)) {
if (params.getAsInt(param, null) == null) {
throw new RuntimeException("must provide a value for: " + param);
}
} else if (SliceParams.RANGE.equals(param)) {
if (params.getAsVar(param, null) == null) {
throw new RuntimeException("must provide a variable for: " + param);
}
} else {
throw new RuntimeException("unrecognized param: " + param);
}
}
StatementPatternNode sp = null;
for (IGroupMemberNode node : group) {
if (node instanceof FilterNode) {
// ok to have filters with ranges
continue;
}
if (!(node instanceof StatementPatternNode)) {
throw new RuntimeException("only statement patterns allowed");
}
final StatementPatternNode tmp = (StatementPatternNode) node;
if (tmp.s().isConstant() && BD.SERVICE_PARAM.equals(tmp.s().getValue())) {
continue;
}
if (sp != null) {
throw new RuntimeException("group must contain a single statement pattern");
}
sp = tmp;
}
return sp;
}
/**
*
* Note: This has the {@link AbstractTripleStore} reference attached. This
* is not a {@link Serializable} object. It MUST run on the query
* controller.
*/
private static class SliceCall implements BigdataServiceCall {
private final AbstractTripleStore db;
private final StatementPatternNode sp;
private final IServiceOptions serviceOptions;
private final ServiceParams serviceParams;
public SliceCall(
final AbstractTripleStore db,
final StatementPatternNode sp,
final IServiceOptions serviceOptions,
final ServiceParams serviceParams) {
if(db == null)
throw new IllegalArgumentException();
if(sp == null)
throw new IllegalArgumentException();
if(serviceOptions == null)
throw new IllegalArgumentException();
if(serviceParams == null)
throw new IllegalArgumentException();
this.db = db;
this.sp = sp;
this.serviceOptions = serviceOptions;
this.serviceParams = serviceParams;
}
/**
* Run a slice over an access path. Currently only implemented to
* work with zero or one incoming bindings, and all variables in the
* incoming binding must be in use in the statement pattern.
*/
@Override
public ICloseableIterator<IBindingSet> call(
final IBindingSet[] bc) {
if (log.isInfoEnabled()) {
log.info(bc.length);
log.info(Arrays.toString(bc));
}
if (bc != null && bc.length > 1) {
throw new RuntimeException("cannot run with multiple incoming bindings");
}
/*
* Keep a map of variables in the statement pattern to the position
* in which they appear in the statement pattern.
*/
final Map<IVariable, Integer> vars = new LinkedHashMap<IVariable, Integer>();
for (int i = 0; i < sp.arity(); i++) {
final TermNode term = sp.get(i);
if (term == null)
continue;
if (term.isVariable()) {
final IVariable v = (IVariable) term.getValueExpression();
if (log.isTraceEnabled()) {
log.trace("variable: " + v + " at position: " + i);
}
vars.put(v, i);
}
}
final IBindingSet bs;
if (bc.length == 1 && !bc[0].equals(EmptyBindingSet.INSTANCE)) {
bs = bc[0];
} else {
bs = null;
}
if (bs != null) {
@SuppressWarnings("rawtypes")
final Iterator<IVariable> it = bs.vars();
while (it.hasNext()) {
@SuppressWarnings("rawtypes")
final IVariable v = it.next();
if (!vars.containsKey(v)) {
throw new RuntimeException("unrecognized variable in incoming binding");
}
if (bs.isBound(v)) {
// no longer a variable
vars.remove(v);
}
}
}
// Handle a range.
final RangeBOp rangeBOp = sp.getRange() != null ? sp.getRange().getRangeBOp() : null;
if (log.isTraceEnabled()) {
log.trace("range: " + rangeBOp);
}
// Create the predicate.
@SuppressWarnings("unchecked")
final IPredicate<ISPO> pred = (IPredicate<ISPO>)
db.getSPORelation().getPredicate(
getTerm(sp, bs, 0),
getTerm(sp, bs, 1),
getTerm(sp, bs, 2),
getTerm(sp, bs, 3),
null,
rangeBOp
);
if (pred == null) {
return new EmptyCloseableIterator<IBindingSet>();
}
// Get the right key order for the predicate.
final SPOKeyOrder keyOrder = db.getSPORelation().getKeyOrder(pred);
// Grab the corresponding index.
final BTree ndx = (BTree) db.getSPORelation().getIndex(keyOrder);
/*
* Inspect the cache and/or the index for the starting and ending
* tuple index for this access path.
*/
final long startIndex, endIndex;
/*
* Avoid an index read if possible.
*/
final CacheHit hit = cache.get(pred);
if (hit == null) {
if (log.isTraceEnabled()) {
log.trace("going to index for range");
}
final byte[] startKey = keyOrder.getFromKey(KeyBuilder.newInstance(), pred);
startIndex = indexOf(ndx, startKey);
final byte[] endKey = keyOrder.getToKey(KeyBuilder.newInstance(), pred); //SuccessorUtil.successor(startKey.clone());
endIndex = indexOf(ndx, endKey) - 1;
cache.put(pred, new CacheHit(startIndex, endIndex));
} else {
if (log.isTraceEnabled()) {
log.trace("cache hit");
}
startIndex = hit.startIndex;
endIndex = hit.endIndex;
}
final long range = endIndex - startIndex + 1;
if (log.isTraceEnabled()) {
log.trace("range: " + range);
}
/*
* Caller is asking for a range count only.
*/
if (serviceParams.contains(SliceParams.RANGE)) {
final IVariable<IV> v = serviceParams.getAsVar(SliceParams.RANGE);
final IBindingSet[] bSets = new IBindingSet[1];
bSets[0] = bs != null ? bs.clone() : new ListBindingSet();
bSets[0].set(v, new Constant<IV>(new XSDNumericIV(range)));
return new ThickCloseableIterator<IBindingSet>(bSets, 1);
}
final long offset = serviceParams.getAsLong(
SliceParams.OFFSET, SliceParams.DEFAULT_OFFSET);
if (offset < 0) {
throw new RuntimeException("illegal negative offset");
}
if (offset > range) {
throw new RuntimeException("offset is out of range");
}
final int limit = serviceParams.getAsInt(
SliceParams.LIMIT, SliceParams.DEFAULT_LIMIT);
if (log.isTraceEnabled()) {
log.trace("offset: " + offset);
log.trace("limit: " + limit);
}
/*
* Reading from the startIndex plus the offset.
*/
final long fromIndex = Math.max(startIndex, startIndex + offset);
/*
* Reading to the offset plus the limit (minus 1), or the end
* index, whichever is smaller.
*/
final long toIndex = Math.min(startIndex + offset + limit - 1,
endIndex);
if (fromIndex > toIndex) {
throw new RuntimeException("fromIndex > toIndex");
}
final byte[] fromKey = ndx.keyAt(fromIndex);
final byte[] toKey = SuccessorUtil.successor(ndx.keyAt(toIndex));
final int arity = pred.arity();
final int numBoundEntries = pred.arity() - vars.size();
if (log.isTraceEnabled()) {
log.trace("fromIndex: " + fromIndex);
log.trace("toIndex: " + toIndex);
log.trace("fromKey: " + BytesUtil.toString(fromKey));
log.trace("toKey: " + BytesUtil.toString(toKey));
log.trace("arity: " + arity);
log.trace("#boundEntries: " + numBoundEntries);
log.trace(keyOrder);
}
/*
* Use a multi-term advancer to skip the bound entries and just
* get to the variables.
*
* Not a good idea. Needs to visit each tuple individually.
*/
final DistinctMultiTermAdvancer advancer = null;
// new DistinctMultiTermAdvancer(
// arity, //arity - 3 or 4
// numBoundEntries // #boundEntries - anything not a var and not bound by incoming bindings
// );
// final DistinctTermAdvancer advancer2 =
// new DistinctTermAdvancer(arity);
final ITupleIterator it = ndx.rangeIterator(fromKey, toKey,
0/* capacity */, IRangeQuery.KEYS | IRangeQuery.CURSOR, advancer);
/*
* Max # of tuples read will be limit.
*/
final IBindingSet[] bSets = new IBindingSet[limit];
int i = 0;
while (it.hasNext()) {
final byte[] key = it.next().getKey();
final SPO spo = keyOrder.decodeKey(key);
bSets[i] = bs != null ? bs.clone() : new ListBindingSet();
for (IVariable v : vars.keySet()) {
final int pos = vars.get(v);
bSets[i].set(v, new Constant<IV>(spo.get(pos)));
}
// if (log.isTraceEnabled()) {
// log.trace("next bs: " + bSets[i]);
// }
i++;
}
if (log.isTraceEnabled()) {
log.trace("done iterating " + i + " results.");
}
return new ThickCloseableIterator<IBindingSet>(bSets, i);
}
/**
* Get the IV in the statement pattern at the specified position, or
* get the value from the binding set for the variable at that position.
* Return null if not bound in either place.
*/
private IV getTerm(final StatementPatternNode sp, final IBindingSet bs, final int pos) {
final TermNode t = sp.get(pos);
if (t == null)
return null;
if (t.isConstant()) {
return ((IConstant<IV>) t.getValueExpression()).get();
} else {
final IVariable<IV> v = (IVariable<IV>) t.getValueExpression();
if (bs != null && bs.isBound(v)) {
return ((IConstant<IV>) bs.get(v)).get();
} else {
return null;
}
}
}
/**
* Use the index to find the index of the tuple for the specified key
* (or the index of the next real tuple after the specified key).
*/
private long indexOf(final BTree ndx, final byte[] key) {
if (log.isTraceEnabled()) {
log.trace(BytesUtil.toString(key));
}
final long indexOfKey = ndx.indexOf(key);
if (log.isTraceEnabled()) {
log.trace("result of indexOf(key): " + indexOfKey);
}
final long index;
if (indexOfKey >= 0) {
// it's a real key
index = indexOfKey;
} else {
// not a real key
index = -(indexOfKey+1);
}
if (log.isTraceEnabled()) {
log.trace("index: " + index);
}
return index;
}
@Override
public IServiceOptions getServiceOptions() {
return serviceOptions;
}
}
}