/**
Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved.
Contact:
SYSTAP, LLC DBA Blazegraph
2501 Calvert ST NW #106
Washington, DC 20008
licenses@blazegraph.com
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
package com.bigdata.rdf.graph.impl.bd;
import java.lang.reflect.Constructor;
import java.util.Arrays;
import java.util.Collections;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Set;
import org.apache.log4j.Logger;
import org.openrdf.model.Literal;
import org.openrdf.model.URI;
import org.openrdf.model.Value;
import org.openrdf.model.ValueFactory;
import org.openrdf.model.impl.URIImpl;
import com.bigdata.bop.Constant;
import com.bigdata.bop.IBindingSet;
import com.bigdata.bop.IVariable;
import com.bigdata.bop.bindingSet.ListBindingSet;
import com.bigdata.journal.IIndexManager;
import com.bigdata.rdf.graph.IBinder;
import com.bigdata.rdf.graph.IGASContext;
import com.bigdata.rdf.graph.IGASEngine;
import com.bigdata.rdf.graph.IGASProgram;
import com.bigdata.rdf.graph.IGASScheduler;
import com.bigdata.rdf.graph.IGASSchedulerImpl;
import com.bigdata.rdf.graph.IGASState;
import com.bigdata.rdf.graph.IGASStats;
import com.bigdata.rdf.graph.IGraphAccessor;
import com.bigdata.rdf.graph.IPredecessor;
import com.bigdata.rdf.graph.IReducer;
import com.bigdata.rdf.graph.TraversalDirectionEnum;
import com.bigdata.rdf.graph.analytics.CC;
import com.bigdata.rdf.graph.analytics.PR;
import com.bigdata.rdf.graph.impl.GASEngine;
import com.bigdata.rdf.graph.impl.GASState;
import com.bigdata.rdf.graph.impl.bd.BigdataGASEngine.BigdataGraphAccessor;
import com.bigdata.rdf.graph.impl.scheduler.CHMScheduler;
import com.bigdata.rdf.internal.IV;
import com.bigdata.rdf.lexicon.LexiconRelation;
import com.bigdata.rdf.model.BigdataValue;
import com.bigdata.rdf.model.BigdataValueImpl;
import com.bigdata.rdf.sail.BigdataSail.BigdataSailConnection;
import com.bigdata.rdf.sparql.ast.DummyConstantNode;
import com.bigdata.rdf.sparql.ast.GraphPatternGroup;
import com.bigdata.rdf.sparql.ast.IGroupMemberNode;
import com.bigdata.rdf.sparql.ast.StatementPatternNode;
import com.bigdata.rdf.sparql.ast.VarNode;
import com.bigdata.rdf.sparql.ast.eval.CustomServiceFactoryBase;
import com.bigdata.rdf.sparql.ast.service.BigdataNativeServiceOptions;
import com.bigdata.rdf.sparql.ast.service.BigdataServiceCall;
import com.bigdata.rdf.sparql.ast.service.IServiceOptions;
import com.bigdata.rdf.sparql.ast.service.ServiceCall;
import com.bigdata.rdf.sparql.ast.service.ServiceCallCreateParams;
import com.bigdata.rdf.sparql.ast.service.ServiceNode;
import com.bigdata.rdf.store.AbstractTripleStore;
import com.bigdata.striterator.ChunkedArrayIterator;
import cutthecrap.utils.striterators.ICloseableIterator;
/**
* A SERVICE that exposes {@link IGASProgram}s for SPARQL execution.
* <p>
* For example, the following would run a depth-limited BFS traversal:
*
* <pre>
* PREFIX gas: <http://www.bigdata.com/rdf/gas#>
* #...
* SERVICE <gas#service> {
* gas:program gas:gasClass "com.bigdata.rdf.graph.analytics.BFS" .
* gas:program gas:in <IRI> . # one or more times, specifies the initial frontier.
* gas:program gas:out ?out . # exactly once - will be bound to the visited vertices.
* gas:program gas:maxIterations 4 . # optional limit on breadth first expansion.
* gas:program gas:maxVisited 2000 . # optional limit on the #of visited vertices.
* gas:program gas:nthreads 4 . # specify the #of threads to use (optional)
* }
* </pre>
*
* Or the following would run the FuzzySSSP algorithm.
*
* <pre>
* PREFIX gas: <http://www.bigdata.com/rdf/gas#>
* #...
* SERVICE <gas:service> {
* gas:program gas:gasClass "com.bigdata.rdf.graph.analytics.FuzzySSSP" .
* gas:program gas:in <IRI> . # one or more times, specifies the initial frontier.
* gas:program gas:target <IRI> . # one or more times, identifies the target vertices and hence the paths of interest.
* gas:program gas:out ?out . # exactly once - will be bound to the visited vertices laying within N-hops of the shortest paths.
* gas:program gas:maxIterations 4 . # optional limit on breadth first expansion.
* gas:program gas:maxVisited 2000 . # optional limit on the #of visited vertices.
* }
* </pre>
*
* FIXME Also allow the execution of gas workflows, such as FuzzySSSP. A
* workflow would be more along the lines of a Callable, but one where the
* initial source and/or target vertices could be identified. Or have an
* interface that wraps the analytics (including things like FuzzySSSP) so they
* can declare their own arguments for invocation as a SERVICE.
*
* TODO The input frontier could be a variable, in which case we would pull out
* the column for that variable rather than running the algorithm once per
* source binding set, right? Or maybe not.
*
* @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a>
*
* @see <a href="http://wiki.blazegraph.com/wiki/index.php/RDF_GAS_API">RDF GAS
* API</a>
*/
public class GASService extends CustomServiceFactoryBase {
public interface Options {
/**
* The namespace used for bigdata GAS API.
*/
String NAMESPACE = "http://www.bigdata.com/rdf/gas#";
/**
* The URL at which the {@link GASService} will respond.
*/
URI SERVICE_KEY = new URIImpl(NAMESPACE + "service");
/**
* Used as the subject in the GAS SERVICE invocation pattern.
*/
URI PROGRAM = new URIImpl(NAMESPACE + "program");
/**
* Magic predicate identifies the fully qualified class name of the
* {@link IGASProgram} to be executed.
*/
URI GAS_CLASS = new URIImpl(NAMESPACE + "gasClass");
/**
* The #of threads that will be used to expand the frontier in each
* iteration of the algorithm (optional, default
* {@value #DEFAULT_NTHREADS}).
*
* @see #DEFAULT_NTHREADS
*/
URI NTHREADS = new URIImpl(NAMESPACE + "nthreads");
int DEFAULT_NTHREADS = 4;
/**
* This option determines the traversal direction semantics for the
* {@link IGASProgram} against the graph, including whether the the
* edges of the graph will be interpreted as directed (
* {@link TraversalDirectionEnum#Forward} (which is the default),
* {@link TraversalDirectionEnum#Reverse}), or
* {@link TraversalDirectionEnum#Undirected}.
*
* @see TraversalDirectionEnum
* @see IGASContext#setTraversalDirection(TraversalDirectionEnum)
*/
URI TRAVERSAL_DIRECTION = new URIImpl(NAMESPACE + "traversalDirection");
TraversalDirectionEnum DEFAULT_DIRECTED_TRAVERSAL = TraversalDirectionEnum.Forward;
/**
* The maximum #of iterations for the GAS program (optional, default
* {@value #DEFAULT_MAX_ITERATIONS}).
*
* @see #DEFAULT_MAX_ITERATIONS
* @see IGASContext#setMaxIterations(int)
*/
URI MAX_ITERATIONS = new URIImpl(NAMESPACE + "maxIterations");
int DEFAULT_MAX_ITERATIONS = Integer.MAX_VALUE;
/**
* The maximum #of iterations for the GAS program after the targets
* have been reached (optional, default
* {@value #DEFAULT_MAX_ITERATIONS_AFTER_TARGETS}). Default behavior
* is to not stop once the targets are reached.
*
* @see #DEFAULT_MAX_ITERATIONS_AFTER_TARGETS
* @see IGASContext#setMaxIterationsAfterTargets(int)
*/
URI MAX_ITERATIONS_AFTER_TARGETS = new URIImpl(NAMESPACE + "maxIterationsAfterTargets");
int DEFAULT_MAX_ITERATIONS_AFTER_TARGETS = Integer.MAX_VALUE;
/**
* The maximum #of vertices in the visited set for the GAS program
* (optional, default {@value #DEFAULT_MAX_VISITED}).
*
* @see #DEFAULT_MAX_VISITED
* @see IGASContext#setMaxVisited(int)
*/
URI MAX_VISITED = new URIImpl(NAMESPACE + "maxVisited");
int DEFAULT_MAX_VISITED = Integer.MAX_VALUE;
/**
* An optional constraint on the types of links that will be visited by
* the algorithm.
* <p>
* Note: When this option is used, the scatter and gather will not visit
* the property set for the vertex. Instead, the graph is treated as if
* it were an unattributed graph and only mined for the connectivity
* data (which may include a link weight).
*
* @see IGASContext#setLinkType(URI)
*/
URI LINK_TYPE = new URIImpl(NAMESPACE + "linkType");
/**
* An optional constraint on the types of the link attributes that will
* be visited by the algorithm - the use of this option is required if
* you want to process some specific link weight rather than the simple
* topology of the graph.
*
* @see IGASContext#setLinkAttributeType(URI)
*/
URI LINK_ATTR_TYPE = new URIImpl(NAMESPACE + "linkAttrType");
/**
* The {@link IGASScheduler} (default is {@link #DEFAULT_SCHEDULER}).
* Class must implement {@link IGASSchedulerImpl}.
*/
URI SCHEDULER_CLASS = new URIImpl(NAMESPACE + "schedulerClass");
Class<? extends IGASSchedulerImpl> DEFAULT_SCHEDULER = CHMScheduler.class;
/**
* Magic predicate used to specify one (or more) vertices in the initial
* frontier.
* <p>
* Note: Algorithms such as {@link CC} and {@link PR} automatically
* place all vertices into the initial frontier. For such algorithms,
* you do not need to specify {@link #IN}.
*/
URI IN = new URIImpl(NAMESPACE + "in");
/**
* Magic predicate used to specify one (or more) target vertices. This
* may be used in combination with algorithms that compute paths in a
* graph to filter the visited vertices after the traversal in order to
* remove any vertex that is not part of a path to one or more of the
* specified target vertices.
* <p>
* In order to support this, the algorithm has to have a concept of a
* <code>predecessor</code>. For each <code>target</code>, the set of
* visited vertices is checked to see if the target was reachable. If it
* was reachable, then the predecessors are walked backwards until a
* starting vertex is reached (predecessor:=null). Each such predecessor
* is added to a list of vertices to be retained. This is repeated for
* each target. Once we have identified the combined list of vertices to
* be reained, all vertices NOT in that list are removed from the
* visited vertex state. This causes the algorithm to only report on
* those paths that lead to at least one of the specified target
* vertices.
* <p>
* Note: If you do not care about the distance between two vertices, but
* only whether they are reachable from one another, you can put both
* vertices into the initial frontier. The algorithm will then work from
* both points which can accelerate convergence.
*/
URI TARGET = new URIImpl(NAMESPACE + "target");
/**
* Magic predicate used to specify a variable that will become bound to
* each vertex in the visited set for the analytic. {@link #OUT} is
* always bound to the visited vertices. The other "out" variables are
* bound to state associated with the visited vertices in an algorithm
* dependent manner.
*
* @see IGASProgram#getBinderList()
*/
URI OUT = new URIImpl(NAMESPACE + "out");
URI OUT1 = new URIImpl(NAMESPACE + "out1");
URI OUT2 = new URIImpl(NAMESPACE + "out2");
URI OUT3 = new URIImpl(NAMESPACE + "out3");
URI OUT4 = new URIImpl(NAMESPACE + "out4");
URI OUT5 = new URIImpl(NAMESPACE + "out5");
URI OUT6 = new URIImpl(NAMESPACE + "out6");
URI OUT7 = new URIImpl(NAMESPACE + "out7");
URI OUT8 = new URIImpl(NAMESPACE + "out8");
URI OUT9 = new URIImpl(NAMESPACE + "out9");
}
static private transient final Logger log = Logger
.getLogger(GASService.class);
/**
* The list of all out variables.
*/
static private List<URI> OUT_VARS = Collections.unmodifiableList(Arrays
.asList(new URI[] { Options.OUT, Options.OUT1, Options.OUT2,
Options.OUT3, Options.OUT4, Options.OUT5, Options.OUT6,
Options.OUT7, Options.OUT8, Options.OUT9 }));
private final BigdataNativeServiceOptions serviceOptions;
public GASService() {
serviceOptions = new BigdataNativeServiceOptions();
/*
* TODO Review decision to make this a runFirst service. The rational is
* that this service can only apply a very limited set of restrictions
* during query, therefore it will often make sense to run it first.
* However, the fromTime and toTime could be bound by the query and the
* service can filter some things more efficiently internally than if we
* generated a bunch of intermediate solutions for those things.
*/
serviceOptions.setRunFirst(true);
}
/**
* The known URIs.
* <p>
* Note: We can recognize anything in {@link Options#NAMESPACE}, but the
* predicate still has to be something that we know how to interpret.
*/
static final Set<URI> gasUris;
static {
final Set<URI> set = new LinkedHashSet<URI>();
set.add(Options.PROGRAM);
gasUris = Collections.unmodifiableSet(set);
}
@Override
public IServiceOptions getServiceOptions() {
return serviceOptions;
}
/**
* NOP
* <p>
* {@inheritDoc}
*/
@Override
public void startConnection(BigdataSailConnection conn) {
// NOP
}
@Override
public ServiceCall<?> create(final ServiceCallCreateParams params) {
if (params == null)
throw new IllegalArgumentException();
final AbstractTripleStore store = params.getTripleStore();
if (store == null)
throw new IllegalArgumentException();
/*
* Create and return the ServiceCall object which will execute this
* query.
*/
return new GASServiceCall(store, params.getServiceNode(),
getServiceOptions());
}
/**
* Execute the service call (run the GAS program).
*
* @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan
* Thompson</a>
*
* TODO Validate the service call parameters, including whether they
* are understood by the specific algorithm.
*/
private static class GASServiceCall<VS, ES, ST> implements BigdataServiceCall {
private final AbstractTripleStore store;
private final GraphPatternGroup<IGroupMemberNode> graphPattern;
private final IServiceOptions serviceOptions;
// options extracted from the SERVICE's graph pattern.
private final int nthreads;
private final TraversalDirectionEnum traversalDirection;
private final int maxIterations;
private final int maxIterationsAfterTargets;
private final int maxVisited;
private final URI linkType, linkAttrType;
private final Class<IGASProgram<VS, ES, ST>> gasClass;
private final Class<IGASSchedulerImpl> schedulerClass;
private final Value[] initialFrontier;
private final Value[] targetVertices;
private final IVariable<?>[] outVars;
public GASServiceCall(final AbstractTripleStore store,
final ServiceNode serviceNode,
final IServiceOptions serviceOptions) {
if (store == null)
throw new IllegalArgumentException();
if (serviceNode == null)
throw new IllegalArgumentException();
if (serviceOptions == null)
throw new IllegalArgumentException();
this.store = store;
this.graphPattern = serviceNode.getGraphPattern();
this.serviceOptions = serviceOptions;
this.nthreads = ((Literal) getOnlyArg(
Options.PROGRAM,
Options.NTHREADS,
store.getValueFactory().createLiteral(
Options.DEFAULT_NTHREADS))).intValue();
this.traversalDirection = TraversalDirectionEnum
.valueOf(((Literal) getOnlyArg(
Options.PROGRAM,
Options.TRAVERSAL_DIRECTION,
store.getValueFactory().createLiteral(
Options.DEFAULT_DIRECTED_TRAVERSAL.name())))
.stringValue());
this.maxIterations = ((Literal) getOnlyArg(Options.PROGRAM,
Options.MAX_ITERATIONS, store.getValueFactory()
.createLiteral(Options.DEFAULT_MAX_ITERATIONS)))
.intValue();
this.maxIterationsAfterTargets = ((Literal) getOnlyArg(Options.PROGRAM,
Options.MAX_ITERATIONS_AFTER_TARGETS, store.getValueFactory()
.createLiteral(Options.DEFAULT_MAX_ITERATIONS_AFTER_TARGETS)))
.intValue();
this.maxVisited = ((Literal) getOnlyArg(
Options.PROGRAM,
Options.MAX_VISITED,
store.getValueFactory().createLiteral(
Options.DEFAULT_MAX_VISITED))).intValue();
this.linkType = (URI) getOnlyArg(Options.PROGRAM,
Options.LINK_TYPE, null/* default */);
this.linkAttrType = (URI) getOnlyArg(Options.PROGRAM,
Options.LINK_ATTR_TYPE, null/* default */);
// GASProgram (required)
{
final Literal tmp = (Literal) getOnlyArg(Options.PROGRAM,
Options.GAS_CLASS);
if (tmp == null)
throw new IllegalArgumentException(
"Required predicate not specified: "
+ Options.GAS_CLASS);
final String className = tmp.stringValue();
final Class<?> cls;
try {
cls = Class.forName(className);
} catch (ClassNotFoundException e) {
throw new IllegalArgumentException("No such class: "
+ className);
}
if (!IGASProgram.class.isAssignableFrom(cls))
throw new IllegalArgumentException(Options.GAS_CLASS
+ " must extend " + IGASProgram.class.getName());
this.gasClass = (Class<IGASProgram<VS, ES, ST>>) cls;
}
// Scheduler (optional).
{
final Literal tmp = (Literal) getOnlyArg(Options.PROGRAM,
Options.SCHEDULER_CLASS);
if (tmp == null) {
this.schedulerClass = null;
} else {
final String className = tmp.stringValue();
final Class<?> cls;
try {
cls = Class.forName(className);
} catch (ClassNotFoundException e) {
throw new IllegalArgumentException("No such class: "
+ className);
}
if (!IGASSchedulerImpl.class.isAssignableFrom(cls))
throw new IllegalArgumentException(
Options.SCHEDULER_CLASS + " must extend "
+ IGASSchedulerImpl.class.getName());
this.schedulerClass = (Class<IGASSchedulerImpl>) cls;
}
}
// Initial frontier.
this.initialFrontier = getArg(Options.PROGRAM, Options.IN);
// Target vertices
this.targetVertices = getArg(Options.PROGRAM, Options.TARGET);
/*
* The output variable (bound to the visited set).
*
* TODO This does too much work. It searches the group graph pattern
* 10 times, when we could do just one pass and find everything.
*/
{
this.outVars = new IVariable[10];
int i = 0;
for (URI uri : OUT_VARS) {
this.outVars[i++] = getVar(Options.PROGRAM, uri);
}
}
}
/**
* Return the variable associated with the first instandce of the
* specified subject and predicate in the service's graph pattern. Only
* the simple {@link StatementPatternNode}s are visited.
*
* @param s
* The subject.
* @param p
* The predicate.
*
* @return The variable -or- <code>null</code> if the specified subject
* and predicate do not appear.
*/
private IVariable<?> getVar(final URI s, final URI p) {
if (s == null)
throw new IllegalArgumentException();
if (p == null)
throw new IllegalArgumentException();
List<Value> tmp = null;
final Iterator<IGroupMemberNode> itr = graphPattern.getChildren()
.iterator();
while (itr.hasNext()) {
final IGroupMemberNode child = itr.next();
if (!(child instanceof StatementPatternNode))
continue;
final StatementPatternNode sp = (StatementPatternNode) child;
// s and p are constants.
if (!sp.s().isConstant())
continue;
if (!sp.p().isConstant())
continue;
// constants match.
if (!s.equals(sp.s().getValue()))
continue;
if (!p.equals(sp.p().getValue()))
continue;
if (tmp == null)
tmp = new LinkedList<Value>();
// found an o.
return ((VarNode)sp.o()).getValueExpression();
}
return null; // not found.
}
/**
* Return the object bindings from the service's graph pattern for the
* specified subject and predicate. Only the simple
* {@link StatementPatternNode}s are visited.
*
* @param s
* The subject.
* @param p
* The predicate.
*
* @return An array containing one or more bindings -or-
* <code>null</code> if the specified subject and predicate do
* not appear.
*/
private Value[] getArg(final URI s, final URI p) {
if (s == null)
throw new IllegalArgumentException();
if (p == null)
throw new IllegalArgumentException();
List<Value> tmp = null;
final Iterator<IGroupMemberNode> itr = graphPattern.getChildren()
.iterator();
while (itr.hasNext()) {
final IGroupMemberNode child = itr.next();
if (!(child instanceof StatementPatternNode))
continue;
final StatementPatternNode sp = (StatementPatternNode) child;
// s and p are constants.
if (!sp.s().isConstant())
continue;
if (!sp.p().isConstant())
continue;
// constants match.
if (!s.equals(sp.s().getValue()))
continue;
if (!p.equals(sp.p().getValue()))
continue;
if (tmp == null)
tmp = new LinkedList<Value>();
// found an o.
tmp.add(sp.o().getValue());
}
if (tmp == null)
return null;
return tmp.toArray(new Value[tmp.size()]);
}
/**
* Return the sole {@link Value} for the given s and p.
*
* @param s
* The subject.
* @param p
* The predicate.
*
* @return The sole {@link Value} for that s and p -or-
* <code>null</code> if no value was given.
*
* @throws RuntimeException
* if there are multiple values.
*/
private Value getOnlyArg(final URI s, final URI p) {
final Value[] tmp = getArg(s, p);
if (tmp == null)
return null;
if (tmp.length > 1)
throw new IllegalArgumentException("Multiple values: s=" + s
+ ", p=" + p);
return tmp[0];
}
/**
* Return the sole {@link Value} for the given s and p and the default
* value if no value was explicitly provided.
*
* @param s
* The subject.
* @param p
* The predicate.
* @param def
* The default value.
*
* @return The sole {@link Value} for that s and p -or- the default
* value if no value was given.
*
* @throws RuntimeException
* if there are multiple values.
*/
private Value getOnlyArg(final URI s, final URI p, final Value def) {
final Value tmp = getOnlyArg(s, p);
if (tmp == null)
return def;
return tmp;
}
@Override
public IServiceOptions getServiceOptions() {
return serviceOptions;
}
/**
* Execute the GAS program.
* <p>
* {@inheritDoc}
*/
@Override
public ICloseableIterator<IBindingSet> call(
final IBindingSet[] bindingSets) throws Exception {
/*
* Try/finally pattern to setup the BigdataGASEngine, execute the
* algorithm, and return the results.
*/
IGASEngine gasEngine = null;
try {
gasEngine = newGasEngine(store.getIndexManager(), nthreads);
if (schedulerClass != null) {
((GASEngine) gasEngine).setSchedulerClass(schedulerClass);
}
final IGraphAccessor graphAccessor = newGraphAccessor(store);
final IGASProgram<VS, ES, ST> gasProgram = newGASProgram(gasClass);
final IGASContext<VS, ES, ST> gasContext = gasEngine.newGASContext(
graphAccessor, gasProgram);
gasContext.setTraversalDirection(traversalDirection);
gasContext.setMaxIterations(maxIterations);
gasContext.setMaxIterationsAfterTargets(maxIterationsAfterTargets);
gasContext.setMaxVisited(maxVisited);
if (targetVertices != null) {
gasContext.setTargetVertices(toIV(targetVertices));
}
// Optional link type constraint.
if (linkType != null)
gasContext.setLinkType(linkType);
// Optional link attribute constraint.
if (linkAttrType != null)
gasContext.setLinkAttributeType(linkAttrType);
final IGASState<VS, ES, ST> gasState = gasContext.getGASState();
if (initialFrontier != null) {
/*
* FIXME Why can't we pass in the Value (with a defined IV)
* and not the IV? This should work. Passing in the IV is
* against the grain of the API and the generalized
* abstraction as Values. Of course, having the IV is
* necessary since this is an internal, high performance,
* and close to the indices operation.
*/
@SuppressWarnings("rawtypes")
final IV[] tmp = toIV(initialFrontier);
// set the frontier.
gasState.setFrontier(gasContext, tmp);
}
// Run the analytic.
final IGASStats stats = (IGASStats) gasContext.call();
if (targetVertices != null
&& gasProgram instanceof IPredecessor) {
/*
* Remove vertices from the visited set that are not on a
* path leading to at least one of the specified target
* vertices.
*
* FIXME Why can't we pass in the Value (with a defined IV)
* and not the IV? This should work. Passing in the IV is
* against the grain of the API and the generalized
* abstraction as Values. Of course, having the IV is
* necessary since this is an internal, high performance,
* and close to the indices operation.
*/
@SuppressWarnings("rawtypes")
final IV[] tmp = toIV(targetVertices);
@SuppressWarnings("unchecked")
final IPredecessor<VS, ES, ST> t = (IPredecessor<VS, ES, ST>) gasProgram;
t.prunePaths(gasContext, tmp);
}
if (log.isInfoEnabled()) {
final StringBuilder sb = new StringBuilder();
sb.append("GAS");
sb.append(": analytic=" + gasProgram.getClass().getSimpleName());
sb.append(", nthreads=" + nthreads);
sb.append(", scheduler=" + ((GASState<VS, ES, ST>)gasState).getScheduler().getClass().getSimpleName());
sb.append(", gasEngine=" + gasEngine.getClass().getSimpleName());
sb.append(", stats=" + stats);
log.info(sb.toString());
}
/*
* Bind output variables (if any).
*/
final IBindingSet[] out = gasState
.reduce(new BindingSetReducer<VS, ES, ST>(outVars,
store, gasProgram, gasContext));
return new ChunkedArrayIterator<IBindingSet>(out);
} finally {
if (gasEngine != null) {
gasEngine.shutdownNow();
gasEngine = null;
}
}
}
/**
* Convert a {@link Value}[] of {@link BigdataValue} instances into an
* {@link IV}[].
*/
private static IV[] toIV(final Value[] values) {
@SuppressWarnings("rawtypes")
final IV[] tmp = new IV[values.length];
// Setup the initial frontier.
int i = 0;
for (Value v : values) {
tmp[i++] = ((BigdataValue) v).getIV();
}
return tmp;
}
/**
* Class used to report {@link IBindingSet}s to the {@link GASService}.
* {@link IGASProgram}s can customize the way in which they interpret
* the declared variables by subclassing this class.
*
* @param <VS>
* @param <ES>
* @param <ST>
*
* @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan
* Thompson</a>
*
* TODO This should use the TLBFactory when we change to use
* stream solutions out of the SERVICE (#507), but the TLB class
* is not necessary until the reduce itself runs in concurrent
* threads (it is single threaded right now based on the backing
* CHM iterator).
*/
public static class BindingSetReducer<VS, ES, ST> implements
IReducer<VS, ES, ST, IBindingSet[]> {
/**
* The declared output variables (the ones that the caller wants to
* extract). Any position that will not be extracted is a
* <code>null</code>.
*/
private final IVariable<?>[] outVars;
/**
* The KB instance.
*/
private final AbstractTripleStore store;
private final LexiconRelation lex;
/**
* The object used to create the variable bindings.
*/
private final ValueFactory vf;
/**
* The list of objects used to extract the variable bindings.
*/
private final List<IBinder<VS, ES, ST>> binderList;
/**
* The collected solutions.
*/
private final List<IBindingSet> tmp = new LinkedList<IBindingSet>();
/**
*
* @param outVars
* The declared output variables (the ones that the
* caller wants to extract). Any position that will not
* be extracted is a <code>null</code>.
*/
public BindingSetReducer(//
final IVariable<?>[] outVars,
final AbstractTripleStore store,
final IGASProgram<VS, ES, ST> gasProgram,
final IGASContext<VS, ES, ST> ctx) {
this.outVars = outVars;
this.store = store;
this.lex = store.getLexiconRelation();
this.vf = store.getValueFactory();
this.binderList = gasProgram.getBinderList();
}
@Override
public void visit(final IGASState<VS, ES, ST> state, final Value u) {
final List<IBindingSet> bSets = new LinkedList<IBindingSet>();
bSets.add(new ListBindingSet());
for (IBinder<VS, ES, ST> b : binderList) {
// The variable for this binder.
final IVariable<?> var = outVars[b.getIndex()];
if(var == null)
continue;
final Iterator<IBindingSet> it = bSets.iterator();
final List<IBindingSet> bSets2 = new LinkedList<IBindingSet>();
while (it.hasNext()) {
final IBindingSet parent = it.next();
if (log.isTraceEnabled())
log.trace("parent: " + parent);
final List<Value> vals =
b.bind(vf, state, u, outVars, parent);
if (vals.size() == 0) {
// do nothing, leave the parent in the bSets
} else if (vals.size() == 1) {
/*
* Bind the single value, leave the parent in the
* bSets.
*/
final Value val = vals.get(0);
bind(var, val, parent);
if (log.isTraceEnabled())
log.trace("parent (after bind): " + parent);
} else {
/*
* Remove the parent from the bSets, for each new
* value, clone the parent, bind the value, and add
* the new solution to the bSets
*/
for (Value val : vals) {
final IBindingSet child = parent.clone();
bind(var, val, child);
if (log.isTraceEnabled())
log.trace("child: " + child);
bSets2.add(child);
}
it.remove();
}
}
bSets.addAll(bSets2);
}
// Add to the set of generated solutions.
tmp.addAll(bSets);
}
@SuppressWarnings({ "unchecked", "rawtypes" })
protected void bind(final IVariable<?> var, final Value val, final IBindingSet bs) {
if (val == null)
return;
if (val instanceof IV) {
// The value is already an IV.
bs.set(var, new Constant((IV) val));
} else {
/*
* The Value is a BigdataValueImpl (if the bind() method
* used the supplied ValueFactory). We need to convert
* it to an IV and this code ASSUMES that we can do this
* using an inline IV with the as configured KB. (This
* will work for anything numeric, but not for strings.)
*/
final IV<BigdataValueImpl, ?> iv = lex
.getLexiconConfiguration().createInlineIV(val);
if (iv != null) {
iv.setValue((BigdataValueImpl) val);
bs.set(var, new Constant(iv));
} else if (val instanceof BigdataValue) {
bs.set(var, new Constant(DummyConstantNode.toDummyIV((BigdataValue) val)));
} else {
throw new RuntimeException("FIXME");
}
}
}
@Override
public IBindingSet[] get() {
return tmp.toArray(new IBindingSet[tmp.size()]);
}
}
/**
* Factory for the {@link IGASEngine}.
*/
private IGASEngine newGasEngine(final IIndexManager indexManager,
final int nthreads) {
return new BigdataGASEngine(indexManager, nthreads);
}
/**
* Return an instance of the {@link IGASProgram} to be evaluated.
*/
private IGASProgram<VS, ES, ST> newGASProgram(
final Class<IGASProgram<VS, ES, ST>> cls) {
if (cls == null)
throw new IllegalArgumentException();
try {
final Constructor<IGASProgram<VS, ES, ST>> ctor = cls
.getConstructor(new Class[] {});
final IGASProgram<VS, ES, ST> gasProgram = ctor
.newInstance(new Object[] {});
return gasProgram;
} catch (Exception e) {
throw new RuntimeException(e);
}
}
/**
* Return the object used to access the as-configured graph.
*/
private IGraphAccessor newGraphAccessor(final AbstractTripleStore kb) {
/*
* Use a read-only view (sampling depends on access to the BTree rather
* than the ReadCommittedIndex).
*/
final BigdataGraphAccessor graphAccessor = new BigdataGraphAccessor(
kb.getIndexManager(), kb.getNamespace(), kb
.getIndexManager().getLastCommitTime());
return graphAccessor;
}
}
}