package com.bigdata.relation.rule.eval.pipeline;
import java.util.Iterator;
import java.util.UUID;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Future;
import java.util.concurrent.ScheduledFuture;
import java.util.concurrent.TimeUnit;
import org.apache.log4j.Logger;
import com.bigdata.bfs.BigdataFileSystem;
import com.bigdata.bop.IBindingSet;
import com.bigdata.bop.IVariable;
import com.bigdata.btree.BTree;
import com.bigdata.btree.IIndex;
import com.bigdata.btree.IndexMetadata;
import com.bigdata.counters.CounterSet;
import com.bigdata.journal.IIndexManager;
import com.bigdata.journal.IIndexStore;
import com.bigdata.journal.IResourceLockService;
import com.bigdata.journal.TemporaryStore;
import com.bigdata.relation.accesspath.IAsynchronousIterator;
import com.bigdata.relation.locator.IResourceLocator;
import com.bigdata.relation.rule.IRule;
import com.bigdata.relation.rule.eval.IJoinNexusFactory;
import com.bigdata.resources.IndexManager;
import com.bigdata.resources.StoreManager.ManagedJournal;
import com.bigdata.service.AbstractDistributedFederation;
import com.bigdata.service.AbstractScaleOutFederation;
import com.bigdata.service.DataService;
import com.bigdata.service.DataServiceCallable;
import com.bigdata.service.IBigdataFederation;
import com.bigdata.service.Session;
import com.bigdata.service.proxy.ClientAsynchronousIterator;
import com.bigdata.sparse.SparseRowStore;
import com.bigdata.striterator.IKeyOrder;
/**
* A factory for {@link DistributedJoinTask}s. The factory either creates a new
* {@link DistributedJoinTask} or returns the pre-existing
* {@link DistributedJoinTask} for the given {@link JoinMasterTask} instance (as
* identified by its {@link UUID}), <i>orderIndex</i>, and <i>partitionId</i>.
* When the desired join task pre-exists, factory will invoke
* {@link DistributedJoinTask#addSource(IAsynchronousIterator)} and specify the
* {@link #sourceItrProxy} as another source for that join task.
* <p>
* The use of a factory pattern allows us to concentrate all
* {@link DistributedJoinTask}s which target the same tail predicate and index
* partition for the same rule execution instance onto the same
* {@link DistributedJoinTask}. The concentrator effect achieved by the factory
* only matters when the fan-out is GT ONE (1).
*
* @todo The factory semantics requires something like a "session" concept on
* the {@link DataService}. However, it could also be realized by a
* canonicalizing mapping of {masterProxy, orderIndex, partitionId} onto
* an object that is placed within a weak value cache.
*
* @todo Whenever a {@link DistributedJoinTask} is interrupted or errors it must
* make sure that the entry is removed from the session (it could also
* interrupt/cancel the remaining {@link DistributedJoinTask}s for the
* same {masterInstance}, but we are already doing that in a different
* way.)
*
* @todo We need to specify the failover behavior when running query or mutation
* rules. The simplest answer is that the query or closure operation fails
* and can be retried.
* <P>
* When retried a different data service instance could take over for the
* failed instance. This presumes some concept of "affinity" for a data
* service instance when locating a join task. If there are replicated
* instances of a data service, then affinity would be the tendency to
* choose the same instance for all join tasks with the same master,
* orderIndex, and partitionId. That might be more efficient since it
* allows aggregation of binding sets that require the same access path
* read. However, it might be more efficient to distribute the reads
* across the failover instances - it really depends on the workload.
* <p>
* Ideally, a data service failure would be handled by restarting only
* those parts of the operation that had failed. This means that there is
* some notion of idempotent for the operation. For at least the RDF
* database, this may be achievable. Failure during query leading to
* resend of some binding set chunks to a new join task could result in
* overgeneration of results, but those results would all be duplicates.
* If that is acceptable, then this approach could be considered "safe".
* Failure during mutation (aka closure) is even easier for RDF as
* redundant writes on an index still lead to the same fixed point.
*
* @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a>
* @version $Id$
*/
public class JoinTaskFactoryTask extends DataServiceCallable<Future> {
/**
*
*/
private static final long serialVersionUID = -2637166803787195001L;
protected static final transient Logger log = Logger.getLogger(JoinTaskFactoryTask.class);
/**
* @deprecated This is only used by a logging statement.
*/
final String scaleOutIndexName;
final IRule rule;
final IJoinNexusFactory joinNexusFactory;
final int[] order;
final int orderIndex;
final int partitionId;
final UUID masterUUID;
final IJoinMaster masterProxy;
final IAsynchronousIterator<IBindingSet[]> sourceItrProxy;
final IKeyOrder[] keyOrders;
/**
* A list of variables required for each tail, by tailIndex. Used to filter
* downstream variable binding sets.
*/
final IVariable[][] requiredVars;
// /**
// * Set by the {@link DataService} which recognized that this class
// * implements the {@link IDataServiceCallable}.
// */
// private transient DataService dataService;
//
// public void setDataService(DataService dataService) {
//
// this.dataService = dataService;
//
// }
/**
* Set by {@link #call()} to the federation instance available on the
* {@link DataService}.
*/
private transient AbstractScaleOutFederation fed;
public String toString() {
return getClass().getSimpleName() + "{ orderIndex=" + orderIndex
+ ", partitionId=" + partitionId + "}";
}
/**
*
* @param scaleOutIndexName
* @param rule
* @param joinNexusFactory
* @param order
* @param orderIndex
* @param partitionId
* @param masterProxy
* @param masterUUID
* (Avoids RMI to obtain this later).
* @param sourceItrProxy
* @param nextScaleOutIndexName
*/
public JoinTaskFactoryTask(final String scaleOutIndexName,
final IRule rule, final IJoinNexusFactory joinNexusFactory,
final int[] order, final int orderIndex, final int partitionId,
final IJoinMaster masterProxy,
final UUID masterUUID,
final IAsynchronousIterator<IBindingSet[]> sourceItrProxy,
final IKeyOrder[] keyOrders,
final IVariable[][] requiredVars) {
if (scaleOutIndexName == null)
throw new IllegalArgumentException();
if (rule == null)
throw new IllegalArgumentException();
final int tailCount = rule.getTailCount();
if (joinNexusFactory == null)
throw new IllegalArgumentException();
if (order == null)
throw new IllegalArgumentException();
if (order.length != tailCount)
throw new IllegalArgumentException();
if (orderIndex < 0 || orderIndex >= tailCount)
throw new IllegalArgumentException();
if (partitionId < 0)
throw new IllegalArgumentException();
if (masterProxy == null)
throw new IllegalArgumentException();
if (masterUUID == null)
throw new IllegalArgumentException();
if (sourceItrProxy == null)
throw new IllegalArgumentException();
if (keyOrders == null || keyOrders.length != order.length)
throw new IllegalArgumentException();
if (requiredVars == null)
throw new IllegalArgumentException();
this.scaleOutIndexName = scaleOutIndexName;
this.rule = rule;
this.joinNexusFactory = joinNexusFactory;
this.order = order;
this.orderIndex = orderIndex;
this.partitionId = partitionId;
this.masterProxy = masterProxy;
this.masterUUID = masterUUID;
this.sourceItrProxy = sourceItrProxy;
this.keyOrders = keyOrders;
this.requiredVars = requiredVars;
}
/**
* Either starts a new {@link DistributedJoinTask} and returns its
* {@link Future} or returns the {@link Future} of an existing
* {@link DistributedJoinTask} for the same
* {@link DistributedJoinMasterTask} instance, <i>orderIndex</i>, and
* <i>partitionId</i>.
*
* @return (A proxy for) the {@link Future} of the
* {@link DistributedJoinTask}.
*/
public Future call() throws Exception {
// if (dataService == null)
// throw new IllegalStateException();
this.fed = (AbstractScaleOutFederation) getFederation();
/*
* Start the iterator using our local thread pool in order to avoid
* having it start() with a new Thread().
*
* Note: This MUST be done before we create the join task or the
* iterator will create its own Thread.
*/
if (sourceItrProxy instanceof ClientAsynchronousIterator) {
((ClientAsynchronousIterator) sourceItrProxy).start(fed
.getExecutorService());
}
final String namespace = getJoinTaskNamespace(masterUUID, orderIndex,
partitionId);
final Future<Void> joinTaskFuture;
final Session session = getDataService().getSession();
/*
* @todo this serializes all requests for a new join task on this data
* service. However, we only need to serialize requests for the same
* [uuid, orderIndex, partitionId]. A NamedLock on [namespace] would do
* exactly that.
*
* Note: The DistributedJoinTask will remove itself from the session
* when it completes (regardless of success or failure). It does not
* obtain a lock on the session but instead relies on addSource(itr) to
* reject new sources until it can be removed from the session.
*/
synchronized (session) {
// lookup task for that key in the session.
DistributedJoinTask joinTask = (DistributedJoinTask) session
.get(namespace);
if (joinTask != null) {
if (joinTask.addSource(sourceItrProxy)) {
// use the existing join task.
joinTaskFuture = joinTask.futureProxy;
} else {
/*
* Create a new join task (the old one has decided that it
* will not accept any new sources).
*/
// new task.
joinTask = newJoinTask();
// put into the session.
session.put(namespace, joinTask);
// submit task and note its future.
joinTaskFuture = submit(joinTask);
}
} else {
/*
* There is no join task in the session so we create one now.
*/
// new task.
joinTask = newJoinTask();
// put into the session.
session.put(namespace, joinTask);
// submit task and note its future.
joinTaskFuture = submit(joinTask);
}
}
return joinTaskFuture;
}
protected DistributedJoinTask newJoinTask() {
final DistributedJoinTask task;
{
/*
* Note: This wrapper class passes getIndex(name,timestamp) to the
* IndexManager for the DataService, which is the class that knows
* how to assemble the index partition view.
*/
final IIndexManager indexManager = new DelegateIndexManager(
getDataService());
task = new DistributedJoinTask(/*scaleOutIndexName,*/ rule,
joinNexusFactory.newInstance(indexManager), order,
orderIndex, partitionId, fed, masterProxy, masterUUID,
sourceItrProxy, keyOrders, getDataService(), requiredVars);
}
return task;
}
protected Future<Void> submit(final DistributedJoinTask task) {
if (log.isDebugEnabled())
log.debug("Submitting new JoinTask: orderIndex=" + orderIndex
+ ", partitionId=" + partitionId + ", indexName="
+ scaleOutIndexName);
Future<Void> joinTaskFuture = getFederation()
.getExecutorService().submit(task);
if (fed.isDistributed()) {
// create a proxy for the future.
joinTaskFuture = ((AbstractDistributedFederation) fed)
.getProxy(joinTaskFuture);
}
task.futureProxy = joinTaskFuture;
return joinTaskFuture;
}
/**
*
* @param masterUUID
* The master UUID should be cached locally by the JoinTask so
* that invoking this method does not require RMI.
* @param orderIndex
* @param partitionId
* @return
*/
static public String getJoinTaskNamespace(final UUID masterUUID,
final int orderIndex, final int partitionId) {
return masterUUID + "/" + orderIndex + "/" + partitionId;
}
/**
* The index view that we need for the {@link DistributedJoinTask} is on the
* {@link IndexManager} class, not the live {@link ManagedJournal}. Looking
* on the live journal we will only see the mutable {@link BTree} and not
* the entire index partition view. However, {@link IndexManager} does not
* implement {@link IIndexManager} or even {@link IIndexStore}. Therefore
* this class was introduced. It passes most of the methods on to the
* {@link IBigdataFederation} but {@link #getIndex(String, long)} is
* delegated to {@link IndexManager#getIndex(String, long)} which is the
* method that knows how to create the index partition view.
*
* @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a>
* @version $Id$
*
* @todo While this class solves our problem I do not know whether or not
* this class should this class have more visibility? The downside is
* that it is a bit incoherent how it passes along one method to the
* {@link IndexManager}, most methods to the
* {@link IBigdataFederation} and disallows {@link #dropIndex(String)}
* and {@link #registerIndex(IndexMetadata)} in an attempt to stay out
* of trouble. That may be enough reason to keep it private.
*/
static class DelegateIndexManager implements IIndexManager {
private final DataService dataService;
public DelegateIndexManager(final DataService dataService) {
if (dataService == null)
throw new IllegalArgumentException();
this.dataService = dataService;
}
/**
* Delegates to the {@link IndexManager}.
*/
@Override
public IIndex getIndex(final String name, final long timestamp) {
return dataService.getResourceManager().getIndex(name, timestamp);
}
/**
* Not allowed.
*/
@Override
public void dropIndex(final String name) {
throw new UnsupportedOperationException();
}
/**
* Not allowed.
*/
@Override
public void registerIndex(IndexMetadata indexMetadata) {
throw new UnsupportedOperationException();
}
@Override
public void destroy() {
throw new UnsupportedOperationException();
}
@Override
public ExecutorService getExecutorService() {
return dataService.getFederation().getExecutorService();
}
@Override
public BigdataFileSystem getGlobalFileSystem() {
return dataService.getFederation().getGlobalFileSystem();
}
@Override
public SparseRowStore getGlobalRowStore() {
return dataService.getFederation().getGlobalRowStore();
}
@Override
public SparseRowStore getGlobalRowStore(final long timestamp) {
return dataService.getFederation().getGlobalRowStore(timestamp);
}
@Override
public long getLastCommitTime() {
return dataService.getFederation().getLastCommitTime();
}
@Override
public IResourceLocator getResourceLocator() {
return dataService.getFederation().getResourceLocator();
}
@Override
public IResourceLockService getResourceLockService() {
return dataService.getFederation().getResourceLockService();
}
@Override
public TemporaryStore getTempStore() {
return dataService.getFederation().getTempStore();
}
@Override
public ScheduledFuture<?> addScheduledTask(Runnable task,
long initialDelay, long delay, TimeUnit unit) {
return dataService.getFederation().addScheduledTask(task,
initialDelay, delay, unit);
}
@Override
public boolean getCollectPlatformStatistics() {
return dataService.getFederation().getCollectPlatformStatistics();
}
@Override
public boolean getCollectQueueStatistics() {
return dataService.getFederation().getCollectQueueStatistics();
}
@Override
public int getHttpdPort() {
return dataService.getFederation().getHttpdPort();
}
@Override
public CounterSet getCounters() {
return dataService.getFederation().getCounters();
}
/**
* {@inheritDoc}
*
* TODO Implement. Probably delegate to the local DS n2a index so this
* does a DS local n2a scan.
*/
@Override
public Iterator<String> indexNameScan(String prefix, long timestamp) {
throw new UnsupportedOperationException();
}
/**
* The data service does support group commit.
*/
@Override
public boolean isGroupCommit() {
return true;
}
}
}