/**
Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved.
Contact:
SYSTAP, LLC DBA Blazegraph
2501 Calvert ST NW #106
Washington, DC 20008
licenses@blazegraph.com
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/*
* Created on Mar 14, 2007
*/
package com.bigdata.service;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteBuffer;
import java.util.Properties;
import java.util.UUID;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicReference;
import org.apache.log4j.Logger;
import com.bigdata.Banner;
import com.bigdata.bop.engine.IQueryPeer;
import com.bigdata.bop.engine.QueryEngine;
import com.bigdata.bop.fed.FederatedQueryEngine;
import com.bigdata.btree.IIndex;
import com.bigdata.btree.IRangeQuery;
import com.bigdata.btree.ITupleIterator;
import com.bigdata.btree.IndexMetadata;
import com.bigdata.btree.ResultSet;
import com.bigdata.btree.proc.IIndexProcedure;
import com.bigdata.counters.CounterSet;
import com.bigdata.counters.Instrument;
import com.bigdata.io.ByteBufferInputStream;
import com.bigdata.journal.AbstractLocalTransactionManager;
import com.bigdata.journal.AbstractTask;
import com.bigdata.journal.ConcurrencyManager;
import com.bigdata.journal.DropIndexTask;
import com.bigdata.journal.IConcurrencyManager;
import com.bigdata.journal.IDistributedTransactionService;
import com.bigdata.journal.ILocalTransactionManager;
import com.bigdata.journal.IResourceManager;
import com.bigdata.journal.ITransactionService;
import com.bigdata.journal.ITx;
import com.bigdata.journal.IndexProcedureTask;
import com.bigdata.journal.JournalTransactionService.SinglePhaseCommit;
import com.bigdata.journal.Name2Addr;
import com.bigdata.journal.RegisterIndexTask;
import com.bigdata.journal.RunState;
import com.bigdata.journal.TimestampUtility;
import com.bigdata.journal.Tx;
import com.bigdata.journal.WriteExecutorService;
import com.bigdata.mdi.IResourceMetadata;
import com.bigdata.rawstore.IBlock;
import com.bigdata.rawstore.IRawStore;
import com.bigdata.resources.ResourceManager;
import com.bigdata.resources.StoreManager;
import com.bigdata.resources.StoreManager.ManagedJournal;
import cutthecrap.utils.striterators.IFilter;
/**
* An implementation of a network-capable {@link IDataService}. The service is
* started using the {@link DataServer} class. Operations are submitted using an
* {@link IConcurrencyManager#submit(AbstractTask)} and will run with the
* appropriate concurrency controls as imposed by that method.
*
* @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a>
*
* @see DataServer, which is used to start this service.
*
* @todo Startup should be broken into two aspects: local startup and service
* connect and disconnect events. For example, we on the tx service
* connect the store manager should notify the tx service of the last
* commit time on the live journal. On disconnect, the data service needs
* to go offline. The metadata service is required only for overflow
* processing, but if it remains down then we will eventually need to
* bring the data service offline when the buffered writes would cause the
* live journal to no longer be fully buffered as the overflow processing
* time will be increased if we need to read through to the disk during
* overflow.
*
* @todo Write benchmark test to measure interhost transfer rates. Should be
* 100Mbits/sec (~12M/sec) on a 100BaseT switched network. With full
* duplex in the network and the protocol, that rate should be
* bidirectional. Can that rate be sustained with a fully connected
* bi-directional transfer?
*
* FIXME Probably ALL of the methods {@link IDataService} should be subsumed
* under {@link #submit(Callable)} or
* {@link #submit(long, String, IIndexProcedure)} so they do not block on the
* {@link DataService} and thereby absorb a thread.
*
* @todo Review JERI options to support secure RMI protocols. For example, using
* SSL or an SSH tunnel. For most purposes I expect bigdata to operate on
* a private network, but replicate across gateways is also a common use
* case. Do we have to handle it specially?
*/
abstract public class DataService extends AbstractService
implements IDataService, IServiceShutdown, ISession //IWritePipeline
{
protected static final Logger log = Logger.getLogger(DataService.class);
/**
* Options understood by the {@link DataService}.
*
* @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a>
*/
public static interface Options extends com.bigdata.journal.Options,
com.bigdata.journal.ConcurrencyManager.Options,
com.bigdata.resources.ResourceManager.Options,
com.bigdata.counters.AbstractStatisticsCollector.Options,
com.bigdata.service.IBigdataClient.Options
// @todo local tx manager options?
{
}
/**
* @todo improve reporting here and for block write as well (goes through
* unisolated tasks at the present).
*
* @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a>
*/
protected static class ReadBlockCounters {
/** #of block read requests. */
long readBlockCount, readBlockErrorCount, readBlockBytes, readBlockNanos;
public ReadBlockCounters() {
}
}
/**
* Counters for the block read API.
*/
final private ReadBlockCounters readBlockApiCounters = new ReadBlockCounters();
/**
* Object manages the resources hosted by this {@link DataService}.
*/
private ResourceManager resourceManager;
/**
* Object provides concurrency control for the named resources (indices).
*/
private ConcurrencyManager concurrencyManager;
/**
* Object supports local transactions and does handshaking with the
* {@link DistributedTransactionService}.
*/
private DataServiceTransactionManager localTransactionManager;
/**
* Object used to support distributed query.
*/
private final AtomicReference<FederatedQueryEngine> queryEngine = new AtomicReference<FederatedQueryEngine>();
/**
* The object used to manage the local resources.
*/
public ResourceManager getResourceManager() {
return resourceManager;
}
/**
* The object used to support distributed query against an
* {@link IBigdataFederation}.
*/
public IQueryPeer getQueryEngine() {
return queryEngine.get();
}
/**
* The object used to control access to the local resources.
*/
public ConcurrencyManager getConcurrencyManager() {
return concurrencyManager;
}
/**
* The object used to coordinate transactions executing against local
* resources.
*/
public ILocalTransactionManager getLocalTransactionManager() {
return localTransactionManager;
}
/**
* Returns the {@link IResourceManager}.
*
* @param properties
* Properties to configure that object.
*
* @return The {@link IResourceManager}.
*/
protected IResourceManager newResourceManager(final Properties properties) {
return new ResourceManager(properties) {
@Override
public IBigdataFederation<?> getFederation() {
return DataService.this.getFederation();
}
@Override
public DataService getDataService() {
return DataService.this;
}
@Override
public UUID getDataServiceUUID() {
return DataService.this.getServiceUUID();
}
// /**
// * @todo this must report the entire service failover chain.
// */
// public UUID[] getDataServiceUUIDs() {
//
// return new UUID[] {
//
// getDataServiceUUID()
//
// };
//
// }
};
}
/**
* A clone of properties specified to the ctor.
*/
private final Properties properties;
/**
* An object wrapping the properties specified to the ctor.
*/
public Properties getProperties() {
return new Properties(properties);
}
/**
* The dynamic property set associated with the service instance.
*/
private final Session session = new Session();
@Override
public Session getSession() {
return session;
}
/**
* Core constructor - you MUST {@link #start()} the {@link DataService}
* before it can be used.
*
* @param properties
* The configuration properties.
*
* @see Options
*
* @see #start()
*/
protected DataService(final Properties properties) {
// show the copyright banner during statup.
Banner.banner();
this.properties = (Properties) properties.clone();
}
/**
* Note: "open" is judged by the {@link ConcurrencyManager#isOpen()} but the
* {@link DataService} is not usable until {@link StoreManager#isStarting()}
* returns <code>false</code> (there is asynchronous processing involved
* in reading the existing store files or creating the first store file and
* you can not use the {@link DataService} until that processing has been
* completed). The {@link ConcurrencyManager} will block for a while waiting
* for the {@link StoreManager} startup to complete and will reject tasks if
* startup processing does not complete within a timeout.
*/
public boolean isOpen() {
final ConcurrencyManager tmp = this.concurrencyManager;
return tmp != null && tmp.isOpen();
}
/**
* Concrete implementation manages the local state of transactions executing
* on a {@link DataService}.
*
* @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a>
*/
public class DataServiceTransactionManager extends
AbstractLocalTransactionManager {
@Override
public ITransactionService getTransactionService() {
return DataService.this.getFederation().getTransactionService();
}
/**
* Exposed to {@link DataService#singlePhaseCommit(long)}
*/
@Override
public void deactivateTx(final Tx localState) {
super.deactivateTx(localState);
}
}
/**
* Starts the {@link DataService}.
*
* @todo it would be nice if {@link #start()} could restart after
* {@link #shutdown()} but that is hardly necessary.
*/
@Override
synchronized public DataService start() {
if (isOpen()) {
throw new IllegalStateException();
}
resourceManager = (ResourceManager) newResourceManager(properties);
localTransactionManager = new DataServiceTransactionManager();
concurrencyManager = new ConcurrencyManager(properties,
localTransactionManager, resourceManager);
if (resourceManager instanceof ResourceManager) {
/*
* Startup the resource manager.
*/
((ResourceManager) resourceManager)
.setConcurrencyManager(concurrencyManager);
}
/**
* Hook sets up the queryEngine reference once the data service is
* running.
*/
getFederation().getExecutorService().execute(new Runnable() {
public void run() {
final DataService dataService = DataService.this;
dataService.getResourceManager().awaitRunning();
final FederatedQueryEngine queryEngine = new FederatedQueryEngine(
dataService);
queryEngine.init();
dataService.queryEngine.set(queryEngine);
if (log.isInfoEnabled())
log.info("Setup query engine.");
}
});
return this;
}
/**
* Delegate handles custom counters for the {@link ResourceManager}, local
* {@link AbstractTransactionService} and the {@link ConcurrencyManager}, dynamic
* re-attachment of counters, etc. This delegate must be set on the
* {@link AbstractClient} for those additional features to work.
*
* @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a>
*/
static public class DataServiceFederationDelegate extends
DefaultServiceFederationDelegate<DataService> {
private final DataService dataService;
public DataServiceFederationDelegate(final DataService service) {
super(service);
this.dataService = service;
}
// /**
// * Dynamically detach and attach the counters for the named indices
// * underneath of the {@link IndexManager}.
// * <p>
// * Note: This method limits the frequency of update to no more than once
// * every 5 seconds.
// * <p>
// * Note: {@link OverflowManager#overflow()} is responsible for
// * reattaching the counters for the live {@link ManagedJournal} during
// * synchronous overflow.
// */
// @Override
// synchronized public void reattachDynamicCounters() {
//
// final long now = System.currentTimeMillis();
//
// final long elapsed = now - lastReattachMillis;
//
// if (dataService.isOpen() && dataService.resourceManager.isRunning()
// && elapsed > 5000/* ms */) {
//
// // inherit base class behavior
// super.reattachDynamicCounters();
//
// // The service's counter set hierarchy.
// final CounterSet serviceRoot = dataService.getFederation()
// .getServiceCounterSet();
//
//// // The lock manager
//// {
////
//// // the lock manager is a direct child of this node.
//// final CounterSet tmp = (CounterSet) serviceRoot
//// .makePath(IDataServiceCounters.concurrencyManager
//// + ICounterSet.pathSeparator
//// + IConcurrencyManagerCounters.writeService);
////
//// synchronized (tmp) {
////
//// /*
//// * Note: We detach and then attach since that wipes out
//// * any counter set nodes for queues which no longer
//// * exist. Otherwise they will build up forever.
//// */
////
//// // detach the old counters.
//// tmp.detach(IConcurrencyManagerCounters.LockManager);
////
//// // attach the the new counters.
//// ((CounterSet) tmp
//// .makePath(IConcurrencyManagerCounters.LockManager))
//// .attach(dataService.concurrencyManager
//// .getWriteService().getLockManager()
//// .getCounters());
////
//// }
////
//// }
//
// // The live indices.
// {
//
// /*
// * The counters for the index manager within the service's
// * counter hierarchy.
// *
// * Note: The indices are a direct child of this node.
// */
// final CounterSet tmp = (CounterSet) serviceRoot
// .getPath(IDataServiceCounters.resourceManager
// + ICounterSet.pathSeparator
// + IResourceManagerCounters.IndexManager);
//
// synchronized (tmp) {
//
// /*
// * Note: We detach and then attach since that wipes out
// * any counter set nodes for index partitions which no
// * longer exist. Otherwise they will build up forever.
// */
// final boolean exists = tmp
// .getPath(IIndexManagerCounters.Indices) != null;
//
// // detach the index partition counters.
// tmp.detach(IIndexManagerCounters.Indices);
//
// // attach the current index partition counters.
// ((CounterSet) tmp
// .makePath(IIndexManagerCounters.Indices))
// .attach(dataService.resourceManager
// .getIndexCounters());
//
// if (log.isInfoEnabled())
// log
// .info("Attached index partition counters: preexisting="
// + exists
// + ", path="
// + tmp.getPath());
//
// }
//
// }
//
// lastReattachMillis = now;
//
// }
//
// }
// private long lastReattachMillis = 0L;
@Override
public boolean isServiceReady() {
if(!dataService.resourceManager.isOpen()) {
/*
* This will happen if the store manager is unable to discover
* the timestamp service. It will halt its startup process and
* report that it is closed. At that point the data service can
* not start and will shutdown.
*/
if(log.isInfoEnabled())
log.info("Store manager not open - will shutdown.");
// shutdown the data service.
dataService.shutdownNow();
// collection was not started.
return false;
}
if (!dataService.resourceManager.isRunning()) {
log.warn("Resource manager is not running yet.");
return false;
}
return true;
}
/**
* Extended to setup {@link DataService} specific counters and to write
* the client URL onto a file in the service's data directory.
*/
@Override
public void didStart() {
super.didStart();
setupCounters();
logHttpdURL(dataService.getHTTPDURLFile());
}
/**
* Sets up {@link DataService} specific counters.
*
* @see IDataServiceCounters
*/
protected void setupCounters() {
if (getServiceUUID() == null) {
throw new IllegalStateException(
"The ServiceUUID is not available yet");
}
if(!dataService.isOpen()) {
/*
* The service has already been closed.
*/
log.warn("Service is not open.");
return;
}
/*
* Service specific counters.
*/
final CounterSet serviceRoot = dataService.getFederation()
.getServiceCounterSet();
serviceRoot.makePath(IDataServiceCounters.resourceManager).attach(
dataService.resourceManager.getCounters());
serviceRoot.makePath(IDataServiceCounters.concurrencyManager)
.attach(dataService.concurrencyManager.getCounters());
serviceRoot.makePath(IDataServiceCounters.transactionManager)
.attach(dataService.localTransactionManager.getCounters());
{
final QueryEngine queryEngine = dataService.queryEngine.get();
if (queryEngine != null) {
serviceRoot.makePath(IDataServiceCounters.queryEngine)
.attach(queryEngine.getCounters());
}
}
// block API.
{
CounterSet tmp = serviceRoot.makePath("Block API");
tmp.addCounter("Blocks Read", new Instrument<Long>() {
@Override
public void sample() {
setValue(dataService.readBlockApiCounters.readBlockCount);
}
});
tmp.addCounter("Blocks Read Per Second",
new Instrument<Double>() {
@Override
public void sample() {
// @todo encapsulate this logic.
long secs = TimeUnit.SECONDS
.convert(
dataService.readBlockApiCounters.readBlockNanos,
TimeUnit.NANOSECONDS);
final double v;
if (secs == 0L)
v = 0d;
else
v = dataService.readBlockApiCounters.readBlockCount
/ secs;
setValue(v);
}
});
}
}
}
/**
* Polite shutdown does not accept new requests and will shutdown once the
* existing requests have been processed.
*/
@Override
synchronized public void shutdown() {
if (!isOpen())
return;
final QueryEngine queryEngine = this.queryEngine.get();
if (queryEngine != null) {
queryEngine.shutdown();
// queryEngineManager = null;
}
if (concurrencyManager != null) {
concurrencyManager.shutdown();
// concurrencyManager = null;
}
if (localTransactionManager != null) {
localTransactionManager.shutdown();
// localTransactionManager = null;
}
if (resourceManager != null) {
resourceManager.shutdown();
// resourceManager = null;
}
super.shutdown();
}
/**
* Shutdown attempts to abort in-progress requests and shutdown as soon as
* possible.
*/
@Override
synchronized public void shutdownNow() {
if (!isOpen())
return;
final QueryEngine queryEngine = this.queryEngine.get();
if (queryEngine != null) {
queryEngine.shutdownNow();
// queryEngineManager = null;
}
if (concurrencyManager != null) {
concurrencyManager.shutdownNow();
// concurrencyManager = null;
}
if (localTransactionManager != null) {
localTransactionManager.shutdownNow();
// localTransactionManager = null;
}
if (resourceManager != null) {
resourceManager.shutdownNow();
// resourceManager = null;
}
super.shutdownNow();
}
@Override
synchronized public void destroy() {
super.destroy();
resourceManager.deleteResources();
final File file = getHTTPDURLFile();
if(file.exists()) {
file.delete();
}
// super.destroy();
}
/**
* The file on which the URL of the embedded httpd service is written.
*/
protected File getHTTPDURLFile() {
return new File(getResourceManager().getDataDir(), "httpd.url");
}
/**
* Interface defines and documents the counters and counter namespaces
* reported by the {@link DataService} and the various services which it
* uses.
*
* @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a>
*/
public static interface IDataServiceCounters extends
ConcurrencyManager.IConcurrencyManagerCounters,
// ...TransactionManager.XXXCounters,
ResourceManager.IResourceManagerCounters
{
/**
* The namespace for the counters pertaining to the {@link ConcurrencyManager}.
*/
String concurrencyManager = "Concurrency Manager";
/**
* The namespace for the counters pertaining to the {@link ILocalTransactionService}.
*/
String transactionManager = "Transaction Manager";
/**
* The namespace for the counters pertaining to the {@link ResourceManager}.
*/
String resourceManager = "Resource Manager";
/**
* The namespace for the counters pertaining to the {@link QueryEngine}.
*/
String queryEngine = "Query Engine";
}
/*
* ITxCommitProtocol.
*/
@Override
public void setReleaseTime(final long releaseTime) {
setupLoggingContext();
try {
getResourceManager().setReleaseTime(releaseTime);
} finally {
clearLoggingContext();
}
}
/**
* Note: This is basically identical to the standalone journal case.
*
* @see JournalTransactionService#commitImpl(long)}.
*/
@Override
public long singlePhaseCommit(final long tx) throws ExecutionException,
InterruptedException, IOException {
setupLoggingContext();
try {
if(TimestampUtility.isReadOnly(tx)) {
/*
* A read-only transaction.
*
* Note: We do not maintain state on the client for read-only
* transactions. The state for a read-only transaction is
* captured by its transaction identifier and by state on the
* transaction service, which maintains a read lock.
*
* Note: Thrown exception since this method will not be invoked
* by the txService for a read-only tx.
*/
throw new IllegalArgumentException();
}
final Tx localState = (Tx) getLocalTransactionManager().getTx(tx);
if (localState == null) {
/*
* This is not an active transaction.
*/
throw new IllegalStateException();
}
/*
* Note: This code is shared (copy-by-value) by the
* JournalTransactionService commitImpl(...)
*/
final ManagedJournal journal = getResourceManager().getLiveJournal();
{
/*
* A transaction with an empty write set can commit immediately
* since validation and commit are basically NOPs (this is the same
* as the read-only case.)
*
* Note: We lock out other operations on this tx so that this
* decision will be atomic.
*/
localState.lock.lock();
try {
if (localState.isEmptyWriteSet()) {
/*
* Sort of a NOP commit.
*/
localState.setRunState(RunState.Committed);
((DataServiceTransactionManager) journal
.getLocalTransactionManager())
.deactivateTx(localState);
// state.setRunState(RunState.Committed);
return 0L;
}
} finally {
localState.lock.unlock();
}
}
final IConcurrencyManager concurrencyManager = /*journal.*/getConcurrencyManager();
final AbstractTask<Void> task = new SinglePhaseCommit(
concurrencyManager, journal.getLocalTransactionManager(),
localState);
try {
/*
* FIXME This is not working yet. If we submit directly to the
* concurrency manager, then there is a ClassCastException on
* the DirtyListener. If we submit directly to the WriteService
* then the task does not hold its locks. None of these options
* work. The write service really needs a refactor (to be state
* based rather like the new lock service) before I finish the
* distributed commit protocol.
*/
// submit and wait for the result.
concurrencyManager
.submit(task).get();
// .getWriteService().submit(task).get();
// .getWriteService().getLockManager().submit(task.getResource(), task).get();
/*
* FIXME The state changes for the local tx should be atomic across
* this operation. In order to do that we have to make those changes
* inside of SinglePhaseTask while it is holding the lock, but after
* it has committed. Perhaps the best way to do this is with a pre-
* and post- call() API since we can not hold the lock across the
* task otherwise (it will deadlock).
*/
localState.lock.lock();
try {
localState.setRunState(RunState.Committed);
((DataServiceTransactionManager) journal
.getLocalTransactionManager())
.deactivateTx(localState);
// state.setRunState(RunState.Committed);
} finally {
localState.lock.unlock();
}
} catch (Throwable t) {
// log.error(t.getMessage(), t);
localState.lock.lock();
try {
localState.setRunState(RunState.Aborted);
((DataServiceTransactionManager) journal
.getLocalTransactionManager())
.deactivateTx(localState);
// state.setRunState(RunState.Aborted);
throw new RuntimeException(t);
} finally {
localState.lock.unlock();
}
}
/*
* Note: This is returning the commitTime set on the task when it was
* committed as part of a group commit.
*/
// log.warn("\n" + state + "\n" + localState);
return task.getCommitTime();
} finally {
clearLoggingContext();
}
}
@Override
public void prepare(final long tx, final long revisionTime)
throws ExecutionException, InterruptedException, IOException {
setupLoggingContext();
try {
if(TimestampUtility.isReadOnly(tx)) {
/*
* A read-only transaction.
*
* Note: We do not maintain state on the client for read-only
* transactions. The state for a read-only transaction is captured
* by its transaction identifier and by state on the transaction
* service, which maintains a read lock.
*
* Note: Thrown exception since this method will not be invoked
* by the txService for a read-only tx.
*/
throw new IllegalArgumentException();
}
final Tx state = (Tx) getLocalTransactionManager().getTx(tx);
if (state == null) {
/*
* This is not an active transaction.
*/
throw new IllegalStateException();
}
/*
* Submit the task and await its future
*/
concurrencyManager.submit(
new DistributedCommitTask(concurrencyManager,
resourceManager, getServiceUUID(), state,
revisionTime)).get();
// Done.
} finally {
clearLoggingContext();
}
}
/**
* Task handling the distributed commit protocol for the
* {@link IDataService}.
*
* @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a>
*/
private static class DistributedCommitTask extends AbstractTask<Void> {
// ctor arg.
private final ResourceManager resourceManager;
private UUID dataServiceUUID;
private final Tx state;
private final long revisionTime;
// derived.
private final long tx;
/**
* @param concurrencyManager
* @param resourceManager
* @param dataServiceUUID
* @param localState
* @param revisionTime
*/
public DistributedCommitTask(
final ConcurrencyManager concurrencyManager,//
final ResourceManager resourceManager,//
final UUID dataServiceUUID,//
final Tx localState,//
final long revisionTime//
) {
super(concurrencyManager, ITx.UNISOLATED, localState
.getDirtyResource());
if (resourceManager == null)
throw new IllegalArgumentException();
if (localState == null)
throw new IllegalArgumentException();
if (revisionTime == 0L)
throw new IllegalArgumentException();
if (revisionTime <= localState.getStartTimestamp())
throw new IllegalArgumentException();
this.resourceManager = resourceManager;
this.dataServiceUUID = dataServiceUUID;
this.state = localState;
this.revisionTime = revisionTime;
this.tx = localState.getStartTimestamp();
}
/**
* FIXME Finish, write tests and debug.
*/
@Override
protected Void doTask() throws Exception {
final IDistributedTransactionService txService = (IDistributedTransactionService) resourceManager
.getLiveJournal().getLocalTransactionManager()
.getTransactionService();
prepare();
final long commitTime = txService.prepared(tx, dataServiceUUID);
// obtain the exclusive write lock on journal.
lockJournal();
try {
// Commit using the specified commit time.
commit(commitTime);
boolean success = false;
try {
/*
* Wait until the entire distributed transaction is
* committed.
*/
success = txService.committed(tx, dataServiceUUID);
} finally {
if (!success) {
// Rollback the journal.
rollback();
}
}
} finally {
// release the exclusive write lock on journal.
unlockJournal();
}
return null;
}
/**
* Prepare the transaction (validate and merge down onto the unisolated
* indices and then checkpoints those indices).
* <p>
* Note: This presumes that we are already holding exclusive write locks
* on the named indices such that the pre-conditions for validation and
* its post-conditions can not change until we either commit or discard
* the transaction.
* <p>
* Note: The indices need to be isolated as by {@link AbstractTask} or
* they will be enrolled onto {@link Name2Addr}'s commitList when they
* become dirty and then checkpointed and included with the NEXT commit.
* <p>
* For this reason, the {@link DistributedCommitTask} is an UNISOLATED
* task so that we can reuse the existing mechanisms as much as
* possible.
*
* FIXME This will work if we can grab the write service lock from
* within the task (which will mean changing that code to allow the lock
* with the caller only still running or simply waiting until we are
* signaled by the txService that all participants are either go
* (continue execution and will commit at the next group commit, but
* then we need a protocol to impose the correct commit time, e.g., by
* passing it on the task and ensuring that there is no other tx ready
* in the commit group) or abort (just throw an exception).
*/
protected void prepare() {
state.prepare(revisionTime);
}
/**
* Obtain the exclusive lock on the write service. This will prevent any
* other tasks using the concurrency API from writing on the journal.
*/
protected void lockJournal() {
throw new UnsupportedOperationException();
}
protected void unlockJournal() {
throw new UnsupportedOperationException();
}
/**
* Commit the transaction using the specified <i>commitTime</i>.
* <p>
* Note: There are no persistent side-effects unless this method returns
* successfully.
*
* @param commitTime
* The commit time that must be used.
*/
protected void commit(final long commitTime) {
/*
* @todo enroll the named indices onto Name2Addr's commitList (this
* basically requires breaking the isolation imposed by the
* AbstractTask).
*/
if (true)
throw new UnsupportedOperationException();
final ManagedJournal journal = resourceManager.getLiveJournal();
// atomic commit.
journal.commitNow(commitTime);
}
/**
* Discard the last commit, restoring the journal to the previous commit
* point.
*/
protected void rollback() {
final ManagedJournal journal = resourceManager.getLiveJournal();
journal.rollback();
}
}
@Override
public void abort(final long tx) throws IOException {
setupLoggingContext();
try {
final Tx localState = (Tx) getLocalTransactionManager().getTx(tx);
if (localState == null)
throw new IllegalArgumentException();
localState.lock.lock();
try {
localState.setRunState(RunState.Aborted);
} finally {
localState.lock.unlock();
}
} finally {
clearLoggingContext();
}
}
/*
* IDataService.
*/
/**
* Forms the name of the index corresponding to a partition of a named
* scale-out index as <i>name</i>#<i>partitionId</i>.
* <p>
* Another advantage of this naming scheme is that index partitions are just
* named indices and all of the mechanisms for operating on named indices
* and for concurrency control for named indices apply automatically. Among
* other things, this means that different tasks can write concurrently on
* different partitions of the same named index on a given
* {@link DataService}.
*
* @return The name of the index partition.
*/
public static final String getIndexPartitionName(final String name,
final int partitionId) {
if (name == null) {
throw new IllegalArgumentException();
}
if (partitionId == -1) {
// Not a partitioned index.
return name;
}
return name + "#" + partitionId;
}
/**
* Returns either {@link IDataService} or {@link IMetadataService} as
* appropriate.
*/
@Override
public Class getServiceIface() {
final Class serviceIface;
if(DataService.this instanceof IMetadataService) {
serviceIface = IMetadataService.class;
} else {
serviceIface = IDataService.class;
}
return serviceIface;
}
@Override
public void registerIndex(final String name, final IndexMetadata metadata)
throws IOException, InterruptedException, ExecutionException {
setupLoggingContext();
try {
if (metadata == null)
throw new IllegalArgumentException();
final AbstractTask<UUID> task = new RegisterIndexTask(concurrencyManager,
name, metadata);
concurrencyManager.submit(task).get();
} finally {
clearLoggingContext();
}
}
@Override
public void dropIndex(final String name) throws IOException,
InterruptedException, ExecutionException {
setupLoggingContext();
try {
final AbstractTask<Boolean> task = new DropIndexTask(concurrencyManager,
name);
concurrencyManager.submit(task).get();
} finally {
clearLoggingContext();
}
}
@Override
public IndexMetadata getIndexMetadata(final String name, final long timestamp)
throws IOException, InterruptedException, ExecutionException {
setupLoggingContext();
try {
// Choose READ_COMMITTED iff UNISOLATED was requested.
final long startTime = (timestamp == ITx.UNISOLATED
? ITx.READ_COMMITTED
: timestamp);
final AbstractTask<IndexMetadata> task = new GetIndexMetadataTask(
concurrencyManager, startTime, name);
return (IndexMetadata) concurrencyManager.submit(task).get();
} finally {
clearLoggingContext();
}
}
/**
* Retrieves the {@link IndexMetadata} for the named index as of the
* specified timestamp.
*
* @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a>
*/
public static class GetIndexMetadataTask extends AbstractTask<IndexMetadata> {
public GetIndexMetadataTask(ConcurrencyManager concurrencyManager,
long startTime, String name) {
super(concurrencyManager, startTime, name);
}
@Override
protected IndexMetadata doTask() throws Exception {
return getIndex(getOnlyResource()).getIndexMetadata();
}
}
/**
* Note: This chooses {@link ITx#READ_COMMITTED} if the the index has
* {@link ITx#UNISOLATED} isolation and the {@link IIndexProcedure} is an
* read-only operation. This provides better concurrency on the
* {@link DataService} by moving read-only operations off of the
* {@link WriteExecutorService}.
* <p>
* Note: When the {@link DataService} is accessed via RMI the {@link Future}
* MUST be a proxy. This gets handled by the concrete server implementation.
*/
@Override
public <T> Future<T> submit(final long tx, final String name,
final IIndexProcedure<T> proc) {
setupLoggingContext();
try {
if (name == null)
throw new IllegalArgumentException();
if (proc == null)
throw new IllegalArgumentException();
// Choose READ_COMMITTED iff proc is read-only and UNISOLATED was requested.
final long timestamp = (tx == ITx.UNISOLATED
&& proc.isReadOnly() ? ITx.READ_COMMITTED
: tx);
// wrap the caller's task.
final AbstractTask<T> task = new IndexProcedureTask<T>(
concurrencyManager, timestamp, name, proc);
if (task instanceof IFederationCallable) {
((IFederationCallable) task).setFederation(getFederation());
}
if (task instanceof IDataServiceCallable) {
((IDataServiceCallable) task).setDataService(this);
}
// submit the procedure and await its completion.
return concurrencyManager.submit(task);
} finally {
clearLoggingContext();
}
}
/**
* Note: When the {@link DataService} is accessed via RMI the {@link Future}
* MUST be a proxy. This gets handled by the concrete server implementation.
*
* @see AbstractDistributedFederation#getProxy(Future)
*
* @todo we should probably put the federation object in a sandbox in order
* to prevent various operations by tasks running in the
* {@link DataService} using the {@link IDataServiceCallable}
* interface to gain access to the {@link DataService}'s federation.
* for example, if they use {@link AbstractFederation#shutdownNow()}
* then the {@link DataService} itself would be shutdown.
*/
@Override
public Future<? extends Object> submit(final Callable<? extends Object> task) {
setupLoggingContext();
try {
if (task == null)
throw new IllegalArgumentException();
/*
* Submit to the ExecutorService for the DataService's federation
* object. This is used for tasks which are not associated with a
* timestamp and hence not linked to any specific view of the named
* indices.
*/
if (task instanceof IFederationCallable) {
((IFederationCallable) task).setFederation(getFederation());
}
if (task instanceof IDataServiceCallable) {
((IDataServiceCallable) task).setDataService(this);
}
// submit the task and return its Future.
return getFederation().getExecutorService().submit(task);
} finally {
clearLoggingContext();
}
}
// /**
// * Encapsulate the {@link Future} within a proxy that may be marshalled by
// * RMI and sent to a remote client. The client will interact with the
// * unmarshalled {@link Future}, which in turn will use RMI to control the
// * original {@link Future} within the {@link DataService}.
// * <p>
// * The default implementation simply returns the <i>future</i> and MUST be
// * overriden when remote clients will use RMI to execute methods on the
// * {@link DataService}.
// *
// * @param future
// * The future.
// *
// * @return The encapsulated future.
// */
// protected Future wrapFuture(Future future) {
//
// return future;
//
// }
@Override
public ResultSet rangeIterator(long tx, String name, byte[] fromKey,
byte[] toKey, int capacity, int flags, IFilter filter)
throws InterruptedException, ExecutionException {
setupLoggingContext();
try {
if (name == null)
throw new IllegalArgumentException();
/*
* Figure out if the iterator is read-only for the time that it
* executes on the data service. For this case, we ignore the CURSOR
* flag since modifications during iterator execution on the data
* service can only be introduced via a filter or the REMOVEALL
* flag. The caller will be used a chunked iterator. Therefore if
* they choose to delete tuples while visiting the elements in the
* ResultSet then the deletes will be issued as separate requests.
*/
final boolean readOnly = ((flags & IRangeQuery.READONLY) != 0)
|| (filter == null &&
// ((flags & IRangeQuery.CURSOR) == 0) &&
((flags & IRangeQuery.REMOVEALL) == 0)
);
long timestamp = tx;
if (timestamp == ITx.UNISOLATED && readOnly) {
/*
* If the iterator is readOnly then READ_COMMITTED has the same
* semantics as UNISOLATED and provides better concurrency since
* it reduces contention for the writeService.
*/
timestamp = ITx.READ_COMMITTED;
}
// final long startTime = (tx == ITx.UNISOLATED
// && ((flags & IRangeQuery.REMOVEALL)==0)? ITx.READ_COMMITTED
// : tx);
final RangeIteratorTask task = new RangeIteratorTask(
concurrencyManager, timestamp, name, fromKey, toKey,
capacity, flags, filter);
// submit the task and wait for it to complete.
return concurrencyManager.submit(task).get();
} finally {
clearLoggingContext();
}
}
/**
* @todo this operation should be able to abort an
* {@link IBlock#inputStream() read} that takes too long or if there
* is a need to delete the resource.
*
* @todo this should be run on the read service.
*
* @todo coordinate close out of stores.
*
* @todo efficient (stream-based) read from the journal (IBlockStore API).
* This is a fully buffered read and will cause heap churn.
*/
@Override
public IBlock readBlock(IResourceMetadata resource, final long addr) {
if (resource == null)
throw new IllegalArgumentException();
if (addr == 0L)
throw new IllegalArgumentException();
setupLoggingContext();
final long begin = System.nanoTime();
try {
final IRawStore store = resourceManager.openStore(resource.getUUID());
if (store == null) {
log.warn("Resource not available: " + resource);
readBlockApiCounters.readBlockErrorCount++;
throw new IllegalStateException("Resource not available");
}
final int byteCount = store.getByteCount(addr);
return new IBlock() {
@Override
public long getAddress() {
return addr;
}
// @todo reuse buffers
@Override
public InputStream inputStream() {
// this is when it actually reads the data.
final ByteBuffer buf = store.read(addr);
// #of bytes buffered.
readBlockApiCounters.readBlockBytes += byteCount;
// caller will read from this object.
return new ByteBufferInputStream(buf);
}
@Override
public int length() {
return byteCount;
}
};
} finally {
readBlockApiCounters.readBlockCount++;
readBlockApiCounters.readBlockNanos = System.nanoTime() - begin;
clearLoggingContext();
}
}
/**
* Task for running a rangeIterator operation.
*
* @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a>
*/
static protected class RangeIteratorTask extends AbstractTask<ResultSet> {
private final byte[] fromKey;
private final byte[] toKey;
private final int capacity;
private final int flags;
private final IFilter filter;
public RangeIteratorTask(ConcurrencyManager concurrencyManager,
long startTime, String name, byte[] fromKey, byte[] toKey,
int capacity, int flags, IFilter filter) {
super(concurrencyManager, startTime, name);
this.fromKey = fromKey;
this.toKey = toKey;
this.capacity = capacity;
this.flags = flags;
this.filter = filter; // MAY be null.
}
@Override
public ResultSet doTask() throws Exception {
final IIndex ndx = getIndex(getOnlyResource());
/*
* Figure out the upper bound on the #of tuples that could be
* materialized.
*
* Note: the upper bound on the #of key-value pairs in the range is
* truncated to an [int].
*/
final int rangeCount = (int) ndx.rangeCount(fromKey, toKey);
final int limit = (rangeCount > capacity ? capacity : rangeCount);
/*
* Iterator that will visit the key range.
*
* Note: We always visit the keys regardless of whether we pass them
* on to the caller. This is necessary in order for us to set the
* [lastKey] field on the result set and that is necessary to
* support continuation queries.
*/
final ITupleIterator<?> itr = ndx.rangeIterator(fromKey, toKey, limit,
flags | IRangeQuery.KEYS, filter);
/*
* Populate the result set from the iterator.
*/
return new ResultSet(ndx, capacity, flags, itr);
}
}
/*
* Overflow processing API
*/
@Override
public void forceOverflow(final boolean immediate,
final boolean compactingMerge) throws IOException,
InterruptedException, ExecutionException {
setupLoggingContext();
try {
if (!(resourceManager instanceof ResourceManager)) {
throw new UnsupportedOperationException();
}
final Callable<Void> task = new ForceOverflowTask(compactingMerge);
log.warn("Will force overflow: immediate=" + immediate
+ ", compactingMerge=" + compactingMerge);
if (immediate) {
/*
* Run the task on the write service. The task writes a small
* record on the journal in order to make sure that it is dirty
* and then sets the flag to force overflow with the next
* commit. Since the task runs on the write service and since
* the journal is dirty, a group commit will occur and
* synchronous overflow processing will occur before this method
* returns.
*
* Note: the resource itself is arbitrary - there is no index
* by that name.
*/
getConcurrencyManager().submit(
new AbstractTask<Void>(getConcurrencyManager(),
ITx.UNISOLATED,
new String[] { "__forceOverflow" }) {
@Override
protected Void doTask() throws Exception {
// write a one byte record on the journal.
getJournal().write(ByteBuffer.wrap(new byte[]{1}));
// run task that will set the overflow flag.
return task.call();
}
}).get();
} else {
/*
* Provoke overflow with the next group commit. All this does is
* set the flag that will cause overflow to occur with the next
* group commit. Since the task does not run on the write
* service it will return immediately.
*/
try {
task.call();
} catch (Exception e) {
throw new RuntimeException(e);
}
}
} finally {
clearLoggingContext();
}
}
@Override
public boolean purgeOldResources(final long timeout,
final boolean truncateJournal) throws InterruptedException {
// delegate all the work.
return getResourceManager().purgeOldResources(timeout, truncateJournal);
}
/**
* Task sets the flag that will cause overflow processing to be triggered on
* the next group commit.
*
* @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a>
*/
private class ForceOverflowTask implements Callable<Void> {
private final boolean compactingMerge;
public ForceOverflowTask(final boolean compactingMerge) {
this.compactingMerge = compactingMerge;
}
@Override
public Void call() throws Exception {
// final WriteExecutorService writeService = concurrencyManager
// .getWriteService();
final ResourceManager resourceManager = (ResourceManager) DataService.this.resourceManager;
if (resourceManager.isOverflowAllowed()) {
if (compactingMerge) {
resourceManager.compactingMerge.set(true);
}
// trigger overflow on the next group commit.
// writeService.forceOverflow.set(true);
resourceManager.forceOverflow.set(true);
}
return null;
}
}
@Override
public long getAsynchronousOverflowCounter() throws IOException {
setupLoggingContext();
try {
if (!(resourceManager instanceof ResourceManager)) {
throw new UnsupportedOperationException();
}
return resourceManager.getAsynchronousOverflowCount();
} finally {
clearLoggingContext();
}
}
@Override
public boolean isOverflowActive() throws IOException {
setupLoggingContext();
try {
if (!(resourceManager instanceof ResourceManager)) {
throw new UnsupportedOperationException();
}
/*
* overflow processing is enabled but not allowed, which means that
* overflow processing is occurring right now.
*/
return resourceManager.isOverflowEnabled()
&& !resourceManager.isOverflowAllowed();
} finally {
clearLoggingContext();
}
}
}