/** Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved. Contact: SYSTAP, LLC DBA Blazegraph 2501 Calvert ST NW #106 Washington, DC 20008 licenses@blazegraph.com This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2 of the License. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /* * Created on Mar 14, 2007 */ package com.bigdata.service; import java.io.File; import java.io.IOException; import java.io.InputStream; import java.nio.ByteBuffer; import java.util.Properties; import java.util.UUID; import java.util.concurrent.Callable; import java.util.concurrent.ExecutionException; import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicReference; import org.apache.log4j.Logger; import com.bigdata.Banner; import com.bigdata.bop.engine.IQueryPeer; import com.bigdata.bop.engine.QueryEngine; import com.bigdata.bop.fed.FederatedQueryEngine; import com.bigdata.btree.IIndex; import com.bigdata.btree.IRangeQuery; import com.bigdata.btree.ITupleIterator; import com.bigdata.btree.IndexMetadata; import com.bigdata.btree.ResultSet; import com.bigdata.btree.proc.IIndexProcedure; import com.bigdata.counters.CounterSet; import com.bigdata.counters.Instrument; import com.bigdata.io.ByteBufferInputStream; import com.bigdata.journal.AbstractLocalTransactionManager; import com.bigdata.journal.AbstractTask; import com.bigdata.journal.ConcurrencyManager; import com.bigdata.journal.DropIndexTask; import com.bigdata.journal.IConcurrencyManager; import com.bigdata.journal.IDistributedTransactionService; import com.bigdata.journal.ILocalTransactionManager; import com.bigdata.journal.IResourceManager; import com.bigdata.journal.ITransactionService; import com.bigdata.journal.ITx; import com.bigdata.journal.IndexProcedureTask; import com.bigdata.journal.JournalTransactionService.SinglePhaseCommit; import com.bigdata.journal.Name2Addr; import com.bigdata.journal.RegisterIndexTask; import com.bigdata.journal.RunState; import com.bigdata.journal.TimestampUtility; import com.bigdata.journal.Tx; import com.bigdata.journal.WriteExecutorService; import com.bigdata.mdi.IResourceMetadata; import com.bigdata.rawstore.IBlock; import com.bigdata.rawstore.IRawStore; import com.bigdata.resources.ResourceManager; import com.bigdata.resources.StoreManager; import com.bigdata.resources.StoreManager.ManagedJournal; import cutthecrap.utils.striterators.IFilter; /** * An implementation of a network-capable {@link IDataService}. The service is * started using the {@link DataServer} class. Operations are submitted using an * {@link IConcurrencyManager#submit(AbstractTask)} and will run with the * appropriate concurrency controls as imposed by that method. * * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a> * * @see DataServer, which is used to start this service. * * @todo Startup should be broken into two aspects: local startup and service * connect and disconnect events. For example, we on the tx service * connect the store manager should notify the tx service of the last * commit time on the live journal. On disconnect, the data service needs * to go offline. The metadata service is required only for overflow * processing, but if it remains down then we will eventually need to * bring the data service offline when the buffered writes would cause the * live journal to no longer be fully buffered as the overflow processing * time will be increased if we need to read through to the disk during * overflow. * * @todo Write benchmark test to measure interhost transfer rates. Should be * 100Mbits/sec (~12M/sec) on a 100BaseT switched network. With full * duplex in the network and the protocol, that rate should be * bidirectional. Can that rate be sustained with a fully connected * bi-directional transfer? * * FIXME Probably ALL of the methods {@link IDataService} should be subsumed * under {@link #submit(Callable)} or * {@link #submit(long, String, IIndexProcedure)} so they do not block on the * {@link DataService} and thereby absorb a thread. * * @todo Review JERI options to support secure RMI protocols. For example, using * SSL or an SSH tunnel. For most purposes I expect bigdata to operate on * a private network, but replicate across gateways is also a common use * case. Do we have to handle it specially? */ abstract public class DataService extends AbstractService implements IDataService, IServiceShutdown, ISession //IWritePipeline { protected static final Logger log = Logger.getLogger(DataService.class); /** * Options understood by the {@link DataService}. * * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a> */ public static interface Options extends com.bigdata.journal.Options, com.bigdata.journal.ConcurrencyManager.Options, com.bigdata.resources.ResourceManager.Options, com.bigdata.counters.AbstractStatisticsCollector.Options, com.bigdata.service.IBigdataClient.Options // @todo local tx manager options? { } /** * @todo improve reporting here and for block write as well (goes through * unisolated tasks at the present). * * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a> */ protected static class ReadBlockCounters { /** #of block read requests. */ long readBlockCount, readBlockErrorCount, readBlockBytes, readBlockNanos; public ReadBlockCounters() { } } /** * Counters for the block read API. */ final private ReadBlockCounters readBlockApiCounters = new ReadBlockCounters(); /** * Object manages the resources hosted by this {@link DataService}. */ private ResourceManager resourceManager; /** * Object provides concurrency control for the named resources (indices). */ private ConcurrencyManager concurrencyManager; /** * Object supports local transactions and does handshaking with the * {@link DistributedTransactionService}. */ private DataServiceTransactionManager localTransactionManager; /** * Object used to support distributed query. */ private final AtomicReference<FederatedQueryEngine> queryEngine = new AtomicReference<FederatedQueryEngine>(); /** * The object used to manage the local resources. */ public ResourceManager getResourceManager() { return resourceManager; } /** * The object used to support distributed query against an * {@link IBigdataFederation}. */ public IQueryPeer getQueryEngine() { return queryEngine.get(); } /** * The object used to control access to the local resources. */ public ConcurrencyManager getConcurrencyManager() { return concurrencyManager; } /** * The object used to coordinate transactions executing against local * resources. */ public ILocalTransactionManager getLocalTransactionManager() { return localTransactionManager; } /** * Returns the {@link IResourceManager}. * * @param properties * Properties to configure that object. * * @return The {@link IResourceManager}. */ protected IResourceManager newResourceManager(final Properties properties) { return new ResourceManager(properties) { @Override public IBigdataFederation<?> getFederation() { return DataService.this.getFederation(); } @Override public DataService getDataService() { return DataService.this; } @Override public UUID getDataServiceUUID() { return DataService.this.getServiceUUID(); } // /** // * @todo this must report the entire service failover chain. // */ // public UUID[] getDataServiceUUIDs() { // // return new UUID[] { // // getDataServiceUUID() // // }; // // } }; } /** * A clone of properties specified to the ctor. */ private final Properties properties; /** * An object wrapping the properties specified to the ctor. */ public Properties getProperties() { return new Properties(properties); } /** * The dynamic property set associated with the service instance. */ private final Session session = new Session(); @Override public Session getSession() { return session; } /** * Core constructor - you MUST {@link #start()} the {@link DataService} * before it can be used. * * @param properties * The configuration properties. * * @see Options * * @see #start() */ protected DataService(final Properties properties) { // show the copyright banner during statup. Banner.banner(); this.properties = (Properties) properties.clone(); } /** * Note: "open" is judged by the {@link ConcurrencyManager#isOpen()} but the * {@link DataService} is not usable until {@link StoreManager#isStarting()} * returns <code>false</code> (there is asynchronous processing involved * in reading the existing store files or creating the first store file and * you can not use the {@link DataService} until that processing has been * completed). The {@link ConcurrencyManager} will block for a while waiting * for the {@link StoreManager} startup to complete and will reject tasks if * startup processing does not complete within a timeout. */ public boolean isOpen() { final ConcurrencyManager tmp = this.concurrencyManager; return tmp != null && tmp.isOpen(); } /** * Concrete implementation manages the local state of transactions executing * on a {@link DataService}. * * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a> */ public class DataServiceTransactionManager extends AbstractLocalTransactionManager { @Override public ITransactionService getTransactionService() { return DataService.this.getFederation().getTransactionService(); } /** * Exposed to {@link DataService#singlePhaseCommit(long)} */ @Override public void deactivateTx(final Tx localState) { super.deactivateTx(localState); } } /** * Starts the {@link DataService}. * * @todo it would be nice if {@link #start()} could restart after * {@link #shutdown()} but that is hardly necessary. */ @Override synchronized public DataService start() { if (isOpen()) { throw new IllegalStateException(); } resourceManager = (ResourceManager) newResourceManager(properties); localTransactionManager = new DataServiceTransactionManager(); concurrencyManager = new ConcurrencyManager(properties, localTransactionManager, resourceManager); if (resourceManager instanceof ResourceManager) { /* * Startup the resource manager. */ ((ResourceManager) resourceManager) .setConcurrencyManager(concurrencyManager); } /** * Hook sets up the queryEngine reference once the data service is * running. */ getFederation().getExecutorService().execute(new Runnable() { public void run() { final DataService dataService = DataService.this; dataService.getResourceManager().awaitRunning(); final FederatedQueryEngine queryEngine = new FederatedQueryEngine( dataService); queryEngine.init(); dataService.queryEngine.set(queryEngine); if (log.isInfoEnabled()) log.info("Setup query engine."); } }); return this; } /** * Delegate handles custom counters for the {@link ResourceManager}, local * {@link AbstractTransactionService} and the {@link ConcurrencyManager}, dynamic * re-attachment of counters, etc. This delegate must be set on the * {@link AbstractClient} for those additional features to work. * * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a> */ static public class DataServiceFederationDelegate extends DefaultServiceFederationDelegate<DataService> { private final DataService dataService; public DataServiceFederationDelegate(final DataService service) { super(service); this.dataService = service; } // /** // * Dynamically detach and attach the counters for the named indices // * underneath of the {@link IndexManager}. // * <p> // * Note: This method limits the frequency of update to no more than once // * every 5 seconds. // * <p> // * Note: {@link OverflowManager#overflow()} is responsible for // * reattaching the counters for the live {@link ManagedJournal} during // * synchronous overflow. // */ // @Override // synchronized public void reattachDynamicCounters() { // // final long now = System.currentTimeMillis(); // // final long elapsed = now - lastReattachMillis; // // if (dataService.isOpen() && dataService.resourceManager.isRunning() // && elapsed > 5000/* ms */) { // // // inherit base class behavior // super.reattachDynamicCounters(); // // // The service's counter set hierarchy. // final CounterSet serviceRoot = dataService.getFederation() // .getServiceCounterSet(); // //// // The lock manager //// { //// //// // the lock manager is a direct child of this node. //// final CounterSet tmp = (CounterSet) serviceRoot //// .makePath(IDataServiceCounters.concurrencyManager //// + ICounterSet.pathSeparator //// + IConcurrencyManagerCounters.writeService); //// //// synchronized (tmp) { //// //// /* //// * Note: We detach and then attach since that wipes out //// * any counter set nodes for queues which no longer //// * exist. Otherwise they will build up forever. //// */ //// //// // detach the old counters. //// tmp.detach(IConcurrencyManagerCounters.LockManager); //// //// // attach the the new counters. //// ((CounterSet) tmp //// .makePath(IConcurrencyManagerCounters.LockManager)) //// .attach(dataService.concurrencyManager //// .getWriteService().getLockManager() //// .getCounters()); //// //// } //// //// } // // // The live indices. // { // // /* // * The counters for the index manager within the service's // * counter hierarchy. // * // * Note: The indices are a direct child of this node. // */ // final CounterSet tmp = (CounterSet) serviceRoot // .getPath(IDataServiceCounters.resourceManager // + ICounterSet.pathSeparator // + IResourceManagerCounters.IndexManager); // // synchronized (tmp) { // // /* // * Note: We detach and then attach since that wipes out // * any counter set nodes for index partitions which no // * longer exist. Otherwise they will build up forever. // */ // final boolean exists = tmp // .getPath(IIndexManagerCounters.Indices) != null; // // // detach the index partition counters. // tmp.detach(IIndexManagerCounters.Indices); // // // attach the current index partition counters. // ((CounterSet) tmp // .makePath(IIndexManagerCounters.Indices)) // .attach(dataService.resourceManager // .getIndexCounters()); // // if (log.isInfoEnabled()) // log // .info("Attached index partition counters: preexisting=" // + exists // + ", path=" // + tmp.getPath()); // // } // // } // // lastReattachMillis = now; // // } // // } // private long lastReattachMillis = 0L; @Override public boolean isServiceReady() { if(!dataService.resourceManager.isOpen()) { /* * This will happen if the store manager is unable to discover * the timestamp service. It will halt its startup process and * report that it is closed. At that point the data service can * not start and will shutdown. */ if(log.isInfoEnabled()) log.info("Store manager not open - will shutdown."); // shutdown the data service. dataService.shutdownNow(); // collection was not started. return false; } if (!dataService.resourceManager.isRunning()) { log.warn("Resource manager is not running yet."); return false; } return true; } /** * Extended to setup {@link DataService} specific counters and to write * the client URL onto a file in the service's data directory. */ @Override public void didStart() { super.didStart(); setupCounters(); logHttpdURL(dataService.getHTTPDURLFile()); } /** * Sets up {@link DataService} specific counters. * * @see IDataServiceCounters */ protected void setupCounters() { if (getServiceUUID() == null) { throw new IllegalStateException( "The ServiceUUID is not available yet"); } if(!dataService.isOpen()) { /* * The service has already been closed. */ log.warn("Service is not open."); return; } /* * Service specific counters. */ final CounterSet serviceRoot = dataService.getFederation() .getServiceCounterSet(); serviceRoot.makePath(IDataServiceCounters.resourceManager).attach( dataService.resourceManager.getCounters()); serviceRoot.makePath(IDataServiceCounters.concurrencyManager) .attach(dataService.concurrencyManager.getCounters()); serviceRoot.makePath(IDataServiceCounters.transactionManager) .attach(dataService.localTransactionManager.getCounters()); { final QueryEngine queryEngine = dataService.queryEngine.get(); if (queryEngine != null) { serviceRoot.makePath(IDataServiceCounters.queryEngine) .attach(queryEngine.getCounters()); } } // block API. { CounterSet tmp = serviceRoot.makePath("Block API"); tmp.addCounter("Blocks Read", new Instrument<Long>() { @Override public void sample() { setValue(dataService.readBlockApiCounters.readBlockCount); } }); tmp.addCounter("Blocks Read Per Second", new Instrument<Double>() { @Override public void sample() { // @todo encapsulate this logic. long secs = TimeUnit.SECONDS .convert( dataService.readBlockApiCounters.readBlockNanos, TimeUnit.NANOSECONDS); final double v; if (secs == 0L) v = 0d; else v = dataService.readBlockApiCounters.readBlockCount / secs; setValue(v); } }); } } } /** * Polite shutdown does not accept new requests and will shutdown once the * existing requests have been processed. */ @Override synchronized public void shutdown() { if (!isOpen()) return; final QueryEngine queryEngine = this.queryEngine.get(); if (queryEngine != null) { queryEngine.shutdown(); // queryEngineManager = null; } if (concurrencyManager != null) { concurrencyManager.shutdown(); // concurrencyManager = null; } if (localTransactionManager != null) { localTransactionManager.shutdown(); // localTransactionManager = null; } if (resourceManager != null) { resourceManager.shutdown(); // resourceManager = null; } super.shutdown(); } /** * Shutdown attempts to abort in-progress requests and shutdown as soon as * possible. */ @Override synchronized public void shutdownNow() { if (!isOpen()) return; final QueryEngine queryEngine = this.queryEngine.get(); if (queryEngine != null) { queryEngine.shutdownNow(); // queryEngineManager = null; } if (concurrencyManager != null) { concurrencyManager.shutdownNow(); // concurrencyManager = null; } if (localTransactionManager != null) { localTransactionManager.shutdownNow(); // localTransactionManager = null; } if (resourceManager != null) { resourceManager.shutdownNow(); // resourceManager = null; } super.shutdownNow(); } @Override synchronized public void destroy() { super.destroy(); resourceManager.deleteResources(); final File file = getHTTPDURLFile(); if(file.exists()) { file.delete(); } // super.destroy(); } /** * The file on which the URL of the embedded httpd service is written. */ protected File getHTTPDURLFile() { return new File(getResourceManager().getDataDir(), "httpd.url"); } /** * Interface defines and documents the counters and counter namespaces * reported by the {@link DataService} and the various services which it * uses. * * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a> */ public static interface IDataServiceCounters extends ConcurrencyManager.IConcurrencyManagerCounters, // ...TransactionManager.XXXCounters, ResourceManager.IResourceManagerCounters { /** * The namespace for the counters pertaining to the {@link ConcurrencyManager}. */ String concurrencyManager = "Concurrency Manager"; /** * The namespace for the counters pertaining to the {@link ILocalTransactionService}. */ String transactionManager = "Transaction Manager"; /** * The namespace for the counters pertaining to the {@link ResourceManager}. */ String resourceManager = "Resource Manager"; /** * The namespace for the counters pertaining to the {@link QueryEngine}. */ String queryEngine = "Query Engine"; } /* * ITxCommitProtocol. */ @Override public void setReleaseTime(final long releaseTime) { setupLoggingContext(); try { getResourceManager().setReleaseTime(releaseTime); } finally { clearLoggingContext(); } } /** * Note: This is basically identical to the standalone journal case. * * @see JournalTransactionService#commitImpl(long)}. */ @Override public long singlePhaseCommit(final long tx) throws ExecutionException, InterruptedException, IOException { setupLoggingContext(); try { if(TimestampUtility.isReadOnly(tx)) { /* * A read-only transaction. * * Note: We do not maintain state on the client for read-only * transactions. The state for a read-only transaction is * captured by its transaction identifier and by state on the * transaction service, which maintains a read lock. * * Note: Thrown exception since this method will not be invoked * by the txService for a read-only tx. */ throw new IllegalArgumentException(); } final Tx localState = (Tx) getLocalTransactionManager().getTx(tx); if (localState == null) { /* * This is not an active transaction. */ throw new IllegalStateException(); } /* * Note: This code is shared (copy-by-value) by the * JournalTransactionService commitImpl(...) */ final ManagedJournal journal = getResourceManager().getLiveJournal(); { /* * A transaction with an empty write set can commit immediately * since validation and commit are basically NOPs (this is the same * as the read-only case.) * * Note: We lock out other operations on this tx so that this * decision will be atomic. */ localState.lock.lock(); try { if (localState.isEmptyWriteSet()) { /* * Sort of a NOP commit. */ localState.setRunState(RunState.Committed); ((DataServiceTransactionManager) journal .getLocalTransactionManager()) .deactivateTx(localState); // state.setRunState(RunState.Committed); return 0L; } } finally { localState.lock.unlock(); } } final IConcurrencyManager concurrencyManager = /*journal.*/getConcurrencyManager(); final AbstractTask<Void> task = new SinglePhaseCommit( concurrencyManager, journal.getLocalTransactionManager(), localState); try { /* * FIXME This is not working yet. If we submit directly to the * concurrency manager, then there is a ClassCastException on * the DirtyListener. If we submit directly to the WriteService * then the task does not hold its locks. None of these options * work. The write service really needs a refactor (to be state * based rather like the new lock service) before I finish the * distributed commit protocol. */ // submit and wait for the result. concurrencyManager .submit(task).get(); // .getWriteService().submit(task).get(); // .getWriteService().getLockManager().submit(task.getResource(), task).get(); /* * FIXME The state changes for the local tx should be atomic across * this operation. In order to do that we have to make those changes * inside of SinglePhaseTask while it is holding the lock, but after * it has committed. Perhaps the best way to do this is with a pre- * and post- call() API since we can not hold the lock across the * task otherwise (it will deadlock). */ localState.lock.lock(); try { localState.setRunState(RunState.Committed); ((DataServiceTransactionManager) journal .getLocalTransactionManager()) .deactivateTx(localState); // state.setRunState(RunState.Committed); } finally { localState.lock.unlock(); } } catch (Throwable t) { // log.error(t.getMessage(), t); localState.lock.lock(); try { localState.setRunState(RunState.Aborted); ((DataServiceTransactionManager) journal .getLocalTransactionManager()) .deactivateTx(localState); // state.setRunState(RunState.Aborted); throw new RuntimeException(t); } finally { localState.lock.unlock(); } } /* * Note: This is returning the commitTime set on the task when it was * committed as part of a group commit. */ // log.warn("\n" + state + "\n" + localState); return task.getCommitTime(); } finally { clearLoggingContext(); } } @Override public void prepare(final long tx, final long revisionTime) throws ExecutionException, InterruptedException, IOException { setupLoggingContext(); try { if(TimestampUtility.isReadOnly(tx)) { /* * A read-only transaction. * * Note: We do not maintain state on the client for read-only * transactions. The state for a read-only transaction is captured * by its transaction identifier and by state on the transaction * service, which maintains a read lock. * * Note: Thrown exception since this method will not be invoked * by the txService for a read-only tx. */ throw new IllegalArgumentException(); } final Tx state = (Tx) getLocalTransactionManager().getTx(tx); if (state == null) { /* * This is not an active transaction. */ throw new IllegalStateException(); } /* * Submit the task and await its future */ concurrencyManager.submit( new DistributedCommitTask(concurrencyManager, resourceManager, getServiceUUID(), state, revisionTime)).get(); // Done. } finally { clearLoggingContext(); } } /** * Task handling the distributed commit protocol for the * {@link IDataService}. * * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a> */ private static class DistributedCommitTask extends AbstractTask<Void> { // ctor arg. private final ResourceManager resourceManager; private UUID dataServiceUUID; private final Tx state; private final long revisionTime; // derived. private final long tx; /** * @param concurrencyManager * @param resourceManager * @param dataServiceUUID * @param localState * @param revisionTime */ public DistributedCommitTask( final ConcurrencyManager concurrencyManager,// final ResourceManager resourceManager,// final UUID dataServiceUUID,// final Tx localState,// final long revisionTime// ) { super(concurrencyManager, ITx.UNISOLATED, localState .getDirtyResource()); if (resourceManager == null) throw new IllegalArgumentException(); if (localState == null) throw new IllegalArgumentException(); if (revisionTime == 0L) throw new IllegalArgumentException(); if (revisionTime <= localState.getStartTimestamp()) throw new IllegalArgumentException(); this.resourceManager = resourceManager; this.dataServiceUUID = dataServiceUUID; this.state = localState; this.revisionTime = revisionTime; this.tx = localState.getStartTimestamp(); } /** * FIXME Finish, write tests and debug. */ @Override protected Void doTask() throws Exception { final IDistributedTransactionService txService = (IDistributedTransactionService) resourceManager .getLiveJournal().getLocalTransactionManager() .getTransactionService(); prepare(); final long commitTime = txService.prepared(tx, dataServiceUUID); // obtain the exclusive write lock on journal. lockJournal(); try { // Commit using the specified commit time. commit(commitTime); boolean success = false; try { /* * Wait until the entire distributed transaction is * committed. */ success = txService.committed(tx, dataServiceUUID); } finally { if (!success) { // Rollback the journal. rollback(); } } } finally { // release the exclusive write lock on journal. unlockJournal(); } return null; } /** * Prepare the transaction (validate and merge down onto the unisolated * indices and then checkpoints those indices). * <p> * Note: This presumes that we are already holding exclusive write locks * on the named indices such that the pre-conditions for validation and * its post-conditions can not change until we either commit or discard * the transaction. * <p> * Note: The indices need to be isolated as by {@link AbstractTask} or * they will be enrolled onto {@link Name2Addr}'s commitList when they * become dirty and then checkpointed and included with the NEXT commit. * <p> * For this reason, the {@link DistributedCommitTask} is an UNISOLATED * task so that we can reuse the existing mechanisms as much as * possible. * * FIXME This will work if we can grab the write service lock from * within the task (which will mean changing that code to allow the lock * with the caller only still running or simply waiting until we are * signaled by the txService that all participants are either go * (continue execution and will commit at the next group commit, but * then we need a protocol to impose the correct commit time, e.g., by * passing it on the task and ensuring that there is no other tx ready * in the commit group) or abort (just throw an exception). */ protected void prepare() { state.prepare(revisionTime); } /** * Obtain the exclusive lock on the write service. This will prevent any * other tasks using the concurrency API from writing on the journal. */ protected void lockJournal() { throw new UnsupportedOperationException(); } protected void unlockJournal() { throw new UnsupportedOperationException(); } /** * Commit the transaction using the specified <i>commitTime</i>. * <p> * Note: There are no persistent side-effects unless this method returns * successfully. * * @param commitTime * The commit time that must be used. */ protected void commit(final long commitTime) { /* * @todo enroll the named indices onto Name2Addr's commitList (this * basically requires breaking the isolation imposed by the * AbstractTask). */ if (true) throw new UnsupportedOperationException(); final ManagedJournal journal = resourceManager.getLiveJournal(); // atomic commit. journal.commitNow(commitTime); } /** * Discard the last commit, restoring the journal to the previous commit * point. */ protected void rollback() { final ManagedJournal journal = resourceManager.getLiveJournal(); journal.rollback(); } } @Override public void abort(final long tx) throws IOException { setupLoggingContext(); try { final Tx localState = (Tx) getLocalTransactionManager().getTx(tx); if (localState == null) throw new IllegalArgumentException(); localState.lock.lock(); try { localState.setRunState(RunState.Aborted); } finally { localState.lock.unlock(); } } finally { clearLoggingContext(); } } /* * IDataService. */ /** * Forms the name of the index corresponding to a partition of a named * scale-out index as <i>name</i>#<i>partitionId</i>. * <p> * Another advantage of this naming scheme is that index partitions are just * named indices and all of the mechanisms for operating on named indices * and for concurrency control for named indices apply automatically. Among * other things, this means that different tasks can write concurrently on * different partitions of the same named index on a given * {@link DataService}. * * @return The name of the index partition. */ public static final String getIndexPartitionName(final String name, final int partitionId) { if (name == null) { throw new IllegalArgumentException(); } if (partitionId == -1) { // Not a partitioned index. return name; } return name + "#" + partitionId; } /** * Returns either {@link IDataService} or {@link IMetadataService} as * appropriate. */ @Override public Class getServiceIface() { final Class serviceIface; if(DataService.this instanceof IMetadataService) { serviceIface = IMetadataService.class; } else { serviceIface = IDataService.class; } return serviceIface; } @Override public void registerIndex(final String name, final IndexMetadata metadata) throws IOException, InterruptedException, ExecutionException { setupLoggingContext(); try { if (metadata == null) throw new IllegalArgumentException(); final AbstractTask<UUID> task = new RegisterIndexTask(concurrencyManager, name, metadata); concurrencyManager.submit(task).get(); } finally { clearLoggingContext(); } } @Override public void dropIndex(final String name) throws IOException, InterruptedException, ExecutionException { setupLoggingContext(); try { final AbstractTask<Boolean> task = new DropIndexTask(concurrencyManager, name); concurrencyManager.submit(task).get(); } finally { clearLoggingContext(); } } @Override public IndexMetadata getIndexMetadata(final String name, final long timestamp) throws IOException, InterruptedException, ExecutionException { setupLoggingContext(); try { // Choose READ_COMMITTED iff UNISOLATED was requested. final long startTime = (timestamp == ITx.UNISOLATED ? ITx.READ_COMMITTED : timestamp); final AbstractTask<IndexMetadata> task = new GetIndexMetadataTask( concurrencyManager, startTime, name); return (IndexMetadata) concurrencyManager.submit(task).get(); } finally { clearLoggingContext(); } } /** * Retrieves the {@link IndexMetadata} for the named index as of the * specified timestamp. * * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a> */ public static class GetIndexMetadataTask extends AbstractTask<IndexMetadata> { public GetIndexMetadataTask(ConcurrencyManager concurrencyManager, long startTime, String name) { super(concurrencyManager, startTime, name); } @Override protected IndexMetadata doTask() throws Exception { return getIndex(getOnlyResource()).getIndexMetadata(); } } /** * Note: This chooses {@link ITx#READ_COMMITTED} if the the index has * {@link ITx#UNISOLATED} isolation and the {@link IIndexProcedure} is an * read-only operation. This provides better concurrency on the * {@link DataService} by moving read-only operations off of the * {@link WriteExecutorService}. * <p> * Note: When the {@link DataService} is accessed via RMI the {@link Future} * MUST be a proxy. This gets handled by the concrete server implementation. */ @Override public <T> Future<T> submit(final long tx, final String name, final IIndexProcedure<T> proc) { setupLoggingContext(); try { if (name == null) throw new IllegalArgumentException(); if (proc == null) throw new IllegalArgumentException(); // Choose READ_COMMITTED iff proc is read-only and UNISOLATED was requested. final long timestamp = (tx == ITx.UNISOLATED && proc.isReadOnly() ? ITx.READ_COMMITTED : tx); // wrap the caller's task. final AbstractTask<T> task = new IndexProcedureTask<T>( concurrencyManager, timestamp, name, proc); if (task instanceof IFederationCallable) { ((IFederationCallable) task).setFederation(getFederation()); } if (task instanceof IDataServiceCallable) { ((IDataServiceCallable) task).setDataService(this); } // submit the procedure and await its completion. return concurrencyManager.submit(task); } finally { clearLoggingContext(); } } /** * Note: When the {@link DataService} is accessed via RMI the {@link Future} * MUST be a proxy. This gets handled by the concrete server implementation. * * @see AbstractDistributedFederation#getProxy(Future) * * @todo we should probably put the federation object in a sandbox in order * to prevent various operations by tasks running in the * {@link DataService} using the {@link IDataServiceCallable} * interface to gain access to the {@link DataService}'s federation. * for example, if they use {@link AbstractFederation#shutdownNow()} * then the {@link DataService} itself would be shutdown. */ @Override public Future<? extends Object> submit(final Callable<? extends Object> task) { setupLoggingContext(); try { if (task == null) throw new IllegalArgumentException(); /* * Submit to the ExecutorService for the DataService's federation * object. This is used for tasks which are not associated with a * timestamp and hence not linked to any specific view of the named * indices. */ if (task instanceof IFederationCallable) { ((IFederationCallable) task).setFederation(getFederation()); } if (task instanceof IDataServiceCallable) { ((IDataServiceCallable) task).setDataService(this); } // submit the task and return its Future. return getFederation().getExecutorService().submit(task); } finally { clearLoggingContext(); } } // /** // * Encapsulate the {@link Future} within a proxy that may be marshalled by // * RMI and sent to a remote client. The client will interact with the // * unmarshalled {@link Future}, which in turn will use RMI to control the // * original {@link Future} within the {@link DataService}. // * <p> // * The default implementation simply returns the <i>future</i> and MUST be // * overriden when remote clients will use RMI to execute methods on the // * {@link DataService}. // * // * @param future // * The future. // * // * @return The encapsulated future. // */ // protected Future wrapFuture(Future future) { // // return future; // // } @Override public ResultSet rangeIterator(long tx, String name, byte[] fromKey, byte[] toKey, int capacity, int flags, IFilter filter) throws InterruptedException, ExecutionException { setupLoggingContext(); try { if (name == null) throw new IllegalArgumentException(); /* * Figure out if the iterator is read-only for the time that it * executes on the data service. For this case, we ignore the CURSOR * flag since modifications during iterator execution on the data * service can only be introduced via a filter or the REMOVEALL * flag. The caller will be used a chunked iterator. Therefore if * they choose to delete tuples while visiting the elements in the * ResultSet then the deletes will be issued as separate requests. */ final boolean readOnly = ((flags & IRangeQuery.READONLY) != 0) || (filter == null && // ((flags & IRangeQuery.CURSOR) == 0) && ((flags & IRangeQuery.REMOVEALL) == 0) ); long timestamp = tx; if (timestamp == ITx.UNISOLATED && readOnly) { /* * If the iterator is readOnly then READ_COMMITTED has the same * semantics as UNISOLATED and provides better concurrency since * it reduces contention for the writeService. */ timestamp = ITx.READ_COMMITTED; } // final long startTime = (tx == ITx.UNISOLATED // && ((flags & IRangeQuery.REMOVEALL)==0)? ITx.READ_COMMITTED // : tx); final RangeIteratorTask task = new RangeIteratorTask( concurrencyManager, timestamp, name, fromKey, toKey, capacity, flags, filter); // submit the task and wait for it to complete. return concurrencyManager.submit(task).get(); } finally { clearLoggingContext(); } } /** * @todo this operation should be able to abort an * {@link IBlock#inputStream() read} that takes too long or if there * is a need to delete the resource. * * @todo this should be run on the read service. * * @todo coordinate close out of stores. * * @todo efficient (stream-based) read from the journal (IBlockStore API). * This is a fully buffered read and will cause heap churn. */ @Override public IBlock readBlock(IResourceMetadata resource, final long addr) { if (resource == null) throw new IllegalArgumentException(); if (addr == 0L) throw new IllegalArgumentException(); setupLoggingContext(); final long begin = System.nanoTime(); try { final IRawStore store = resourceManager.openStore(resource.getUUID()); if (store == null) { log.warn("Resource not available: " + resource); readBlockApiCounters.readBlockErrorCount++; throw new IllegalStateException("Resource not available"); } final int byteCount = store.getByteCount(addr); return new IBlock() { @Override public long getAddress() { return addr; } // @todo reuse buffers @Override public InputStream inputStream() { // this is when it actually reads the data. final ByteBuffer buf = store.read(addr); // #of bytes buffered. readBlockApiCounters.readBlockBytes += byteCount; // caller will read from this object. return new ByteBufferInputStream(buf); } @Override public int length() { return byteCount; } }; } finally { readBlockApiCounters.readBlockCount++; readBlockApiCounters.readBlockNanos = System.nanoTime() - begin; clearLoggingContext(); } } /** * Task for running a rangeIterator operation. * * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a> */ static protected class RangeIteratorTask extends AbstractTask<ResultSet> { private final byte[] fromKey; private final byte[] toKey; private final int capacity; private final int flags; private final IFilter filter; public RangeIteratorTask(ConcurrencyManager concurrencyManager, long startTime, String name, byte[] fromKey, byte[] toKey, int capacity, int flags, IFilter filter) { super(concurrencyManager, startTime, name); this.fromKey = fromKey; this.toKey = toKey; this.capacity = capacity; this.flags = flags; this.filter = filter; // MAY be null. } @Override public ResultSet doTask() throws Exception { final IIndex ndx = getIndex(getOnlyResource()); /* * Figure out the upper bound on the #of tuples that could be * materialized. * * Note: the upper bound on the #of key-value pairs in the range is * truncated to an [int]. */ final int rangeCount = (int) ndx.rangeCount(fromKey, toKey); final int limit = (rangeCount > capacity ? capacity : rangeCount); /* * Iterator that will visit the key range. * * Note: We always visit the keys regardless of whether we pass them * on to the caller. This is necessary in order for us to set the * [lastKey] field on the result set and that is necessary to * support continuation queries. */ final ITupleIterator<?> itr = ndx.rangeIterator(fromKey, toKey, limit, flags | IRangeQuery.KEYS, filter); /* * Populate the result set from the iterator. */ return new ResultSet(ndx, capacity, flags, itr); } } /* * Overflow processing API */ @Override public void forceOverflow(final boolean immediate, final boolean compactingMerge) throws IOException, InterruptedException, ExecutionException { setupLoggingContext(); try { if (!(resourceManager instanceof ResourceManager)) { throw new UnsupportedOperationException(); } final Callable<Void> task = new ForceOverflowTask(compactingMerge); log.warn("Will force overflow: immediate=" + immediate + ", compactingMerge=" + compactingMerge); if (immediate) { /* * Run the task on the write service. The task writes a small * record on the journal in order to make sure that it is dirty * and then sets the flag to force overflow with the next * commit. Since the task runs on the write service and since * the journal is dirty, a group commit will occur and * synchronous overflow processing will occur before this method * returns. * * Note: the resource itself is arbitrary - there is no index * by that name. */ getConcurrencyManager().submit( new AbstractTask<Void>(getConcurrencyManager(), ITx.UNISOLATED, new String[] { "__forceOverflow" }) { @Override protected Void doTask() throws Exception { // write a one byte record on the journal. getJournal().write(ByteBuffer.wrap(new byte[]{1})); // run task that will set the overflow flag. return task.call(); } }).get(); } else { /* * Provoke overflow with the next group commit. All this does is * set the flag that will cause overflow to occur with the next * group commit. Since the task does not run on the write * service it will return immediately. */ try { task.call(); } catch (Exception e) { throw new RuntimeException(e); } } } finally { clearLoggingContext(); } } @Override public boolean purgeOldResources(final long timeout, final boolean truncateJournal) throws InterruptedException { // delegate all the work. return getResourceManager().purgeOldResources(timeout, truncateJournal); } /** * Task sets the flag that will cause overflow processing to be triggered on * the next group commit. * * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a> */ private class ForceOverflowTask implements Callable<Void> { private final boolean compactingMerge; public ForceOverflowTask(final boolean compactingMerge) { this.compactingMerge = compactingMerge; } @Override public Void call() throws Exception { // final WriteExecutorService writeService = concurrencyManager // .getWriteService(); final ResourceManager resourceManager = (ResourceManager) DataService.this.resourceManager; if (resourceManager.isOverflowAllowed()) { if (compactingMerge) { resourceManager.compactingMerge.set(true); } // trigger overflow on the next group commit. // writeService.forceOverflow.set(true); resourceManager.forceOverflow.set(true); } return null; } } @Override public long getAsynchronousOverflowCounter() throws IOException { setupLoggingContext(); try { if (!(resourceManager instanceof ResourceManager)) { throw new UnsupportedOperationException(); } return resourceManager.getAsynchronousOverflowCount(); } finally { clearLoggingContext(); } } @Override public boolean isOverflowActive() throws IOException { setupLoggingContext(); try { if (!(resourceManager instanceof ResourceManager)) { throw new UnsupportedOperationException(); } /* * overflow processing is enabled but not allowed, which means that * overflow processing is occurring right now. */ return resourceManager.isOverflowEnabled() && !resourceManager.isOverflowAllowed(); } finally { clearLoggingContext(); } } }