HAPipelineGlue.java example

Explorer
blazegraph-master
- database-master
/**

Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016.  All rights reserved.

Contact:
     SYSTAP, LLC DBA Blazegraph
     2501 Calvert ST NW #106
     Washington, DC 20008
     licenses@blazegraph.com

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/
/*
 * Created on Jun 13, 2010
 */

package com.bigdata.ha;

import java.io.IOException;
import java.net.InetSocketAddress;
import java.rmi.Remote;
import java.util.concurrent.Future;

import com.bigdata.ha.msg.HAWriteMessage;
import com.bigdata.ha.msg.IHALogRequest;
import com.bigdata.ha.msg.IHALogRootBlocksRequest;
import com.bigdata.ha.msg.IHALogRootBlocksResponse;
import com.bigdata.ha.msg.IHARebuildRequest;
import com.bigdata.ha.msg.IHASendState;
import com.bigdata.ha.msg.IHASendStoreResponse;
import com.bigdata.ha.msg.IHASyncRequest;
import com.bigdata.ha.msg.IHAWriteMessage;
import com.bigdata.ha.msg.IHAWriteSetStateRequest;
import com.bigdata.ha.msg.IHAWriteSetStateResponse;
import com.bigdata.ha.pipeline.HAReceiveService;
import com.bigdata.ha.pipeline.HASendService;
import com.bigdata.io.writecache.WriteCache;
import com.bigdata.journal.WriteExecutorService;
import com.bigdata.service.proxy.ThickFuture;

/**
 * A {@link Remote} interface supporting the write replication pipeline. The
 * quorum leader accepts writes from the application layer. The writes are
 * formatted onto low-level cache blocks. Those cache blocks are replicated from
 * the quorum leader to the quorum followers along the pipeline.
 * 
 * @see QuorumPipeline
 * 
 * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a>
 * @version $Id$
 */
public interface HAPipelineGlue extends Remote {

    /**
     * Return the address at which this service will listen for write pipeline
     * messages sent from the upstream service.
     * 
     * @todo This should be handled as a smart proxy so this method does not
     *       actually perform RMI.
     */ 
    InetSocketAddress getWritePipelineAddr() throws IOException;

    /**
     * Instruct the service to move to the end of the write pipeline. The leader
     * MUST be the first service in the write pipeline since it is the service
     * to which the application directs all write requests. This message is used
     * when a leader will be elected and it needs to force other services before
     * it in the write pipeline to leave/add themselves from/to the write
     * pipeline such that the leader becomes the first service in the write
     * pipeline.
     * 
     * @todo It is possible for the leader to issue these messages in an
     *       sequence which is designed to arrange the write pipeline such that
     *       it has a good network topology. The leader must first analyze the
     *       network topology for the services in the pipeline, decide on a good
     *       order, and then issue the requests to the services in that order.
     *       Each service will move to the end of the pipeline in turn. Once all
     *       services have been moved to the end of the pipeline, the pipeline
     *       will reflect the desired total order. However, note that transient
     *       network partitioning, zookeeper session timeouts, etc. can all
     *       cause this order to be perturbed.
     * 
     * @todo A pipeline leave currently causes a service leave. Reconsider this
     *       because it would mean that reorganizing the pipeline would actually
     *       cause service leaves, which would temporarily break the quorum and
     *       certainly distribute the leader election. If we can do a pipeline
     *       leave without a service leave, if services which are joined always
     *       rejoin the write pipeline when the observe a pipeline leave, and if
     *       we can force a service leave (by instructing the service to
     *       bounce the zookeeper connection) if the service fails to
     *       rejoin the pipeline, then it would be easier to reorganize the
     *       pipeline. [This would still make it possible for services to add
     *       themselves to the pipeline without being joined with the quorum but
     *       the quorum watcher would now automatically add the service to the
     *       quorum before a join and add it back if it is removed from the
     *       pipeline while it was joined _if_ the quorum is not yet met (once
     *       the quorum is met we can not rejoin the pipeline if writes have
     *       propagated along the pipeline, and the pipeline is not clean at
     *       each commit - only when we have an exclusive lock on the
     *       {@link WriteExecutorService}).]
     * 
     * @todo The leader could also invoke this at any commit point where a
     *       service joins the quorum since we need to obtain an exclusive lock
     *       on the {@link WriteExecutorService} at that point (use as we do
     *       when we do a synchronous overflow).
     *       <p>
     *       Periodic re-optimizations should take into account that services
     *       which are bouncing should stay at the end of the pipeline even
     *       though they have higher network costs because loosing the service
     *       at the end of the pipeline causes minimal retransmission.
     */
    Future<Void> moveToEndOfPipeline() throws IOException;
    
    /**
     * Reset the pipeline (blocking). This message is used to handle an error in
     * pipeline replication. If replication fails, the socket connections both
     * upstream and downstream of the point of failure can be left in an
     * indeterminate state with partially buffered data. In order to bring the
     * pipeline back into a known state (without forcing a quorum break) we
     * message each service in the pipeline to reset its
     * {@link HAReceiveService} (including the inner {@link HASendService}). The
     * next message and payload relayed from the leader will cause new socket
     * connections to be established.
     * 
     * @param msg The request.
     * 
     * @return The {@link Future} for the operation on the remote service.
     * 
     * @throws IOException
     */
    Future<IHAPipelineResetResponse> resetPipeline(IHAPipelineResetRequest req)
            throws IOException;

    /**
     * Accept metadata describing an NIO buffer transfer along the write
     * pipeline. This method is never invoked on the master. It is only invoked
     * on the failover nodes, including the last node in the failover chain.
     * 
     * @param req
     *            A request for an HALog (optional). This is only non-null when
     *            historical {@link WriteCache} blocks are being replayed down
     *            the write pipeline in order to synchronize a service.
     * @param snd
     *            Metadata about the state of the sender and potentially the
     *            routing of the payload along the write replication pipeline.
     * @param msg
     *            The metadata.
     * 
     * @return The {@link Future} which will become available once the buffer
     *         transfer is complete.
     */
    Future<Void> receiveAndReplicate(IHASyncRequest req, IHASendState snd,
            IHAWriteMessage msg) throws IOException;

    /**
     * Request metadata about the current write set from the quorum leader.
     * 
     * @param req
     *            The request.
     *            
     * @return The response.
     */
    IHAWriteSetStateResponse getHAWriteSetState(IHAWriteSetStateRequest req)
            throws IOException;

    /**
     * Request the root blocks for the HA Log for the specified commit point.
     * 
     * @param msg
     *            The request (specifies the desired HA Log by the commit
     *            counter of the closing root block).
     * 
     * @return The requested root blocks.
     */
    IHALogRootBlocksResponse getHALogRootBlocksForWriteSet(
            IHALogRootBlocksRequest msg) throws IOException;

    /**
     * The recipient will send the {@link WriteCache} blocks for the specified
     * write set on the write pipeline. These {@link WriteCache} blocks will be
     * visible to ALL services in the write pipeline. It is important that all
     * services <em>ignore</em> {@link WriteCache} blocks that are not in
     * sequence for the current write set unless they have explicitly requested
     * an HALog using this method.
     * <p>
     * Note: The {@link WriteCache} blocks are sent on the write pipeline.
     * Therefore, a service MUST NOT request the write set from a service that
     * is downstream from it in the write pipeline. It is always safe to request
     * the data from the leader.
     * <p>
     * Note: The {@link HAWriteMessage} includes a quorum token. When historical
     * {@link WriteCache} blocks are being replicated in response to this
     * method, the will be replicated using the quorum token that was in effect
     * for that write set. Implementations MUST NOT perform asserts on the
     * quorum token until after they have determined that the message is for the
     * <em>current</em> write set.
     * <p>
     * Note: DO NOT use a {@link ThickFuture} for the returned {@link Future}.
     * That will defeat the ability of the requester to cancel the
     * {@link Future}.
     * 
     * @param req
     *            A request for an HALog.
     *            
     * @return A {@link Future} that may be used to cancel the remote process
     *         sending the data through the write pipeline.
     */
    Future<Void> sendHALogForWriteSet(IHALogRequest msg) throws IOException;

    /**
     * Send the raw blocks for the requested backing store across the write
     * pipeline.
     * <p>
     * Note: This method supports disaster recovery of a service from a met
     * quorum. This procedure can only be used when a met quorum exists.
     * <p>
     * Note: DO NOT use a {@link ThickFuture} for the returned {@link Future}.
     * That will defeat the ability of the requester to cancel the
     * {@link Future}.
     * 
     * @param req
     *            A request to replicate a backing store.
     * 
     * @return A {@link Future} that may be used to cancel the remote process
     *         sending the data through the write pipeline. The {@link Future}
     *         will report the current root block in effect as of the moment
     *         when the <code>sendHAStore</code> operation was completed.
     */
    Future<IHASendStoreResponse> sendHAStore(IHARebuildRequest msg)
            throws IOException;

//    /**
//     * There is something for this on HAGlue right now.
//     *
//     * TODO Method to compute a digest for the committed allocations on a
//     * backing store as of the commit point on which the specified transaction
//     * is reading. This may be used to verify that the backing stores are
//     * logically consistent even when they may have some discarded writes that
//     * are not present on all stores (from aborted write sets).
//     * <p>
//     * The caller can get the root blocks for the commit counter associated with
//     * the txId (if we can the readsOnCommitTime).
//     * <p>
//     * The RWStore needs to snapshot the allocators while holding the allocation
//     * lock and that snapshot MUST be for the same commit point. Therefore, this
//     * operation needs to be submitted by the leader in code that can guarantee
//     * that the leader does not go through a commit point. The snapshots should
//     * be stored at the nodes, not shipped to the leader. The leader therefore
//     * needs to know when each snapshot is ready so it can exit the critical
//     * code and permit additional writes (releasing the allocLock or the
//     * semaphore on the journal).
//     * 
//     * <pre>
//     * IHAStoreChecksumRequest {storeUUID, txid (of readLock)}
//     * 
//     * IHAStoreCheckSumResponse {MD5Digest}
//     * </pre>
//     */
//    Future<IHASnapshotDigestResponse> computeSnapshotDigest(IHASnapshotDigestRequest req) throws IOException;

}