package lsr.paxos.core;
import static lsr.common.ProcessDescriptor.processDescriptor;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import lsr.common.CrashModel;
import lsr.paxos.ActiveRetransmitter;
import lsr.paxos.EpochPrepareRetransmitter;
import lsr.paxos.PrepareRetransmitter;
import lsr.paxos.PrepareRetransmitterImpl;
import lsr.paxos.RetransmittedMessage;
import lsr.paxos.messages.Message;
import lsr.paxos.messages.Prepare;
import lsr.paxos.messages.PrepareOK;
import lsr.paxos.messages.Propose;
import lsr.paxos.network.Network;
import lsr.paxos.replica.ClientBatchID;
import lsr.paxos.replica.ClientBatchManager;
import lsr.paxos.replica.ClientBatchManager.FwdBatchRetransmitter;
import lsr.paxos.replica.ClientRequestManager;
import lsr.paxos.storage.ClientBatchStore;
import lsr.paxos.storage.ConsensusInstance;
import lsr.paxos.storage.ConsensusInstance.LogEntryState;
import lsr.paxos.storage.Log;
import lsr.paxos.storage.Storage;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Represents part of paxos which is responsible for proposing new consensus
* values. Provides procedures to start proposing which sends the
* {@link Propose} messages, and allows proposing new values. The number of
* currently running proposals is defined by <code>MAX_ACTIVE_PROPOSALS</code>.
*/
public class ProposerImpl implements Proposer {
/** retransmitted message for prepare request */
private PrepareRetransmitter prepareRetransmitter;
/** retransmitted propose messages for instances */
private final Map<Integer, RetransmittedMessage> proposeRetransmitters =
new HashMap<Integer, RetransmittedMessage>();
/** Keeps track of the processes that have prepared for this view */
private final ActiveRetransmitter retransmitter;
private final Paxos paxos;
private final Storage storage;
private ProposerState state;
private ClientBatchManager cliBatchManager;
/** Locked on the array, modifies the int inside. */
private final int[] waitingHooks = new int[] {0};
private final ArrayList<ClientBatchManager.FwdBatchRetransmitter> waitingFBRs = new ArrayList<ClientBatchManager.FwdBatchRetransmitter>();
/** Tasks to be executed once the proposer prepares */
final HashSet<Task> tasksOnPrepared = new HashSet<Task>();
/**
* Initializes new instance of <code>Proposer</code>. If the id of current
* replica is 0 then state is set to <code>ACTIVE</code>. Otherwise
* <code>INACTIVE</code> state is set.
*
* @param paxos - the paxos the acceptor belong to
* @param network - data associated with the paxos
* @param failureDetector - used to notify about leader change
* @param storage - used to send responses
*/
public ProposerImpl(Paxos paxos,
Network network,
Storage storage,
CrashModel crashModel)
{
this.paxos = paxos;
this.storage = storage;
retransmitter = new ActiveRetransmitter(network, "ProposerRetransmitter");
this.state = ProposerState.INACTIVE;
if (crashModel == CrashModel.EpochSS) {
prepareRetransmitter = new EpochPrepareRetransmitter(retransmitter, storage);
} else {
prepareRetransmitter = new PrepareRetransmitterImpl(retransmitter);
}
}
public void setClientRequestManager(ClientRequestManager requestManager) {
cliBatchManager = requestManager.getClientBatchManager();
}
public void start() {
assert !processDescriptor.indirectConsensus || cliBatchManager != null;
retransmitter.init();
}
/**
* Gets the current state of the proposer.
*
* @return <code>INACTIVE</code>,
* <code>PREPARING<code/> or <code>PREPARED<code/>
*/
public ProposerState getState() {
return state;
}
/**
* If previous leader is suspected this procedure is executed. We're
* changing the view (variable indicating order of the leaders in time)
* accordingly, and we're sending the prepare message.
*
*/
public void prepareNextView() {
assert paxos.getDispatcher().amIInDispatcher();
state = ProposerState.PREPARING;
setNextViewNumber();
logger.info(processDescriptor.logMark_Benchmark, "Preparing view: {}", storage.getView());
Prepare prepare = new Prepare(storage.getView(), storage.getFirstUncommitted());
prepareRetransmitter.startTransmitting(prepare, Network.OTHERS);
if (processDescriptor.indirectConsensus)
fetchLocalMissingBatches();
// tell that local process is already prepared
prepareRetransmitter.update(null, processDescriptor.localId);
// unlikely, unless N==1
if (prepareRetransmitter.isMajority()) {
onMajorityOfPrepareOK();
}
}
private void fetchLocalMissingBatches() {
for (ConsensusInstance instane : storage.getLog().getInstanceMap().tailMap(
storage.getFirstUncommitted()).values()) {
if (instane.getState() == LogEntryState.KNOWN &&
!ClientBatchStore.instance.hasAllBatches(instane.getClientBatchIds())) {
waitingHooks[0]++;
FwdBatchRetransmitter fbr = cliBatchManager.fetchMissingBatches(
instane.getClientBatchIds(),
new ClientBatchManager.Hook() {
public void hook() {
synchronized (waitingHooks) {
if (Thread.interrupted())
return;
waitingHooks[0]--;
waitingHooks.notifyAll();
}
}
}, true);
waitingFBRs.add(fbr);
}
}
}
private void setNextViewNumber() {
int view = storage.getView();
do {
view++;
} while (!processDescriptor.isLocalProcessLeader(view));
storage.setView(view);
}
public void onPrepareOK(PrepareOK message, int sender) {
assert paxos.getDispatcher().amIInDispatcher();
assert paxos.isLeader();
assert state != ProposerState.INACTIVE : "Proposer is not active.";
// asserting the same again. Who knows what happens in between?
assert message.getView() == storage.getView() : "Received a PrepareOK for a higher or lower view. " +
"Msg.view: " + message.getView() +
", view: " + storage.getView();
logger.info(processDescriptor.logMark_Benchmark, "Received {}: {}", sender, message);
// Ignore prepareOK messages if we have finished preparing
if (state == ProposerState.PREPARED) {
logger.debug("View {} already prepared. Ignoring message.", storage.getView());
return;
}
updateLogFromPrepareOk(message);
prepareRetransmitter.update(message, sender);
if (prepareRetransmitter.isMajority()) {
onMajorityOfPrepareOK();
}
}
private void onMajorityOfPrepareOK() {
prepareRetransmitter.stop();
logger.debug("Majority of PrepareOK gathered. Waiting for {} missing batch values",
waitingHooks[0]);
long timeout = System.currentTimeMillis() + processDescriptor.maxBatchFetchingTimeoutMs;
// wait for all batch values to arrive
synchronized (waitingHooks) {
while (waitingHooks[0] > 0)
try {
long timeLeft = timeout - System.currentTimeMillis();
if (timeLeft <= 0) {
logger.warn("Could not fetch batch values - restarting view change");
for (FwdBatchRetransmitter fbr : waitingFBRs)
cliBatchManager.removeTask(fbr);
waitingFBRs.clear();
waitingHooks[0] = 0;
prepareNextView();
return;
}
waitingHooks.wait(timeLeft);
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
}
waitingFBRs.clear();
state = ProposerState.PREPARED;
logger.info(processDescriptor.logMark_Benchmark, "View prepared {}", storage.getView());
// Send a proposal for all instances that were not decided.
Log log = storage.getLog();
for (int i = storage.getFirstUncommitted(); i < log.getNextId(); i++) {
ConsensusInstance instance = log.getInstance(i);
assert instance != null;
switch (instance.getState()) {
case DECIDED:
// If the decision was already taken by some process,
// there is no need to propose again, so skip this
// instance
break;
case KNOWN:
// No decision, but some value is known
logger.info("Proposing value from previous view: {}", instance);
instance.setView(storage.getView());
continueProposal(instance);
break;
case UNKNOWN:
assert instance.getValue() == null : "Unknow instance has value";
logger.warn("No value locked for instance {}: proposing no-op", i);
fillWithNoOperation(instance);
break;
default:
assert false;
}
}
paxos.onViewPrepared(log.getNextId());
for (Task task : tasksOnPrepared) {
task.onPrepared();
}
tasksOnPrepared.clear();
if (processDescriptor.indirectConsensus)
enqueueOrphanedBatches();
proposeNext();
}
public void executeOnPrepared(final Task task) {
assert state != ProposerState.INACTIVE;
paxos.getDispatcher().execute(new Runnable() {
public void run() {
if (state == ProposerState.INACTIVE) {
task.onFailedToPrepare();
return;
}
if (state == ProposerState.PREPARED) {
task.onPrepared();
return;
}
tasksOnPrepared.add(task);
}
});
}
private void enqueueOrphanedBatches() {
HashSet<ClientBatchID> instanceless = ClientBatchStore.instance.getInstancelessBatches();
for (ClientBatchID cbid : instanceless)
paxos.enqueueRequest(cbid);
}
private void fillWithNoOperation(ConsensusInstance instance) {
ByteBuffer bb = ByteBuffer.allocate(4 + ClientBatchID.NOP.byteSize());
bb.putInt(1); // Size of batch
ClientBatchID.NOP.writeTo(bb); // request
instance.updateStateFromKnown(storage.getView(), bb.array());
continueProposal(instance);
}
private void updateLogFromPrepareOk(PrepareOK message) {
if (message.getPrepared() == null) {
return;
}
// Update the local log with the data sent by this process
for (final ConsensusInstance ci : message.getPrepared()) {
// Algorithm: The received instance can be either
// Decided - Set the local log entry to decided.
// Accepted - If the local log entry is decided, ignore.
// Otherwise, find the accept message for this consensus
// instance with the highest timestamp and propose it.
final ConsensusInstance localLog = storage.getLog().getInstance(ci.getId());
// Happens if previous PrepareOK caused a snapshot execution
if (localLog == null) {
continue;
}
if (localLog.getState() == LogEntryState.DECIDED) {
// We already know the decision, so ignore it.
continue;
}
switch (ci.getState()) {
case DECIDED:
if (!processDescriptor.indirectConsensus
|| ClientBatchStore.instance.hasAllBatches(ci.getClientBatchIds())) {
localLog.updateStateFromDecision(ci.getView(), ci.getValue());
paxos.decide(ci.getId());
} else {
waitingHooks[0]++;
ci.setDecidable(true);
FwdBatchRetransmitter fbr = cliBatchManager.fetchMissingBatches(
ci.getClientBatchIds(), new ClientBatchManager.Hook() {
public void hook() {
paxos.getDispatcher().executeAndWait(new Runnable() {
public void run() {
localLog.updateStateFromDecision(ci.getView(),
ci.getValue());
if (!LogEntryState.DECIDED.equals(ci.getState()))
paxos.decide(ci.getId());
}
});
synchronized (waitingHooks) {
if (Thread.interrupted())
return;
waitingHooks[0]--;
waitingHooks.notifyAll();
}
}
}, true);
waitingFBRs.add(fbr);
}
break;
case KNOWN:
assert ci.getValue() != null : "Instance state KNOWN but value is null";
if (!processDescriptor.indirectConsensus ||
ClientBatchStore.instance.hasAllBatches(ci.getClientBatchIds()))
localLog.updateStateFromKnown(ci.getView(), ci.getValue());
else {
waitingHooks[0]++;
FwdBatchRetransmitter fbr = cliBatchManager.fetchMissingBatches(
ci.getClientBatchIds(),
new ClientBatchManager.Hook() {
public void hook() {
paxos.getDispatcher().executeAndWait(new Runnable() {
public void run() {
localLog.updateStateFromKnown(ci.getView(),
ci.getValue());
}
});
synchronized (waitingHooks) {
if (Thread.interrupted())
return;
waitingHooks[0]--;
waitingHooks.notifyAll();
}
}
}, true);
waitingFBRs.add(fbr);
}
break;
case UNKNOWN:
assert ci.getValue() == null : "Unknow instance has value";
logger.debug("Ignoring: {}", ci);
break;
default:
assert false : "Invalid state: " + ci.getState();
break;
}
}
}
public void notifyAboutNewBatch()
{
// Called from batcher thread
paxos.getDispatcher().submit(new Runnable() {
public void run() {
logger.debug("Propose task running");
proposeNext();
}
});
}
public void proposeNext() {
logger.debug("Proposing.");
while (!storage.isWindowFull()) {
byte[] proposal = paxos.requestBatch();
if (proposal == null)
return;
propose(proposal);
}
}
/**
* Asks the proposer to propose the given value. If there are currently too
* many active propositions, this proposal will be enqueued until there are
* available slots. If the proposer is <code>INACTIVE</code>, then message
* is discarded. Otherwise value is added to list of active proposals.
*
* @param value - the value to propose
* @throws InterruptedException
*/
public void propose(byte[] value) {
assert paxos.getDispatcher().amIInDispatcher();
if (state != ProposerState.PREPARED) {
/*
* This can happen if there is a Propose event queued on the
* Dispatcher when the view changes.
*/
logger.warn("Cannot propose in INACTIVE or PREPARING state. Discarding batch");
return;
}
logger.info(processDescriptor.logMark_OldBenchmark, "Proposing: {}",
storage.getLog().getNextId());
ConsensusInstance instance = storage.getLog().append(storage.getView(), value);
assert !processDescriptor.indirectConsensus ||
ClientBatchStore.instance.hasAllBatches(instance.getClientBatchIds());
// Mark the instance as accepted locally
instance.getAccepts().set(processDescriptor.localId);
if (instance.isMajority()) {
logger.warn("Either you use one replica only (what for?) or something is very wrong.");
paxos.decide(instance.getId());
}
RetransmittedMessage msg = retransmitter.startTransmitting(new Propose(instance));
proposeRetransmitters.put(instance.getId(), msg);
}
/**
* Called to inform the proposer that a decision was taken. Allows the
* proposer to make a new proposal.
*/
public void ballotFinished() {
assert paxos.getDispatcher().amIInDispatcher();
// Needed - decide (triggering this method) is i.a. called by PrepareOK
if (state == ProposerState.PREPARED) {
proposeNext();
}
}
/**
* After becoming the leader we need to take control over the consensus for
* orphaned instances. This method activates retransmission of propose
* messages for instances, which we already have in our logs (
* {@link sendNextProposal} and {@link Propose} create a new instance)
*
* @param instance instance we want to revoke
*/
private void continueProposal(ConsensusInstance instance) {
assert state == ProposerState.PREPARED;
assert proposeRetransmitters.containsKey(instance.getId()) == false : "Different proposal for the same instance";
// TODO: current implementation causes temporary window size violation.
Message m = new Propose(instance);
// Mark the instance as accepted locally
instance.getAccepts().set(processDescriptor.localId);
RetransmittedMessage msg = retransmitter.startTransmitting(m);
proposeRetransmitters.put(instance.getId(), msg);
}
/**
* As the process looses leadership, it must stop all message retransmission
* - that is either prepare or propose messages.
*/
public void stopProposer() {
assert paxos.getDispatcher().amIInDispatcher();
state = ProposerState.INACTIVE;
// TODO: STOP ACCEPTING
prepareRetransmitter.stop();
retransmitter.stopAll();
proposeRetransmitters.clear();
for (Task task : tasksOnPrepared) {
task.onFailedToPrepare();
}
tasksOnPrepared.clear();
}
/**
* After reception of majority accepts, we suppress propose messages.
*
* @param instanceId no. of instance, for which we want to stop
* retransmission
*/
public void stopPropose(int instanceId) {
assert paxos.getDispatcher().amIInDispatcher();
RetransmittedMessage r = proposeRetransmitters.remove(instanceId);
if (r != null) {
r.stop();
}
}
/**
* If retransmission to some process for certain instance is no longer
* needed, we should stop it
*
* @param instanceId no. of instance, for which we want to stop
* retransmission
* @param destination number of the process in processes PID list
*/
public void stopPropose(int instanceId, int destination) {
assert proposeRetransmitters.containsKey(instanceId);
assert paxos.getDispatcher().amIInDispatcher();
proposeRetransmitters.get(instanceId).stop(destination);
}
public ClientBatchManager getClientBatchManager() {
return cliBatchManager;
}
private final static Logger logger = LoggerFactory.getLogger(ProposerImpl.class);
}