/**
* Fortika - Robust Group Communication
* Copyright (C) 2002-2006 Sergio Mena de la Cruz (EPFL) (sergio.mena@epfl.ch)
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/*
* Author: Olivier Rütti
*/
package groupcomm.common.abcast;
import java.io.IOException;
import java.io.OutputStream;
import java.io.PrintStream;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.logging.Level;
import java.util.logging.Logger;
import uka.transport.Transportable;
import framework.CompressedSet;
import framework.Constants;
import framework.GroupCommEventArgs;
import framework.GroupCommException;
import framework.GroupCommMessage;
import framework.PID;
import framework.libraries.DefaultSerialization;
import framework.libraries.FlowControl;
import framework.libraries.Trigger;
import framework.libraries.serialization.TArrayList;
import framework.libraries.serialization.TBoolean;
import framework.libraries.serialization.THashMap;
import framework.libraries.serialization.THashSet;
import framework.libraries.serialization.TInteger;
import framework.libraries.serialization.TLinkedHashMap;
import framework.libraries.serialization.TLinkedList;
import framework.libraries.serialization.TList;
import framework.libraries.serialization.TLong;
import framework.libraries.serialization.TMap;
import framework.libraries.serialization.TSet;
import framework.libraries.serialization.TSortedMap;
import framework.libraries.serialization.TTreeMap;
/**
* <b> This class implements the common code for algorithm abcast. </b>
* <hr>
* <b> Events:
* <dt> <i>Init</i> </dt>
* <dd> Initializes the abcast layer </dd>
* <dt> <i>Abcast</i> </dt>
* <dd> Send a Broadcast message, with the abcast algorithm </dd>
* <dt> <i>Pt2PtDeliver</i> </dt>
* <dd> Happend when a message is received by the lower layers </dd>
* <dt> <i>Decide</i> </dt>
* <dd> Happend when consensus has decided </dd>
* </dl>
*/
public class FastAbcastImpl {
//REMOVE
long cStart = 0;
/**
* Identifiers of Consensus message types
*/
public static final int ABCAST_ESTIMATE = 1;
public static final int ABCAST_PROPOSE = 2;
public static final int ABCAST_ACK = 3;
public static final int ABCAST_RBCAST = 4;
public static final int ABCAST_ABORT = 5;
public static final int ABCAST_COORDNEEDSESTIMATE = 6;
public static final int ABCAST_UPDATEK = 7;
private PID myself;
// Initialized ?
private boolean initialized = false;
// Trigger class for events routing
private Trigger trigger;
// Variables for flow control
private FlowControl flow_control;
private int fc_key;
private int nbMsgsSent;
// Known processes, to send the broadcast messages : Contains PID
private TArrayList known;
// Others processes, to send the estimate, propose: Does Not Contains MYSELF
private TArrayList others;
// A-delivered messages : Set (AbcastMessageID)
private TSet aDelivered;
// A-Undelivered messages : FIFO-Order Map (AbcastMessageID -> GroupCommMessage
// m)
private TLinkedHashMap aUndelivered;
// Bound for old A-delivered messages : Table (PID -> Integer)
private TMap maxIdProProc;
// Decision that are RBCast but not yet acknowledged
private TMap decisionToBroadcast;
// Smallest know decision for each processes
private TMap processesCurrentK;
// Messages sent to other processes
private TMap messagesSendToProc;
// id for consensus requests
private long k = -1;
// the coordinator
private PID coordinator;
// round
private int round = -1;
// phase
private int phase = -1;
// current Estimate
private TLinkedHashMap estimate;
// Abcast message current id
private AbcastMessageID abcastId;
// IS the coordinator requested estimate
private boolean coordNeedsEstimate = false;
// Is tzhe first rond optimized
private boolean optimizeFirstRound = false;
// Timestamp
private int timeStamp;
// nb Estimate received
private int numEstimate;
// nb Ack Received
private int nbAck;
// nb nack received
private int nbNack;
// majority
private int majority;
/**
* Set of suspected processes. Its initial value is given in the
* constructor. It is updated every time a <i>suspect</i> event is
* triggered by the failure detector.
*/
private TSet suspected = new THashSet();
// This set contains all messages that came too early and thus they can't be
// treeated yet. Messages are ordered by round number, then by phase number.
private TSortedMap pushedBack = new TTreeMap();
public static final int MIN_LOCALLY_ABCAST = 1;
//public static final int MAX_UNDELIVERED = 8;
//public static final int MAX_PROPOSE = 4;
public static final int MSGS_PER_CONSENSUS = 4;
private int max_locally_abcast = MSGS_PER_CONSENSUS;
public static final int MAX_MESSAGES_PER_ACK = 2;
public static final int UPDATEK_PERIOD = 100;
private static final Logger logger = Logger.getLogger(FastAbcastImpl.class
.getName());
public static class TriggerItem {
public int type;
public GroupCommEventArgs args;
public TriggerItem(int type, GroupCommEventArgs args) {
this.type = type;
this.args = args;
}
}
/**
* Constructor.
*
* @param abcast
* object of a framework protocol based class, which ensure event
* routing for this protocol.
*/
public FastAbcastImpl(Trigger abcast, FlowControl fc, PID myself) {
logger.entering("FastAbcastImpl", "<constr>");
this.trigger = abcast;
this.flow_control = fc;
this.myself = myself;
aDelivered = new THashSet();
aUndelivered = new TLinkedHashMap();
maxIdProProc = new THashMap();
abcastId = new AbcastMessageID(myself, 0);
decisionToBroadcast = new TLinkedHashMap();
processesCurrentK = new THashMap();
messagesSendToProc = new THashMap();
logger.exiting("FastAbcastImpl", "<constr>");
}
/**
* Handler for the <i>Init</i> event. </br> It sends the list of known
* processes to the lower layer allowing them to communicate with us
*
* @param ev
* <dl>
* <dt> arg1 : Set[PID] </dt>
* <dd> List of processes for broadcasting </dd>
* </dl>
*
* @throws GroupCommException
* @throws IOException
* @throws ClassNotFoundException
*/
public void handleInit(GroupCommEventArgs ev) throws GroupCommException,
IOException, ClassNotFoundException {
logger.entering("FastAbcastImpl", "handleInit");
TList p = (TList) ev.removeFirst();
LinkedList toTrigger = new LinkedList();
if (initialized)
throw new GroupCommException("FastAbcastImpl already initialized.");
initialized = true;
fc_key = flow_control.getFreshKey();
optimizeFirstRound = false;
coordNeedsEstimate = false;
known = new TArrayList(p);
others = new TArrayList();
// Look for duplicate processes in the group
for (int i = 0; i < known.size(); i++) {
if (!myself.equals(known.get(i))){
others.add(known.get(i));
messagesSendToProc.put(known.get(i), new CompressedSet());
}
for (int j = i + 1; j < known.size(); j++)
if (known.get(i).equals(known.get(j)))
throw new GroupCommException("Process" + known.get(i)
+ " appears more than once in the group.");
// add entries to processCurrentK
processesCurrentK.put(known.get(i), new TLong(-1));
}
// calculate majority excluding myself
majority = this.known.size() / 2;
// init maximum id of Adelivered message
Iterator it = known.iterator();
PID pid;
while (it.hasNext()) {
pid = (PID) it.next();
maxIdProProc.put(pid, new TLong(-1));
}
// Init the coordinator
coordinator = (PID) known.get(0);
// start FD
GroupCommEventArgs jrl = new GroupCommEventArgs();
GroupCommEventArgs e1 = new GroupCommEventArgs();
e1.addLast(new THashSet(p)); // Start
e1.addLast(new THashSet()); // Stop
toTrigger.addLast(new TriggerItem(Constants.STARTSTOPMONITOR, e1));
// join-remove
jrl.addLast(new THashSet(p)); // join
jrl.addLast(new THashSet()); // remove
toTrigger.addLast(new TriggerItem(Constants.JOINREMOVELIST, jrl));
// start the internal consensus
if (known.size() > 1)
this.incK(toTrigger, new TLinkedHashMap());
proceedWithTrigger(toTrigger);
logger.exiting("FastAbcastImpl", "handleInit");
}
/**
* The handler for the <i>Abcast</i> event. <br/> It broadcasts the message
* to all the processes described by the init event. <br/> It adds an
* Abcast-Id to the message.
*
* @param ev
* <dl>
* <dt> arg1: GroupCommMessage </dt>
* <dd> The message </dd>
* </dl>
* @throws GroupCommException
* @throws IOException
* @throws ClassNotFoundException
*/
public void handleAbcast(GroupCommEventArgs ev) throws GroupCommException,
IOException, ClassNotFoundException {
logger.entering("FastAbcastImpl", "handleAbcast");
LinkedList toTrigger = new LinkedList();
// msg
GroupCommMessage msg = (GroupCommMessage) ev.removeFirst();
AbcastMessageID id = abcastId.nextId();
// if we are the only process deliver m immediately
if (known.size() == 1) {
GroupCommEventArgs adeliver = new GroupCommEventArgs();
adeliver.addLast(msg);
adeliver.addLast(id.proc);
trigger.trigger(Constants.ADELIVER, adeliver);
maxIdProProc.put(id.proc, new TLong(id.id));
return;
}
aUndelivered.put(id, msg);
//Flow control
//flow_control.alloc(fc_key, 1);
nbMsgsSent++;
if (nbMsgsSent >= max_locally_abcast)
flow_control.block(fc_key);
// Sends a new estimate to coordinator, if it explicetely asks for it
// Insert a message in estimate (since it is empty) before sending a message
if (coordNeedsEstimate && (!myself.equals(coordinator))) {
coordNeedsEstimate = false;
estimate.put(id, msg);
sendEstimate(toTrigger);
} else if (coordNeedsEstimate) { // If I am the coordinator and I wait for
// additionnal Estimate. I take this new
// message to abcast as a proposal
estimate.put(id, msg);
sendPropose(toTrigger);
coordNeedsEstimate = false;
processAck(toTrigger, k, round, null);
}
proceedWithTrigger(toTrigger);
logger.exiting("FastAbcastImpl", "handleAbcast");
}
/**
* The handler for the <i>Pt2PtDeliver</i> event. <br/> When we recieve a
* message from the Reliable communication layer, we have to resent the
* message to all the receipents, if it's the first time it arrives. That's
* the R-Broadcast part of the protocol. It launch a consensus too.
*
* @param ev
* <dl>
* <dt> arg1: GroupCommMessage (id::m) </dt>
* <dd> The message, with an id </dd>
* <dt> arg2: PID </dt>
* <dd> Source PID </dd>
* </dl>
* @throws GroupCommException
* @throws IOException
* @throws ClassNotFoundException
*/
public void handlePt2PtDeliver(GroupCommEventArgs ev)
throws GroupCommException, IOException, ClassNotFoundException {
logger.entering("FastAbcastImpl", "handlePt2PtDeliver");
LinkedList toTrigger = new LinkedList();
GroupCommMessage m = (GroupCommMessage) ev.get(0);
// m = <<k::type::payload>>
long kmess = ((TLong) m.tunpack()).longValue();
// m = <<type::payload>>
PID source = (PID) ev.get(1);
// Update table of known decision that have been taken by others
updateOldDecisions(kmess, source);
int type = ((TInteger) m.tunpack()).intValue();
// m = <<payload>>
switch (type) {
case ABCAST_ESTIMATE:
// m = <<r::estimate::lastupdated>>
int rmess = ((TInteger) m.tunpack()).intValue();
processEstimate(toTrigger, kmess, rmess, m);
break;
case ABCAST_PROPOSE:
// m = <<r::propose>>
rmess = ((TInteger) m.tunpack()).intValue();
processPropose(toTrigger, kmess, rmess, m);
break;
case ABCAST_ACK:
// m = <<r::ack>>
rmess = ((TInteger) m.tunpack()).intValue();
processAck(toTrigger, kmess, rmess, m);
break;
case ABCAST_ABORT:
// m = <<r>>
rmess = ((TInteger) m.tunpack()).intValue();
processAbort(toTrigger, kmess, rmess);
break;
case ABCAST_COORDNEEDSESTIMATE:
// m= <<r>>
rmess = ((TInteger) m.tunpack()).intValue();
processCoordNeedsEstimate(toTrigger, kmess, rmess);
break;
case ABCAST_RBCAST:
processRBcast(toTrigger, kmess, m, source);
break;
case ABCAST_UPDATEK:
break;
default:
throw new GroupCommException("FastAbcastIMpl : handlePt2Ptdeliver : "
+ "Unknown message type: " + type);
}
proceedWithTrigger(toTrigger);
logger.exiting("FastAbcastImpl", "handlePt2PtDeliver");
}
/**
* The handler for the <i>Suspect</i> event. <br/> When a
* suspicion arrives, we send a nack to coordinator if
* necessary (i.e., if we do not have already sent a ack) and
* go to the next round
*
* @param ev
* <dl>
* <dt> arg1: TSet of supsected processes </dt>
* </dl>
* @throws GroupCommException
* @throws IOException
* @throws ClassNotFoundException
*/
public void handleSuspect(GroupCommEventArgs ev) throws GroupCommException,
IOException, ClassNotFoundException {
logger.entering("FastAbcastImpl", "handleSuspect");
LinkedList toTrigger = new LinkedList();
suspected = (TSet) ev.get(0);
processSuspicion(toTrigger, suspected);
proceedWithTrigger(toTrigger);
logger.exiting("FastAbcastImpl", "handleSuspect");
}
/**
* Makes a copy in memory of the parameter. If the parameter has references
* to other objects, they are also cloned. This is necessary to avoid
* side-effects at higher-lever protocols.
*
* @param o
* The object to deep-clone
* @return
* @throws IOException
* @throws ClassNotFoundException
*/
private Transportable deepClone(Transportable o) throws IOException,
ClassNotFoundException {
return DefaultSerialization
.unmarshall(DefaultSerialization.marshall(o));
}
/**
* This method inserts a message whose processing has to be delayed into map
* <i>pushedBack</i>. The map is ordered by increasing round numbers.
*
* @param r
* The message's round number
* @param type
* The message's type
* @param m
* The message payload
*/
private void pushback(long k, int r, int type, GroupCommMessage m) {
logger.entering("FastAbcastImpl", "pushback");
TLong kObj = new TLong(k);
TInteger rObj = new TInteger(r);
TTreeMap mapK = (TTreeMap) pushedBack.get(kObj);
if (mapK == null) {
mapK = new TTreeMap();
pushedBack.put(kObj, mapK);
}
// We already pushed back a decision for this
// internal consensus => no more messages different
// from a decision needs to be pushedback!
if ((mapK.get(new TInteger(-1)) != null) && (r > -1))
return;
TLinkedList listR = (TLinkedList) mapK.get(rObj);
if (listR == null) {
listR = new TLinkedList();
mapK.put(rObj, listR);
}
m.tpack(new TInteger(type));
listR.addLast(m);
// If we add a decision, delete all pushed back
// messages for the same k
if ((mapK.keySet().size() > 1) && (r == -1)) {
mapK = new TTreeMap();
mapK.put(rObj, listR);
pushedBack.put(kObj, mapK);
}
logger.exiting("FastAbcastImpl", "pushback");
}
// Execute all the trigger in list of TriggerItem toTrigger
private void proceedWithTrigger(LinkedList toTrigger) {
while (!toTrigger.isEmpty()) {
TriggerItem tItem = (TriggerItem) toTrigger.removeFirst();
trigger.trigger(tItem.type, tItem.args);
}
}
// Update Old decisions set
private void updateOldDecisions(long kmess, PID source) {
long initK = ((TLong) processesCurrentK.get(source)).longValue();
if (kmess > initK) {
processesCurrentK.put(source, new TLong(kmess));
// Retrieve the smallest K for which we need to Keep a decision
long smallestK = Long.MAX_VALUE;
Iterator it = processesCurrentK.keySet().iterator();
while (it.hasNext()) {
PID pid = (PID) it.next();
long kPid = ((TLong) processesCurrentK.get(pid)).longValue();
if (kPid < smallestK)
smallestK = kPid;
}
// Discard all decisions that comes earlier
for (long kDecision=initK; kDecision<smallestK; kDecision++){
decisionToBroadcast.remove(new TLong(kDecision));
}
}
}
/**
* Increase K, i.e. start a new internal consensus
* @throws GroupCommException
* @throws IOException
* @throws ClassNotFoundException
*/
private void incK(LinkedList toTrigger, TLinkedHashMap newEstimate)
throws GroupCommException, IOException, ClassNotFoundException {
logger.entering("FastAbcastImpl", "incK");
// Update local variables
this.k++;
this.round = -1;
this.timeStamp = -1;
this.estimate = newEstimate;
if ((this.k % UPDATEK_PERIOD) == 0)
sendCurrentK(toTrigger);
this.nextRound(toTrigger);
logger.exiting("FastAbcastImpl", "incK");
}
/**
* Start the new round of the consensus
* @throws GroupCommException
* @throws IOException
* @throws ClassNotFoundException
*/
private void nextRound(LinkedList toTrigger) throws GroupCommException,
IOException, ClassNotFoundException {
logger.entering("FastAbcastImpl", "nextRound");
round++;
if (round > 1000)
System.err
.println("WARNING: Consensus is taking too many rounds!!!");
coordinator = (PID) known.get(round % known.size());
coordNeedsEstimate = false;
logger.log(Level.FINE,
"New round: {0}. New coordinator: {1} for {2}th Consensus",
new Object[] { new Integer(round), coordinator, new Long(k) });
nbAck = 0;
nbNack = 0;
numEstimate = 0;
if (round == 0) {
if (optimizeFirstRound){
// This is the first round and it is optimized!!
if (!myself.equals(coordinator)) {
timeStamp = 0;
sendAck(toTrigger, true);
this.phase = 5;
} else { // I am the coordinator
this.phase = 4;
processAck(toTrigger, k, round, null);
}
} else {
// This is the first round and not optimized.
if (myself.equals(coordinator)) {
if (estimate.size() == 0) {
// If coordinator has empty estimate, it asks for estimate
logger.fine("No message in estimate, send coordNeedsEstimate");
coordNeedsEstimate = true;
numEstimate = majority;
this.phase = 2;
sendCoordNeedsEstimate(toTrigger);
} else {
// Coordinator has messages to propose, it takes them as proposal
timeStamp = round;
phase = 4;
sendPropose(toTrigger);
coordNeedsEstimate = false;
processAck(toTrigger, k, round, null);
}
} else {
// Other processes are in phase 3 as if they have sent an estimate
this.phase = 3;
if (suspected.contains(coordinator)) {
sendAck(toTrigger, false);
nextRound(toTrigger);
}
}
}
} else {
if (!myself.equals(coordinator)) {
sendEstimate(toTrigger);
this.phase = 3;
if (suspected.contains(coordinator)) {
sendAck(toTrigger, false);
nextRound(toTrigger);
}
} else { // I am the coordinator
this.phase = 2;
}
}
// Treat pushedback messages
while (!pushedBack.isEmpty()) {
// Take message for phase kpushed
long kpushed = ((TLong) pushedBack.firstKey()).longValue();
if (kpushed > k) {
// Messages for futur internal consensus, we keep it and quit
// the loop
logger.exiting("FastAbcastImpl", "nextRound");
return;
}
// Remove all possible messages from consensus k
TTreeMap mapK = (TTreeMap) pushedBack.remove(new TLong(kpushed));
if (kpushed == k) {
while (!mapK.isEmpty()) {
int r = ((TInteger) mapK.firstKey()).intValue();
if (r > round) {
// Messages for future round, we keep it (thus, we have
// to put the map
// again in pushedBack) and quit the loop
pushedBack.put(new TLong(k), mapK);
logger.exiting("ConsensusExecution", "nextRound");
return;
}
// We remove all messages of round r from the pushed back
// queue
TLinkedList l = (TLinkedList) mapK.remove(new TInteger(r));
if ((r < round) && (r != -1)) { // Do not discard decision
// messages
logger
.log(
Level.FINE,
"Discarding old messages {0} in pushedBack for round {1} and consensus {2}",
new Object[] { l, new Integer(r),
new Long(k) });
} else { // r == round or r == -1
logger
.log(
Level.FINE,
"Processing messages {0} in pushedBack for round {1} and consensus {2}",
new Object[] { l, new Integer(r),
new Long(k) });
Iterator it = l.iterator();
while (it.hasNext()) {
GroupCommMessage m = (GroupCommMessage) it.next();
int type = ((TInteger) m.tunpack()).intValue();
switch (type) {
case ABCAST_ESTIMATE:
processEstimate(toTrigger, kpushed, r, m);
break;
case ABCAST_PROPOSE:
processPropose(toTrigger, kpushed, r, m);
break;
case ABCAST_ACK:
processAck(toTrigger, kpushed, r, m);
break;
case ABCAST_ABORT:
processAbort(toTrigger, kpushed, r);
break;
case ABCAST_COORDNEEDSESTIMATE:
processCoordNeedsEstimate(toTrigger, kpushed, r);
break;
case ABCAST_RBCAST:
PID source = (PID) m.removeFirst();
processRBcast(toTrigger, kpushed, m, source);
break;
default:
throw new GroupCommException(
"Weird message type " + type
+ " in pushed back set!");
}
}
}
}
}
}
logger.exiting("FastAbcastImpl", "nextRound");
}
/**
* A new estimation has just arrived. This method processes it.
*
* @param kmess
* The consensus of the estimate
* @param rmess
* The round of the estimate
* @param m
* The message containing the estimation.
* @throws GroupCommException
* @throws IOException
* @throws ClassNotFoundException
*/
private void processEstimate(LinkedList toTrigger, long kmess, int rmess,
GroupCommMessage m) throws GroupCommException, IOException,
ClassNotFoundException {
logger.entering("FastAbcastImpl", "processEstimate");
// Push back message for future
if (((rmess > round) && (kmess == k)) || (kmess > k))
pushback(kmess, rmess, ABCAST_ESTIMATE, m.cloneGroupCommMessage());
// m = <<estimate::lastUpdated>>
TLinkedHashMap estim = (TLinkedHashMap) m.tunpack();
int lastUpd = ((TInteger) m.tunpack()).intValue();
// Add the unknown message to unordered
// ... even if it is a past or future message
Iterator i = estim.keySet().iterator();
while (i.hasNext()) {
AbcastMessageID t = (AbcastMessageID) i.next();
long maxId = ((TLong) maxIdProProc.get(t.proc)).longValue();
if ((!aUndelivered.containsKey(t)) && (!aDelivered.contains(t))
&& (maxId < t.id))
aUndelivered.put(t, estim.get(t));
}
//Flow control
if (nbMsgsSent >= max_locally_abcast)
flow_control.block(fc_key);
if ((rmess != round) || (kmess != k))
return; // discard message for past and future
// I am not the coordinator => BAD!
if (!myself.equals(coordinator))
throw new GroupCommException(
"Unexpected message received in round " + this.round
+ "by non-coordinator: " + m);
if (phase != 2) {
// ignore
logger
.log(
Level.FINE,
"Discarding late ESTIMATE (phase != 2): {0}. Current round {1}, consensus {2}",
new Object[] { m, new Integer(this.round),
new Long(this.k) });
logger.exiting("ConsensusExecution", "processEstimate");
return;
}
// update the estimate if necessary
// either the timestamp is bigger than the last one
// either the timestamp is equal but the new estimate is not empty!!
if ((lastUpd > timeStamp)
|| ((lastUpd == timeStamp) && (estimate.keySet().size() == 0) && (estim
.keySet().size() != 0))) {
logger.log(Level.FINE,
"Updating ESTIMATE with {0}. Timestamp: {1}", new Object[] {
estim, new Integer(lastUpd) });
timeStamp = lastUpd;
estimate = estim;
}
numEstimate++;
if (numEstimate >= majority) {
// If estimate is empty, then take aUndelivered as an estimate
if (estimate.size() == 0) {
estimate = new TLinkedHashMap();
Iterator it = aUndelivered.keySet().iterator();
int sizeEstimate = Math.max(MSGS_PER_CONSENSUS/2, aUndelivered.size()/2);
while (it.hasNext() && (estimate.size() < sizeEstimate)) {
AbcastMessageID id = (AbcastMessageID) it.next();
estimate.put(id, aUndelivered.get(id));
}
}
// If estimate contains messages, send a proposal else
// send coordNeedsEstimate
if (estimate.size() > 0) {
// We received enough estimations: we can propose a value to all
// other processes.
// Proceed to phase 4
logger
.fine("Got majority of ESTIMATEs. Changing phase to 4. Sending proposal");
timeStamp = round;// BUG FIXED BY RACHELLE FUZZATI
phase = 4;
sendPropose(toTrigger);
coordNeedsEstimate = false;
processAck(toTrigger, k, round, null);
// just to check if a NACK has already arrived
} else {
if (nbNack == 0) {
if (!coordNeedsEstimate) {
// ASSERT: aUndelivered is empty and no coordNeedsEtimate already sent
logger
.fine("No message in estimate, send coordNeedsEstimate");
coordNeedsEstimate = true;
sendCoordNeedsEstimate(toTrigger);
}
} else {
logger.fine("Got a NACK. Going to next round");
sendAbort(toTrigger);
nextRound(toTrigger);
}
}
}
logger.exiting("FastAbcastImpl", "processEstimate");
}
/**
* A proposal has just arrived from the coordinator.
*
* @param kmess
* The consensus of the estimate
* @param rmess
* The round of the estimate
* @param m
* The message containing the proposal.
* @throws GroupCommException
*/
private void processPropose(LinkedList toTrigger, long kmess, int rmess,
GroupCommMessage m) throws GroupCommException {
logger.entering("FastAbcastImpl", "processPropose");
if (((rmess > round) && (kmess == k)) || (kmess > k))
pushback(kmess, rmess, ABCAST_PROPOSE, m); // Push back message for
// future
if ((rmess != round) || (kmess != k))
return; // discard message for past and future
// I am the coordinator => BAD!
if (myself.equals(coordinator)) {
throw new GroupCommException(
"Unexpected message received in round " + round
+ "by coordinator: " + m);
}
estimate = (TLinkedHashMap) m.tunpack();
timeStamp = round;
logger
.log(
Level.FINE,
"Received a PROPOSE: {0} in round {1} in consensus {2}. Setting it as estimate.",
new Object[] { estimate, new Integer(round),
new Long(k) });
coordNeedsEstimate = false;
// Send ack to the coordinator
sendAck(toTrigger, true);
this.phase = 5;
// Nothing to do this round, wait decision, abort or coordinator
// suspicion
logger.exiting("FastAbcastImpl", "processPropose");
}
/**
* An acknowledgement message (Ack or Nack) has just arrived.
*
* @param kmess
* The consensus of the estimate
* @param rmess
* The round of the estimate
* @param m
* The message containing the acknowledgement.
* @throws GroupCommException
* @throws IOException
* @throws ClassNotFoundException
*/
private void processAck(LinkedList toTrigger, long kmess, int rmess,
GroupCommMessage m) throws GroupCommException, IOException,
ClassNotFoundException {
logger.entering("FastAbcastImpl", "processAck");
if (((rmess > round) && (kmess == k)) || (kmess > k)) {
pushback(kmess, rmess, ABCAST_ACK, m); // Push back message for
return; // future
}
logger.log(Level.FINE,
"About to process an ACK: {0} in round {1} consensus {2}. Received acks/nacks {3}/{4}",
new Object[] { m, new Integer(round), new Long(k),
new Integer(nbAck), new Integer(nbNack) });
if (m != null) {
boolean ack = ((TBoolean) m.tunpack()).booleanValue();
if ((rmess == round) && (kmess == k)) {
// Do not take into account ack from the past!
// I am not the coordinator => BAD!
if (!myself.equals(coordinator)) {
throw new GroupCommException(
"Unexpected message received in round " + round
+ "by non-coordinator: " + m);
}
// Increment number of {ack,nack} received.
if (ack) {
nbAck++;
} else {
nbNack++;
}
}
// Add messages that are not already ordered in unordered
TLinkedHashMap msgs = (TLinkedHashMap) m.tunpack();
Iterator it = msgs.keySet().iterator();
while (it.hasNext()) {
AbcastMessageID msgID = (AbcastMessageID) it.next();
long maxId = ((TLong) maxIdProProc.get(msgID.proc)).longValue();
if ((!aUndelivered.containsKey(msgID))
&& (!aDelivered.contains(msgID)) && (msgID.id > maxId)) {
aUndelivered.put(msgID, msgs.get(msgID));
}
}
// Flow Control
//Flow control
if (nbMsgsSent >= max_locally_abcast)
flow_control.block(fc_key);
}
if ((rmess == round) && (kmess == k)) {
// Again, do not take into account ack from the past
// If coordNeedsEstimate is true, the other condition can be true only
// if we deliver a nack. If we deliver a nack and coordNeedsEstimate is true
// we have to abort current round
// We take into account messages that arrive in Phase 2 (NACKs).
// However, the process should not pass on to the next round
// until it completes Phase 2.
if ((phase == 2) && (numEstimate < majority)) {
logger.fine("Still in phase 2 and no proposal sent");
logger.exiting("FastAbcastImpl", "processAck");
return;
}
if (nbNack == 0) {
if (nbAck == majority) {
// We got a majority of positive Ack's. Thefore we can
// decide
logger
.fine("Got a majority of positive Ack's. Broadcasting decision");
broadcastDecision(toTrigger);
}
} else {
// Somebody suspected us and sent us a negative acknowledgement.
// We might not be able to decide in this round
// We proceed to next round
logger.fine("Got a NACK. Going to next round");
sendAbort(toTrigger);
nextRound(toTrigger);
}
} else {
}
logger.exiting("FastAbcastImpl", "processAck");
}
/**
* An abort message has just arrived.
*
* @param kmess
* The consensus of the estimate
* @param rmess
* The round of the estimate
* @throws GroupCommException
* @throws IOException
* @throws ClassNotFoundException
*/
private void processAbort(LinkedList toTrigger, long kmess, int rmess)
throws GroupCommException, IOException, ClassNotFoundException {
logger.entering("FastAbcastImpl", "processAbort");
if (((rmess > round) && (kmess == k)) || (kmess > k))
pushback(kmess, rmess, ABCAST_ABORT, new GroupCommMessage()); // Push // message
// for
// future
if ((rmess != round) || (kmess != k))
return; // discard message for past and future
// I am the coordinator => BAD!
if (myself.equals(coordinator)) {
throw new GroupCommException(
"Unexpected message received in round " + round
+ " in consensus " + k + " by coordinator: "
+ " Abort");
}
nextRound(toTrigger);
logger.exiting("FastAbcastImpl", "processAbort");
}
/**
* The list of suspected processes has just changed. To process the new
* list, fisrt update the suspected list with the one received. Then, send a
* Nack to the coordinator if it is now suspected (and proceed to the next
* round).
*
* @param suspected
* The updated suspect list
* @throws GroupCommException
* @throws IOException
* @throws ClassNotFoundException
*/
private void processSuspicion(LinkedList toTrigger, TSet suspected)
throws GroupCommException, IOException, ClassNotFoundException {
logger.entering("FastAbcastImpl", "processSuspicion");
this.suspected = suspected;
// Re-Send Old Decision if the sender is suspected
Iterator it = decisionToBroadcast.keySet().iterator();
while (it.hasNext()) {
TLong kObj = (TLong) it.next();
GroupCommMessage decisionK = (GroupCommMessage) decisionToBroadcast
.get(kObj);
if (suspected.contains(decisionK.getFirst())) {
PID source = (PID) decisionK.tunpack();
// decisionK = <<decision::newPropose::processCurrentK>>
decisionK.tpack(new TInteger(ABCAST_RBCAST));
// m = <<CONS_BROADCAST::decision::newPropose::processCurrentK>>
decisionK.tpack(kObj);
// m = <<k::CONS_BROADCAST::decision::newPropose::processCurrentK>>
//Broadcast decision to others except source
for (int i = 0; i < others.size(); i++) {
if (!source.equals(others.get(i)))
triggerSend(toTrigger, decisionK
.cloneGroupCommMessage(), (PID) others.get(i));
}
decisionToBroadcast.remove(kObj);
}
}
if (!myself.equals(coordinator) && suspected.contains(coordinator)) {
// Send a NACK only if no ack were already send
if (this.phase != 5)
sendAck(toTrigger, false);
// Proceed to next round
nextRound(toTrigger);
}
logger.exiting("FastAbcastImpl", "processSuspicion");
}
/**
* A message CoordNeedsEstimate has just arrived
*
* @param kmess
* The consensus of the estimate
* @param rmess
* The round of the estimate
* @throws IOException
* @throws ClassNotFoundException
*/
private void processCoordNeedsEstimate(LinkedList toTrigger, long kmess,
int rmess) throws IOException, ClassNotFoundException {
if (((rmess > round) && (kmess == k)) || (kmess > k))
pushback(kmess, rmess, ABCAST_COORDNEEDSESTIMATE,
new GroupCommMessage());
else {
if (!aUndelivered.isEmpty()) {
estimate = new TLinkedHashMap();
Iterator it = aUndelivered.keySet().iterator();
int sizeEstimate = Math.max(MSGS_PER_CONSENSUS/2, aUndelivered.size()/2);
while (it.hasNext() && (estimate.size() < sizeEstimate)) {
AbcastMessageID id = (AbcastMessageID) it.next();
estimate.put(id, aUndelivered.get(id));
}
sendEstimate(toTrigger);
} else if (this.phase != 5) {
coordNeedsEstimate = true;
}
}
}
/**
* A decision has just arrived.
*
* @param kmess
* The consensus of the estimate
* @param m
* The message containing the decision.
* @throws GroupCommException
*/
private void processRBcast(LinkedList toTrigger, long kmess,
GroupCommMessage m, PID source) throws GroupCommException,
IOException, ClassNotFoundException {
logger.entering("FastAbcastImpl", "processRBcast");
if (kmess > k) {
m.addFirst(source);
pushback(kmess, -1, ABCAST_RBCAST, m); // Push back message for
return; // future
} else if (kmess < k) {
if ((kmess == k - 1) && (round == 0) && (this.phase != 5)
&& (!optimizeFirstRound)) {
// It is possible that we deliver a decision without newEstimate
// while we should. If this is the case and we are in 1st round, we
// may block!!!
// Thus, send an ack to coordinator
m.tunpack();
// this messages are already delivered
TLinkedHashMap newEstimate = (TLinkedHashMap) m.tunpack();
if (newEstimate.keySet().size() != 0) {
// I am the coordinator => BAD!
if (myself.equals(coordinator)) {
throw new GroupCommException(
"Unexpected message received in round "
+ round
+ " in consensus "
+ k
+ " by coordinator: "
+ " Second Decision with newEtimate != null");
}
optimizeFirstRound = true;
estimate = newEstimate;
timeStamp = 0;
// Send a ack to coordinator
sendAck(toTrigger, true);
this.phase = 5;
}
}
return;
}
// Keep the decision in the memory if it needs to be resend
// If I am the process who take the decision, just Send decision
// to everybody
if ((!source.equals(myself)) && (!suspected.contains(source))) {
GroupCommMessage decisionMessage = (GroupCommMessage) deepClone(m);
decisionMessage.tpack(source);
decisionToBroadcast.put(new TLong(k), decisionMessage);
} else {
//Send Message to others
m.tpack(new TInteger(ABCAST_RBCAST));
// m = <<CONS_BROADCAST::decision::newPropose:>>
m.tpack(new TLong(k));
// m = <<k::CONS_BROADCAST::decision::newPropose>>
//Broadcast decision to others
for (int i = 0; i < others.size(); i++) {
if (!source.equals(others.get(i)))
triggerSend(toTrigger, m.cloneGroupCommMessage(),
(PID) others.get(i));
}
m.tunpack();
m.tunpack();
}
//Feed-back for flow-control
TLinkedHashMap toBeDelivered = (TLinkedHashMap) deepClone(m.tunpack());
if(toBeDelivered.size() < MSGS_PER_CONSENSUS) max_locally_abcast = Math.min(MSGS_PER_CONSENSUS * 2, max_locally_abcast + 1);
if(toBeDelivered.size() > MSGS_PER_CONSENSUS) max_locally_abcast = Math.max(MIN_LOCALLY_ABCAST, max_locally_abcast - 1);
// Adeliver messages contained in decision
logger.log(
Level.FINE,
"Consensus {0}:\n\t size of decision {1}; aUndelivered size = {2}; nbMsgsSent = {3}",
new Object[] { new Long(k),
new Integer(toBeDelivered.size()),
new Integer(aUndelivered.size()),
new Integer(nbMsgsSent)});
while (!toBeDelivered.isEmpty()) {
AbcastMessageID id = (AbcastMessageID) toBeDelivered.keySet().iterator().next(); //firstKey();
GroupCommMessage msg = (GroupCommMessage) toBeDelivered.remove(id);
// delivered = msg.cloneGroupCommMessage();
long maxId = ((TLong) maxIdProProc.get(id.proc)).longValue();
if (!aDelivered.contains(id) && id.id > maxId) {
// Remove the id from aUndelivered
aUndelivered.remove(id);
// add it in aDelivered
aDelivered.add(id);
// Adeliver message
GroupCommEventArgs adeliver = new GroupCommEventArgs();
adeliver.addLast(msg);
adeliver.addLast(id.proc);
toTrigger.addLast(new TriggerItem(Constants.ADELIVER, adeliver));
//Flow control
if (id.proc.equals(myself)) {
//flow_control.free(fc_key, 1);
nbMsgsSent--;
// Add the messages to the messages sent
int sizeOthers = others.size();
for (int i = 0; i<sizeOthers; i++)
((CompressedSet) messagesSendToProc.get(others.get(i))).add(id);
}
// update the highest aDelivered table
AbcastMessageID newID = new AbcastMessageID(id.proc, maxId + 1);
while (aDelivered.contains(newID)) {
aDelivered.remove(newID);
maxId++;
newID.id++;
}
maxIdProProc.put(id.proc, new TLong(maxId));
}
}
// Flow Control
// Flow control
if (nbMsgsSent < max_locally_abcast)
flow_control.release(fc_key);
// Start next internal consensus
TLinkedHashMap newEstimate = (TLinkedHashMap) m.tunpack();
logger.log(
Level.FINE,
"New proposal length = {0} \t aUndel = {1} \t nbMsgsSent = {2}",
new Object[] { new Integer(newEstimate.size()),
new Integer(aUndelivered.size()),
new Integer(nbMsgsSent)});
if (newEstimate.keySet().size() != 0) {
optimizeFirstRound = true;
} else {
optimizeFirstRound = false;
newEstimate = new TLinkedHashMap();
Iterator it = aUndelivered.keySet().iterator();
int sizeEstimate = Math.max(MSGS_PER_CONSENSUS/2, aUndelivered.size()/2);
while (it.hasNext() && (newEstimate.size() < sizeEstimate)) {
AbcastMessageID id = (AbcastMessageID) it.next();
newEstimate.put(id, aUndelivered.get(id));
}
}
// Update the current K
processesCurrentK.put(myself, new TLong(this.k));
incK(toTrigger, newEstimate);
logger.exiting("FastAbcastImpl", "processRBcast");
}
/**
* Send a message with the current estimate to the current coordinator (the
* local process mustn't be the coordinator)
*
*/
private void sendEstimate(LinkedList toTrigger) throws IOException,
ClassNotFoundException {
GroupCommMessage estimateMessage = new GroupCommMessage();
// m = <<>>
// Remember which messages where sent to the coordinator
// To avoid sending them twice
Iterator it = estimate.keySet().iterator();
while (it.hasNext()) {
AbcastMessageID aID = (AbcastMessageID) it.next();
((CompressedSet) messagesSendToProc.get(coordinator)).add(aID);
}
estimateMessage.tpack(new TInteger(timeStamp));
// m = <<timeStamp>>
estimateMessage.tpack(deepClone(estimate));
// m = <<estimate::lastUpdated>>
estimateMessage.tpack(new TInteger(round));
// m = <<round::estimate::lastUpdated>>
estimateMessage.tpack(new TInteger(ABCAST_ESTIMATE));
// m = <<CONS_ESTIMATE::round::estimate::lastUpdated>>
estimateMessage.tpack(new TLong(k));
// m = <<k::CONS_ESTIMATE::round::estimate::lastUpdated>>
triggerSend(toTrigger, estimateMessage, coordinator);
}
/**
* Send a message with the current estimate as the coordinator's proposal
* (the local process must be the coordinator)
*
*/
private void sendPropose(LinkedList toTrigger) throws IOException,
ClassNotFoundException {
GroupCommMessage proposeMessage = new GroupCommMessage();
// m = <<>>
proposeMessage.tpack(deepClone(estimate));
// m = <<estimate>>
proposeMessage.tpack(new TInteger(round));
// m = <<round::estimate>>
proposeMessage.tpack(new TInteger(ABCAST_PROPOSE));
// m = <<CONS_PROPOSE::round::estimate>>
proposeMessage.tpack(new TLong(k));
// m = <<k::CONS_PROPOSE::round::estimate>>
triggerSend(toTrigger, proposeMessage, others);
}
/**
* Send a message to make other processes abort current round
*/
private void sendAbort(LinkedList toTrigger) {
GroupCommMessage abortMessage = new GroupCommMessage();
// m = <<>>
abortMessage.tpack(new TInteger(round));
// m = <<round>>
abortMessage.tpack(new TInteger(ABCAST_ABORT));
// m = <<CONS_ABORT::round>>
abortMessage.tpack(new TLong(k));
// m = <<k::CONS_ABORT::round>>
triggerSend(toTrigger, abortMessage, others);
}
/**
* Send a message indicating current K
*/
private void sendCurrentK(LinkedList toTrigger) {
GroupCommMessage updateKMessage = new GroupCommMessage();
// m = <<>>
updateKMessage.tpack(new TInteger(ABCAST_UPDATEK));
// m = <<ABCAST_UPDATEK>>
updateKMessage.tpack(new TLong(k));
// m = <<k::ABCAST_UPDATEK>>
triggerSend(toTrigger, updateKMessage, others);
}
/**
* Send a message with the current estimate as the coordinator's proposal
* (the local process must be the coordinator)
*
*/
private void sendCoordNeedsEstimate(LinkedList toTrigger) {
GroupCommMessage abortMessage = new GroupCommMessage();
// m = <<>>
abortMessage.tpack(new TInteger(round));
// m = <<round>>
abortMessage.tpack(new TInteger(ABCAST_COORDNEEDSESTIMATE));
// m = <<CONS_ABORT::round>>
abortMessage.tpack(new TLong(k));
// m = <<k::CONS_ABORT::round>>
triggerSend(toTrigger, abortMessage, others);
}
/**
* Send an acknowledgement message to the current coordinator. It will be an
* Ack or a Nack depending on the parameter
*
* @param ack
* If it is true, an Ack is sent. Otherwise, a Nack is sent.
*/
private void sendAck(LinkedList toTrigger, boolean ack) {
GroupCommMessage ackMessage = new GroupCommMessage();
TLinkedHashMap messagesToTransmit = new TLinkedHashMap();
int sizeAck = Math.max(MAX_MESSAGES_PER_ACK, (aUndelivered.size())/2);
int nbMess = 0;
// If we send an ack, piggyback all messages in aUndelivered
// that are not already known by the coordinator
Iterator i = aUndelivered.keySet().iterator();
while ((i.hasNext()) && (nbMess < sizeAck)){
AbcastMessageID key = (AbcastMessageID) i.next();
if ((ack)
&& (key.proc.equals(myself))
&& (!estimate.containsKey(key))
&& (!aDelivered.contains(key))
&& (!((CompressedSet) messagesSendToProc.get(coordinator))
.contains(key))) {
messagesToTransmit.put(key, aUndelivered.get(key));
((CompressedSet) messagesSendToProc.get(coordinator)).add(key);
//nbMess++;
}
}
// m = <<>>
ackMessage.tpack(messagesToTransmit);
// m = <<unordered/estimate>>
ackMessage.tpack(new TBoolean(ack));
// m = <<NACK::unordered/estimate>>
ackMessage.tpack(new TInteger(round));
// m = <<round::NACK::unordered/estimate>>
ackMessage.tpack(new TInteger(ABCAST_ACK));
// m = <<CONS_ACK::round::NACK::unordered/estimate>>
ackMessage.tpack(new TLong(k));
// m = <<k::CONS_ACK::round::NACK::unordered/estimate>>
triggerSend(toTrigger, ackMessage, coordinator);
}
/**
* Sends a decision message to all processes
* @throws GroupCommException
* @throws IOException
* @throws ClassNotFoundException
*/
private void broadcastDecision(LinkedList toTrigger)
throws GroupCommException, IOException, ClassNotFoundException {
GroupCommMessage decisionMessage = new GroupCommMessage();
// m = <<>>
TLinkedHashMap newPropose = new TLinkedHashMap();
if (coordinator.equals(known.get(0))) {
Iterator i = aUndelivered.keySet().iterator();
int sizeEstimate = Math.max(MSGS_PER_CONSENSUS/2, aUndelivered.size()/2);
while (i.hasNext() && (newPropose.size() < sizeEstimate)) {
AbcastMessageID id = (AbcastMessageID) i.next();
long maxId = ((TLong) maxIdProProc.get(id.proc)).longValue();
if ((!estimate.containsKey(id)) && (!aDelivered.contains(id))
&& (id.id > maxId))
newPropose.put(id, aUndelivered.get(id));
}
}
decisionMessage.tpack(newPropose);
// m = <<newPropose>>
decisionMessage.tpack(estimate);
// m = <<decision::newPropose>>
// Deliver decision to myself
// and send it to others
processRBcast(toTrigger, k, decisionMessage, myself);
}
/**
* Triggers a <i>PointToPointSend</i> event for each process in the second
* parameter.
*
* @param m
* The message to be sent.
* @param g
* The processes that the message is to be sent to.
*/
private void triggerSend(LinkedList toTrigger, GroupCommMessage m, TList g) {
for (int i = 0; i < g.size(); i++) {
triggerSend(toTrigger, m.cloneGroupCommMessage(), (PID) g.get(i));
}
}
/**
* Triggers a single <i>PointToPointSend</i> event.
*
* @param m
* The message to be sent.
* @param p
* The destination process.
*/
private void triggerSend(LinkedList toTrigger, GroupCommMessage m, PID p) {
GroupCommEventArgs pt2ptSend = new GroupCommEventArgs();
pt2ptSend.addLast(m);
pt2ptSend.addLast(p);
pt2ptSend.addLast(new TBoolean(false)); // not promisc
logger.log(Level.FINE, "Sending Pt2Pt message {0} to {1}",
new Object[] { m, p });
toTrigger.addLast(new TriggerItem(Constants.PT2PTSEND, pt2ptSend));
}
/**
* Used for debug
*
* @param out The output stream used for showing infos
*/
public void dump(OutputStream out) {
PrintStream err = new PrintStream(out);
err.println("======== FastAbcastImpl: dump =======");
err.println(" Initialized: " + String.valueOf(initialized));
err.println(" Current internal consensus: " + k);
err.println(" Current estimate: "+ estimate);
err.println(" Last AbcastMessage id used:\n\t" + abcastId);
err.println(" Number of known undelivered messages: "
+ aUndelivered.size());
err.println(" Known processes: size: " + known.size());
Iterator it = known.iterator();
PID pid;
while (it.hasNext()) {
pid = (PID) it.next();
err.println("\t" + pid.toString());
}
err.println(" A-Undelivered messages:");
err.println(" " + aUndelivered.toString());
err.println(" A-Delivered messages IDs:");
it = aDelivered.iterator();
AbcastMessageID id;
while (it.hasNext()) {
id = (AbcastMessageID) it.next();
err.println("\t" + id);
}
err.println(" and all message with id <= ");
err.println("\t" + maxIdProProc.toString());
err.println(" Decision that are not surely delivered by everyone: " );
err.println(" " + decisionToBroadcast);
err.println(" Last decision taken by processes: ");
err.println(" " + processesCurrentK );
err.println(" Messages Pushback: ");
err.println(" " + pushedBack);
err.println("==================================");
}
}