/**
* Fortika - Robust Group Communication
* Copyright (C) 2002-2006 Sergio Mena de la Cruz (EPFL) (sergio.mena@epfl.ch)
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
package groupcomm.common.consensus;
import java.io.IOException;
import java.io.OutputStream;
import java.io.PrintStream;
import java.util.Iterator;
import java.util.logging.Level;
import java.util.logging.Logger;
import uka.transport.Transportable;
import framework.Compressable;
import framework.CompressedSet;
import framework.Constants;
import framework.GroupCommEventArgs;
import framework.GroupCommException;
import framework.GroupCommMessage;
import framework.PID;
import framework.libraries.DefaultSerialization;
import framework.libraries.FlowControl;
import framework.libraries.Trigger;
import framework.libraries.serialization.TBoolean;
import framework.libraries.serialization.TCollection;
import framework.libraries.serialization.THashMap;
import framework.libraries.serialization.THashSet;
import framework.libraries.serialization.TInteger;
import framework.libraries.serialization.TList;
import framework.libraries.serialization.TMap;
import framework.libraries.serialization.TSet;
/**
* <b> Class implementing the common code for Consensus building block. </b>
* <br>
* This implementation is able to start several consensus instances at the same
* time.
* <hr>
* <b> Handled events: </b>
* <dl>
* <dt> <i>Propose</i> </dt>
* <dd> Start a new consensus execution. </dd>
* <dt> <i>Pt2PtDeliver</i> </dt>
* <dd> Receive a new point-to-point message from the underlying layer. </dd>
* <dt> <i>Suspect</i> </dt>
* <dd> Reports about currently suspected processes. It should be triggered
* every time the list of suspected changes. </dd>
* </dl>
*/
public class Consensus {
/**
* PID of the local process
*/
private PID myself;
/**
* Interface to trigger events to the outside. It has to be implemented by
* the wrapping framework component.
*/
private Trigger trigger;
/**
* Interface to the flow control library. The common code assumes it to be
* built in the framework.
*/
private FlowControl flow_control;
/**
* Unique key to interact with the flow control library without interfereing
* with other layers.
*/
private int fc_key = -1;
/**
* Maximum number of consensus in parallel.
*/
public static final int MAX_CONSENSUS = 100;
/**
* Number of Consensus currently running
*/
private int nbConsensus = 0;
/**
* Set of suspected processes. It is handed over to the newly created
* consensus instances. It is updated every time a <i>suspect</i> event is
* handled.
*/
private TSet suspected = new THashSet();
/**
* In this map, the keys are the processes that take part in at least one of
* all ongoing consensus (so, they are being monitored by the Failure
* Detector). Each of the processes is mapped to a list of all consensus
* instance numbers in which it participates.
*/
private TMap monitored = new THashMap();
/**
* It contains the instances of all ongoing consensus
*/
private TMap executions = new THashMap();
/**
* It contains all decisions that arrived too early (decisions whose
* consensus hasn't yet started in the local process.
*/
private TMap decided = new THashMap();
/**
* Introduced for optimization. Now the process stops monitoring processes of the
* last finished consensus but 2.
*/
private Transportable remove_k;
/**
* Added By ORUTTI. Is used if we receive a decision before the proposal
*/
private TSet waitPropose = new THashSet();
/**
* It contains the instance number of all finished consensus.
*/
//private TSet finished = new THashSet();
private CompressedSet finished = new CompressedSet();
// K proposed by the user
// private HashMap userK = new HashMap();
/*
* A given instance of consensus can be in these states: 1) Not started:
* executions ?= null; dec == null; finished == null; 2) Started: executions !=
* null; dec == null; finished == null; 3) Decided: executions == null; dec !=
* null; finished ==null; 4) Finished: executions == null; dec == null;
* finished != null;
*/
/**
* It logs debugging messages.
*/
private static final Logger logger = Logger.getLogger(Consensus.class
.getName());
/**
* Constructor. <br>
* It inisialises flow control, and copies serveral parameters.
*/
public Consensus(Trigger trigger, FlowControl fc, PID myself) {
logger.entering("ConsensusHandlers", "<constr>");
this.flow_control = fc;
// flow_control.setThreshold(fc_key, MAX_CONSENSUS);
this.trigger = trigger;
this.myself = myself;
this.nbConsensus = 0;
logger.exiting("ConsensusHandlers", "<constr>");
}
/**
* Handler for event <i>Propose</i>. It spawns a new consensus instance.
*
* @param e
* <dl>
* <dt> arg1 : Set[PID] </dt>
* <dd> The group of processes that participate in the new
* instance of consensus. </dd>
* <dt> arg2 : Object </dt>
* <dd> The value proposed by the local process for the new
* instance. </dd>
* <dt> arg3 : Object </dt>
* <dd> The instance number of the new instance. </dd>
* </dl>
*/
public void handlePropose(GroupCommEventArgs e) throws GroupCommException,
IOException, ClassNotFoundException {
logger.entering("ConsensusHandlers", "handlePropose");
//this sould be included in handleInit, but there's none here
if(fc_key == -1) fc_key = flow_control.getFreshKey();
TList group = (TList) e.get(0);
Transportable o = (Transportable) e.get(1);
Transportable k_parObj = (Transportable) e.get(2);
// Long k_parObj = ((Long) e.get(2));
// long k_par = k_parObj.longValue();
logger
.log(
Level.FINE,
"Running consensus#{2}\n\tProcessGroup: {0}\tProposed Value: {1}",
new Object[] { group, o, k_parObj });
// Look for duplicate processes in the group
// At the same time, check if the localhost is in the group
boolean found = false;
for (int i = 0; i < group.size(); i++) {
found = found || myself.equals(group.get(i));
for (int j = i + 1; j < group.size(); j++)
if (group.get(i).equals(group.get(j)))
throw new GroupCommException("Process" + group.get(i)
+ " appears more than once in the group.");
}
if (!found) {
throw new GroupCommException("Consensus: The localhost " + myself
+ " is not in the group passed as parameter: " + group);
}
// Flow control
nbConsensus++;
if (nbConsensus >= MAX_CONSENSUS)
flow_control.block(fc_key);
// flow_control.alloc(fc_key, 1);
// Set instance number
// if (k_par < 0)
// k_par = k + 1;
// if (k_par <= k)
// throw new GroupCommException(
// "Bad instance number (should be greather than)" + k);
// for (long tune = k + 1; tune < k_par; tune++)
// finished.add(tune);
// this.k = k_par;
if (group.size() == 1) {
// Consensus with one only process
// We decide the value proposed
triggerDecision(deepClone(o), k_parObj);
logger.exiting("ConsensusHandlers", "handlePropose");
return;
}
Transportable decision = (Transportable) decided.get(k_parObj);
// Has its decision already arrived??
if (decision != null) {
decided.remove(k_parObj);
Transportable clone = (Transportable) deepClone(decision);
// reSendDecision(decision, k_parObj, group, null); DONE EARLIER
executions.remove(k_parObj);
triggerDecision(clone, k_parObj);
removeProcesses(k_parObj);
logger.exiting("ConsensusHandlers", "handlePropose");
return;
}
if (finished.contains((Compressable) k_parObj))
throw new GroupCommException(
"Impossible to finish before starting consensus!");
addProcesses(k_parObj, group);
getExecution(k_parObj).processStart(o, group);
// START: ADDED BY ORUTTI
if (waitPropose.contains(k_parObj)){
decision = getExecution(k_parObj).firstEstimate();
if (decision != null) {
waitPropose.remove(k_parObj);
decided.remove(k_parObj);
Transportable clone = (Transportable) deepClone(decision);
// reSendDecision(decision, k_parObj, group, null); DONE EARLIER
executions.remove(k_parObj);
triggerDecision(clone, k_parObj);
removeProcesses(k_parObj);
logger.exiting("ConsensusHandlers", "handlePropose");
return;
}
}
// END: ADDED BY ORUTTI
logger.exiting("ConsensusHandlers", "handlePropose");
}
/**
* Handler for event <i>Pt2PtDeliver</i>. The are four types of messages:
* ESTIMATE, PROPOSE, ACK, RBCAST. This handler only call the corresponding
* method in the ConsensusExecution object associated with the messages
* execution.
*
* @param e
* <dl>
* <dt> arg1 : GroupCommMessage </dt>
* <dd> The message. </dd>
* <dt> arg2 : PID </dt>
* <dd> The sending process. </dd>
* </dl>
*/
public void handlePt2PtDeliver(GroupCommEventArgs e)
throws GroupCommException, IOException, ClassNotFoundException {
logger.entering("ConsensusHandlers", "handlePt2PtDeliver");
GroupCommMessage m = (GroupCommMessage) e.get(0);
// m = <<k::type::payload>>
Transportable kmessObj = m.tunpack();
// m = <<type::payload>>
if (finished.contains((Compressable) kmessObj) || decided.containsKey(kmessObj)) {
logger
.log(
Level.FINE,
"Late message to Consensus instance {0}. Discarding it: {1}",
new Object[] { kmessObj, m });
logger.exiting("ConsensusHandlers", "handlePt2PtDeliver");
return;
}
PID source = (PID) e.get(1);
int type = ((TInteger) m.tunpack()).intValue();
// m = <<payload>>
switch (type) {
case ConsensusExecution.CONS_ESTIMATE:
// m = <<r::estimate::lastupdated>>
int r = ((TInteger) m.tunpack()).intValue();
getExecution(kmessObj).processEstimate(r, m);
break;
case ConsensusExecution.CONS_PROPOSE:
// m = <<r::propose>>
r = ((TInteger) m.tunpack()).intValue();
getExecution(kmessObj).processPropose(r, m);
if (waitPropose.contains(kmessObj)){
Transportable decision = getExecution(kmessObj).firstEstimate();
if (decision != null){
if (getExecution(kmessObj).hasStarted()){
waitPropose.remove(kmessObj);
executions.remove(kmessObj);
removeProcesses(kmessObj);
triggerDecision(decision, kmessObj);
} else {
decided.put(kmessObj, decision);
}
}
}
break;
case ConsensusExecution.CONS_ACK:
// m = <<r::ack>>
r = ((TInteger) m.tunpack()).intValue();
getExecution(kmessObj).processAck(r, m);
break;
case ConsensusExecution.CONS_ABORT:
// m = <<r>>
r = ((TInteger) m.tunpack()).intValue();
getExecution(kmessObj).processAbort(r);
break;
case ConsensusExecution.CONS_DECIDETAG:
// START: ADDED BY ORUTTI
if (getExecution(kmessObj).hasStarted()) {
Transportable decision = getExecution(kmessObj).firstEstimate();
if (!myself.equals(source)) {
TList group = (TList) m.tunpack();
// m = <<>>
reSendDecisionTag(kmessObj, group, source);
}
// THE DECISION TAG IS ALREADY ARRIVED BUT NOT THE PROPOSITION
if (decision == null){
waitPropose.add(kmessObj);
} else {
executions.remove(kmessObj);
removeProcesses(kmessObj);
triggerDecision(decision, kmessObj);
}
} else {
waitPropose.add(kmessObj);
TList group = (TList) m.tunpack();
// m = <<>>
reSendDecisionTag(kmessObj, group, source);
}
break;
// END: ADDED BY ORUTTI
case ConsensusExecution.CONS_RBCAST:
// m = <<decision::group>>
Transportable decision = m.tunpack();
// m = <<group>>
if (getExecution(kmessObj).hasStarted()) {
Transportable clone = deepClone(decision);
if (!myself.equals(source)) {
TList group = (TList) m.tunpack();
// m = <<>>
reSendDecision(decision, kmessObj, group, source);
}
executions.remove(kmessObj);
removeProcesses(kmessObj);
triggerDecision(clone, kmessObj);
} else {
decided.put(kmessObj, decision);
// if (!myself.equals(source)) {Impossible!!
TList group = (TList) m.tunpack();
// m = <<>>
reSendDecision(decision, kmessObj, group, source);
//}
}
break;
default:
throw new GroupCommException("Consensus : handlePt2Ptdeliver : "
+ "Unknown message type: " + type);
}
logger.exiting("ConsensusHandlers", "handlePt2PtDeliver");
}
/**
* Handler for event <i>Suspect</i>. Every time the Failure Detector
* changes its suspect list, it triggers an event that this handler is bound
* to.
*
* @param e
* <dl>
* <dt> arg1 : Set[PID] </dt>
* <dd> The updated suspect list. </dd>
* </dl>
*/
public void handleSuspect(GroupCommEventArgs e) throws GroupCommException, IOException, ClassNotFoundException {
logger.entering("ConsensusHandlers", "handleSuspect");
suspected = (TSet) e.get(0);
Iterator it = executions.values().iterator();
// "it" contains an UNORDERED sequence of executions
while (it.hasNext()) {
ConsensusExecution exe = (ConsensusExecution) it.next();
exe.processSuspicion(suspected);
}
logger.exiting("ConsensusHandlers", "handleSuspect");
}
/**
* Callback function. Added by ORUTTI
*/
public void decisionTaken(Transportable decision, Transportable kObj){
executions.remove(kObj);
removeProcesses(kObj);
triggerDecision(decision, kObj);
}
/**
* Trigger a <b>decision</b> event with the instance number that has just
* decided and the decided value.
*
* @param o
* The value decided
* @param k
* The instance number
*/
private void triggerDecision(Transportable o, Transportable k) {
logger.entering("ConsensusHandlers", "triggerDecision");
nbConsensus--;
if (nbConsensus < MAX_CONSENSUS)
flow_control.release(fc_key);
finished.add((Compressable) k);
GroupCommEventArgs e = new GroupCommEventArgs();
e.addLast(o);
e.addLast(k);
logger.log(Level.FINE, "Triggering decision#{1}: {0}", new Object[] {
o, k });
trigger.trigger(Constants.DECIDE, e);
logger.exiting("ConsensusHandlers", "triggerDecision");
}
/**
* Makes a copy in memory of the parameter. If the parameter has references
* to other objects, they are also cloned. This is necessary to avoid
* side-effects at higher-lever protocols.
*
* @param o
* The object to deep-clone
* @return
* @throws IOException
* @throws ClassNotFoundException
*/
private Transportable deepClone(Transportable o) throws IOException,
ClassNotFoundException {
// TODO: There have to be better ways to do deep-clone!!!
return DefaultSerialization
.unmarshall(DefaultSerialization.marshall(o));
}
/**
* Sends the decision again. This is done to simulate the bahaviour of
* Reliable Broadcast in static environments.
*
* @param decision
* The value decided
* @param kObj
* The instance number
* @param group
* The group of processes to which the message has to be sent
* @param dontsend
* A PID to which it is not necessary to send the message
* (optimization)
*/
private void reSendDecision(Transportable decision, Transportable kObj,
TList group, PID dontsend) {
GroupCommMessage decisionMessage = new GroupCommMessage();
// m = <<>>
decisionMessage.tpack(group);
// m = <<group>>
decisionMessage.tpack(decision);
// m = <<decision::group>>
decisionMessage.tpack(new TInteger(ConsensusExecution.CONS_RBCAST));
// m = <<CONS_BROADCAST::decision::group>>
decisionMessage.tpack(kObj);
// m = <<k::CONS_BROADCAST::decision::group>>
// Broadcast to the f following processes
int index = group.indexOf(myself);
int f = (group.size() / 2); // Since group does not contain the initial
// sender of decision
for (int i = 1; i <= f; i++) {
PID pi = (PID) group.get((index + i) % group.size());
// if (!pi.equals(myself)
// && (dontsend == null || !pi.equals(dontsend))) {
// SHOULD BE IMPOSSIBLE
if (/*!pi.equals(myself) &&*/ !pi.equals(dontsend)) {
GroupCommEventArgs pt2ptSend = new GroupCommEventArgs();
pt2ptSend.addLast(decisionMessage.cloneGroupCommMessage());
pt2ptSend.addLast(pi);
pt2ptSend.addLast(new TBoolean(false));
// not promisc
logger.log(Level.FINE, "Sending Broadcast message {0} to {1}",
new Object[] { decisionMessage, pi });
trigger.trigger(Constants.PT2PTSEND, pt2ptSend);
}
}
}
/**
* Sends the decision tag again. This is done to simulate the bahaviour of
* Reliable Broadcast in static environments.
*
* @param kObj
* The instance number
* @param group
* The group of processes to which the message has to be sent
* @param dontsend
* A PID to which it is not necessary to send the message
* (optimization)
*/
//ADDED BY ORUTTI
private void reSendDecisionTag(Transportable kObj,
TList group, PID dontsend) {
GroupCommMessage decisionMessage = new GroupCommMessage();
// m = <<>>
decisionMessage.tpack(group);
// m = <<group>>
decisionMessage.tpack(new TInteger(ConsensusExecution.CONS_DECIDETAG));
// m = <<CONS_DECIDETAG::group>>
decisionMessage.tpack(kObj);
// m = <<k::CONS_DECIDETAG::group>>
// Broadcast to the f following processes
int index = group.indexOf(myself);
int f = (group.size() / 2); // Since group does not contain the initial
// sender of decision
for (int i = 1; i <= f; i++) {
PID pi = (PID) group.get((index + i) % group.size());
// if (!pi.equals(myself)
// && (dontsend == null || !pi.equals(dontsend))) {
// SHOULD BE IMPOSSIBLE
if (/*!pi.equals(myself) &&*/ !pi.equals(dontsend)) {
GroupCommEventArgs pt2ptSend = new GroupCommEventArgs();
pt2ptSend.addLast(decisionMessage.cloneGroupCommMessage());
pt2ptSend.addLast(pi);
pt2ptSend.addLast(new TBoolean(false));
// not promisc
logger.log(Level.FINE, "Sending Broadcast message {0} to {1}",
new Object[] { decisionMessage, pi });
trigger.trigger(Constants.PT2PTSEND, pt2ptSend);
}
}
}
/**
* Updates the monitored map with the process group of a new instance. It
* also starts monitoring completely new processes.
*
* @param group
* The group of process that will take part in current consensus.
*/
private void addProcesses(Transportable kObj, TList group) {
logger.entering("ConsensusHandlers", "addProcesses");
THashSet start = new THashSet();
// For each p in group
Iterator i = group.iterator();
while (i.hasNext()) {
PID p = (PID) i.next();
TSet ins = (TSet) monitored.get(p);
// ins is the set of instances (long) that p takes part of
if (ins == null) {
start.add(p);
ins = new THashSet();
monitored.put(p, ins);
}
ins.add(kObj);
}
// Start = new processes
if (!start.isEmpty()) {
GroupCommEventArgs e1 = new GroupCommEventArgs();
e1.addLast(start); // Start //TODO: clone start? it's used below as
// well...
e1.addLast(new THashSet()); // Stop
trigger.trigger(Constants.STARTSTOPMONITOR, e1);
GroupCommEventArgs e2 = new GroupCommEventArgs();
e2.addLast(start); // Join
e2.addLast(new THashSet()); // Remove
trigger.trigger(Constants.JOINREMOVELIST, e2);
}
logger.exiting("ConsensusHandlers", "addProcesses");
}
/**
* Removes consensus instance number k2 from all processes that appear in
* map <i>monitored</i> map. If some updated process has no more instances
* in its mapped list it is removed from the map (since it doesn't take part
* in any remaining consensus)
*
* @param k2
* The consensus instance that has just finished
*/
private void removeProcesses(Transportable kObj) {
logger.entering("ConsensusHandlers", "removeProcesses");
// Sergio - 9 mar 2006 - added for optimization
Transportable tmp = remove_k;
remove_k = kObj;
if(tmp == null) return;
THashSet stop = new THashSet();
// For each p in group
TCollection keys = monitored.keySet();
Iterator i = keys.iterator();
while (i.hasNext()) {
PID p = (PID) i.next();
TSet ins = (TSet) monitored.get(p);
// ins is the set of instances (long) that p takes part of
ins.remove(tmp);
if (ins.isEmpty()) {
stop.add(p);
}
}
// Stop = processes that don't take part in any of
// the remaining consensus
i = stop.iterator();
while (i.hasNext()) {
PID p = (PID) i.next();
monitored.remove(p);
// Side effect: we're modifying all remaining instances, but
// p doesn't take part in any of them
suspected.remove(p);
}
if (!stop.isEmpty()) {
GroupCommEventArgs e1 = new GroupCommEventArgs();
e1.addLast(new THashSet()); // Start
e1.addLast((THashSet) stop.clone()); // Stop
trigger.trigger(Constants.STARTSTOPMONITOR, e1);
GroupCommEventArgs e2 = new GroupCommEventArgs();
e2.addLast(new THashSet()); // Join
e2.addLast(stop); // Remove
trigger.trigger(Constants.JOINREMOVELIST, e2);
}
logger.exiting("ConsensusHandlers", "removeProcesses");
}
/**
* Return the execution object mapped to a given instance number. If there
* is still no execution mapped to the instance number it maps a fresh one.
*
* @param k
* The instance number whose execution will be returned.
* @return The execution mapped to that instance number.
*/
private ConsensusExecution getExecution(Transportable o) {
logger.entering("ConsensusHandlers", "getExecution");
ConsensusExecution exec = (ConsensusExecution) executions.get(o);
if (exec == null) {
// need to clone suspected??
exec = new ConsensusExecution(myself, o, suspected, trigger);
executions.put(o, exec);
}
logger.exiting("ConsensusHandlers", "getExecution");
return exec;
}
/**
* Used for debugging. </br> Undocumented.
*
* @param out
* An output stream
*/
public void dump(OutputStream out) {
PrintStream err = new PrintStream(out);
err.println("===== Consensus: dump =====");
err.println("All executions: " + executions);
err.println("Decisions arrived: " + decided);
err.println("Finished executions: " + finished);
err.println("Processes suspected: " + suspected);
err.println("Processes monitored: " + monitored);
err.println("===================================");
}
}