/**
* Fortika - Robust Group Communication
* Copyright (C) 2002-2006 Sergio Mena de la Cruz (EPFL) (sergio.mena@epfl.ch)
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
package groupcomm.common.consensus;
import java.io.IOException;
import java.util.logging.Level;
import java.util.logging.Logger;
import uka.transport.DeepClone;
import uka.transport.MarshalStream;
import uka.transport.Transportable;
import uka.transport.UnmarshalStream;
import framework.Constants;
import framework.GroupCommEventArgs;
import framework.GroupCommException;
import framework.GroupCommMessage;
import framework.PID;
import framework.libraries.Trigger;
import framework.libraries.serialization.TArrayList;
import framework.libraries.serialization.TBoolean;
import framework.libraries.serialization.TInteger;
import framework.libraries.serialization.TList;
/**
* Cette classe g�re une ex�cution de ConsensusMR. <br>
*/
public class ConsensusPaxosExecution implements Transportable{
//TODO: remove "implements Transportable"
/**
* (Exclusive) variants/optimizations.
*/
private int optVariant;
/**
* Variant flag: basic paxos.
*/
protected static final int P0 = 0;
/**
* Variant flag: skip 1st read.
*/
protected static final int P1 = 1;
private static final Logger logger =
Logger.getLogger(ConsensusPaxosExecution.class.getName());
private Trigger trigger;
/**
* Identifiers of ConsensusPAXOS messages
*/
public static final int CONS_READ = 15965;
public static final int CONS_ACKREAD = 15966;
public static final int CONS_NACKREAD = 15967;
public static final int CONS_ACKWRITE = 15968;
public static final int CONS_NACKWRITE = 15969;
public static final int CONS_WRITE = 15970;
public static final int CONS_DECISION = 15971;
/* VARIABLES GLOBALES DE CONSENSUS */
private PID myself;
/** le num�ro du consensus.
*/
private Transportable k;
/** The serial number of the current round.
* -1 means that the algorithm has not started yet.
* +infinity means that the algorithm has finished
* (with a decision).
*/
private int round = -1;
/**le leader courant
*/
private PID leader;
/**l'estimation courante du processus
*/
private Transportable estimate;
/**
* Number of processes that execute this consensus algorithm.
*/
private int n;
/**
* Latest read and write phase.
*/
private int write = -1;
private int read = -1;
/**
* Number of [Read|Write]Ack] received so far, in the current round.
*/
private int nbAckRead;
private int nbAckWrite;
private int nbNack;
private int highestWrite;
/**
* The round at which the process initiated a propose.
*/
private int proposedRound = -1;
// Le nombre de messages � recevoir lors de l'attente d'une majorit� de messages.
private int limit;
protected int nackLimit;
// Un tableau qui contient tous les processus du groupe sauf soi-m�me.
private TList others;
// Un tableau qui contient tous les processus du groupe.
private TList group;
private boolean ignoreSuspicions = false;
public ConsensusPaxosExecution(PID myself,
Transportable k,
Trigger trigger,
int optVariant) {
this.myself = myself;
this.k = k;
this.leader = null;
this.trigger = trigger;
this.optVariant = optVariant;
}
/**
* Lance l'�x�cution de consensus.
*/
public void processStart(Transportable proposal, TList group)
throws GroupCommException {
estimate = proposal;
// Set the number of processes
n = group.size();
// List of all processes in the group
//this.group = new PID[n];
this.group = new TArrayList();
// List of all processes in the group but myself
this.others = new TArrayList();
for (int i = 0; i < n; i++) { //TODO: Optimize!
PID p = (PID) group.get(i);
this.group.add(p);
if (!p.equals(myself)) {
this.others.add(p);
}
}
// Limit for ack and estimate messages
limit = n / 2;
nackLimit = 1;
// If no leader is choosed, take the first member of the list
if (this.leader == null)
this.leader = (PID) this.group.get(0);
if (round != -1)
throw new GroupCommException("ConsensusPaxosExecution: Calling start while round != -1!!");
round = group.indexOf(myself);
processPropose();
ignoreSuspicions = false;
}
/**
* Commence la proc�dure de proposition. Si l'optimisation est choisie
* on envois directement un CONS_WRITE avce notre estimate au lieu du
* CONS_READ pr�liminaire.
*/
public void processPropose()
throws GroupCommException {
if ((!hasDecided()) && (isLeader()) && (round!=proposedRound)){
if (optVariant == P0 || (optVariant == P1 && round != 0)) {
sendRead();
read = round;
} else if (optVariant == P1 && round == 0) {
// regular Paxos without READ on round 1.
sendWrite();
read = round;
write = round;
// FIXME is that correct ?
} else {
throw new GroupCommException("ConsensusPaxosExecution: Invalid option!!!");
}
// reset internal counters
nbAckRead = 0;
nbAckWrite = 0;
nbNack = 0;
highestWrite = write;
proposedRound = round;
}
}
/**
* Re�oit un read. Le consid�re seulement s'il est d'un round non
* connu (-> sup�rieur au round connu actuellement
*/
public void processRead(int r, PID source) {
if ((read > r) || (write > r))
sendNackRead(source, r);
else {
read = r;
sendAckRead(source, r);
}
}
/**
* Re�oit un write. Le consid�re seulement s'il est d'un round non
* connu (-> sup�rieur au round connu actuellement
*/
public void processWrite(int r, PID source, Transportable estimateFromW) {
if ((read > r) || (write > r))
sendNackWrite(source, r);
else {
write = r;
estimate = estimateFromW;
sendAckWrite(source, r);
}
}
/**
* Re�oit un ackRead. Envoi un write si on a obtenu le nombre limite d'ack
*/
public void processAckRead(int r, int lastWrite, Transportable estimateFromAck) {
if ((r == round) && (nbNack == 0)) {
nbAckRead++;
if (lastWrite > highestWrite) {
estimate = estimateFromAck;
highestWrite = lastWrite;
}
if (nbAckRead == limit){
if (write < round)
write = round;
highestWrite = Integer.MAX_VALUE;
sendWrite();
}
}
}
/**
* Re�oit un nack.
*/
public void processNack(int r)
throws GroupCommException {
if (hasDecided())
return;
if (r == round) {
nbNack++;
if (((nbAckWrite + nbNack) == limit) && (nbNack>nackLimit))
processNackOnLimit();
}
}
/**
* Re�oit un ackWrite. Decide si le nombre d'ack+nack>limit et que l'on
* a pas re�u un nombre de nack>=nackLimit
*/
public void processAckWrite(int r)
throws GroupCommException {
// check if the ack comes from our current round
if (r == round) {
nbAckWrite++;
if ((nbAckWrite + nbNack) == limit) {
if (nbNack < nackLimit)
// decide
broadcastDecision();
else
processNackOnLimit();
}
}/* else {
throw new GroupCommException("ConsensusPaxosExecution: Invalid Ack Write!!!");
}*/
}
/**
* Envoit d'un NACK au coordinateur si et seulement si celui-ci est suspect�.
*/
public void processNewLeader(PID newLeader)
throws GroupCommException {
this.leader = newLeader;
if (!ignoreSuspicions)
processAbort();
}
private void processAbort()
throws GroupCommException {
//only a leader can abort
if (!isLeader() || hasDecided())
return;
processPropose();
}
/**
* Indique si ce processus est leader. <br>
*
* @return Ce processsus est leader.
*/
private boolean isLeader(){
return (leader.equals(myself));
}
/**
* Indique si le consensus a d�j� d�cid� pour ce processus. <br>
*
* @return Ce consensus a d�j� d�cid�.
*/
private boolean hasDecided() {
return (round >= Integer.MAX_VALUE);
}
/**
* Indique si le consensus a d�j� commencer pour ce processus. <br>
*
* @return Ce consensus a d�j� commencer.
*/
public boolean hasStarted() {
return round > -1;
}
/**
* Appel�e si le nombre Ack+Nack>limit et Nack > nackLimit
*/
private void processNackOnLimit()
throws GroupCommException {
// P0,P1 : increment round and restart loop
round = round + n;
processPropose();
}
private void sendRead() {
GroupCommMessage proposeMessage = new GroupCommMessage();
//m = <<>>
proposeMessage.tpack(new TInteger(round));
//m = <<round>>
proposeMessage.tpack(new TInteger(CONS_READ));
//m = <<CONS_READ::round>>
proposeMessage.tpack(k);
//m = <<k::CONS_READ::round>>
triggerSend(proposeMessage, others);
}
private void sendWrite() {
GroupCommMessage proposeMessage = new GroupCommMessage();
//m = <<>>
proposeMessage.tpack(estimate);
//m = <<estimate>>
proposeMessage.tpack(new TInteger(round));
//m = <<round::estimate>>
proposeMessage.tpack(new TInteger(CONS_WRITE));
//m = <<CONS_WRITE::round::estimate>>
proposeMessage.tpack(k);
//m = <<k::CONS_WRITE::round::estimate>>
triggerSend(proposeMessage, others);
}
private void sendAckRead(PID receiver, int r){
GroupCommMessage proposeMessage = new GroupCommMessage();
//m = <<>>
proposeMessage.tpack(estimate);
//m = <<estimate>>
proposeMessage.tpack(new TInteger(write));
//m = <<write::estimate>>
proposeMessage.tpack(new TInteger(r));
//m = <<round::write::estimate>>
proposeMessage.tpack(new TInteger(CONS_ACKREAD));
//m = <<CONS_ACKREAD::round::write::estimate>>
proposeMessage.tpack(k);
//m = <<k::CONS_ACKREAD::round::write::estimate>>
triggerSend(proposeMessage, receiver);
}
private void sendNackRead(PID receiver, int r){
GroupCommMessage proposeMessage = new GroupCommMessage();
//m = <<>>
proposeMessage.tpack(new TInteger(r));
//m = <<round>>
proposeMessage.tpack(new TInteger(CONS_NACKREAD));
//m = <<CONS_NACKREAD::round>>
proposeMessage.tpack(k);
//m = <<k::CONS_NACKREAD::round>>
triggerSend(proposeMessage, receiver);
}
private void sendAckWrite(PID receiver, int r){
GroupCommMessage proposeMessage = new GroupCommMessage();
//m = <<>>
proposeMessage.tpack(new TInteger(r));
//m = <<round>>
proposeMessage.tpack(new TInteger(CONS_ACKWRITE));
//m = <<CONS_ACKWRITE::round>>
proposeMessage.tpack(k);
//m = <<k::CONS_ACKWRITE::round>>
triggerSend(proposeMessage, receiver);
}
private void sendNackWrite(PID receiver, int r){
GroupCommMessage proposeMessage = new GroupCommMessage();
//m = <<>>
proposeMessage.tpack(new TInteger(r));
//m = <<round>>
proposeMessage.tpack(new TInteger(CONS_NACKWRITE));
//m = <<CONS_NACKWRITE::round>>
proposeMessage.tpack(k);
//m = <<k::CONS_NACKWRITE::round>>
triggerSend(proposeMessage, receiver);
}
private void broadcastDecision() {
GroupCommMessage decisionMessage = new GroupCommMessage();
//m = <<>>
decisionMessage.tpack(group);
//m = <<group>>
//decisionMessage.pack(hardClone(estimate));//Because I'm sending to myself
decisionMessage.tpack(estimate);
//m = <<decision::group>>
decisionMessage.tpack(new TInteger(CONS_DECISION));
//m = <<CONS_DECSISON::decision::group>>
decisionMessage.tpack(k);
//m = <<k::CONS_DECISION::decision::group>>
triggerSend(decisionMessage, group);
}
private void triggerSend(GroupCommMessage m, TList g) {
for (int i = 0; i < g.size(); i++) {
triggerSend(m.cloneGroupCommMessage(), (PID) g.get(i));
}
}
private void triggerSend(GroupCommMessage m, PID p) {
GroupCommEventArgs pt2ptSend = new GroupCommEventArgs();
pt2ptSend.addLast(m);
pt2ptSend.addLast(p);
pt2ptSend.addLast(new TBoolean(false)); // not promisc
logger.log(
Level.FINE,
"Sending Pt2Pt message {0} to {1}",
new Object[] { m, p });
trigger.trigger(Constants.PT2PTSEND, pt2ptSend);
}
public String toString() {
return new String(
"(** k: "
+ k
+ " r: "
+ round
+ " leader: "
+ leader
+ " write: "
+ write
+ " read: "
+ read
+ " nbAckRead: "
+ nbAckRead
+ " nbAckWrite: "
+ nbAckWrite
+ " nbNack: "
+ nbNack
+ " highestWrite: "
+ highestWrite
+ " estimate: "
+ estimate
+ "**)");
}
//TODO: remove this
public void marshal(MarshalStream arg0) throws IOException {
throw new IOException("not implemented");
}
public void unmarshalReferences(UnmarshalStream arg0) throws IOException, ClassNotFoundException {
throw new IOException("not implemented");
}
public Object deepClone(DeepClone arg0) throws CloneNotSupportedException {
throw new CloneNotSupportedException("not implemented");
}
}