/** * Fortika - Robust Group Communication * Copyright (C) 2002-2006 Sergio Mena de la Cruz (EPFL) (sergio.mena@epfl.ch) * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ package groupcomm.common.consensus; import java.io.IOException; import java.util.Iterator; import java.util.logging.Level; import java.util.logging.Logger; import uka.transport.DeepClone; import uka.transport.MarshalStream; import uka.transport.Transportable; import uka.transport.UnmarshalStream; import framework.Constants; import framework.GroupCommEventArgs; import framework.GroupCommException; import framework.GroupCommMessage; import framework.PID; import framework.libraries.DefaultSerialization; import framework.libraries.Trigger; import framework.libraries.serialization.TArrayList; import framework.libraries.serialization.TBoolean; import framework.libraries.serialization.THashSet; import framework.libraries.serialization.TInteger; import framework.libraries.serialization.TLinkedList; import framework.libraries.serialization.TList; import framework.libraries.serialization.TSet; import framework.libraries.serialization.TSortedMap; import framework.libraries.serialization.TTreeMap; /** * This class implements a single instance of consensus<br> * It implements Chandra-Toueg algorithm with one execption: The decision is * RBcast by the enclosing Consensus class. * */ public class ConsensusExecution implements Transportable { /** * Identifiers of Consensus message types */ public static final int CONS_ESTIMATE = 1; public static final int CONS_PROPOSE = 2; public static final int CONS_ACK = 3; public static final int CONS_RBCAST = 4; public static final int CONS_DECIDETAG = 5; public static final int CONS_ABORT = 6; /** * PID of the local process */ private PID myself; /** * Number of this consensus instance */ private Transportable k; /** * The serial number of the current round.<br> * -1 means that the algorithm has not started yet.<br> * +infinity means that the algorithm has finished (with a decision). */ private int round = -1; /** * Number of current phase. Can be 1, 2, 3 and 4. */ private int phase = -1; /** * The current estimation. */ private Transportable estimate; /** * The first Estimate that we received * NB: Optimization to avoid resending the complete decision * Added by ORUTTI */ private Transportable firstEstimate = null; /** * The round number in which the estimate was last updated. */ private int lastUpdated = -1; /** * Number of estimates received in this round (only valid if the process is * the coordinator in the current round and is executing Phase 2). */ private int numEstimate = 0; /** * Number of ACKs received in the current round (only valid if the process * is the coordinator in the current round and is executing Phase 4) */ private int nbAck = 0; /** * Number of NACKs received in the current round (only valid if the process * is the coordinator in the current round and is executing Phase 4) */ private int nbNack = 0; /** * True if we have already send a acknoledgement to the coordinator! (avoid * that both an ack and a nack are send to the coordinator */ private boolean alreadySendAck = false; /** * Array containing the group processes that take part into this consensus * instance. */ private TList group; /** * Array containing all processes in the group but <i>myself</i>. It is * used to send <b>propose</b> and <b>decision</b> messages in rounds * where the local process is the coordinator. */ private TList others; /** * The coordinatorof the current round. */ private PID coordinator; /** * The minimum number of processes needed to have a majority in the group. */ private int majority; /** * Set of suspected processes. Its initial value is given in the * constructor. It is updated every time a <i>suspect</i> event is * triggered by the failure detector. */ private TSet suspected = new THashSet(); /** * Interface to trigger events to the outside. It has to be implemented by * the wrapping framework component. */ private Trigger trigger; /** * This set contains all messages that came too early and thus they can't be * treeated yet. Messages are ordered by round number, then by phase number. */ private TSortedMap pushedBack = new TTreeMap(); /** * It logs debugging messages. */ private static final Logger logger = Logger .getLogger(ConsensusExecution.class.getName()); /** * Constructor. <br> * It initalises all data, and copies references to libraries used. * * @param myself * The local process. * @param k * The instance object * @param suspected * The initial set of suspected processes. * @param trigger * The interface to trigger events. */ public ConsensusExecution(PID myself, Transportable k, TSet suspected, Trigger trigger) { logger.entering("ConsensusExecution", "<constr>"); this.myself = myself; this.k = k; this.suspected = suspected; this.trigger = trigger; logger.exiting("ConsensusExecution", "<constr>"); } /** * Starts consenssus execution. * * @param proposal * The value proposed * @param group * The group of processes taking part of this consensus * @throws GroupCommException */ public void processStart(Transportable proposal, TList group) throws GroupCommException, IOException, ClassNotFoundException { logger.entering("ConsensusExecution", "processStart"); estimate = proposal; // List of all processes in the group // this.group = new PID[group.size()]; this.group = new TArrayList(); // List of all processes in the group but myself this.others = new TArrayList(); for (int i = 0; i < group.size(); i++) { // TODO: Optimize! PID p = (PID) group.get(i); this.group.add(p); if (!p.equals(myself)) { this.others.add(p); } } // Limit for ack and estimate messages (excluding myself) majority = this.group.size() / 2; if (round != -1) throw new GroupCommException( "ConsensusExecution: Calling propose on consensus" + k + "while round != -1!!"); logger .log( Level.FINE, "Starting ConsensusExecution. k = {0}. Group = {1}. Majority = {2}", new Object[] { k, group, new Integer(majority) }); nextRound(); logger.exiting("ConsensusExecution", "processStart"); } /** * A new estimation has just arrived. This method processes it. * * @param r * The round number. * @param m * The message containing the estimation. * @throws GroupCommException */ public void processEstimate(int r, GroupCommMessage m) throws GroupCommException, ClassNotFoundException, IOException { logger.entering("ConsensusExecution", "processEstimate"); if (r > round) { // The message is for a future round // We keep it for later handling pushback(r, CONS_ESTIMATE, m); logger .log( Level.FINE, "An ESTIMATE was pushed back. Current pushed-back messages: {0}", pushedBack); } if (r != round) { // The message is not for this round logger.exiting("ConsensusExecution", "processEstimate"); return; } // I am not the coordinator => BAD! if (!myself.equals(coordinator)) { throw new GroupCommException( "Unexpected message received in round " + r + "by non-coordinator: " + m); } // We are not in phase 2, this estimate is late if (phase != 2) { // ignore logger .log( Level.FINE, "Discarding late ESTIMATE (phase != 2): {0}. Current round {1}", new Object[] { m, new Integer(round) }); logger.exiting("ConsensusExecution", "processEstimate"); return; } // m = <<estimate::lastUpdated>> Transportable estim = m.tunpack(); int lastUpd = ((TInteger) m.tunpack()).intValue(); // update the estimate if necessary if (lastUpd > lastUpdated) { logger.log(Level.FINE, "Updating ESTIMATE with {0}. Timestamp: {1}", new Object[] { estim, new Integer(lastUpd) }); lastUpdated = lastUpd; estimate = estim; } numEstimate++; if (numEstimate == majority) { // We received enough estimations: we can propose a value to all // other processes. // Proceed to phase 4 logger .fine("Got majority of ESTIMATEs. Changing phase to 4. Sending proposal"); lastUpdated = round;// BUG FIXED BY RACHELLE FUZZATI phase = 4; sendPropose(); processAck(round, null); // just to check if a NACK has already arrived } logger.exiting("ConsensusExecution", "processEstimate"); } /** * A proposal has just arrived from the coordinator. * * @param r * The round number. * @param m * The message containing the proposal. * @throws GroupCommException */ public void processPropose(int r, GroupCommMessage m) throws GroupCommException, ClassNotFoundException, IOException { logger.entering("ConsensusExecution", "processPropose"); if (r > round) { // The message is for a future round // We keep it for later handling pushback(r, CONS_PROPOSE, m); logger .log( Level.FINE, "A PROPOSE was pushed back. Currrent pushed-back messages: {0}", pushedBack); } if (r != round) { // START: ADDED BY ORUTTI if (r == 0){ if(m.size() == 1){// Sergio: Included due to a bug in static-appia-abcast, when the coordinator is catching up firstEstimate = deepClone(m.tpeek(0)); } else { firstEstimate = deepClone(m.tpeek(1)); // 1 instead 0 because CONS_PROPOSE was pushed // in method pushback } } // END: ADDED BY ORUTTI // The message is not for this round logger.exiting("ConsensusExecution", "processPropose"); return; } // I am the coordinator => BAD! if (myself.equals(coordinator)) { throw new GroupCommException( "Unexpected message received in round " + r + "by coordinator: " + m); } estimate = m.tunpack(); if (round == 0){ firstEstimate = deepClone(estimate); } lastUpdated = round; logger .log( Level.FINE, "Received a PROPOSE: {0} in round {1}. Setting it as estimate.", new Object[] { estimate, new Integer(round) }); // Send ack to the coordinator sendAck(true); alreadySendAck = true; // Nothing to do this round, wait decision, abort or coordinator // suspicion logger.exiting("ConsensusExecution", "processPropose"); } /** * An acknowledgement message (Ack or Nack) has just arrived. * * @param r * The round number. * @param m * The message containing the acknowledgement. * @throws GroupCommException */ public void processAck(int r, GroupCommMessage m) throws GroupCommException, IOException, ClassNotFoundException { logger.entering("ConsensusExecution", "processAck"); if (r > round) { // The message is for a future round // We keep it for later handling pushback(r, CONS_ACK, m); logger .log( Level.FINE, "An ACK was pushed back. Currrent pushed-back messages: {0}", pushedBack); } if (r != round) { // The message is not for this round logger.exiting("ConsensusExecution", "processAck"); return; } // I am not the coordinator => BAD! if (!myself.equals(coordinator)) { throw new GroupCommException( "Unexpected message received in round " + r + "by non-coordinator: " + m); } // Increment number of {ack,nack} received. logger .log( Level.FINE, "About to process an ACK: {0} in round {1}. Received acks/nacks {2}/{3}", new Object[] { m, new Integer(round), new Integer(nbAck), new Integer(nbNack) }); if (m != null) { boolean ack = ((TBoolean) m.tunpack()).booleanValue(); if (ack) { nbAck++; } else { nbNack++; } } // We take into account messages that arrive in Phase 2 (NACKs). // However, the process should not pass on to the next round // until it completes Phase 2. if (phase == 2) { logger.fine("Still in phase 2"); logger.exiting("ConsensusExecution", "processAck"); return; } if (nbNack == 0) { if (nbAck == majority) { // We got a majority of positive Ack's. Thefore we can decide logger .fine("Got a majority of positive Ack's. Broadcasting decision"); broadcastDecision(); round = Integer.MAX_VALUE; } } else { // Somebody suspected us and sent us a negative acknowledgement. // We might not be able to decide in this round // We proceed to next round logger.fine("Got a NACK. Going to next round"); nbNack = 0; nbAck = 0; sendAbort(); nextRound(); } logger.exiting("ConsensusExecution", "processAck"); } /** * An abort message has just arrived. * * @param r * The round number. * @throws GroupCommException */ public void processAbort(int r) throws GroupCommException, IOException, ClassNotFoundException { logger.entering("ConsensusExecution", "processAbort"); if (r > round) { // The message is for a future round // We keep it for later handling pushback(r, CONS_ABORT, new GroupCommMessage()); logger .log( Level.FINE, "An Abort Message was pushed back. Currrent pushed-back messages: {0}", pushedBack); } if (r != round) { // The message is not for this round logger.exiting("ConsensusExecution", "processAbort"); return; } // I am the coordinator => BAD! if (myself.equals(coordinator)) { throw new GroupCommException( "Unexpected message received in round " + r + "by coordinator: " + " Abort"); } nextRound(); logger.exiting("ConsensusExecution", "processAbort"); } /** * The list of suspected processes has just changed. To process the new * list, fisrt update the suspected list with the one received. Then, send a * Nack to the coordinator if it is now suspected (and proceed to the next * round). * * @param suspected * The updated suspect list * @throws GroupCommException */ public void processSuspicion(TSet suspected) throws GroupCommException, IOException, ClassNotFoundException { logger.entering("ConsensusExecution", "processSuspicion"); this.suspected = suspected; // Consensus is not running (either not started yet or already finished) if (round == Integer.MAX_VALUE || round == -1) { logger.exiting("ConsensusExecution", "processSuspicion"); return; } if (!myself.equals(coordinator) && suspected.contains(coordinator)) { // Send a NACK only if no ack were already send if (!alreadySendAck) sendAck(false); // Proceed to next round nextRound(); } logger.exiting("ConsensusExecution", "processSuspicion"); } /** * Returns true iff this instance has already started (i.e., a value was * proposed by the local host). * * @return */ public boolean hasStarted() { return round > -1; } /** * Returns the estimate of the process first round */ public Transportable firstEstimate() { return firstEstimate; } /** * Makes a copy in memory of the parameter. If the parameter has references * to other objects, they are also cloned. This is necessary to avoid * side-effects at higher-lever protocols. * * @param o * The object to deep-clone * @return * @throws IOException * @throws ClassNotFoundException */ private Transportable deepClone(Transportable o) throws IOException, ClassNotFoundException { // TODO: There have to be better ways to do deep-clone!!! return DefaultSerialization .unmarshall(DefaultSerialization.marshall(o)); } /** * This method manages the transition to the next round. It updates several * attributes and executes a different code dependng on whether the local * process will be the coordinator or not in the new round. * * @throws GroupCommException */ private void nextRound() throws GroupCommException, IOException, ClassNotFoundException { logger.entering("ConsensusExecution", "nextRound"); // Increments the round round++; alreadySendAck = false; if (round > 1000) System.err .println("WARNING: Consensus is taking too many rounds!!!"); // New coordinator coordinator = (PID) group.get(round % group.size()); logger.log(Level.FINE, "New round: {0}. New coordinator: {1}", new Object[] { new Integer(round), coordinator }); if (!myself.equals(coordinator)) { // I am not the coordinator if (round > 0) { // Optimization for round #1 // PHASE 1 // If this is not round #1, send estimate to the coordinator sendEstimate(); } if (suspected.contains(coordinator)) { // PHASE 3 // The new coordinator is suspected // Send it a Nack sendAck(false); // Nothing left to do in this round // Proceed to next round nextRound(); } } else { // I am the coordinator if (round > 0) { // Optimization for round #1 // If this is not round #1, wait for estimates numEstimate = 0; phase = 2; } else { // If this is round #1, directly send proposal to the others firstEstimate = deepClone(estimate); sendPropose(); // Proceed to phase 4 phase = 4; } } // In any case, // Pushed back messages must be treated if current round is theirs while (!pushedBack.isEmpty()) { TInteger rObj = (TInteger) pushedBack.firstKey(); int r = rObj.intValue(); if (r > round) { // Messages for future round, we keep it and quit the loop logger.exiting("ConsensusExecution", "nextRound"); return; } // We remove all messages of round r from the pushed back queue TLinkedList l = (TLinkedList) pushedBack.remove(rObj); if (r < round) { logger .log( Level.FINE, "Discarding old messages {0} in pushedBack for round {1}", new Object[] { l, rObj }); } else { // r == round logger.log(Level.FINE, "Processing messages {0} in pushedBack for round {1}", new Object[] { l, rObj }); Iterator it = l.iterator(); while (it.hasNext()) { GroupCommMessage m = (GroupCommMessage) it.next(); int type = ((TInteger) m.tunpack()).intValue(); switch (type) { case CONS_ESTIMATE: processEstimate(r, m); break; case CONS_PROPOSE: processPropose(r, m); break; case CONS_ACK: processAck(r, m); break; case CONS_ABORT: processAbort(r); break; default: throw new GroupCommException("Weird message type " + type + " in pushed back set!"); } } } } logger.exiting("ConsensusExecution", "nextRound"); } /** * This method inserts a message whose processing has to be delayed into map * <i>pushedBack</i>. The map is ordered by increasing round numbers. * * @param r * The message's round number * @param type * The message's type * @param m * The message payload */ private void pushback(int r, int type, GroupCommMessage m) { logger.entering("ConsensusExecution", "pushback"); TInteger rObj = new TInteger(r); TLinkedList l = (TLinkedList) pushedBack.get(rObj); if (l == null) { l = new TLinkedList(); pushedBack.put(rObj, l); } m.tpack(new TInteger(type)); l.addLast(m); logger.exiting("ConsensusExecution", "pushback"); } /** * Send a message with the current estimate to the current coordinator (the * local process mustn't be the coordinator) * */ private void sendEstimate() { GroupCommMessage estimateMessage = new GroupCommMessage(); // m = <<>> estimateMessage.tpack(new TInteger(lastUpdated)); // m = <<lastUpdated>> estimateMessage.tpack(estimate); // m = <<estimate::lastUpdated>> estimateMessage.tpack(new TInteger(round)); // m = <<round::estimate::lastUpdated>> estimateMessage.tpack(new TInteger(CONS_ESTIMATE)); // m = <<CONS_ESTIMATE::round::estimate::lastUpdated>> estimateMessage.tpack(k); // m = <<k::CONS_ESTIMATE::round::estimate::lastUpdated>> triggerSend(estimateMessage, coordinator); } /** * Send a message with the current estimate as the coordinator's proposal * (the local process must be the coordinator) * */ private void sendPropose() { GroupCommMessage proposeMessage = new GroupCommMessage(); // m = <<>> proposeMessage.tpack(estimate); // m = <<estimate>> proposeMessage.tpack(new TInteger(round)); // m = <<round::estimate>> proposeMessage.tpack(new TInteger(CONS_PROPOSE)); // m = <<CONS_PROPOSE::round::estimate>> proposeMessage.tpack(k); // m = <<k::CONS_PROPOSE::round::estimate>> triggerSend(proposeMessage, others); } /** * Send a message with the current estimate as the coordinator's proposal * (the local process must be the coordinator) * */ private void sendAbort() { GroupCommMessage abortMessage = new GroupCommMessage(); // m = <<>> abortMessage.tpack(new TInteger(round)); // m = <<round>> abortMessage.tpack(new TInteger(CONS_ABORT)); // m = <<CONS_ABORT::round>> abortMessage.tpack(k); // m = <<k::CONS_ABORT::round>> triggerSend(abortMessage, others); } /** * Send an acknowledgement message to the current coordinator. It will be an * Ack or a Nack depending on the parameter * * @param ack * If it is true, an Ack is sent. Otherwise, a Nack is sent. */ private void sendAck(boolean ack) { GroupCommMessage ackMessage = new GroupCommMessage(); // m = <<>> ackMessage.tpack(new TBoolean(ack)); // m = <<NACK>> ackMessage.tpack(new TInteger(round)); // m = <<round::NACK>> ackMessage.tpack(new TInteger(CONS_ACK)); // m = <<CONS_ACK::round::NACK>> ackMessage.tpack(k); // m = <<k::CONS_ACK::round::NACK>> triggerSend(ackMessage, coordinator); } /** * Sends a decision message to all processes * */ private void broadcastDecision() { GroupCommMessage decisionMessage = new GroupCommMessage(); // m = <<>> decisionMessage.tpack(others); // m = <<group>> group does not have to contain myself // Because decision do not have to be resend to the process that decide if (round == 0){ decisionMessage.tpack(new TInteger(CONS_DECIDETAG)); } else { decisionMessage.tpack(estimate); // m = <<decision::group>> decisionMessage.tpack(new TInteger(CONS_RBCAST)); } // m = <<(CONS_BROADCAST::decision || CONS_DECIDETAG)::group>> decisionMessage.tpack(k); // m = <<k::(CONS_BROADCAST::decision || CONS_DECIDETAG)::group>> // Broadcast decision to others triggerSend(decisionMessage, group); } /** * Triggers a <i>PointToPointSend</i> event for each process in the second * parameter. * * @param m * The message to be sent. * @param g * The processes that the message is to be sent to. */ private void triggerSend(GroupCommMessage m, TList g) { for (int i = 0; i < g.size(); i++) { triggerSend(m.cloneGroupCommMessage(), (PID) g.get(i)); } } /** * Triggers a single <i>PointToPointSend</i> event. * * @param m * The message to be sent. * @param p * The destination process. */ private void triggerSend(GroupCommMessage m, PID p) { GroupCommEventArgs pt2ptSend = new GroupCommEventArgs(); pt2ptSend.addLast(m); pt2ptSend.addLast(p); pt2ptSend.addLast(new TBoolean(false)); // not promisc logger.log(Level.FINE, "Sending Pt2Pt message {0} to {1}", new Object[] { m, p }); trigger.trigger(Constants.PT2PTSEND, pt2ptSend); } /** * Used for debugging. Undocumented. */ public String toString() { return new String("(** k: " + k + " r: " + round + " phase: " + phase + " coord: " + coordinator + " nbAck: " + nbAck + " nbNack: " + nbNack + " nbEstimate: " + numEstimate + " lastUpd: " + lastUpdated + " estimate: " + estimate + " suspected: " + suspected + " pushedBack: " + pushedBack + "**)"); } // TODO: Remove public void marshal(MarshalStream arg0) throws IOException { throw new IOException("Unimplemented marshall"); } public void unmarshalReferences(UnmarshalStream arg0) throws IOException, ClassNotFoundException { throw new IOException("Unimplemented unmarshallReferences"); } public Object deepClone(DeepClone arg0) throws CloneNotSupportedException { throw new CloneNotSupportedException("Unimplemented deepClone"); } }