package ch.usi.da.paxos.ring; /* * Copyright (c) 2013 Università della Svizzera italiana (USI) * * This file is part of URingPaxos. * * URingPaxos is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * URingPaxos is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with URingPaxos. If not, see <http://www.gnu.org/licenses/>. */ import java.nio.ByteBuffer; import java.util.Map; import java.util.Map.Entry; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.LinkedTransferQueue; import java.util.concurrent.TimeUnit; import java.util.concurrent.TransferQueue; import java.util.concurrent.atomic.AtomicLong; import org.apache.log4j.Logger; import ch.usi.da.paxos.Util; import ch.usi.da.paxos.api.ConfigKey; import ch.usi.da.paxos.api.PaxosRole; import ch.usi.da.paxos.message.Message; import ch.usi.da.paxos.message.MessageType; import ch.usi.da.paxos.message.Value; import ch.usi.da.paxos.storage.Promise; /** * Name: CoordinatorRole<br> * Description: <br> * * Creation date: Aug 12, 2012<br> * $Id$ * * @author Samuel Benz benz@geoid.ch */ public class CoordinatorRole extends Role { private final static Logger logger = Logger.getLogger(CoordinatorRole.class); private final RingManager ring; private final AtomicLong instance = new AtomicLong(); private final TransferQueue<Promise> promises = new LinkedTransferQueue<Promise>(); private final Map<Long,Promise> phase1_in_transit = new ConcurrentHashMap<Long,Promise>(); private final Map<Long,Promise> phase1range_in_transit = new ConcurrentHashMap<Long,Promise>(); private int reserved = 10000; private int resend_time = 2000; private volatile boolean fastmode = true; // True == Phase1Range private int successful_promise_count = 0; // Used to switch fastmode private final int enable_fastmode_threashold = 100; private int trim_modulo = 0; // (0: disable) private int trim_quorum = 2; private long last_trimmed_instance = 0; public int multi_ring_lambda = 9000; public int multi_ring_delta_t = 100; public AtomicLong value_count = new AtomicLong(0); /** * @param ring */ public CoordinatorRole(RingManager ring) { this.ring = ring; if(ring.getConfiguration().containsKey(ConfigKey.p1_preexecution_number)){ reserved = Integer.parseInt(ring.getConfiguration().get(ConfigKey.p1_preexecution_number)); logger.info("Coordinator p1_preexecution_number: " + reserved); } if(ring.getConfiguration().containsKey(ConfigKey.p1_resend_time)){ resend_time = Integer.parseInt(ring.getConfiguration().get(ConfigKey.p1_resend_time)); logger.info("Coordinator p1_resend_time: " + resend_time); } if(ring.getConfiguration().containsKey(ConfigKey.trim_modulo)){ trim_modulo = Integer.parseInt(ring.getConfiguration().get(ConfigKey.trim_modulo)); logger.info("Coordinator trim_modulo: " + trim_modulo); } if(ring.getConfiguration().containsKey(ConfigKey.trim_quorum)){ trim_quorum = Integer.parseInt(ring.getConfiguration().get(ConfigKey.trim_quorum)); logger.info("Coordinator trim_quorum: " + trim_quorum); } if(ring.getConfiguration().containsKey(ConfigKey.multi_ring_delta_t)){ multi_ring_delta_t = Integer.parseInt(ring.getConfiguration().get(ConfigKey.multi_ring_delta_t)); logger.info("Coordinator multi_ring_delta_t: " + multi_ring_delta_t); } if(ring.getConfiguration().containsKey(ConfigKey.multi_ring_lambda)){ multi_ring_lambda = Integer.parseInt(ring.getConfiguration().get(ConfigKey.multi_ring_lambda)); logger.info("Coordinator multi_ring_lambda: " + multi_ring_lambda); } } @Override public void run() { ring.getNetwork().registerCallback(this); try { // wait until quorum is possible (ring is big enough) while(ring.getAcceptors().size() < ring.getQuorum()){ Thread.sleep(1000); } Thread.sleep(3000); } catch (InterruptedException e) { Thread.currentThread().interrupt(); } Thread t = new Thread(new InstanceSkipper(ring,this)); t.setName("InstanceSkipper"); t.start(); // send safe message to learner to recover last_trim_instance Message recover = new Message(0,ring.getNodeID(),PaxosRole.Learner,MessageType.Safe,0,0,new Value("SAFE!",new byte[0])); if(ring.getNetwork().getLearner() != null){ ring.getNetwork().getLearner().deliver(ring,recover); }else{ ring.getNetwork().send(recover); } // phase 1 reserver loop while(ring.isNodeCoordinator()){ try { if(fastmode){ // Phase1Range while(promises.size() < (reserved/2) && phase1range_in_transit.isEmpty()){ final int ballot = 10+ring.getNodeID(); Value v = new Value("",Util.intToByte(reserved)); Message m = new Message(instance.incrementAndGet(),ring.getNodeID(),PaxosRole.Acceptor,MessageType.Phase1Range,ballot,0,v); instance.addAndGet(reserved-1); phase1range_in_transit.put(m.getInstance(),new Promise(m.getInstance(),m.getBallot())); if(ring.getNetwork().getAcceptor() != null){ ring.getNetwork().getAcceptor().deliver(ring,m); }else{ // else should never happen, since there is no coordinator without acceptor! ring.getNetwork().send(m); } } long time = System.currentTimeMillis(); for(Entry<Long, Promise> e : phase1range_in_transit.entrySet()){ if(time-e.getValue().getDate()>resend_time){ instance.addAndGet(-reserved); fastmode = false; logger.error("Coordinator timeout in phase1range reservation for instance: " + e.getKey()); logger.debug("Coordinator switch to standard reservation."); } } }else{ // Phase1 while(promises.size() < (reserved/2) && phase1_in_transit.size() < reserved){ final int ballot = 10+ring.getNodeID(); for(int i=0;i<reserved;i++){ Message m = new Message(instance.incrementAndGet(),ring.getNodeID(),PaxosRole.Acceptor,MessageType.Phase1,ballot,0,null); phase1_in_transit.put(m.getInstance(),new Promise(m.getInstance(),m.getBallot())); if(ring.getNetwork().getAcceptor() != null){ ring.getNetwork().getAcceptor().deliver(ring,m); }else{ // else should never happen, since there is no coordinator without acceptor! ring.getNetwork().send(m); } } } long time = System.currentTimeMillis(); for(Entry<Long, Promise> e : phase1_in_transit.entrySet()){ if(time-e.getValue().getDate()>resend_time){ Message m = new Message(e.getKey(),ring.getNodeID(),PaxosRole.Acceptor,MessageType.Phase1,e.getValue().getBallot()+10,0,null); phase1_in_transit.put(m.getInstance(),new Promise(m.getInstance(),m.getBallot())); if(ring.getNetwork().getAcceptor() != null){ ring.getNetwork().getAcceptor().deliver(ring,m); }else{ // else should never happen, since there is no coordinator without acceptor! ring.getNetwork().send(m); } logger.error("Coordinator timeout in phase1 reservation for instance: " + e.getKey()); } } } Thread.sleep(20); } catch (InterruptedException e) { Thread.currentThread().interrupt(); break; } } ring.getNetwork().setLeader(null); logger.debug("Coordinator stopped!"); } public synchronized void deliver(RingManager fromRing,Message m){ /*if(logger.isDebugEnabled()){ logger.debug("coordinator " + ring.getNodeID() + " received " + m); }*/ if(m.getType() == MessageType.Relearn){ Message n = new Message(m.getInstance(),m.getSender(),PaxosRole.Acceptor,MessageType.Phase2,new Integer(9999),0,new Value(Value.getSkipID(),Long.toString(1).getBytes())); if(ring.getNetwork().getAcceptor() != null){ ring.getNetwork().getAcceptor().deliver(ring,n); }else{ // else should never happen, since there is no coordinator without acceptor! ring.getNetwork().send(n); } }else if(m.getType() == MessageType.Value){ if(m.getValue().isBatch()){ // not so nice; but the instance skipper needs the exact amount of values ByteBuffer buffer = ByteBuffer.wrap(m.getValue().getValue()); while(buffer.remaining() > 0){ try { Message.fromBuffer(buffer); value_count.incrementAndGet(); } catch (Exception e) { logger.error("Coordinator could not de-serialize batch message!" + e); } } }else{ value_count.incrementAndGet(); } Promise p = null; try { p = promises.poll(1,TimeUnit.SECONDS); // wait for a promise } catch (InterruptedException e) { } if(p != null){ long instance = p.getInstance(); PaxosRole rcv = PaxosRole.Acceptor; Message n = new Message(instance,m.getSender(),rcv,MessageType.Phase2,p.getBallot(),0,new Value(m.getValue().getID(),new byte[0])); if(ring.getNetwork().getAcceptor() != null){ ring.getNetwork().getAcceptor().deliver(ring,n); }else{ // else should never happen, since there is no coordinator without acceptor! ring.getNetwork().send(n); } } // send safe message to trim acceptor log after n instances if(trim_modulo > 0 && value_count.get() % trim_modulo == 0){ Message n = new Message(0,m.getSender(),PaxosRole.Learner,MessageType.Safe,0,0,new Value("SAFE!",new byte[0])); if(ring.getNetwork().getLearner() != null){ ring.getNetwork().getLearner().deliver(ring,n); }else{ ring.getNetwork().send(n); } } }else if(m.getType() == MessageType.Safe){ String s = new String(m.getValue().getValue()); logger.debug("Coordinator received safe response from learners: " + s); Message n = new Message(getTrimInstance(s),m.getSender(),PaxosRole.Acceptor,MessageType.Trim,0,0,null); if(ring.getNetwork().getAcceptor() != null){ ring.getNetwork().getAcceptor().deliver(ring,n); }else{ ring.getNetwork().send(n); } }else if(m.getType() == MessageType.Trim){ if(m.getVoteCount() >= ring.getQuorum()){ logger.info("Coordinator succesfully trimmed acceptor log to instance " + m.getInstance()); last_trimmed_instance = m.getInstance(); if(m.getInstance()>instance.get()){ // speed up ballot reservation (like NACK) instance.set(m.getInstance()); } }else{ logger.error("Coordinator acceptor log trimming to instance " + m.getInstance() + " failed!"); } }else if(m.getType() == MessageType.Phase1 && m.getSender() == ring.getNodeID()){ if(m.getValue() != null){ // instance already decided -> resend 2b phase1_in_transit.remove(m.getInstance()); Message n = new Message(m.getInstance(),m.getSender(),PaxosRole.Acceptor,MessageType.Phase2,m.getBallot(),m.getValueBallot(),m.getValue()); if(ring.getNetwork().getAcceptor() != null){ ring.getNetwork().getAcceptor().deliver(ring,n); }else{ // else should never happen, since there is no coordinator without acceptor! ring.getNetwork().send(n); } }else if(m.getVoteCount() >= ring.getQuorum()){ Promise p = new Promise(m.getInstance(),m.getBallot()); promises.add(p); if(logger.isDebugEnabled()){ logger.debug("Coordinator reserved instance " + m.getInstance() + " (Phase1)"); } phase1_in_transit.remove(m.getInstance()); successful_promise_count++; if(successful_promise_count>=enable_fastmode_threashold){ successful_promise_count = 0; fastmode = true; logger.debug("Coordinator switch to fastmode reservation."); } }else{ logger.error(m +" at ring end without quorum! (" + m.getVoteCount() + ")"); } }else if(m.getType() == MessageType.Phase1Range && m.getSender() == ring.getNodeID()){ if(m.getVoteCount() >= ring.getQuorum()){ int n = Util.byteToInt(m.getValue().getValue()); for(long i=m.getInstance();i<n+m.getInstance();i++){ Promise p = new Promise(i,m.getBallot()); promises.add(p); } if(logger.isDebugEnabled()){ logger.debug("Coordinator reserved instance " + m.getInstance() + "-" + (m.getInstance()+reserved-1) + " (Phase1Range)"); } phase1range_in_transit.remove(m.getInstance()); }else{ logger.error(m +" at ring end without quorum! (" + m.getVoteCount() + ")"); } }else if(m.getType() == MessageType.Decision){ if(m.getInstance()>instance.get()){ instance.set(m.getInstance()); } } } private long getTrimInstance(String s) { long min = Long.MAX_VALUE; int q = 0; for(String is : s.split(";")){ try { long i = 0; if(!is.isEmpty()){ i = Long.valueOf(is); } q++; if(i == 0) { return last_trimmed_instance; } // notify recovering learner what is online if(i<min){ min = i; } } catch (NumberFormatException e) { logger.error("Error in getTrimInstance()!",e); return last_trimmed_instance; } } return q >= trim_quorum ? min : last_trimmed_instance; } public TransferQueue<Promise> getPromiseQueue(){ return promises; } }