package org.jgroups.protocols; import org.jgroups.*; import org.jgroups.annotations.*; import org.jgroups.stack.Protocol; import org.jgroups.util.TimeScheduler; import org.jgroups.util.Util; import java.util.*; import java.util.concurrent.CopyOnWriteArraySet; import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReentrantLock; /** * Protocol to discover subgroups; e.g., existing due to a network partition (that healed). Example: group * {p,q,r,s,t,u,v,w} is split into 3 subgroups {p,q}, {r,s,t,u} and {v,w}. This protocol will eventually send * a MERGE event with the coordinators of each subgroup up the stack: {p,r,v}. Note that - depending on the time * of subgroup discovery - there could also be 2 MERGE events, which first join 2 of the subgroups, and then the * resulting group to the last subgroup. The real work of merging the subgroups into one larger group is done * somewhere above this protocol (typically in the GMS protocol).<p> * This protocol works as follows: * <ul> * <li>If coordinator: periodically retrieve the initial membership (using the FIND_INITIAL_MBRS event provided e.g. * by PING or TCPPING protocols. This list contains {coord,addr} pairs. * <li>If there is more than 1 coordinator: * <ol> * <li>Get all coordinators * <li>Create a MERGE event with the list of coordinators as argument * <li>Send the event up the stack * </ol> * </ul> * * <p> * * Requires: FIND_INITIAL_MBRS event from below<br> * Provides: sends MERGE event with list of coordinators up the stack<br> * @author Bela Ban, Oct 16 2001 */ @MBean(description="Protocol to discover subgroups existing due to a network partition") public class MERGE2 extends Protocol { /* ----------------------------------------- Properties -------------------------------------------------- */ @Property(description="Minimum time in ms between runs to discover other clusters") protected long min_interval=5000; @Property(description="Maximum time in ms between runs to discover other clusters") protected long max_interval=20000; @Property(description="Number of inconsistent views with only 1 coord after a MERGE event is sent up") protected int inconsistent_view_threshold=1; @Property(description="When receiving a multicast message, checks if the sender is member of the cluster. " + "If not, initiates a merge. Generates a lot of traffic for large clusters when there is a lot of merging") protected boolean merge_fast=true; @Property(description="The delay (in milliseconds) after which a merge fast execution is started") protected long merge_fast_delay=1000; /* ---------------------------------------------- JMX -------------------------------------------------------- */ @ManagedAttribute(writable=false, description="whether or not a merge task is currently running " + "(should be the case in a coordinator") public boolean isMergeTaskRunning() { return task.isRunning(); } /* --------------------------------------------- Fields ------------------------------------------------------ */ protected Address local_addr=null; protected View view; protected final Set<Address> members=new HashSet<Address>(); protected final Set<Address> merge_candidates=new CopyOnWriteArraySet<Address>(); protected final FindSubgroupsTask task=new FindSubgroupsTask(); protected volatile boolean is_coord=false; protected TimeScheduler timer; @ManagedAttribute(description="Number of inconsistent 1-coord views until a MERGE event is sent up the stack") protected int num_inconsistent_views=0; @ManagedAttribute(description="Number of times a MERGE event was sent up the stack") protected int num_merge_events=0; public void init() throws Exception { timer=getTransport().getTimer(); if(timer == null) throw new Exception("timer cannot be retrieved"); if(min_interval <= 0 || max_interval <= 0) throw new Exception("min_interval and max_interval have to be > 0"); if(max_interval <= min_interval) throw new Exception ("max_interval has to be greater than min_interval"); } public long getMinInterval() { return min_interval; } public void setMinInterval(long i) { min_interval=i; } public long getMaxInterval() { return max_interval; } public void setMaxInterval(long l) { max_interval=l; } protected boolean isMergeRunning() { Object retval=up_prot.up(new Event(Event.IS_MERGE_IN_PROGRESS)); return retval instanceof Boolean && ((Boolean)retval).booleanValue(); } public List<Integer> requiredDownServices() { return Arrays.asList(Event.FIND_INITIAL_MBRS, Event.FIND_ALL_VIEWS); } /** Discovers members and detects whether we have multiple coordinator. If so, kicks off a merge */ @ManagedOperation public void sendMergeSolicitation() { task.findAndNotify(); } @ManagedOperation public void startMergeTask() {task.start();} @ManagedOperation public void stopMergeTask() {task.stop();} public void stop() { is_coord=false; merge_candidates.clear(); task.stop(); } public Object down(Event evt) { switch(evt.getType()) { case Event.VIEW_CHANGE: Object ret=down_prot.down(evt); view=(View)evt.getArg(); List<Address> mbrs=view.getMembers(); if(mbrs == null || mbrs.isEmpty() || local_addr == null) { task.stop(); return ret; } members.clear(); members.addAll(mbrs); merge_candidates.removeAll(members); Address coord=mbrs.isEmpty()? null : mbrs.get(0); if(coord != null && coord.equals(local_addr)) { is_coord=true; task.start(); // start task if we became coordinator (doesn't start if already running) } else { // if we were coordinator, but are no longer, stop task. this happens e.g. when we merge and someone // else becomes the new coordinator of the merged group is_coord=false; task.stop(); } return ret; case Event.SET_LOCAL_ADDRESS: local_addr=(Address)evt.getArg(); return down_prot.down(evt); default: return down_prot.down(evt); // Pass on to the layer below us } } public Object up(Event evt) { switch(evt.getType()) { case Event.MSG: if(!merge_fast) break; Message msg=(Message)evt.getArg(); Address dest=msg.getDest(); if(dest != null) break; final Address sender=msg.getSrc(); if(!members.contains(sender) && merge_candidates.add(sender)) { timer.schedule(new Runnable() { public void run() { if(!members.contains(sender)) task.findAndNotify(); } }, merge_fast_delay, TimeUnit.MILLISECONDS); } break; } return up_prot.up(evt); } /** * Task periodically executing (if role is coordinator). Gets the initial membership and determines * whether there are subgroups (multiple coordinators for the same group). If yes, it sends a MERGE event * with the list of the coordinators up the stack */ protected class FindSubgroupsTask { @GuardedBy("this") private Future<?> future; private Lock lock=new ReentrantLock(); public synchronized void start() { if(future == null || future.isDone()) { future=timer.scheduleWithDynamicInterval(new TimeScheduler.Task() { public long nextInterval() { return computeInterval(); } public void run() { findAndNotify(); } }); } } public synchronized void stop() { if(future != null) { future.cancel(true); future=null; } } public synchronized boolean isRunning() { return future != null && !future.isDone(); } public void findAndNotify() { if(isMergeRunning()) return; if(lock.tryLock()) { try { _findAndNotify(); } catch(Throwable t) { log.error("FindSubgroupsTask failed", t); } finally { lock.unlock(); } } } protected void _findAndNotify() { List<PingData> discovery_rsps=findAllViews(); if(log.isTraceEnabled()) { StringBuilder sb=new StringBuilder(); sb.append("Discovery results:\n"); for(PingData data: discovery_rsps) sb.append("[" + data.getAddress() + "]: " + data.printViewId()).append("\n"); log.trace(sb); } // Create a map of senders and the views they sent Map<Address,View> views=getViews(discovery_rsps); // A list of different views List<View> different_views=Util.detectDifferentViews(views); if(different_views.size() <= 1) { num_inconsistent_views=0; return; } Collection<Address> merge_participants=Util.determineMergeParticipants(views); if(merge_participants.size() == 1) { if(num_inconsistent_views < inconsistent_view_threshold) { if(log.isDebugEnabled()) log.debug(local_addr + ": dropping MERGE for inconsistent views " + Util.printViews(different_views) + " as inconsistent view threshold (" + inconsistent_view_threshold + ") has not yet been reached (" + num_inconsistent_views + ")"); num_inconsistent_views++; return; } else num_inconsistent_views=0; } else num_inconsistent_views=0; if(log.isDebugEnabled()) { StringBuilder sb=new StringBuilder(); sb.append(local_addr + " found different views : " + Util.printViews(different_views) + "; sending up MERGE event with merge participants " + merge_participants + ".\n"); sb.append("Discovery results:\n"); for(PingData data: discovery_rsps) sb.append("[" + data.getAddress() + "]: coord=" + data.getCoordAddress()).append("\n"); log.debug(sb.toString()); } Event evt=new Event(Event.MERGE, views); try { up_prot.up(evt); num_merge_events++; } catch(Throwable t) { log.error("failed sending up MERGE event", t); } } /** * Returns a random value within [min_interval - max_interval] */ protected long computeInterval() { return min_interval + Util.random(max_interval - min_interval); } /** Returns a list of PingData with only the view from members around the cluster */ @SuppressWarnings("unchecked") protected List<PingData> findAllViews() { List<PingData> retval=(List<PingData>)down_prot.down(new Event(Event.FIND_ALL_VIEWS)); if(retval == null) return Collections.emptyList(); if(is_coord && local_addr != null) { PingData tmp=new PingData(local_addr, view, true); //let's make sure that we add ourself as a coordinator if(!retval.contains(tmp)) retval.add(tmp); } return retval; } public Map<Address,View> getViews(List<PingData> initial_mbrs) { Map<Address,View> retval=new HashMap<Address,View>(); for(PingData response: initial_mbrs) { if(!response.isServer()) continue; Address sender=response.getAddress(); View view=response.getView(); if(sender == null || view == null) continue; retval.put(sender,view); } return retval; } } }