package org.jgroups.protocols; import org.jgroups.*; import org.jgroups.annotations.MBean; import org.jgroups.annotations.ManagedAttribute; import org.jgroups.annotations.ManagedOperation; import org.jgroups.annotations.Property; import org.jgroups.stack.Protocol; import org.jgroups.util.*; import org.jgroups.util.UUID; import java.io.DataInput; import java.io.DataOutput; import java.util.*; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; import java.util.concurrent.ConcurrentSkipListSet; import java.util.concurrent.Future; /** * Protocol to discover subgroups; e.g., existing due to a network partition (that healed). Example: group * {p,q,r,s,t,u,v,w} is split into 3 subgroups {p,q}, {r,s,t,u} and {v,w}. This protocol will eventually send * a MERGE event with the views of each subgroup up the stack: {p,r,v}. <p/> * Works as follows (https://issues.jboss.org/browse/JGRP-1387): every member periodically broadcasts its address (UUID), * logical name, physical address and ViewID information. Other members collect this information and see if the ViewIds * are different (indication of different subpartitions). If they are, the member with the lowest address (first in the * sorted list of collected addresses) sends a MERGE event up the stack, which will be handled by GMS. * The others do nothing.<p/> * The advantage compared to {@link MERGE2} is that there are no merge collisions caused by multiple merges going on. * Also, the INFO traffic is spread out over max_interval, and every member sends its physical address with INFO, so * we don't need to fetch the physical address first. * * @author Bela Ban, Nov 2011 * @since 3.1 */ @MBean(description="Protocol to discover subgroups existing due to a network partition") public class MERGE3 extends Protocol { /* ----------------------------------------- Properties -------------------------------------------------- */ protected long min_interval=1000; protected long max_interval=10000; @Property(description="The max number of merge participants to be involved in a merge. 0 sets this to unlimited.") protected int max_participants_in_merge=100; /* ---------------------------------------------- JMX -------------------------------------------------------- */ @ManagedAttribute(description="Interval (in ms) after which we check for view inconsistencies",writable=true) protected long check_interval=0; @ManagedAttribute(description="Number of cached ViewIds") public int getViews() {return views.size();} /* --------------------------------------------- Fields ------------------------------------------------------ */ protected Address local_addr=null; protected View view; protected TimeScheduler timer; protected Future<?> info_sender; protected Future<?> view_consistency_checker; // hashmap to keep track of view-id sent in INFO messages protected final ConcurrentMap<ViewId,SortedSet<Address>> views=new ConcurrentHashMap<ViewId,SortedSet<Address>>(view != null? view.size() : 16); protected final ResponseCollector<View> view_rsps=new ResponseCollector<View>(); protected boolean transport_supports_multicasting=true; protected String cluster_name; @ManagedAttribute(description="Whether or not the current member is the coordinator") protected volatile boolean is_coord=false; @ManagedAttribute(description="Number of times a MERGE event was sent up the stack") protected int num_merge_events=0; @ManagedAttribute(description="Is the view consistency checker task running") public synchronized boolean isViewConsistencyCheckerRunning() { return view_consistency_checker != null && !view_consistency_checker.isDone(); } @ManagedAttribute(description="Is the view consistency checker task running") public boolean isMergeTaskRunning() {return isViewConsistencyCheckerRunning();} @ManagedAttribute(description="Is the info sender task running") public synchronized boolean isInfoSenderRunning() { return info_sender != null && !info_sender.isDone(); } @ManagedOperation(description="Lists the contents of the cached views") public String dumpViews() { StringBuilder sb=new StringBuilder(); for(Map.Entry<ViewId,SortedSet<Address>> entry: views.entrySet()) sb.append(entry.getKey()).append(": ").append(entry.getValue()).append("\n"); return sb.toString(); } @ManagedOperation(description="Clears the views cache") public void clearViews() {views.clear();} public void init() throws Exception { timer=getTransport().getTimer(); if(timer == null) throw new Exception("timer cannot be retrieved"); if(min_interval >= max_interval) throw new IllegalArgumentException("min_interval (" + min_interval + ") has to be < max_interval (" + max_interval + ")"); if(check_interval == 0) check_interval=computeCheckInterval(); else { if(check_interval <= max_interval) { log.warn("set check_interval=" + computeCheckInterval() + " as it is <= max_interval"); check_interval=computeCheckInterval(); } } if(max_interval <= 0) throw new Exception("max_interval must be > 0"); transport_supports_multicasting=getTransport().supportsMulticasting(); } public void stop() { super.stop(); is_coord=false; stopViewConsistencyChecker(); stopInfoSender(); } public long getMinInterval() { return min_interval; } @Property(description="Minimum time in ms before sending an info message") public void setMinInterval(long i) { if(min_interval < 0 || min_interval >= max_interval) throw new IllegalArgumentException("min_interval (" + min_interval + ") has to be < max_interval (" + max_interval + ")"); min_interval=i; } public long getMaxInterval() { return max_interval; } @Property(description="Interval (in milliseconds) when the next info " + "message will be sent. A random value is picked from range [1..max_interval]") public void setMaxInterval(long val) { if(val <= 0) throw new IllegalArgumentException("max_interval must be > 0"); max_interval=val; check_interval=computeCheckInterval(); } protected long computeCheckInterval() { return (long)(max_interval * 1.6); } protected boolean isMergeRunning() { Object retval=up_prot.up(new Event(Event.IS_MERGE_IN_PROGRESS)); return retval instanceof Boolean && ((Boolean)retval).booleanValue(); } protected synchronized void startInfoSender() { if(info_sender == null || info_sender.isDone()) info_sender=timer.scheduleWithDynamicInterval(new InfoSender()); } protected synchronized void stopInfoSender() { if(info_sender != null) { info_sender.cancel(true); info_sender=null; } } protected synchronized void startViewConsistencyChecker() { if(view_consistency_checker == null || view_consistency_checker.isDone()) view_consistency_checker=timer.scheduleWithDynamicInterval(new ViewConsistencyChecker()); } protected synchronized void stopViewConsistencyChecker() { if(view_consistency_checker != null) { view_consistency_checker.cancel(true); view_consistency_checker=null; } } public Object down(Event evt) { switch(evt.getType()) { case Event.CONNECT: case Event.CONNECT_USE_FLUSH: case Event.CONNECT_WITH_STATE_TRANSFER: case Event.CONNECT_WITH_STATE_TRANSFER_USE_FLUSH: cluster_name=(String)evt.getArg(); break; case Event.DISCONNECT: stopViewConsistencyChecker(); stopInfoSender(); break; case Event.VIEW_CHANGE: stopViewConsistencyChecker(); stopInfoSender(); Object ret=down_prot.down(evt); view=(View)evt.getArg(); clearViews(); if(ergonomics && max_participants_in_merge > 0) max_participants_in_merge=Math.max(100, view.size() / 3); startInfoSender(); List<Address> mbrs=view.getMembers(); Address coord=mbrs.isEmpty()? null : mbrs.get(0); if(coord != null && coord.equals(local_addr)) { is_coord=true; startViewConsistencyChecker(); // start task if we became coordinator (doesn't start if already running) } else { // if we were coordinator, but are no longer, stop task. this happens e.g. when we merge and someone // else becomes the new coordinator of the merged group is_coord=false; } return ret; case Event.SET_LOCAL_ADDRESS: local_addr=(Address)evt.getArg(); break; } return down_prot.down(evt); } public Object up(Event evt) { switch(evt.getType()) { case Event.MSG: Message msg=(Message)evt.getArg(); MergeHeader hdr=(MergeHeader)msg.getHeader(getId()); if(hdr == null) break; Address sender=msg.getSrc(); switch(hdr.type) { case INFO: if(hdr.logical_name != null && sender instanceof UUID) UUID.add(sender, hdr.logical_name); if(hdr.physical_addrs != null) { for(PhysicalAddress physical_addr: hdr.physical_addrs) down(new Event(Event.SET_PHYSICAL_ADDRESS, new Tuple<Address,PhysicalAddress>(sender, physical_addr))); } SortedSet<Address> existing=views.get(hdr.view_id); if(existing == null) { existing=new ConcurrentSkipListSet<Address>(); SortedSet<Address> tmp=views.putIfAbsent(hdr.view_id, existing); if(tmp != null) existing=tmp; } existing.add(sender); if(log.isTraceEnabled()) log.trace(local_addr + " <-- " + sender + ": " + hdr + ", cached views: " + views.size()); break; case VIEW_REQ: View tmp_view=view != null? view.copy() : null; Header tmphdr=MergeHeader.createViewResponse(tmp_view); Message view_rsp=new Message(sender); view_rsp.putHeader(getId(), tmphdr); down_prot.down(new Event(Event.MSG, view_rsp)); break; case VIEW_RSP: if(hdr.view != null) view_rsps.add(sender, hdr.view); break; default: log.error("Type " + hdr.type + " not known"); } return null; } return up_prot.up(evt); } public static List<View> detectDifferentViews(Map<Address,View> map) { final List<View> ret=new ArrayList<View>(); for(View view: map.values()) { if(view == null) continue; ViewId vid=view.getVid(); if(!Util.containsViewId(ret, vid)) ret.add(view); } return ret; } protected class InfoSender implements TimeScheduler.Task { public void run() { if(view == null) { log.warn("view is null, cannot send INFO message"); return; } PhysicalAddress physical_addr=local_addr != null? (PhysicalAddress)down_prot.down(new Event(Event.GET_PHYSICAL_ADDRESS, local_addr)) : null; String logical_name=UUID.get(local_addr); ViewId view_id=view.getViewId(); MergeHeader hdr=MergeHeader.createInfo(view_id, logical_name, Arrays.asList(physical_addr)); if(transport_supports_multicasting) { Message msg=new Message(); msg.putHeader(getId(), hdr); down_prot.down(new Event(Event.MSG, msg)); return; } Discovery discovery_protocol=(Discovery)stack.findProtocol(Discovery.class); if(discovery_protocol == null) { log.warn("no discovery protocol found, cannot ask for physical addresses to send INFO message"); return; } Collection<PhysicalAddress> physical_addrs=discovery_protocol.fetchClusterMembers(cluster_name); if(physical_addrs == null) physical_addrs=(Collection<PhysicalAddress>)down_prot.down(new Event(Event.GET_PHYSICAL_ADDRESSES)); if(physical_addrs == null || physical_addrs.isEmpty()) return; if(log.isTraceEnabled()) log.trace("discovery protocol " + discovery_protocol.getName() + " returned " + physical_addrs.size() + " physical addresses: " + Util.printListWithDelimiter(physical_addrs, ", ", 10)); for(Address addr: physical_addrs) { Message info=new Message(addr); info.putHeader(getId(), hdr); down_prot.down(new Event(Event.MSG, info)); } } public long nextInterval() { return Math.max(min_interval, Util.random(max_interval) + max_interval/2); } } protected class ViewConsistencyChecker implements TimeScheduler.Task { public void run() { try { if(views.size() <= 1) { if(log.isTraceEnabled()) log.trace("found no inconsistent views: " + dumpViews()); return; } _run(); } finally { clearViews(); } } protected void _run() { SortedSet<Address> coords=new TreeSet<Address>(); // Only add view creators which *are* actually in the set as well, e.g. // A|4: {A,B,C} and // B|4: {D} would only add A to the coords list. A is a real coordinator for(Map.Entry<ViewId,SortedSet<Address>> entry: views.entrySet()) { Address coord=entry.getKey().getCreator(); SortedSet<Address> members=entry.getValue(); if(members != null && members.contains(coord)) coords.add(coord); } Address merge_leader=coords.isEmpty() ? null : coords.first(); if(merge_leader == null || local_addr == null || !merge_leader.equals(local_addr)) { if(log.isTraceEnabled()) log.trace("I (" + local_addr + ") won't be the merge leader"); return; } if(log.isDebugEnabled()) log.debug("I (" + local_addr + ") will be the merge leader"); // add merge participants for(SortedSet<Address> set: views.values()) { if(!set.isEmpty()) coords.add(set.first()); } if(coords.size() <= 1) { log.trace("cancelling merge as we only have 1 coordinator: " + coords); return; } if(log.isTraceEnabled()) log.trace("merge participants are " + coords); if(max_participants_in_merge > 0 && coords.size() > max_participants_in_merge) { int old_size=coords.size(); for(Iterator<Address> it=coords.iterator(); it.hasNext();) { Address next=it.next(); if(next.equals(merge_leader)) continue; if(coords.size() > max_participants_in_merge) it.remove(); } if(log.isTraceEnabled()) log.trace(local_addr + ": reduced " + old_size + " coords to " + max_participants_in_merge); } // grab views from all members in coords view_rsps.reset(coords); for(Address target: coords) { if(target.equals(local_addr)) { if(view != null) view_rsps.add(local_addr, view.copy()); continue; } Message view_req=new Message(target); Header hdr=MergeHeader.createViewRequest(); view_req.putHeader(getId(), hdr); down_prot.down(new Event(Event.MSG, view_req)); } view_rsps.waitForAllResponses(check_interval / 10); Map<Address,View> results=view_rsps.getResults(); Map<Address,View> merge_views=new HashMap<Address,View>(); for(Map.Entry<Address,View> entry: results.entrySet()) if(entry.getValue() != null) merge_views.put(entry.getKey(), entry.getValue()); if(merge_views.size() >= 2) { up_prot.up(new Event(Event.MERGE, merge_views)); num_merge_events++; } } public long nextInterval() { return check_interval; } } public static class MergeHeader extends Header { protected Type type=Type.INFO; protected ViewId view_id; protected View view; protected String logical_name; protected Collection<PhysicalAddress> physical_addrs; public MergeHeader() { } public static MergeHeader createInfo(ViewId view_id, String logical_name, Collection<PhysicalAddress> physical_addrs) { return new MergeHeader(Type.INFO, view_id, null, logical_name, physical_addrs); } public static MergeHeader createViewRequest() { return new MergeHeader(Type.VIEW_REQ, null, null, null, null); } public static MergeHeader createViewResponse(View view) { return new MergeHeader(Type.VIEW_RSP, null, view, null, null); } protected MergeHeader(Type type, ViewId view_id, View view, String logical_name, Collection<PhysicalAddress> physical_addrs) { this.type=type; this.view_id=view_id; this.view=view; this.logical_name=logical_name; this.physical_addrs=physical_addrs; } public int size() { int retval=Global.BYTE_SIZE; // for the type retval+=Util.size(view_id); retval+=Util.size(view); retval+=Global.BYTE_SIZE; // presence byte for logical_name if(logical_name != null) retval+=logical_name.length() +2; retval+=Util.size(physical_addrs); return retval; } public void writeTo(DataOutput outstream) throws Exception { outstream.writeByte(type.ordinal()); // a byte if ok as we only have 3 types anyway Util.writeViewId(view_id,outstream); Util.writeView(view, outstream); Util.writeString(logical_name, outstream); Util.writeAddresses(physical_addrs, outstream); } @SuppressWarnings("unchecked") public void readFrom(DataInput instream) throws Exception { type=Type.values()[instream.readByte()]; view_id=Util.readViewId(instream); view=Util.readView(instream); logical_name=Util.readString(instream); physical_addrs=(Collection<PhysicalAddress>)Util.readAddresses(instream,ArrayList.class); } public String toString() { StringBuilder sb=new StringBuilder(); sb.append(type + ": "); if(view_id != null) sb.append("view_id=" + view_id); else if(view != null) sb.append(" view=").append(view); sb.append(", logical_name=" + logical_name + ", physical_addr=" + physical_addrs); return sb.toString(); } protected static enum Type {INFO, VIEW_REQ, VIEW_RSP} } }