package org.jgroups.protocols; import org.jgroups.*; import org.jgroups.annotations.MBean; import org.jgroups.annotations.ManagedAttribute; import org.jgroups.annotations.ManagedOperation; import org.jgroups.annotations.Property; import org.jgroups.protocols.pbcast.JoinRsp; import org.jgroups.stack.Protocol; import org.jgroups.util.*; import org.jgroups.util.UUID; import java.io.InterruptedIOException; import java.util.*; import java.util.concurrent.TimeUnit; /** * The Discovery protocol retrieves the initial membership (used by GMS and MERGE2) by sending discovery requests. * We do this in subclasses of Discovery, e.g. by mcasting a discovery request ({@link PING}) or, if gossiping is enabled, * by contacting the GossipRouter ({@link TCPGOSSIP}).<p/> * The responses should allow us to determine the coordinator which we have to contact, e.g. in case we want to join * the group, or to see if we have diverging views in case of MERGE2.<p/> * When we are a server (after having received the BECOME_SERVER event), we'll respond to discovery requests with * a discovery response. * * @author Bela Ban */ @MBean public abstract class Discovery extends Protocol { /* ----------------------------------------- Properties -------------------------------------------------- */ @Property(description="Timeout to wait for the initial members") protected long timeout=3000; @Property(description="Minimum number of initial members to get a response from") protected int num_initial_members=10; @Deprecated @Property(description="Minimum number of server responses (PingData.isServer()=true). If this value is " + "greater than 0, we'll ignore num_initial_members",deprecatedMessage="not used anymore") protected int num_initial_srv_members=0; @Property(description="Return from the discovery phase as soon as we have 1 coordinator response") protected boolean break_on_coord_rsp=true; @Property(description="Whether or not to return the entire logical-physical address cache mappings on a " + "discovery request, or not.") protected boolean return_entire_cache=false; @Property(description="If greater than 0, we'll wait a random number of milliseconds in range [0..stagger_timeout] " + "before sending a discovery response. This prevents traffic spikes in large clusters when everyone sends their " + "discovery response at the same time") protected long stagger_timeout=0; @Property(description="Always sends a discovery response, no matter what",writable=true) protected boolean force_sending_discovery_rsps=true; @Property(description="If a persistent disk cache (PDC) is present, combine the discovery results with the " + "contents of the disk cache before returning the results") protected boolean use_disk_cache=false; @ManagedOperation(description="Sets force_sending_discovery_rsps") public void setForceSendingDiscoveryRsps(boolean flag) { force_sending_discovery_rsps=flag; } /* --------------------------------------------- JMX ------------------------------------------------------ */ @ManagedAttribute(description="Total number of discovery requests sent ") protected int num_discovery_requests=0; /** The largest cluster size found so far (gets reset on stop()) */ @ManagedAttribute private int max_found_members=0; /* --------------------------------------------- Fields ------------------------------------------------------ */ protected volatile boolean is_server=false; protected volatile boolean is_leaving=false; protected TimeScheduler timer=null; protected View view; protected final List<Address> members=new ArrayList<Address>(11); @ManagedAttribute(description="Whether this member is the current coordinator") protected boolean is_coord; protected Address local_addr=null; protected Address current_coord; protected String group_addr=null; protected final Set<Responses> ping_responses=new HashSet<Responses>(); public void init() throws Exception { timer=getTransport().getTimer(); if(timer == null) throw new Exception("timer cannot be retrieved from protocol stack"); if(stagger_timeout < 0) throw new IllegalArgumentException("stagger_timeout cannot be negative"); if(stagger_timeout > timeout) { log.debug("stagger_timeout (" + stagger_timeout + ") was greater than timeout (" + timeout + "); setting it to " + timeout + " ms"); stagger_timeout=timeout; } } /** * Grab all current cluster members * @return A list of the cluster members (usually IpAddresses), or null if the transport is multicast-enabled. * Returns an empty list if no cluster members could be found. * @param cluster_name */ public abstract Collection<PhysicalAddress> fetchClusterMembers(String cluster_name); /** Whether or not to send each discovery request on a separate (timer) thread. If disabled, * a discovery request will be sent to all members fetched by {@link #fetchClusterMembers(String)} sequentially */ public abstract boolean sendDiscoveryRequestsInParallel(); public abstract boolean isDynamic(); public void handleDisconnect() { } public void handleConnect() { } public void discoveryRequestReceived(Address sender, String logical_name, Collection<PhysicalAddress> physical_addrs) { } public long getTimeout() { return timeout; } public void setTimeout(long timeout) { this.timeout=timeout; } public int getNumInitialMembers() { return num_initial_members; } public void setNumInitialMembers(int num_initial_members) { this.num_initial_members=num_initial_members; } public int getNumberOfDiscoveryRequestsSent() { return num_discovery_requests; } @ManagedAttribute public String getView() {return view != null? view.getViewId().toString() : "null";} public ViewId getViewId() { return view != null? view.getViewId() : null; } @ManagedAttribute(description="The address of the current coordinator") public String getCurrentCoord() {return current_coord != null? current_coord.toString() : "n/a";} protected boolean isMergeRunning() { Object retval=up_prot.up(new Event(Event.IS_MERGE_IN_PROGRESS)); return retval instanceof Boolean && (Boolean)retval; } public List<Integer> providedUpServices() { List<Integer> ret=new ArrayList<Integer>(3); ret.add(Event.FIND_INITIAL_MBRS); ret.add(Event.FIND_ALL_VIEWS); ret.add(Event.GET_PHYSICAL_ADDRESS); return ret; } public void resetStats() { super.resetStats(); num_discovery_requests=0; } public void start() throws Exception { super.start(); } public void stop() { is_server=false; max_found_members=0; } /** * Finds initial members * @param promise * @return */ public List<PingData> findInitialMembers(Promise<JoinRsp> promise) { return findMembers(promise, num_initial_members, break_on_coord_rsp, null); } public List<PingData> findAllViews(Promise<JoinRsp> promise) { int num_expected_mbrs=Math.max(max_found_members, Math.max(num_initial_members, view != null? view.size() : num_initial_members)); max_found_members=Math.max(max_found_members, num_expected_mbrs); return findMembers(promise, num_expected_mbrs, false, getViewId()); } protected List<PingData> findMembers(Promise<JoinRsp> promise, int num_expected_rsps, boolean break_on_coord, ViewId view_id) { num_discovery_requests++; final Responses rsps=new Responses(num_expected_rsps, break_on_coord, promise); synchronized(ping_responses) { ping_responses.add(rsps); } try { sendDiscoveryRequest(group_addr, promise, view_id); } catch(InterruptedIOException ie) { ; } catch(InterruptedException ex) { ; } catch(Throwable ex) { if(log.isErrorEnabled()) log.error("failed sending discovery request", ex); } try { return rsps.get(timeout); } catch(Exception e) { return new LinkedList<PingData>(); } finally { synchronized(ping_responses) { ping_responses.remove(rsps); } } } public void sendDiscoveryRequest(String cluster_name, Promise promise, ViewId view_id) throws Exception { PingData data=null; PhysicalAddress physical_addr=(PhysicalAddress)down(new Event(Event.GET_PHYSICAL_ADDRESS, local_addr)); if(view_id == null) data=new PingData(local_addr, null, false, UUID.get(local_addr), Arrays.asList(physical_addr)); PingHeader hdr=new PingHeader(PingHeader.GET_MBRS_REQ, data, cluster_name); hdr.view_id=view_id; Collection<PhysicalAddress> cluster_members=fetchClusterMembers(cluster_name); if(cluster_members == null) { Message msg=new Message(null); // multicast msg msg.setFlag(Message.OOB); msg.putHeader(getId(), hdr); sendMcastDiscoveryRequest(msg); } else { if(use_disk_cache) { // this only makes sense if we have PDC below us Collection<PhysicalAddress> list=(Collection<PhysicalAddress>)down_prot.down(new Event(Event.GET_PHYSICAL_ADDRESSES)); if(list != null) for(PhysicalAddress phys_addr: list) if(!cluster_members.contains(phys_addr)) cluster_members.add(phys_addr); } if(cluster_members.isEmpty()) { // if we don't find any members, return immediately if(promise != null) promise.setResult(null); } else { for(final Address addr: cluster_members) { if(addr.equals(physical_addr)) // no need to send the request to myself continue; final Message msg=new Message(addr, null, null); msg.setFlag(Message.OOB); msg.putHeader(this.id, hdr); if(log.isTraceEnabled()) log.trace(local_addr + ": sending discovery request to " + msg.getDest()); if(!sendDiscoveryRequestsInParallel()) { down_prot.down(new Event(Event.MSG, msg)); } else { timer.execute(new Runnable() { public void run() { try { down_prot.down(new Event(Event.MSG, msg)); } catch(Exception ex){ if(log.isErrorEnabled()) log.error(local_addr + ": failed sending discovery request to " + addr + ": " + ex); } } }); } } } } } protected void sendMcastDiscoveryRequest(Message discovery_request) { down_prot.down(new Event(Event.MSG, discovery_request)); } @ManagedOperation(description="Runs the discovery protocol to find initial members") public String findInitialMembersAsString() { List<PingData> results=findInitialMembers(null); if(results == null || results.isEmpty()) return "<empty>"; StringBuilder sb=new StringBuilder(); for(PingData rsp: results) { sb.append(rsp).append("\n"); } return sb.toString(); } @ManagedOperation(description="Runs the discovery protocol to find all views") public String findAllViewsAsString() { List<PingData> rsps=findAllViews(null); if(rsps == null || rsps.isEmpty()) return "<empty>"; StringBuilder sb=new StringBuilder(); for(PingData data: rsps) { View v=data.getView(); if(v != null) sb.append(v).append("\n"); } return sb.toString(); } /** * An event was received from the layer below. Usually the current layer will want to examine * the event type and - depending on its type - perform some computation * (e.g. removing headers from a MSG event type, or updating the internal membership list * when receiving a VIEW_CHANGE event). * Finally the event is either a) discarded, or b) an event is sent down * the stack using <code>PassDown</code> or c) the event (or another event) is sent up * the stack using <code>PassUp</code>. * <p/> * For the PING protocol, the Up operation does the following things. * 1. If the event is a Event.MSG then PING will inspect the message header. * If the header is null, PING simply passes up the event * If the header is PingHeader.GET_MBRS_REQ then the PING protocol * will PassDown a PingRequest message * If the header is PingHeader.GET_MBRS_RSP we will add the message to the initial members * vector and wake up any waiting threads. * 2. If the event is Event.SET_LOCAL_ADDR we will simple set the local address of this protocol * 3. For all other messages we simple pass it up to the protocol above * * @param evt - the event that has been sent from the layer below */ @SuppressWarnings("unchecked") public Object up(Event evt) { switch(evt.getType()) { case Event.MSG: Message msg=(Message)evt.getArg(); PingHeader hdr=(PingHeader)msg.getHeader(this.id); if(hdr == null) return up_prot.up(evt); PingData data=hdr.data; Address logical_addr=data != null? data.getAddress() : null; if(is_leaving) return null; // prevents merging back a leaving member (https://issues.jboss.org/browse/JGRP-1336) switch(hdr.type) { case PingHeader.GET_MBRS_REQ: // return Rsp(local_addr, coord) if(group_addr == null || hdr.cluster_name == null) { if(log.isWarnEnabled()) log.warn("group_addr (" + group_addr + ") or cluster_name of header (" + hdr.cluster_name + ") is null; passing up discovery request from " + msg.getSrc() + ", but this should not" + " be the case"); } else { if(!group_addr.equals(hdr.cluster_name)) { if(log.isWarnEnabled()) log.warn(local_addr + ": discarding discovery request for cluster '" + hdr.cluster_name + "' from " + msg.getSrc() + "; our cluster name is '" + group_addr + "'. " + "Please separate your clusters cleanly."); return null; } } // add physical address and logical name of the discovery sender (if available) to the cache if(data != null) { if(logical_addr == null) logical_addr=msg.getSrc(); Collection<PhysicalAddress> physical_addrs=data.getPhysicalAddrs(); PhysicalAddress physical_addr=physical_addrs != null && !physical_addrs.isEmpty()? physical_addrs.iterator().next() : null; if(logical_addr != null && data.getLogicalName() != null) UUID.add(logical_addr, data.getLogicalName()); if(logical_addr != null && physical_addr != null) down(new Event(Event.SET_PHYSICAL_ADDRESS, new Tuple<Address,PhysicalAddress>(logical_addr, physical_addr))); discoveryRequestReceived(msg.getSrc(), data.getLogicalName(), physical_addrs); synchronized(ping_responses) { for(Responses response: ping_responses) { response.addResponse(data, false); } } } if(hdr.view_id != null) { // If the discovery request is merge-triggered, and the ViewId shipped with it // is the same as ours, we don't respond (JGRP-1315). ViewId my_view_id=view != null? view.getViewId() : null; if(my_view_id != null && my_view_id.equals(hdr.view_id)) return null; boolean send_discovery_rsp=force_sending_discovery_rsps || is_coord || current_coord == null || current_coord.equals(msg.getSrc()); if(!send_discovery_rsp) { if(log.isTraceEnabled()) log.trace(local_addr + ": suppressing merge response as I'm not a coordinator and the " + "discovery request was not sent by a coordinator"); return null; } } if(isMergeRunning()) { if(log.isTraceEnabled()) log.trace(local_addr + ": suppressing merge response as a merge is already in progress"); return null; } if(return_entire_cache) { Map<Address,PhysicalAddress> cache=(Map<Address,PhysicalAddress>)down(new Event(Event.GET_LOGICAL_PHYSICAL_MAPPINGS)); if(cache != null) { for(Map.Entry<Address,PhysicalAddress> entry: cache.entrySet()) { Address addr=entry.getKey(); // JGRP-1492: only return our own address, and addresses in view. if (addr.equals(local_addr) || members.contains(addr)) { PhysicalAddress physical_addr=entry.getValue(); sendDiscoveryResponse(addr, Arrays.asList(physical_addr), is_server, hdr.view_id != null, UUID.get(addr), msg.getSrc()); } } } } else { List<PhysicalAddress> physical_addrs=hdr.view_id != null? null : Arrays.asList((PhysicalAddress)down(new Event(Event.GET_PHYSICAL_ADDRESS, local_addr))); sendDiscoveryResponse(local_addr, physical_addrs, is_server, hdr.view_id != null, UUID.get(local_addr), msg.getSrc()); } return null; case PingHeader.GET_MBRS_RSP: // add response to vector and notify waiting thread // add physical address (if available) to transport's cache if(data != null) { Address response_sender=msg.getSrc(); if(logical_addr == null) logical_addr=msg.getSrc(); Collection<PhysicalAddress> addrs=data.getPhysicalAddrs(); PhysicalAddress physical_addr=addrs != null && !addrs.isEmpty()? addrs.iterator().next() : null; if(logical_addr != null && data.getLogicalName() != null) UUID.add(logical_addr, data.getLogicalName()); if(logical_addr != null && physical_addr != null) down(new Event(Event.SET_PHYSICAL_ADDRESS, new Tuple<Address,PhysicalAddress>(logical_addr, physical_addr))); if(log.isTraceEnabled()) log.trace(local_addr + ": received GET_MBRS_RSP from " + response_sender + ": " + data); boolean overwrite=logical_addr != null && logical_addr.equals(response_sender); synchronized(ping_responses) { for(Responses response: ping_responses) { response.addResponse(data, overwrite); } } } return null; default: if(log.isWarnEnabled()) log.warn("got PING header with unknown type (" + hdr.type + ')'); return null; } case Event.GET_PHYSICAL_ADDRESS: try { sendDiscoveryRequest(group_addr, null, null); } catch(InterruptedIOException ie) { if(log.isWarnEnabled()){ log.warn("Discovery request for cluster " + group_addr + " interrupted"); } Thread.currentThread().interrupt(); } catch(Exception ex) { if(log.isErrorEnabled()) log.error("failed sending discovery request", ex); } return null; case Event.FIND_INITIAL_MBRS: // sent by transport return findInitialMembers(null); } return up_prot.up(evt); } /** * An event is to be sent down the stack. The layer may want to examine its type and perform * some action on it, depending on the event's type. If the event is a message MSG, then * the layer may need to add a header to it (or do nothing at all) before sending it down * the stack using <code>PassDown</code>. In case of a GET_ADDRESS event (which tries to * retrieve the stack's address from one of the bottom layers), the layer may need to send * a new response event back up the stack using <code>up_prot.up()</code>. * The PING protocol is interested in several different down events, * Event.FIND_INITIAL_MBRS - sent by the GMS layer and expecting a GET_MBRS_OK * Event.TMP_VIEW and Event.VIEW_CHANGE - a view change event * Event.BECOME_SERVER - called after client has joined and is fully working group member * Event.CONNECT, Event.DISCONNECT. */ @SuppressWarnings("unchecked") public Object down(Event evt) { switch(evt.getType()) { case Event.FIND_INITIAL_MBRS: // sent by GMS layer case Event.FIND_ALL_VIEWS: // sends the GET_MBRS_REQ to all members, waits 'timeout' ms or until 'num_initial_members' have been retrieved long start=System.currentTimeMillis(); boolean find_all_views=evt.getType() == Event.FIND_ALL_VIEWS; Promise<JoinRsp> promise=(Promise<JoinRsp>)evt.getArg(); List<PingData> rsps=find_all_views? findAllViews(promise) : findInitialMembers(promise); long diff=System.currentTimeMillis() - start; if(log.isTraceEnabled()) log.trace(local_addr + ": discovery took "+ diff + " ms: responses: " + Util.printPingData(rsps)); return rsps; case Event.TMP_VIEW: case Event.VIEW_CHANGE: List<Address> tmp; view=(View)evt.getArg(); if((tmp=view.getMembers()) != null) { synchronized(members) { members.clear(); members.addAll(tmp); } } current_coord=!members.isEmpty()? members.get(0) : null; is_coord=current_coord != null && local_addr != null && current_coord.equals(local_addr); return down_prot.down(evt); case Event.BECOME_SERVER: // called after client has joined and is fully working group member down_prot.down(evt); is_server=true; return null; case Event.SET_LOCAL_ADDRESS: local_addr=(Address)evt.getArg(); return down_prot.down(evt); case Event.CONNECT: case Event.CONNECT_WITH_STATE_TRANSFER: case Event.CONNECT_USE_FLUSH: case Event.CONNECT_WITH_STATE_TRANSFER_USE_FLUSH: is_leaving=false; group_addr=(String)evt.getArg(); Object ret=down_prot.down(evt); handleConnect(); return ret; case Event.DISCONNECT: is_leaving=true; handleDisconnect(); return down_prot.down(evt); default: return down_prot.down(evt); // Pass on to the layer below us } } /* -------------------------- Private methods ---------------------------- */ /** * Creates a byte[] representation of the PingData, but DISCARDING the view it contains. * @param data the PingData instance to serialize. * @return */ protected byte[] serializeWithoutView(PingData data) { final PingData clone = new PingData(data.getAddress(), null, data.isServer(), data.getLogicalName(), data.getPhysicalAddrs()); try { return Util.streamableToByteBuffer(clone); } catch(Exception e) { log.error("Error", e); return null; } } protected PingData deserialize(final byte[] data) { try { return (PingData)Util.streamableFromByteBuffer(PingData.class, data); } catch(Exception e) { log.error("Error", e); return null; } } protected void sendDiscoveryResponse(Address logical_addr, List<PhysicalAddress> physical_addrs, boolean is_server, boolean return_view_only, String logical_name, final Address sender) { PingData data; if(return_view_only) { data=new PingData(logical_addr, view, is_server, null, null); } else { ViewId view_id=view != null? view.getViewId() : null; data=new PingData(logical_addr, null, view_id, is_server, logical_name, physical_addrs); } final Message rsp_msg=new Message(sender, null, null); rsp_msg.setFlag(Message.OOB); final PingHeader rsp_hdr=new PingHeader(PingHeader.GET_MBRS_RSP, data); rsp_msg.putHeader(this.id, rsp_hdr); if(stagger_timeout > 0) { int view_size=view != null? view.size() : 10; int rank=Util.getRank(view, local_addr); // returns 0 if view or local_addr are null long sleep_time=rank == 0? Util.random(stagger_timeout) : stagger_timeout * rank / view_size - (stagger_timeout / view_size); timer.schedule(new Runnable() { public void run() { if(log.isTraceEnabled()) log.trace(local_addr + ": received GET_MBRS_REQ from " + sender + ", sending staggered response " + rsp_hdr); down_prot.down(new Event(Event.MSG, rsp_msg)); } }, sleep_time, TimeUnit.MILLISECONDS); return; } if(log.isTraceEnabled()) log.trace(local_addr + ": received GET_MBRS_REQ from " + sender + ", sending response " + rsp_hdr); down_prot.down(new Event(Event.MSG, rsp_msg)); } protected static class Responses { final Promise<JoinRsp> promise; final List<PingData> ping_rsps=new ArrayList<PingData>(); final int num_expected_rsps; final boolean break_on_coord_rsp; protected Responses(int num_expected_rsps, boolean break_on_coord_rsp, Promise<JoinRsp> promise) { this.num_expected_rsps=num_expected_rsps; this.break_on_coord_rsp=break_on_coord_rsp; this.promise=promise != null? promise : new Promise<JoinRsp>(); } public void addResponse(PingData rsp) { addResponse(rsp, false); } public void addResponse(PingData rsp, boolean overwrite) { if(rsp == null) return; promise.getLock().lock(); try { if(overwrite) ping_rsps.remove(rsp); // https://jira.jboss.org/jira/browse/JGRP-1179 int index=ping_rsps.indexOf(rsp); if(index == -1) { ping_rsps.add(rsp); promise.getCond().signalAll(); } else if(rsp.isCoord()) { PingData pr=ping_rsps.get(index); // Check if the already existing element is not server if(!pr.isCoord()) { ping_rsps.set(index, rsp); promise.getCond().signalAll(); } } } finally { promise.getLock().unlock(); } } public List<PingData> get(long timeout) throws InterruptedException{ long start_time=System.currentTimeMillis(), time_to_wait=timeout; promise.getLock().lock(); try { while(time_to_wait > 0 && !promise.hasResult()) { if(ping_rsps.size() >= num_expected_rsps && (break_on_coord_rsp && containsCoordinatorResponse(ping_rsps))) return new LinkedList<PingData>(ping_rsps); if(break_on_coord_rsp && containsCoordinatorResponse(ping_rsps)) return new LinkedList<PingData>(ping_rsps); promise.getCond().await(time_to_wait, TimeUnit.MILLISECONDS); time_to_wait=timeout - (System.currentTimeMillis() - start_time); } return new LinkedList<PingData>(ping_rsps); } finally { promise.getLock().unlock(); } } private static boolean containsCoordinatorResponse(Collection<PingData> rsps) { if(rsps == null || rsps.isEmpty()) return false; for(PingData rsp: rsps) { if(rsp.isCoord()) return true; } return false; } } }