//$Id$ package org.exist.cluster; import java.util.ArrayList; import java.util.HashMap; import java.util.Vector; import org.apache.log4j.Logger; import org.exist.cluster.cocoon.ConsoleInfo; import org.exist.cluster.journal.JournalIdGenerator; import org.exist.cluster.journal.JournalManager; import org.exist.util.Configuration; import org.jgroups.Address; import org.jgroups.JChannel; import org.jgroups.MembershipListener; import org.jgroups.SuspectedException; import org.jgroups.View; import org.jgroups.blocks.GroupRequest; import org.jgroups.blocks.RpcDispatcher; import org.jgroups.util.RspList; /** * Manage the Cluster communication via RPC JGroups * Created by Nicola Breda. * * @author Nicola Breda aka maiale * @author David Frontini aka spider * Date: 05-aug-2005 * Time: 18.09.08 * Revision $Revision$ */ public class ClusterComunication implements MembershipListener { public static final String CONFIGURATION_ELEMENT_NAME = "cluster"; public static final String CLUSTER_PROTOCOL_ATTRIBUTE = "protocol"; public static final String CLUSTER_USER_ATTRIBUTE = "dbaUser"; public static final String CLUSTER_PWD_ATTRIBUTE = "dbaPassword"; public static final String CLUSTER_EXCLUDED_COLLECTIONS_ATTRIBUTE = "exclude"; public static final String PROPERTY_CLUSTER_PROTOCOL = "cluster.protocol"; public static final String PROPERTY_CLUSTER_USER = "cluster.user"; public static final String PROPERTY_CLUSTER_PWD = "cluster.pwd"; public static final String PROPERTY_CLUSTER_EXCLUDED_COLLECTIONS = "cluster.exclude"; private static Logger log = Logger.getLogger(ClusterComunication.class); private static JChannel channel; private static RpcDispatcher disp; private static final String banner = " ##### # # # ##### ####### ####### ######\n" + "# # # # # # # # # # #\n" + "# # # # # # # # #\n" + "# # # # ##### # ##### ######\n" + "# # # # # # # # #\n" + "# # # # # # # # # # #\n" + " ##### ####### ##### ##### # ####### # #\n" + "\n" + "\n" + " ###### # # # #### #####\n" + " # # # # # #\n" + " ##### ## # #### #\n" + " # ## # # #\n" + " # # # # # # #\n" + " ###### # # # #### #"; public static final String DEFAULT_PROTOCOL_STACK = "UDP(mcast_addr=228.1.2.3;mcast_port=45566;ip_ttl=32;loopback=true):" + "PING(timeout=3000;num_initial_members=6):" + "FD(timeout=3000):" + "VERIFY_SUSPECT(timeout=1500):" + "pbcast.NAKACK(gc_lag=10;retransmit_timeout=600,1200,2400,4800):" + "UNICAST(timeout=600,1200,2400,4800):" + "pbcast.STABLE(desired_avg_gossip=10000):" + "FRAG:" + "pbcast.GMS(join_timeout=5000;join_retry_timeout=2000;" + "shun=true;print_local_addr=true)"; private static ClusterComunication instance; private Vector membersNoSender = new Vector(); private Address localAddress; private Address coordinatorAddress; private static String dbaUser; private static String dbaPwd; private static ArrayList excludedCollection; private JournalManager journalManager; private JournalIdGenerator journalIdGenerator; private boolean coordinator = false; private boolean isRealign = true; private ArrayList realignQueue = new ArrayList(); private boolean viewConfigured = false; private int shift; private Configuration configuration; public static String getDbaUser() { return dbaUser; } public static String getDbaPwd() { return dbaPwd; } private static void createInstance(Configuration conf) throws ClusterException { ClusterComunication c = new ClusterComunication(); System.out.println(banner); try { String protocol = (String) conf.getProperty(PROPERTY_CLUSTER_PROTOCOL); dbaUser = (String) conf.getProperty(PROPERTY_CLUSTER_USER); dbaPwd = (String) conf.getProperty(PROPERTY_CLUSTER_PWD); excludedCollection = (ArrayList) conf.getProperty(PROPERTY_CLUSTER_EXCLUDED_COLLECTIONS); if (protocol == null) protocol = DEFAULT_PROTOCOL_STACK; System.out.println("PROTOCOL \n" + protocol); channel = new JChannel(protocol); disp = new RpcDispatcher(channel, null, c, c); disp.setDeadlockDetection(true); c.configuration = conf; c.journalManager = new JournalManager(conf); c.journalIdGenerator = new JournalIdGenerator(c.journalManager, ((Integer)conf.getProperty(JournalManager.PROPERTY_CLUSTER_JOURNAL_MAXSTORE)).intValue()); c.shift = ((Integer)conf.getProperty(JournalManager.PROPERTY_CLUSTER_JOURNAL_SHIFT)).intValue(); instance = c; channel.connect("eXist-cluster"); c.localAddress = channel.getLocalAddress(); while(!c.viewConfigured){ log.info("SLEEPING - WAITING TO CONFIGURE THE CLUSTER"); Thread.sleep(2000); } if(c.isRealign){ log.info("TRY TO REALIGNING " + Thread.currentThread().getName()); c.realign(); c.isRealign = false; } log.info("REALIGNED ... "+ Thread.currentThread().getName()); } catch (Exception e) { e.printStackTrace(); log.error("Error during cluster JGroups environment configuration " + e); throw new ClusterException("ERROR CREATING CLUSTER ...",e); } } private ClusterComunication() { } /** * ---------------- MEMBERSHIP LISTENER METHODS ------------------------------ **** */ public void viewAccepted(View view) { this.coordinatorAddress = view.getCreator(); // The master address of the cluster boolean coordinator = coordinatorAddress.equals(localAddress); log.info("COordinator : " + coordinator + " localAddress : " + localAddress); if(coordinator) log.info("***************** I'M MASTER!!!!!!!!!"); //Per evitare problematiche di sincronizzazione in caso di failure - il nuovo master sposta in avanti i suoi indici //in modo da compensare possibili disallineamenti. if (coordinator && !this.coordinator && journalIdGenerator!=null) { journalIdGenerator.shiftId(shift); } this.coordinator = coordinatorAddress.equals(localAddress); //check if this node is a master Vector members = (Vector) view.getMembers().clone(); members.removeElement(channel.getLocalAddress()); this.membersNoSender = members; //all members into the cluster viewConfigured = true; } public void suspect(Address address) { if(coordinatorAddress.equals(address)){ log.info("MASTER IS DEAD"); } } public void block() { } /** * ********** --------------------------------------------------------- ********** */ public static ClusterComunication getInstance() { return instance; } /** * ************** --------- CONSOLE METHODS ---------- ******************************* */ public boolean isCoordinator(){ return coordinator; } public Address getCoordinator(){ return coordinatorAddress; } public Address getAddress(){ return localAddress; } public Vector getMembersNoCoordinator(){ Vector members = (Vector) membersNoSender.clone(); members.remove(coordinatorAddress); return members; } public HashMap getConsoleInfos(Vector address){ HashMap response = new HashMap(); RspList list = disp.callRemoteMethods(address, "getConsoleProperties", new Object[]{}, new Class[]{}, GroupRequest.GET_ALL, 0); for(int i=0;i<address.size();i++){ Address addr = (Address) address.get(i); response.put(addr.toString(),list.get(addr)); } return response; } public int[][] getHeaders() throws ClusterException { int[][] data = new int[2][]; data[0] = new int[]{journalManager.getLastIdSaved(),journalManager.getMaxIdSaved(),journalManager.getCounter()}; try{ if(!coordinator) data[1] = (int[]) disp.callRemoteMethod(coordinatorAddress, "getRemoteHeader", new Object[]{}, new Class[]{}, GroupRequest.GET_FIRST, 0); }catch(Throwable e) { e.printStackTrace(); throw new ClusterException("Error retrieving ...",e ); } return data; } public Configuration getConfiguration() { return configuration; } /** * ************** ---------------------------------------- ******************************* */ /** * Configure the cluster communication * * @param c */ public static void configure(Configuration c) throws ClusterException { createInstance(c); } public void synch() throws ClusterException { journalManager.squeueEvent(); } public void removeDocument(String collection, String documentName) throws ClusterException { if (excludedCollection.contains(collection)) return; remoteInvocation(new RemoveClusterEvent(documentName, collection)); } public void storeDocument(String collection, String documentName, String content) throws ClusterException { if (excludedCollection.contains(collection)) return; remoteInvocation(new StoreClusterEvent(content, collection, documentName)); } public void addCollection(String parent, String collectionName) throws ClusterException { if (excludedCollection.contains(parent) || excludedCollection.contains(parent + "/" + collectionName)) return; remoteInvocation(new CreateCollectionClusterEvent(parent, collectionName)); } public void update(String resource, String name, String xupdate) throws ClusterException { if (excludedCollection.contains(resource)) return; //avoid to propagate the internal collection for example temp. remoteInvocation(new UpdateClusterEvent(resource, name, xupdate)); } public void removeCollection(String parent, String collection) throws ClusterException { if (excludedCollection.contains(collection)|| excludedCollection.contains(parent + "/" + collection)) return; //avoid to propagate the internal collection for example temp. remoteInvocation(new RemoveCollectionClusterEvent(parent, collection)); } private void remoteInvocation(ClusterEvent event) throws ClusterException { String code = "" + event.hashCode(); if (!ClusterChannel.hasToBePublished(code)) { ClusterChannel.removeEvent(code); return; } int[] data = getId(true); event.setId(data[0]); event.setCounter(data[1]); journalManager.enqueEvent(event); //add event to the journal queue disp.callRemoteMethods(membersNoSender, "invoke", new Object[]{event}, new Class[]{ClusterEvent.class}, GroupRequest.GET_NONE, 0); if (!coordinator) journalIdGenerator.increaseId(event.getId(), event.getCounter()); } /** * Retrieve the id for the journal * * @return the unique id * @throws ClusterException * @param firstRequest */ private int[] getId(boolean firstRequest) throws ClusterException { try { int[] id; if (coordinator) { //if I'am a master - create next id log.info("GENERATING LOCAL ID..."); id = journalIdGenerator.getNextData(localAddress.toString()); } else { // ask to the master the next id --> rpc to getNextDataRemote log.info("RETRIEVING ID FROM " + coordinatorAddress); Object idObj = disp.callRemoteMethod(coordinatorAddress, "getNextDataRemote", new Object[]{localAddress.toString()}, new Class[]{String.class}, GroupRequest.GET_FIRST, 0); id = ((int[]) idObj); } return id; }catch (SuspectedException se){ if(!firstRequest) throw new ClusterException("unable to retrieve the journal id... master down ... no more retry ", se); log.info("SUSPECTED MASTER SHUTDOWN .... RETRY..."); try { log.info("WAITING FOR NEW MASTER"); Thread.sleep(1000); } catch (InterruptedException e) {} return getId(false); }catch (Exception e) { throw new ClusterException("unable to retrieve the journal id ", e); } } private void realign() throws ClusterException { if(coordinator) return; //TODO: per ora assumiamo che il master (o chi diventa master) sia allineato. int last = ClusterEvent.NO_EVENT; try{ ArrayList events = null; int[] header = new int[]{journalManager.getLastIdSaved(),journalManager.getMaxIdSaved(),journalManager.getCounter()}; int[] remoteHeader = (int[]) disp.callRemoteMethod(coordinatorAddress, "getRemoteHeader", new Object[]{}, new Class[]{}, GroupRequest.GET_FIRST, 0); int counterDiff =Math.abs(header[2]-remoteHeader[2]); if(counterDiff>1) killNoRealign(); if(counterDiff==1 && remoteHeader[1]>header[1]) killNoRealign(); if(counterDiff==0 && header[1]>remoteHeader[1]) killClusterMasterDisaligned(); while(true) { log.info("Call remote method getNextEvents: " + Thread.currentThread().getName()); Object idObj = disp.callRemoteMethod(coordinatorAddress, "getNextEvents", new Object[] {header, remoteHeader, new Integer(last)}, new Class[] {int[].class, int[].class, Integer.class }, GroupRequest.GET_FIRST, 0); events = ((ArrayList) idObj); if( events==null || events.size() == 0 ) break; last = manageEvents(events); log.info("Last id managed : " + last); } synchronized(realignQueue){ while(realignQueue.size()>0) { //execute the queue .... ClusterEvent event = (ClusterEvent) realignQueue.remove(0); log.info("Execute the event " + event.getId() ); ClusterChannel.accountEvent(""+event.hashCode()); if(journalManager.isProcessed(event) ) { log.info("Event processed .........."); continue; } manageEvent(event); } } isRealign = false; }catch(Throwable e) { e.printStackTrace(); log.error("No align done successfully ..."); throw new ClusterException("No align done successfully ...",e ); } } private void killClusterMasterDisaligned() { log.fatal("MASTER DISALIGNED... CLUSTER DATA MAY BE CORRUPTED"); log.fatal("PLEASE STOP CLUSTER AND FIX COLLECTION AND JOURNAL DATA"); //TODO ... to be implemented... MUSTER DISALIGNED } private void killNoRealign() throws ClusterException { log.fatal("NODE DISALIGNED... no hot realignement available.... please fix node collection and journal data"); throw new ClusterException("NODE DISALIGNED"); } private int manageEvents(ArrayList events) throws ClusterException { for(int i = 0; i < events.size() ; i++) { ClusterEvent event = (ClusterEvent) events.get(i); log.info("Manage event id " + event.getId()); if(journalManager.isProcessed(event)) { log.info("event already processed ........."); continue; } ClusterChannel.accountEvent("" + event.hashCode()); manageEvent(event); } return ((ClusterEvent)events.get(events.size() - 1)).getId(); } private void manageEvent(ClusterEvent event) throws ClusterException { event.execute(); journalManager.enqueEvent(event); if (coordinator) journalIdGenerator.releaseId(event.getId()); else journalIdGenerator.increaseId(event.getId(), event.getCounter()); } /* -------------- REMOTE METHODS --------------------- */ public ArrayList getNextEvents(int[] header, int[] myHeader, Integer start){ return journalManager.getNextEvents(header,myHeader,start); } public int[] getNextDataRemote(String address) { return journalIdGenerator.getNextData(address); } public void invoke(ClusterEvent event) throws ClusterException { String code = "" + event.hashCode(); ClusterChannel.accountEvent(code); //reentrant fix synchronized(realignQueue){ if(isRealign){ realignQueue.add(event); return; } } manageEvent(event); } public int[] getRemoteHeader() throws ClusterException { return new int[]{journalManager.getLastIdSaved(),journalManager.getMaxIdSaved(),journalManager.getCounter()}; } public ConsoleInfo getConsoleProperties() throws ClusterException{ String port = System.getProperty("jetty.port"); if(port==null) port = "8080"; //TODO ... verify how to retrieve default port ConsoleInfo info = new ConsoleInfo(); info.setProperty("port",port); return info; } public void stop() { disp.stop(); channel.disconnect(); instance = null; } }