ReplicationServerDomain.java example

Explorer
gluu-opendj-master
/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License, Version 1.0 only
 * (the "License").  You may not use this file except in compliance
 * with the License.
 *
 * You can obtain a copy of the license at
 * trunk/opends/resource/legal-notices/OpenDS.LICENSE
 * or https://OpenDS.dev.java.net/OpenDS.LICENSE.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at
 * trunk/opends/resource/legal-notices/OpenDS.LICENSE.  If applicable,
 * add the following below this CDDL HEADER, with the fields enclosed
 * by brackets "[]" replaced with your own identifying information:
 *      Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 *
 *
 *      Copyright 2006-2010 Sun Microsystems, Inc.
 *      Portions copyright 2011-2013 ForgeRock AS
 */
package org.opends.server.replication.server;

import static org.opends.messages.ReplicationMessages.*;
import static org.opends.server.loggers.ErrorLogger.logError;
import static org.opends.server.loggers.debug.DebugLogger.debugEnabled;
import static org.opends.server.loggers.debug.DebugLogger.getTracer;
import static org.opends.server.util.StaticUtils.stackTraceToSingleLineString;

import java.io.IOException;
import java.util.*;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.locks.ReentrantLock;

import org.opends.messages.Category;
import org.opends.messages.Message;
import org.opends.messages.MessageBuilder;
import org.opends.messages.Severity;
import org.opends.server.admin.std.server.MonitorProviderCfg;
import org.opends.server.api.MonitorProvider;
import org.opends.server.core.DirectoryServer;
import org.opends.server.loggers.debug.DebugTracer;
import org.opends.server.replication.common.*;
import org.opends.server.replication.protocol.*;
import org.opends.server.types.*;
import org.opends.server.util.TimeThread;

import com.sleepycat.je.DatabaseException;

/**
 * This class define an in-memory cache that will be used to store
 * the messages that have been received from an LDAP server or
 * from another replication server and that should be forwarded to
 * other servers.
 *
 * The size of the cache is set by configuration.
 * If the cache becomes bigger than the configured size, the older messages
 * are removed and should they be needed again must be read from the backing
 * file
 *
 *
 * it runs a thread that is responsible for saving the messages
 * received to the disk and for trimming them
 * Decision to trim can be based on disk space or age of the message
 */
public class ReplicationServerDomain extends MonitorProvider<MonitorProviderCfg>
{
  private final String baseDn;
  // The Status analyzer that periodically verifies if the connected DSs are
  // late or not
  private StatusAnalyzer statusAnalyzer = null;

  // The monitoring publisher that periodically sends monitoring messages to the
  // topology
  private MonitoringPublisher monitoringPublisher = null;

  /*
   * The following map contains one balanced tree for each replica ID
   * to which we are currently publishing
   * the first update in the balanced tree is the next change that we
   * must push to this particular server
   *
   * We add new TreeSet in the HashMap when a new server register
   * to this replication server.
   *
   */
  private final Map<Integer, DataServerHandler> directoryServers =
    new ConcurrentHashMap<Integer, DataServerHandler>();

  /*
   * This map contains one ServerHandler for each replication servers
   * with which we are connected (so normally all the replication servers)
   * the first update in the balanced tree is the next change that we
   * must push to this particular server
   *
   * We add new TreeSet in the HashMap when a new replication server register
   * to this replication server.
   */
  private final Map<Integer, ReplicationServerHandler> replicationServers =
    new ConcurrentHashMap<Integer, ReplicationServerHandler>();

  private final ConcurrentLinkedQueue<MessageHandler> otherHandlers =
    new ConcurrentLinkedQueue<MessageHandler>();

  /*
   * This map contains the List of updates received from each
   * LDAP server
   */
  private final Map<Integer, DbHandler> sourceDbHandlers =
    new ConcurrentHashMap<Integer, DbHandler>();
  private ReplicationServer replicationServer;

  // GenerationId management
  private volatile long generationId = -1;
  private boolean generationIdSavedStatus = false;

  // The tracer object for the debug logger.
  private static final DebugTracer TRACER = getTracer();

  // Monitor data management
  /**
   * The monitor data consolidated over the topology.
   */
  private volatile MonitorData monitorData = new MonitorData();

  // This lock guards against multiple concurrent monitor data recalculation.
  private final Object pendingMonitorLock = new Object();

  // Guarded by pendingMonitorLock.
  private long monitorDataLastBuildDate = 0;

  // The set of replication servers which are already known to be slow to send
  // monitor data.
  //
  // Guarded by pendingMonitorLock.
  private final Set<Integer> monitorDataLateServers = new HashSet<Integer>();

  // This lock serializes updates to the pending monitor data.
  private final Object pendingMonitorDataLock = new Object();

  // Monitor data which is currently being calculated.
  //
  // Guarded by pendingMonitorDataLock.
  private MonitorData pendingMonitorData;

  // A set containing the IDs of servers from which we are currently expecting
  // monitor responses. When a response is received from a server we remove the
  // ID from this table, and count down the latch if the ID was in the table.
  //
  // Guarded by pendingMonitorDataLock.
  private final Set<Integer> pendingMonitorDataServerIDs =
    new HashSet<Integer>();

  // This latch is non-null and is used in order to count incoming responses as
  // they arrive. Since incoming response may arrive at any time, even when
  // there is no pending monitor request, access to the latch must be guarded.
  //
  // Guarded by pendingMonitorDataLock.
  private CountDownLatch pendingMonitorDataLatch = null;

  // TODO: Remote monitor data cache lifetime is 500ms/should be configurable
  private final long monitorDataLifeTime = 500;



  /**
   * The needed info for each received assured update message we are waiting
   * acks for.
   * Key: a change number matching a received update message which requested
   * assured mode usage (either safe read or safe data mode)
   * Value: The object holding every info needed about the already received acks
   * as well as the acks to be received.
   * For more details, see ExpectedAcksInfo and its sub classes javadoc.
   */
  private final ConcurrentHashMap<ChangeNumber, ExpectedAcksInfo> waitingAcks =
    new ConcurrentHashMap<ChangeNumber, ExpectedAcksInfo>();

  // The timer used to run the timeout code (timer tasks) for the assured update
  // messages we are waiting acks for.
  private Timer assuredTimeoutTimer = null;
  // Counter used to purge the timer tasks references in assuredTimeoutTimer,
  // every n number of treated assured messages
  private int assuredTimeoutTimerPurgeCounter = 0;

  private ServerState ctHeartbeatState = null;

  /**
   * Creates a new ReplicationServerDomain associated to the DN baseDn.
   *
   * @param baseDn The baseDn associated to the ReplicationServerDomain.
   * @param replicationServer the ReplicationServer that created this
   *                          replicationServer cache.
   */
  public ReplicationServerDomain(
      String baseDn, ReplicationServer replicationServer)
  {
    this.baseDn = baseDn;
    this.replicationServer = replicationServer;
    this.assuredTimeoutTimer = new Timer("Replication server RS("
        + replicationServer.getServerId()
        + ") assured timer for domain \"" + baseDn + "\"", true);

    DirectoryServer.registerMonitorProvider(this);
  }

  /**
   * Add an update that has been received to the list of
   * updates that must be forwarded to all other servers.
   *
   * @param update  The update that has been received.
   * @param sourceHandler The ServerHandler for the server from which the
   *        update was received
   * @throws IOException When an IO exception happens during the update
   *         processing.
   */
  public void put(UpdateMsg update, ServerHandler sourceHandler)
    throws IOException
  {

    ChangeNumber cn = update.getChangeNumber();
    int id = cn.getServerId();
    sourceHandler.updateServerState(update);
    sourceHandler.incrementInCount();

    if (generationId < 0)
    {
      generationId = sourceHandler.getGenerationId();
    }

    /**
     * If this is an assured message (a message requesting ack), we must
     * construct the ExpectedAcksInfo object with the right number of expected
     * acks before posting message to the writers. Otherwise some writers may
     * have time to post, receive the ack and increment received ack counter
     * (kept in ExpectedAcksInfo object) and we could think the acknowledgment
     * is fully processed although it may be not (some other acks from other
     * servers are not yet arrived). So for that purpose we do a pre-loop
     * to determine to who we will post an assured message.
     * Whether the assured mode is safe read or safe data, we anyway do not
     * support the assured replication feature across topologies with different
     * group ids. The assured feature insures assured replication based on the
     * same locality (group id). For instance in double data center deployment
     * (2 group id usage) with assured replication enabled, an assured message
     * sent from data center 1 (group id = 1) will be sent to servers of both
     * data centers, but one will request and wait acks only from servers of the
     * data center 1.
     */
    boolean assuredMessage = update.isAssured();
    PreparedAssuredInfo preparedAssuredInfo = null;
    if (assuredMessage)
    {
      // Assured feature is supported starting from replication protocol V2
      if (sourceHandler.getProtocolVersion() >=
        ProtocolVersion.REPLICATION_PROTOCOL_V2)
      {
        // According to assured sub-mode, prepare structures to keep track of
        // the acks we are interested in.
        AssuredMode assuredMode = update.getAssuredMode();
        if (assuredMode == AssuredMode.SAFE_DATA_MODE)
        {
          sourceHandler.incrementAssuredSdReceivedUpdates();
          preparedAssuredInfo = processSafeDataUpdateMsg(update, sourceHandler);
        } else if (assuredMode == AssuredMode.SAFE_READ_MODE)
        {
          sourceHandler.incrementAssuredSrReceivedUpdates();
          preparedAssuredInfo = processSafeReadUpdateMsg(update, sourceHandler);
        } else
        {
          // Unknown assured mode: should never happen
          Message errorMsg = ERR_RS_UNKNOWN_ASSURED_MODE.get(
            Integer.toString(replicationServer.getServerId()),
            assuredMode.toString(), baseDn, update.toString());
          logError(errorMsg);
          assuredMessage = false;
        }
      } else
      {
        assuredMessage = false;
      }
    }

    // look for the dbHandler that is responsible for the LDAP server which
    // generated the change.
    DbHandler dbHandler;
    synchronized (sourceDbHandlers)
    {
      dbHandler = sourceDbHandlers.get(id);
      if (dbHandler == null)
      {
        try
        {
          dbHandler = replicationServer.newDbHandler(id, baseDn);
          generationIdSavedStatus = true;
        } catch (DatabaseException e)
        {
          /*
           * Because of database problem we can't save any more changes
           * from at least one LDAP server.
           * This replicationServer therefore can't do it's job properly anymore
           * and needs to close all its connections and shutdown itself.
           */
          MessageBuilder mb = new MessageBuilder();
          mb.append(ERR_CHANGELOG_SHUTDOWN_DATABASE_ERROR.get());
          mb.append(stackTraceToSingleLineString(e));
          logError(mb.toMessage());
          replicationServer.shutdown();
          return;
        }
        sourceDbHandlers.put(id, dbHandler);
      }
    }

    // Publish the messages to the source handler
    dbHandler.add(update);

    List<Integer> expectedServers = null;
    if (assuredMessage)
    {
      expectedServers = preparedAssuredInfo.expectedServers;
      if (expectedServers != null)
      {
        // Store the expected acks info into the global map.
        // The code for processing reception of acks for this update will update
        // info kept in this object and if enough acks received, it will send
        // back the final ack to the requester and remove the object from this
        // map
        // OR
        // The following timer will time out and send an timeout ack to the
        // requester if the acks are not received in time. The timer will also
        // remove the object from this map.
        waitingAcks.put(cn, preparedAssuredInfo.expectedAcksInfo);

        // Arm timer for this assured update message (wait for acks until it
        // times out)
        AssuredTimeoutTask assuredTimeoutTask = new AssuredTimeoutTask(cn);
        assuredTimeoutTimer.schedule(assuredTimeoutTask,
          replicationServer.getAssuredTimeout());
        // Purge timer every 100 treated messages
        assuredTimeoutTimerPurgeCounter++;
        if ((assuredTimeoutTimerPurgeCounter % 100) == 0)
          assuredTimeoutTimer.purge();
      }
    }

    /**
     * The update message equivalent to the originally received update message,
     * but with assured flag disabled. This message is the one that should be
     * sent to non eligible servers for assured mode.
     * We need a clone like of the original message with assured flag off, to be
     * posted to servers we don't want to wait the ack from (not normal status
     * servers or servers with different group id). This must be done because
     * the posted message is a reference so each writer queue gets the same
     * reference, thus, changing the assured flag of an object is done for every
     * references posted on every writer queues. That is why we need a message
     * version with assured flag on and another one with assured flag off.
     */
    NotAssuredUpdateMsg notAssuredUpdate = null;

    /*
     * Push the message to the replication servers
     */
    if (sourceHandler.isDataServer())
    {
      for (ReplicationServerHandler handler : replicationServers.values())
      {
        /**
         * Ignore updates to RS with bad gen id
         * (no system managed status for a RS)
         */
        if ( (generationId>0) && (generationId != handler.getGenerationId()) )
        {
          if (debugEnabled())
            TRACER.debugInfo("In " + "Replication Server " +
              replicationServer.getReplicationPort() + " " +
              baseDn + " " + replicationServer.getServerId() +
              " for dn " + baseDn + ", update " +
              update.getChangeNumber().toString() +
              " will not be sent to replication server " +
              Integer.toString(handler.getServerId()) + " with generation id " +
              Long.toString(handler.getGenerationId()) +
              " different from local " +
              "generation id " + Long.toString(generationId));

          continue;
        }

        if (assuredMessage)
        {
          // Assured mode: post an assured or not assured matching update
          // message according to what has been computed for the destination
          // server
          if ((expectedServers != null) && expectedServers.contains(handler.
            getServerId()))
          {
            handler.add(update, sourceHandler);
          } else
          {
            if (notAssuredUpdate == null)
            {
              notAssuredUpdate = new NotAssuredUpdateMsg(update);
            }
            handler.add(notAssuredUpdate, sourceHandler);
          }
        } else
        {
          handler.add(update, sourceHandler);
        }
      }
    }

    /*
     * Push the message to the LDAP servers
     */
    for (DataServerHandler handler : directoryServers.values())
    {
      // Don't forward the change to the server that just sent it
      if (handler == sourceHandler)
      {
        continue;
      }

      /**
       * Ignore updates to DS in bad BAD_GENID_STATUS or FULL_UPDATE_STATUS
       *
       * The RSD lock should not be taken here as it is acceptable to have a
       * delay between the time the server has a wrong status and the fact we
       * detect it: the updates that succeed to pass during this time will have
       * no impact on remote server. But it is interesting to not saturate
       * uselessly the network if the updates are not necessary so this check to
       * stop sending updates is interesting anyway. Not taking the RSD lock
       * allows to have better performances in normal mode (most of the time).
       */
      ServerStatus dsStatus = handler.getStatus();
      if ( (dsStatus == ServerStatus.BAD_GEN_ID_STATUS) ||
        (dsStatus == ServerStatus.FULL_UPDATE_STATUS) )
      {
        if (debugEnabled())
        {
          if (dsStatus == ServerStatus.BAD_GEN_ID_STATUS)
            TRACER.debugInfo("In " + this +
              " for dn " + baseDn + ", update " +
              update.getChangeNumber().toString() +
              " will not be sent to directory server " +
              Integer.toString(handler.getServerId()) + " with generation id " +
              Long.toString(handler.getGenerationId()) +
              " different from local " +
              "generation id " + Long.toString(generationId));
          if (dsStatus == ServerStatus.FULL_UPDATE_STATUS)
            TRACER.debugInfo("In RS " +
              replicationServer.getServerId() +
              " for dn " + baseDn + ", update " +
              update.getChangeNumber().toString() +
              " will not be sent to directory server " +
              Integer.toString(handler.getServerId()) +
              " as it is in full update");
        }

        continue;
      }

      if (assuredMessage)
      {
        // Assured mode: post an assured or not assured matching update
        // message according to what has been computed for the destination
        // server
        if ((expectedServers != null) && expectedServers.contains(handler.
          getServerId()))
        {
          handler.add(update, sourceHandler);
        } else
        {
          if (notAssuredUpdate == null)
          {
            notAssuredUpdate = new NotAssuredUpdateMsg(update);
          }
          handler.add(notAssuredUpdate, sourceHandler);
        }
      } else
      {
        handler.add(update, sourceHandler);
      }
    }

    // Push the message to the other subscribing handlers
    for (MessageHandler handler : otherHandlers) {
      handler.add(update, sourceHandler);
    }
  }

  /**
   * Helper class to be the return type of a method that processes a just
   * received assured update message:
   * - processSafeReadUpdateMsg
   * - processSafeDataUpdateMsg
   * This is a facility to pack many interesting returned object.
   */
  private class PreparedAssuredInfo
  {
      /**
       * The list of servers identified as servers we are interested in
       * receiving acks from. If this list is not null, then expectedAcksInfo
       * should be not null.
       * Servers that are not in this list are servers not eligible for an ack
       * request.
       *
       */
      public List<Integer> expectedServers = null;

      /**
       * The constructed ExpectedAcksInfo object to be used when acks will be
       * received. Null if expectedServers is null.
       */
      public ExpectedAcksInfo expectedAcksInfo = null;
  }

  /**
   * Process a just received assured update message in Safe Read mode. If the
   * ack can be sent immediately, it is done here. This will also determine to
   * which suitable servers an ack should be requested from, and which ones are
   * not eligible for an ack request.
   * This method is an helper method for the put method. Have a look at the put
   * method for a better understanding.
   * @param update The just received assured update to process.
   * @param sourceHandler The ServerHandler for the server from which the
   *        update was received
   * @return A suitable PreparedAssuredInfo object that contains every needed
   * info to proceed with post to server writers.
   * @throws IOException When an IO exception happens during the update
   *         processing.
   */
  private PreparedAssuredInfo processSafeReadUpdateMsg(
    UpdateMsg update, ServerHandler sourceHandler) throws IOException
  {
    ChangeNumber cn = update.getChangeNumber();
    byte groupId = replicationServer.getGroupId();
    byte sourceGroupId = sourceHandler.getGroupId();
    List<Integer> expectedServers = new ArrayList<Integer>();
    List<Integer> wrongStatusServers = new ArrayList<Integer>();

    if (sourceGroupId == groupId)
      // Assured feature does not cross different group ids
    {
      if (sourceHandler.isDataServer())
      {
        // Look for RS eligible for assured
        for (ReplicationServerHandler handler : replicationServers.values())
        {
          if (handler.getGroupId() == groupId)
            // No ack expected from a RS with different group id
          {
            if ((generationId > 0) &&
              (generationId == handler.getGenerationId()))
              // No ack expected from a RS with bad gen id
            {
              expectedServers.add(handler.getServerId());
            }
          }
        }
      }

      // Look for DS eligible for assured
      for (DataServerHandler handler : directoryServers.values())
      {
        // Don't forward the change to the server that just sent it
        if (handler == sourceHandler)
        {
          continue;
        }
        if (handler.getGroupId() == groupId)
          // No ack expected from a DS with different group id
        {
          ServerStatus serverStatus = handler.getStatus();
          if (serverStatus == ServerStatus.NORMAL_STATUS)
          {
            expectedServers.add(handler.getServerId());
          } else
            // No ack expected from a DS with wrong status
          {
            if (serverStatus == ServerStatus.DEGRADED_STATUS)
            {
              wrongStatusServers.add(handler.getServerId());
            }
            /**
             * else
             * BAD_GEN_ID_STATUS or FULL_UPDATE_STATUS:
             * We do not want this to be reported as an error to the update
             * maker -> no pollution or potential misunderstanding when
             * reading logs or monitoring and it was just administration (for
             * instance new server is being configured in topo: it goes in bad
             * gen then then full full update).
             */
          }
        }
      }
    }

    // Return computed structures
    PreparedAssuredInfo preparedAssuredInfo = new PreparedAssuredInfo();
    if (expectedServers.size() > 0)
    {
      // Some other acks to wait for
      preparedAssuredInfo.expectedAcksInfo = new SafeReadExpectedAcksInfo(cn,
        sourceHandler, expectedServers, wrongStatusServers);
      preparedAssuredInfo.expectedServers = expectedServers;
    }

    if (preparedAssuredInfo.expectedServers == null)
    {
      // No eligible servers found, send the ack immediately
      sourceHandler.send(new AckMsg(cn));
    }

    return preparedAssuredInfo;
  }

  /**
   * Process a just received assured update message in Safe Data mode. If the
   * ack can be sent immediately, it is done here. This will also determine to
   * which suitable servers an ack should be requested from, and which ones are
   * not eligible for an ack request.
   * This method is an helper method for the put method. Have a look at the put
   * method for a better understanding.
   * @param update The just received assured update to process.
   * @param sourceHandler The ServerHandler for the server from which the
   *        update was received
   * @return A suitable PreparedAssuredInfo object that contains every needed
   * info to proceed with post to server writers.
   * @throws IOException When an IO exception happens during the update
   *         processing.
   */
  private PreparedAssuredInfo processSafeDataUpdateMsg(
    UpdateMsg update, ServerHandler sourceHandler) throws IOException
  {
    ChangeNumber cn = update.getChangeNumber();
    boolean interestedInAcks = false;
    byte safeDataLevel = update.getSafeDataLevel();
    byte groupId = replicationServer.getGroupId();
    byte sourceGroupId = sourceHandler.getGroupId();
    if (safeDataLevel < (byte) 1)
    {
      // Should never happen
      Message errorMsg = ERR_UNKNOWN_ASSURED_SAFE_DATA_LEVEL.get(
        Integer.toString(replicationServer.getServerId()),
        Byte.toString(safeDataLevel), baseDn, update.toString());
      logError(errorMsg);
    } else if (sourceGroupId != groupId)
    {
      // Assured feature does not cross different group IDS
    } else
    {
      if ((generationId > 0) &&
        (generationId == sourceHandler.getGenerationId()))
        // Ignore assured updates from wrong generationId servers
      {
        if (sourceHandler.isDataServer())
        {
          if (safeDataLevel == (byte) 1)
          {
            /**
             * Immediately return the ack for an assured message in safe data
             * mode with safe data level 1, coming from a DS. No need to wait
             * for more acks
             */
            sourceHandler.send(new AckMsg(cn));
          } else
          {
            /**
             * level > 1 : We need further acks
             * The message will be posted in assured mode to eligible
             * servers. The embedded safe data level is not changed, and his
             * value will be used by a remote RS to determine if he must send
             * an ack (level > 1) or not (level = 1)
             */
            interestedInAcks = true;
          }
        } else
        { // A RS sent us the safe data message, for sure no further ack to wait
          /**
           * Level 1 has already been reached so no further acks to wait.
           * Just deal with level > 1
           */
          if (safeDataLevel > (byte) 1)
          {
            sourceHandler.send(new AckMsg(cn));
          }
        }
      }
    }

    List<Integer> expectedServers = new ArrayList<Integer>();
    if (interestedInAcks)
    {
      if (sourceHandler.isDataServer())
      {
        // Look for RS eligible for assured
        for (ReplicationServerHandler handler : replicationServers.values())
        {
          if (handler.getGroupId() == groupId)
            // No ack expected from a RS with different group id
          {
            if ((generationId > 0) &&
              (generationId == handler.getGenerationId()))
              // No ack expected from a RS with bad gen id
            {
              expectedServers.add(handler.getServerId());
            }
          }
        }
      }
    }

    // Return computed structures
    PreparedAssuredInfo preparedAssuredInfo = new PreparedAssuredInfo();
    int nExpectedServers = expectedServers.size();
    if (interestedInAcks) // interestedInAcks so level > 1
    {
      if (nExpectedServers > 0)
      {
        // Some other acks to wait for
        int sdl = update.getSafeDataLevel();
        int neededAdditionalServers = sdl - 1;
        // Change the number of expected acks if not enough available eligible
        // servers: the level is a best effort thing, we do not want to timeout
        // at every assured SD update for instance if a RS has had his gen id
        // reseted
        byte finalSdl = ((nExpectedServers >= neededAdditionalServers) ?
          (byte)sdl : // Keep level as it was
          (byte)(nExpectedServers+1)); // Change level to match what's available
        preparedAssuredInfo.expectedAcksInfo = new SafeDataExpectedAcksInfo(cn,
          sourceHandler, finalSdl, expectedServers);
        preparedAssuredInfo.expectedServers = expectedServers;
      } else
      {
        // level > 1 and source is a DS but no eligible servers found, send the
        // ack immediately
        sourceHandler.send(new AckMsg(cn));
      }
    }

    return preparedAssuredInfo;
  }

  /**
   * Process an ack received from a given server.
   *
   * @param ack The ack message received.
   * @param ackingServer The server handler of the server that sent the ack.
   */
  public void processAck(AckMsg ack, ServerHandler ackingServer)
  {
    // Retrieve the expected acks info for the update matching the original
    // sent update.
    ChangeNumber cn = ack.getChangeNumber();
    ExpectedAcksInfo expectedAcksInfo = waitingAcks.get(cn);

    if (expectedAcksInfo != null)
    {
      // Prevent concurrent access from processAck() or AssuredTimeoutTask.run()
      synchronized (expectedAcksInfo)
      {
        if (expectedAcksInfo.isCompleted())
        {
          // Timeout code is sending a timeout ack, do nothing and let him
          // remove object from the map
          return;
        }
        /**
         *
         * If this is the last ack we were waiting from, immediately create and
         * send the final ack to the original server
         */
        if (expectedAcksInfo.processReceivedAck(ackingServer, ack))
        {
          // Remove the object from the map as no more needed
          waitingAcks.remove(cn);
          AckMsg finalAck = expectedAcksInfo.createAck(false);
          ServerHandler origServer = expectedAcksInfo.getRequesterServer();
          try
          {
            origServer.send(finalAck);
          } catch (IOException e)
          {
            /**
             * An error happened trying the send back an ack to the server.
             * Log an error and close the connection to this server.
             */
            MessageBuilder mb = new MessageBuilder();
            mb.append(ERR_RS_ERROR_SENDING_ACK.get(
              Integer.toString(replicationServer.getServerId()),
              Integer.toString(origServer.getServerId()),
              cn.toString(), baseDn));
            mb.append(stackTraceToSingleLineString(e));
            logError(mb.toMessage());
            stopServer(origServer, false);
          }
          // Mark the ack info object as completed to prevent potential timeout
          // code parallel run
          expectedAcksInfo.completed();
        }
      }
    }
    /* Else the timeout occurred for the update matching this change number
     * and the ack with timeout error has probably already been sent.
     */
  }

  /**
   * The code run when the timeout occurs while waiting for acks of the
   * eligible servers. This basically sends a timeout ack (with any additional
   * error info) to the original server that sent an assured update message.
   */
  private class AssuredTimeoutTask extends TimerTask
  {
    private ChangeNumber cn = null;

    /**
     * Constructor for the timer task.
     * @param cn The changenumber of the assured update we are waiting acks for
     */
    public AssuredTimeoutTask(ChangeNumber cn)
    {
      this.cn = cn;
    }

    /**
     * Run when the assured timeout for an assured update message we are waiting
     * acks for occurs.
     */
    public void run()
    {
      ExpectedAcksInfo expectedAcksInfo = waitingAcks.get(cn);

      if (expectedAcksInfo != null)
      {
        synchronized (expectedAcksInfo)
        {
          if (expectedAcksInfo.isCompleted())
          {
            // processAck() code is sending the ack, do nothing and let him
            // remove object from the map
            return;
          }
          // Remove the object from the map as no more needed
          waitingAcks.remove(cn);
          // Create the timeout ack and send him to the server the assured
          // update message came from
          AckMsg finalAck = expectedAcksInfo.createAck(true);
          ServerHandler origServer = expectedAcksInfo.getRequesterServer();
          if (debugEnabled())
            TRACER.debugInfo(
              "In RS " + Integer.toString(replicationServer.getServerId()) +
              " for " + baseDn +
              ", sending timeout for assured update with change " + " number " +
              cn.toString() + " to server id " +
              Integer.toString(origServer.getServerId()));
          try
          {
            origServer.send(finalAck);
          } catch (IOException e)
          {
            /**
             * An error happened trying the send back an ack to the server.
             * Log an error and close the connection to this server.
             */
            MessageBuilder mb = new MessageBuilder();
            mb.append(ERR_RS_ERROR_SENDING_ACK.get(
                Integer.toString(replicationServer.getServerId()),
                Integer.toString(origServer.getServerId()),
                cn.toString(), baseDn));
            mb.append(stackTraceToSingleLineString(e));
            logError(mb.toMessage());
            stopServer(origServer, false);
          }
          // Increment assured counters
          boolean safeRead =
              (expectedAcksInfo instanceof SafeReadExpectedAcksInfo);
          if (safeRead)
          {
            origServer.incrementAssuredSrReceivedUpdatesTimeout();
          } else
          {
            if (origServer.isDataServer())
            {
              origServer.incrementAssuredSdReceivedUpdatesTimeout();
            }
          }
          //   retrieve expected servers in timeout to increment their counter
          List<Integer> serversInTimeout = expectedAcksInfo.getTimeoutServers();
          for (Integer serverId : serversInTimeout)
          {
            ServerHandler expectedServerInTimeout =
              directoryServers.get(serverId);
            if (expectedServerInTimeout != null)
            {
              // Was a DS
              if (safeRead)
              {
                expectedServerInTimeout.incrementAssuredSrSentUpdatesTimeout();
              } else
              {
                // No SD update sent to a DS (meaningless)
              }
            } else
            {
              expectedServerInTimeout =
                replicationServers.get(serverId);
              if (expectedServerInTimeout != null)
              {
                // Was a RS
                if (safeRead)
                {
                  expectedServerInTimeout.
                    incrementAssuredSrSentUpdatesTimeout();
                } else
                {
                  expectedServerInTimeout.
                    incrementAssuredSdSentUpdatesTimeout();
                }
              }
              /* else server disappeared ? Let's forget about it. */
            }
          }
          // Mark the ack info object as completed to prevent potential
          // processAck() code parallel run
          expectedAcksInfo.completed();
        }
      }
    }
  }


  /**
   * Stop operations with a list of replication servers.
   *
   * @param replServers the replication servers for which
   * we want to stop operations
   */
  public void stopReplicationServers(Collection<String> replServers)
  {
    for (ReplicationServerHandler handler : replicationServers.values())
    {
      if (replServers.contains(handler.getServerAddressURL()))
        stopServer(handler, false);
    }
  }

  /**
   * Stop operations with all servers this domain is connected with (RS and DS).
   *
   * @param shutdown A boolean indicating if the stop is due to a
   *                 shutdown condition.
   */
  public void stopAllServers(boolean shutdown)
  {
    // Close session with other replication servers
    for (ReplicationServerHandler serverHandler : replicationServers.values())
    {
      stopServer(serverHandler, shutdown);
    }

    // Close session with other LDAP servers
    for (DataServerHandler serverHandler : directoryServers.values())
    {
      stopServer(serverHandler, shutdown);
    }
  }

  /**
   * Checks that a DS is not connected with same id.
   *
   * @param handler the DS we want to check
   * @return true if this is not a duplicate server
   */
  public boolean checkForDuplicateDS(DataServerHandler handler)
  {
    if (directoryServers.containsKey(handler.getServerId()))
    {
      // looks like two connected LDAP servers have the same serverId
      Message message = ERR_DUPLICATE_SERVER_ID.get(
          replicationServer.getMonitorInstanceName(),
          directoryServers.get(handler.getServerId()).toString(),
          handler.toString(), handler.getServerId());
      logError(message);
      return false;
    }
    return true;
  }

  /**
   * Stop operations with a given server.
   *
   * @param handler the server for which we want to stop operations.
   * @param shutdown A boolean indicating if the stop is due to a
   *                 shutdown condition.
   */
  public void stopServer(ServerHandler handler, boolean shutdown)
  {
      if (debugEnabled())
        TRACER.debugInfo(
            "In " + this.replicationServer.getMonitorInstanceName() +
            " domain=" + this + " stopServer() on the server handler " +
            handler.getMonitorInstanceName());
    /*
     * We must prevent deadlock on replication server domain lock, when for
     * instance this code is called from dying ServerReader but also dying
     * ServerWriter at the same time, or from a thread that wants to shut down
     * the handler. So use a thread safe flag to know if the job must be done
     * or not (is already being processed or not).
     */
    if (!handler.engageShutdown())
      // Only do this once (prevent other thread to enter here again)
    {
      if (!shutdown)
      {
        try
        {
          // Acquire lock on domain (see more details in comment of start()
          // method of ServerHandler)
          lock();
        }
        catch (InterruptedException ex)
        {
          // We can't deal with this here, so re-interrupt thread so that it is
          // caught during subsequent IO.
          Thread.currentThread().interrupt();
          return;
        }
      }

      try
      {
        // Stop useless monitoring publisher if no more RS or DS in domain
        if ( (directoryServers.size() + replicationServers.size() )== 1)
        {
          if (debugEnabled())
            TRACER.debugInfo("In " +
              replicationServer.getMonitorInstanceName() +
              " remote server " + handler.getMonitorInstanceName() + " is " +
              "the last RS/DS to be stopped: stopping monitoring publisher");
          stopMonitoringPublisher();
        }

        if (handler.isReplicationServer())
        {
          if (replicationServers.containsKey(handler.getServerId()))
          {
            unregisterServerHandler(handler);
            handler.shutdown();

            // Check if generation id has to be reset
            mayResetGenerationId();
            if (!shutdown)
            {
              // Warn our DSs that a RS or DS has quit (does not use this
              // handler as already removed from list)
              buildAndSendTopoInfoToDSs(null);
            }
          }
        } else if (directoryServers.containsKey(handler.getServerId()))
        {
          // If this is the last DS for the domain,
          // shutdown the status analyzer
          if (directoryServers.size() == 1)
          {
            if (debugEnabled())
              TRACER.debugInfo("In " +
                replicationServer.getMonitorInstanceName() +
                " remote server " + handler.getMonitorInstanceName() +
                " is the last DS to be stopped: stopping status analyzer");
            stopStatusAnalyzer();
          }

          unregisterServerHandler(handler);
          handler.shutdown();

          // Check if generation id has to be reset
          mayResetGenerationId();
          if (!shutdown)
          {
            // Update the remote replication servers with our list
            // of connected LDAP servers
            buildAndSendTopoInfoToRSs();
            // Warn our DSs that a RS or DS has quit (does not use this
            // handler as already removed from list)
            buildAndSendTopoInfoToDSs(null);
          }
        } else if (otherHandlers.contains(handler))
        {
          unRegisterHandler(handler);
          handler.shutdown();
        }
      }
      catch(Exception e)
      {
        logError(Message.raw(Category.SYNC, Severity.NOTICE,
            stackTraceToSingleLineString(e)));
      }
      finally
      {
        if (!shutdown)
        {
          release();
        }
      }
    }
  }

  /**
   * Stop the handler.
   * @param handler The handler to stop.
   */
  public void stopServer(MessageHandler handler)
  {
    if (debugEnabled())
      TRACER.debugInfo(
          "In " + this.replicationServer.getMonitorInstanceName()
          + " domain=" + this + " stopServer() on the message handler "
          + handler.getMonitorInstanceName());
    /*
     * We must prevent deadlock on replication server domain lock, when for
     * instance this code is called from dying ServerReader but also dying
     * ServerWriter at the same time, or from a thread that wants to shut down
     * the handler. So use a thread safe flag to know if the job must be done
     * or not (is already being processed or not).
     */
    if (!handler.engageShutdown())
      // Only do this once (prevent other thread to enter here again)
    {
      try
      {
        // Acquire lock on domain (see more details in comment of start() method
        // of ServerHandler)
        lock();
      }
      catch (InterruptedException ex)
      {
        // We can't deal with this here, so re-interrupt thread so that it is
        // caught during subsequent IO.
        Thread.currentThread().interrupt();
        return;
      }

      try
      {
        if (otherHandlers.contains(handler))
        {
          unRegisterHandler(handler);
          handler.shutdown();
        }
      }
      catch(Exception e)
      {
        logError(Message.raw(Category.SYNC, Severity.NOTICE,
            stackTraceToSingleLineString(e)));
      }
      finally
      {
        release();
      }
    }
  }

  /**
   * Unregister this handler from the list of handlers registered to this
   * domain.
   * @param handler the provided handler to unregister.
   */
  private void unregisterServerHandler(ServerHandler handler)
  {
    if (handler.isReplicationServer())
    {
      replicationServers.remove(handler.getServerId());
    }
    else
    {
      directoryServers.remove(handler.getServerId());
    }
  }

  /**
   * This method resets the generationId for this domain if there is no LDAP
   * server currently connected in the whole topology on this domain and
   * if the generationId has never been saved.
   *
   * - test emtpyness of directoryServers list
   * - traverse replicationServers list and test for each if DS are connected
   * So it strongly relies on the directoryServers list
   */
  private void mayResetGenerationId()
  {
    if (debugEnabled())
      TRACER.debugInfo(
        "In RS " + this.replicationServer.getMonitorInstanceName() +
        " for " + baseDn + " " +
        " mayResetGenerationId generationIdSavedStatus=" +
        generationIdSavedStatus);

    // If there is no more any LDAP server connected to this domain in the
    // topology and the generationId has never been saved, then we can reset
    // it and the next LDAP server to connect will become the new reference.
    boolean lDAPServersConnectedInTheTopology = false;
    if (directoryServers.isEmpty())
    {
      for (ReplicationServerHandler rsh : replicationServers.values())
      {
        if (generationId != rsh.getGenerationId())
        {
          if (debugEnabled())
            TRACER.debugInfo(
              "In RS " + this.replicationServer.getMonitorInstanceName() +
              " for " + baseDn + " " +
              " mayResetGenerationId skip RS" + rsh.getMonitorInstanceName() +
              " that has different genId");
        } else
        {
          if (rsh.hasRemoteLDAPServers())
          {
            lDAPServersConnectedInTheTopology = true;

            if (debugEnabled())
              TRACER.debugInfo(
                "In RS " + this.replicationServer.getMonitorInstanceName() +
                " for " + baseDn + " " +
                " mayResetGenerationId RS" + rsh.getMonitorInstanceName() +
                " has servers connected to it - will not reset generationId");
            break;
          }
        }
      }
    } else
    {
      lDAPServersConnectedInTheTopology = true;
      if (debugEnabled())
        TRACER.debugInfo(
          "In RS " + this.replicationServer.getMonitorInstanceName() +
          " for " + baseDn + " " +
          " has servers connected to it - will not reset generationId");
    }

    if ((!lDAPServersConnectedInTheTopology) &&
      (!this.generationIdSavedStatus) &&
      (generationId != -1))
    {
      changeGenerationId(-1, false);
    }
  }

  /**
   * Checks that a remote RS is not already connected to this hosting RS.
   * @param handler The handler for the remote RS.
   * @return flag specifying whether the remote RS is already connected.
   * @throws DirectoryException when a problem occurs.
   */
  public boolean checkForDuplicateRS(ReplicationServerHandler handler)
  throws DirectoryException
  {
    ReplicationServerHandler oldHandler =
      replicationServers.get(handler.getServerId());
    if ((oldHandler != null))
    {
      if (oldHandler.getServerAddressURL().equals(
        handler.getServerAddressURL()))
      {
        // this is the same server, this means that our ServerStart messages
        // have been sent at about the same time and 2 connections
        // have been established.
        // Silently drop this connection.
        return false;
      }
      else
      {
        // looks like two replication servers have the same serverId
        // log an error message and drop this connection.
        Message message = ERR_DUPLICATE_REPLICATION_SERVER_ID.get(
          replicationServer.getMonitorInstanceName(), oldHandler.
          getServerAddressURL(), handler.getServerAddressURL(),
          handler.getServerId());
        throw new DirectoryException(ResultCode.OTHER, message);
      }
    }
    return true;
  }

  /**
   * Get the next update that need to be sent to a given LDAP server.
   * This call is blocking when no update is available or when dependencies
   * do not allow to send the next available change
   *
   * @param  handler  The server handler for the target directory server.
   *
   * @return the update that must be forwarded
   */
  public UpdateMsg take(ServerHandler handler)
  {
    UpdateMsg msg;
    /*
     * Get the balanced tree that we use to sort the changes to be
     * sent to the replica from the cookie
     *
     * The next change to send is always the first one in the tree
     * So this methods simply need to check that dependencies are OK
     * and update this replicaId RUV
     *
     */
    msg = handler.take();

    return msg;
  }

  /**
   * Return a Set of String containing the lists of Replication servers
   * connected to this server.
   * @return the set of connected servers
   */
  public Set<String> getChangelogs()
  {
    LinkedHashSet<String> mySet = new LinkedHashSet<String>();

    for (ReplicationServerHandler handler : replicationServers.values())
    {
      mySet.add(handler.getServerAddressURL());
    }

    return mySet;
  }

  /**
   * Return a set containing the server that produced update and known by
   * this replicationServer from all over the topology,
   * whatever directly connected of connected to another RS.
   * @return a set containing the servers known by this replicationServer.
   */
  public Set<Integer> getServers()
  {
    return sourceDbHandlers.keySet();
  }

  /**
   * Returns as a set of String the list of LDAP servers connected to us.
   * Each string is the serverID of a connected LDAP server.
   *
   * @return The set of connected LDAP servers
   */
  public List<String> getConnectedLDAPservers()
  {
    List<String> mySet = new ArrayList<String>(0);

    for (DataServerHandler handler : directoryServers.values())
    {
      mySet.add(String.valueOf(handler.getServerId()));
    }
    return mySet;
  }

  /**
   * Creates and returns an iterator.
   * When the iterator is not used anymore, the caller MUST call the
   * ReplicationIterator.releaseCursor() method to free the resources
   * and locks used by the ReplicationIterator.
   *
   * @param serverId Identifier of the server for which the iterator is created.
   * @param changeNumber Starting point for the iterator.
   * @return the created ReplicationIterator. Null when no DB is available
   * for the provided server Id.
   */
  public ReplicationIterator getChangelogIterator(int serverId,
      ChangeNumber changeNumber)
  {
    DbHandler handler = sourceDbHandlers.get(serverId);
    if (handler == null)
      return null;

    ReplicationIterator it;
    try
    {
      it = handler.generateIterator(changeNumber);
    }
    catch (Exception e)
    {
      return null;
    }

    if (!it.next())
    {
      it.releaseCursor();
      return null;
    }

    return it;
  }

 /**
  * Count the number of changes in the replication changelog for the provided
  * serverID, between 2 provided changenumbers.
  * @param serverId Identifier of the server for which the iterator is created.
  * @param from lower limit changenumber.
  * @param to   upper limit changenumber.
  * @return the number of changes.
  *
  */
  public int getCount(int serverId,
      ChangeNumber from, ChangeNumber to)
  {
    DbHandler handler = sourceDbHandlers.get(serverId);
    if (handler == null)
      return 0;

    return handler.getCount(from, to);
  }

  /**
   * Returns the change count for that ReplicationServerDomain.
   *
   * @return the change count.
   */
  public long getChangesCount()
  {
    long entryCount = 0;
    for (DbHandler dbHandler : sourceDbHandlers.values())
    {
      entryCount += dbHandler.getChangesCount();
    }
    return entryCount;
  }

  /**
   * Get the baseDn.
   * @return Returns the baseDn.
   */
  public String getBaseDn()
  {
    return baseDn;
  }

  /**
   * Sets the provided DbHandler associated to the provided serverId.
   *
   * @param serverId  the serverId for the server to which is
   *                  associated the DbHandler.
   * @param dbHandler the dbHandler associated to the serverId.
   *
   * @throws DatabaseException If a database error happened.
   */
  public void setDbHandler(int serverId, DbHandler dbHandler)
    throws DatabaseException
  {
    synchronized (sourceDbHandlers)
    {
      sourceDbHandlers.put(serverId, dbHandler);
    }
  }

  /**
   * Retrieves the destination handlers for a routable message.
   *
   * @param msg The message to route.
   * @param senderHandler The handler of the server that published this message.
   * @return The list of destination handlers.
   */
  private List<ServerHandler> getDestinationServers(RoutableMsg msg,
    ServerHandler senderHandler)
  {
    List<ServerHandler> servers =
      new ArrayList<ServerHandler>();

    if (msg.getDestination() == RoutableMsg.THE_CLOSEST_SERVER)
    {
      // TODO Import from the "closest server" to be implemented
    } else if (msg.getDestination() == RoutableMsg.ALL_SERVERS)
    {
      if (!senderHandler.isReplicationServer())
      {
        // Send to all replication servers with a least one remote
        // server connected
        for (ReplicationServerHandler rsh : replicationServers.values())
        {
          if (rsh.hasRemoteLDAPServers())
          {
            servers.add(rsh);
          }
        }
      }

      // Sends to all connected LDAP servers
      for (DataServerHandler destinationHandler : directoryServers.values())
      {
        // Don't loop on the sender
        if (destinationHandler == senderHandler)
          continue;
        servers.add(destinationHandler);
      }
    } else
    {
      // Destination is one server
      DataServerHandler destinationHandler =
        directoryServers.get(msg.getDestination());
      if (destinationHandler != null)
      {
        servers.add(destinationHandler);
      } else
      {
        // the targeted server is NOT connected
        // Let's search for the replication server that MAY
        // have the targeted server connected.
        if (senderHandler.isDataServer())
        {
          for (ReplicationServerHandler h : replicationServers.values())
          {
            // Send to all replication servers with a least one remote
            // server connected
            if (h.isRemoteLDAPServer(msg.getDestination()))
            {
              servers.add(h);
            }
          }
        }
      }
    }
    return servers;
  }

  /**
   * Processes a message coming from one server in the topology
   * and potentially forwards it to one or all other servers.
   *
   * @param msg The message received and to be processed.
   * @param senderHandler The server handler of the server that emitted
   * the message.
   */
  public void process(RoutableMsg msg, ServerHandler senderHandler)
  {
    // Test the message for which a ReplicationServer is expected
    // to be the destination
    if (!(msg instanceof InitializeRequestMsg) &&
        !(msg instanceof InitializeTargetMsg) &&
        !(msg instanceof InitializeRcvAckMsg) &&
        !(msg instanceof EntryMsg) &&
        !(msg instanceof DoneMsg) &&
        (msg.getDestination() == this.replicationServer.getServerId()))
    {
      if (msg instanceof ErrorMsg)
      {
        ErrorMsg errorMsg = (ErrorMsg) msg;
        logError(ERR_ERROR_MSG_RECEIVED.get(
          errorMsg.getDetails()));
      } else if (msg instanceof MonitorRequestMsg)
      {
        // If the request comes from a Directory Server we need to
        // build the full list of all servers in the topology
        // and send back a MonitorMsg with the full list of all the servers
        // in the topology.
        if (senderHandler.isDataServer())
        {
          // Monitoring information requested by a DS
          MonitorMsg monitorMsg = createGlobalTopologyMonitorMsg(
              msg.getDestination(), msg.getSenderID(), monitorData);

          if (monitorMsg != null)
          {
            try
            {
              senderHandler.send(monitorMsg);
            }
            catch (IOException e)
            {
              // the connection was closed.
            }
          }
          return;
        } else
        {
          // Monitoring information requested by a RS
          MonitorMsg monitorMsg =
            createLocalTopologyMonitorMsg(msg.getDestination(),
            msg.getSenderID());

          if (monitorMsg != null)
          {
            try
            {
              senderHandler.send(monitorMsg);
            } catch (Exception e)
            {
              // We log the error. The requestor will detect a timeout or
              // any other failure on the connection.
              logError(ERR_CHANGELOG_ERROR_SENDING_MSG.get(
                  Integer.toString((msg.getDestination()))));
            }
          }
        }
      } else if (msg instanceof MonitorMsg)
      {
        MonitorMsg monitorMsg = (MonitorMsg) msg;
        receivesMonitorDataResponse(monitorMsg, senderHandler.getServerId());
      } else
      {
        logError(NOTE_ERR_ROUTING_TO_SERVER.get(
          msg.getClass().getCanonicalName()));

        MessageBuilder mb1 = new MessageBuilder();
        mb1.append(
            NOTE_ERR_ROUTING_TO_SERVER.get(msg.getClass().getCanonicalName()));
        mb1.append("serverID:").append(msg.getDestination());
        ErrorMsg errMsg = new ErrorMsg(msg.getSenderID(), mb1.toMessage());
        try
        {
          senderHandler.send(errMsg);
        } catch (IOException ioe1)
        {
          // an error happened on the sender session trying to recover
          // from an error on the receiver session.
          // Not much more we can do at this point.
        }
      }
      return;
    }

    List<ServerHandler> servers = getDestinationServers(msg, senderHandler);

    if (servers.isEmpty())
    {
      MessageBuilder mb = new MessageBuilder();
      mb.append(ERR_NO_REACHABLE_PEER_IN_THE_DOMAIN.get(
          this.baseDn, Integer.toString(msg.getDestination())));
      mb.append(" In Replication Server=").append(
        this.replicationServer.getMonitorInstanceName());
      mb.append(" unroutable message =").append(msg.getClass().getSimpleName());
      mb.append(" Details:routing table is empty");
      ErrorMsg errMsg = new ErrorMsg(
        this.replicationServer.getServerId(),
        msg.getSenderID(),
        mb.toMessage());
      logError(mb.toMessage());
      try
      {
        senderHandler.send(errMsg);
      } catch (IOException ioe)
      {
        // TODO Handle error properly (sender timeout in addition)
        /*
         * An error happened trying to send an error msg to this server.
         * Log an error and close the connection to this server.
         */
        MessageBuilder mb2 = new MessageBuilder();
        mb2.append(ERR_CHANGELOG_ERROR_SENDING_ERROR.get(this.toString()));
        mb2.append(stackTraceToSingleLineString(ioe));
        logError(mb2.toMessage());
        stopServer(senderHandler, false);
      }
    } else
    {
      for (ServerHandler targetHandler : servers)
      {
        try
        {
          targetHandler.send(msg);
        } catch (IOException ioe)
        {
          /*
           * An error happened trying the send a routable message
           * to its destination server.
           * Send back an error to the originator of the message.
           */
          MessageBuilder mb1 = new MessageBuilder();
          mb1.append(ERR_NO_REACHABLE_PEER_IN_THE_DOMAIN.get(
              this.baseDn, Integer.toString(msg.getDestination())));
          mb1.append(" unroutable message =" + msg.getClass().getSimpleName());
          mb1.append(" Details: " + ioe.getLocalizedMessage());
          ErrorMsg errMsg = new ErrorMsg(
            msg.getSenderID(), mb1.toMessage());
          logError(mb1.toMessage());
          try
          {
            senderHandler.send(errMsg);
          } catch (IOException ioe1)
          {
            // an error happened on the sender session trying to recover
            // from an error on the receiver session.
            // We don't have much solution left beside closing the sessions.
            stopServer(senderHandler, false);
            stopServer(targetHandler, false);
          }
        // TODO Handle error properly (sender timeout in addition)
        }
      }
    }

  }



  /**
   * Creates a new monitor message including monitoring information for the
   * whole topology.
   *
   * @param sender
   *          The sender of this message.
   * @param destination
   *          The destination of this message.
   * @param monitorData
   *          The domain monitor data which should be used for the message.
   * @return The newly created and filled MonitorMsg. Null if a problem occurred
   *         during message creation.
   */
  public MonitorMsg createGlobalTopologyMonitorMsg(
      int sender, int destination, MonitorData monitorData)
  {
    MonitorMsg returnMsg =
      new MonitorMsg(sender, destination);

    returnMsg.setReplServerDbState(getDbServerState());

    // Add the informations about the Replicas currently in
    // the topology.
    Iterator<Integer> it = monitorData.ldapIterator();
    while (it.hasNext())
    {
      int replicaId = it.next();
      returnMsg.setServerState(replicaId,
          monitorData.getLDAPServerState(replicaId),
          monitorData.getApproxFirstMissingDate(replicaId), true);
    }

    // Add the information about the Replication Servers
    // currently in the topology.
    it = monitorData.rsIterator();
    while (it.hasNext())
    {
      int replicaId = it.next();
      returnMsg.setServerState(replicaId,
          monitorData.getRSStates(replicaId),
          monitorData.getRSApproxFirstMissingDate(replicaId), false);
    }

    return returnMsg;
  }



  /**
   * Creates a new monitor message including monitoring information for the
   * topology directly connected to this RS. This includes information for: -
   * local RS - all direct DSs - all direct RSs
   *
   * @param sender
   *          The sender of this message.
   * @param destination
   *          The destination of this message.
   * @return The newly created and filled MonitorMsg. Null if the current thread
   *         was interrupted while attempting to get the domain lock.
   */
  public MonitorMsg createLocalTopologyMonitorMsg(int sender, int destination)
  {
    try
    {
      // Lock domain as we need to go through connected servers list
      lock();
    }
    catch (InterruptedException e)
    {
      return null;
    }

    try
    {
      MonitorMsg monitorMsg = new MonitorMsg(sender, destination);

      // Populate for each connected LDAP Server
      // from the states stored in the serverHandler.
      // - the server state
      // - the older missing change
      for (DataServerHandler lsh : this.directoryServers.values())
      {
        monitorMsg.setServerState(lsh.getServerId(),
            lsh.getServerState(), lsh.getApproxFirstMissingDate(),
            true);
      }

      // Same for the connected RS
      for (ReplicationServerHandler rsh : this.replicationServers.values())
      {
        monitorMsg.setServerState(rsh.getServerId(),
            rsh.getServerState(), rsh.getApproxFirstMissingDate(),
            false);
      }

      // Populate the RS state in the msg from the DbState
      monitorMsg.setReplServerDbState(this.getDbServerState());
      return monitorMsg;
    }
    finally
    {
      release();
    }
  }

  /**
   * Shutdown this ReplicationServerDomain.
   */
  public void shutdown()
  {
    DirectoryServer.deregisterMonitorProvider(this);

    // Terminate the assured timer
    assuredTimeoutTimer.cancel();

    stopAllServers(true);

    stopDbHandlers();
  }

  /**
   * Stop the dbHandlers .
   */
  private void stopDbHandlers()
  {
    // Shutdown the dbHandlers
    synchronized (sourceDbHandlers)
    {
      for (DbHandler dbHandler : sourceDbHandlers.values())
      {
        dbHandler.shutdown();
      }
      sourceDbHandlers.clear();
    }
  }

  /**
   * Returns the ServerState describing the last change from this replica.
   *
   * @return The ServerState describing the last change from this replica.
   */
  public ServerState getDbServerState()
  {
    ServerState serverState = new ServerState();
    for (DbHandler db : sourceDbHandlers.values())
    {
      serverState.update(db.getLastChange());
    }
    return serverState;
  }

  /**
   * {@inheritDoc}
   */
  @Override
  public String toString()
  {
    return "ReplicationServerDomain " + baseDn;
  }

  /**
   * Send a TopologyMsg to all the connected directory servers in order to
   * let them know the topology (every known DSs and RSs).
   * @param notThisOne If not null, the topology message will not be sent to
   * this passed server.
   */
  public void buildAndSendTopoInfoToDSs(ServerHandler notThisOne)
  {
    for (DataServerHandler handler : directoryServers.values())
    {
      if ((notThisOne == null) || ((handler != notThisOne)))
        // All except passed one
      {
        for (int i=1; i<=2; i++)
        {
          if (!handler.shuttingDown())
          {
            if (handler.getStatus() != ServerStatus.NOT_CONNECTED_STATUS)
            {
              TopologyMsg topoMsg=createTopologyMsgForDS(handler.getServerId());
              try
              {
                handler.sendTopoInfo(topoMsg);
                break;
              }
              catch (IOException e)
              {
                if (i==2)
                {
                  Message message = ERR_EXCEPTION_SENDING_TOPO_INFO.get(
                      baseDn,
                      "directory",
                      Integer.toString(handler.getServerId()),
                      e.getMessage());
                  logError(message);
                }
              }
            }
          }
          try { Thread.sleep(100); } catch(Exception e) {}
        }
      }
    }
  }

  /**
   * Send a TopologyMsg to all the connected replication servers
   * in order to let them know our connected LDAP servers.
   */
  public void buildAndSendTopoInfoToRSs()
  {
    TopologyMsg topoMsg = createTopologyMsgForRS();
    for (ReplicationServerHandler handler : replicationServers.values())
    {
      for (int i=1; i<=2; i++)
      {
        if (!handler.shuttingDown())
        {
          if (handler.getStatus() != ServerStatus.NOT_CONNECTED_STATUS)
          {
            try
            {
              handler.sendTopoInfo(topoMsg);
              break;
            }
            catch (IOException e)
            {
              if (i==2)
              {
                Message message = ERR_EXCEPTION_SENDING_TOPO_INFO.get(
                    baseDn,
                    "replication",
                    Integer.toString(handler.getServerId()),
                    e.getMessage());
                logError(message);
              }
            }
          }
        }
        try { Thread.sleep(100); } catch(Exception e) {}
      }
    }
  }

  /**
   * Creates a TopologyMsg filled with information to be sent to a remote RS.
   * We send remote RS the info of every DS that are directly connected to us
   * plus our own info as RS.
   * @return A suitable TopologyMsg PDU to be sent to a peer RS
   */
  public TopologyMsg createTopologyMsgForRS()
  {
    List<DSInfo> dsInfos = new ArrayList<DSInfo>();

    // Go through every DSs
    for (DataServerHandler serverHandler : directoryServers.values())
    {
      dsInfos.add(serverHandler.toDSInfo());
    }

    // Create info for the local RS
    List<RSInfo> rsInfos = new ArrayList<RSInfo>();
    RSInfo localRSInfo = new RSInfo(replicationServer.getServerId(),
      replicationServer.getServerURL(), generationId,
      replicationServer.getGroupId(), replicationServer.getWeight());
    rsInfos.add(localRSInfo);

    return new TopologyMsg(dsInfos, rsInfos);
  }

  /**
   * Creates a TopologyMsg filled with information to be sent to a DS.
   * We send remote DS the info of every known DS and RS in the topology (our
   * directly connected DSs plus the DSs connected to other RSs) except himself.
   * Also put info related to local RS.
   *
   * @param destDsId The id of the DS the TopologyMsg PDU is to be sent to and
   * that we must not include in the DS list.
   * @return A suitable TopologyMsg PDU to be sent to a peer DS
   */
  public TopologyMsg createTopologyMsgForDS(int destDsId)
  {
    List<DSInfo> dsInfos = new ArrayList<DSInfo>();
    List<RSInfo> rsInfos = new ArrayList<RSInfo>();

    // Go through every DSs (except recipient of msg)
    for (DataServerHandler serverHandler : directoryServers.values())
    {
      if (serverHandler.getServerId() == destDsId)
        continue;
      dsInfos.add(serverHandler.toDSInfo());
    }

    // Add our own info (local RS)
    RSInfo localRSInfo = new RSInfo(replicationServer.getServerId(),
      replicationServer.getServerURL(), generationId,
      replicationServer.getGroupId(), replicationServer.getWeight());
    rsInfos.add(localRSInfo);

    // Go through every peer RSs (and get their connected DSs), also add info
    // for RSs
    for (ReplicationServerHandler serverHandler : replicationServers.values())
    {
      // Put RS info
      rsInfos.add(serverHandler.toRSInfo());

      serverHandler.addDSInfos(dsInfos);
    }

    return new TopologyMsg(dsInfos, rsInfos);
  }

  /**
   * Get the generationId associated to this domain.
   *
   * @return The generationId
   */
  public long getGenerationId()
  {
    return generationId;
  }

  /**
   * Get the generationId saved status.
   *
   * @return The generationId saved status.
   */
  public boolean getGenerationIdSavedStatus()
  {
    return generationIdSavedStatus;
  }


  /**
   * Initialize the value of the generationID for this ReplicationServerDomain.
   * This method is intended to be used for initialization at startup and
   * simply stores the new value without any additional processing.
   * For example it does not clear the change-log DBs
   *
   * @param generationId The new value of generationId.
   */
  public void initGenerationID(long generationId)
  {
    synchronized (generationIDLock)
    {
      this.generationId = generationId;
      this.generationIdSavedStatus = true;
    }
  }

  /**
   * Sets the provided value as the new in memory generationId.
   * Also clear the changelog databases.
   *
   * @param generationId The new value of generationId.
   * @param savedStatus  The saved status of the generationId.
   * @return The old generation id
   */
  public long changeGenerationId(long generationId, boolean savedStatus)
  {
    synchronized (generationIDLock)
    {
      long oldGenerationId = this.generationId;

      if (this.generationId != generationId)
      {
        // we are changing of genId
        clearDbs();

        this.generationId = generationId;
        this.generationIdSavedStatus = savedStatus;
      }
      return oldGenerationId;
    }
  }

  /**
   * Resets the generationID.
   *
   * @param senderHandler The handler associated to the server
   *        that requested to reset the generationId.
   * @param genIdMsg The reset generation ID msg received.
   */
  public void resetGenerationId(ServerHandler senderHandler,
    ResetGenerationIdMsg genIdMsg)
  {
    if (debugEnabled())
      TRACER.debugInfo(
          "In " + this +
          " Receiving ResetGenerationIdMsg from " + senderHandler.getServerId()+
          " for baseDn " + baseDn + ":\n" + genIdMsg);

    try
    {
      // Acquire lock on domain (see more details in comment of start() method
      // of ServerHandler)
      lock();
    }
    catch (InterruptedException ex)
    {
      // We can't deal with this here, so re-interrupt thread so that it is
      // caught during subsequent IO.
      Thread.currentThread().interrupt();
      return;
    }

    try
    {
      long newGenId = genIdMsg.getGenerationId();

      if (newGenId != this.generationId)
      {
        changeGenerationId(newGenId, false);
      }
      else
      {
        // Order to take a gen id we already have, just ignore
        if (debugEnabled())
          TRACER.debugInfo(
              "In " + this
              + " Reset generation id requested for baseDn " + baseDn
              + " but generation id was already " + this.generationId
              + ":\n" + genIdMsg);
      }

      // If we are the first replication server warned,
      // then forwards the reset message to the remote replication servers
      for (ServerHandler rsHandler : replicationServers.values())
      {
        try
        {
          // After we'll have sent the message , the remote RS will adopt
          // the new genId
          rsHandler.setGenerationId(newGenId);
          if (senderHandler.isDataServer())
          {
            rsHandler.send(genIdMsg);
          }
        } catch (IOException e)
        {
          logError(ERR_EXCEPTION_FORWARDING_RESET_GEN_ID.get(baseDn,
              e.getMessage()));
        }
      }

      // Change status of the connected DSs according to the requested new
      // reference generation id
      for (DataServerHandler dsHandler : directoryServers.values())
      {
        try
        {
          dsHandler.changeStatusForResetGenId(newGenId);
        } catch (IOException e)
        {
          logError(ERR_EXCEPTION_CHANGING_STATUS_AFTER_RESET_GEN_ID.get(baseDn,
              Integer.toString(dsHandler.getServerId()),
              e.getMessage()));
        }
      }

      // Update every peers (RS/DS) with potential topology changes (status
      // change). Rather than doing that each time a DS has a status change
      // (consecutive to reset gen id message), we prefer advertising once for
      // all after changes (less packet sent), here at the end of the reset msg
      // treatment.
      buildAndSendTopoInfoToDSs(null);
      buildAndSendTopoInfoToRSs();

      Message message = NOTE_RESET_GENERATION_ID.get(baseDn, newGenId);
      logError(message);

    }
    catch(Exception e)
    {
      logError(Message.raw(Category.SYNC, Severity.NOTICE,
          stackTraceToSingleLineString(e)));
    }
    finally
    {
      release();
    }
  }

  /**
   * Process message of a remote server changing his status.
   * @param senderHandler The handler associated to the server
   *        that changed his status.
   * @param csMsg The message containing the new status
   */
  public void processNewStatus(DataServerHandler senderHandler,
    ChangeStatusMsg csMsg)
  {
    if (debugEnabled())
    {
      TRACER.debugInfo(
          "In RS " + getReplicationServer().getServerId() +
          " Receiving ChangeStatusMsg from " + senderHandler.getServerId() +
          " for baseDn " + baseDn + ":\n" + csMsg);
    }

    try
    {
      // Acquire lock on domain (see more details in comment of start() method
      // of ServerHandler)
      lock();
    }
    catch (InterruptedException ex)
    {
      // We can't deal with this here, so re-interrupt thread so that it is
      // caught during subsequent IO.
      Thread.currentThread().interrupt();
      return;
    }

    try
    {
      ServerStatus newStatus = senderHandler.processNewStatus(csMsg);
      if (newStatus == ServerStatus.INVALID_STATUS)
      {
        // Already logged an error in processNewStatus()
        // just return not to forward a bad status to topology
        return;
      }

      // Update every peers (RS/DS) with topology changes
      buildAndSendTopoInfoToDSs(senderHandler);
      buildAndSendTopoInfoToRSs();

      Message message = NOTE_DIRECTORY_SERVER_CHANGED_STATUS.get(
          senderHandler.getServerId(), baseDn, newStatus.toString());
      logError(message);
    }
    catch(Exception e)
    {
      logError(Message.raw(Category.SYNC, Severity.NOTICE,
          stackTraceToSingleLineString(e)));
    }
    finally
    {
      release();
    }
  }

  /**
   * Change the status of a directory server according to the event generated
   * from the status analyzer.
   * @param serverHandler The handler of the directory server to update
   * @param event The event to be used for new status computation
   * @return True if we have been interrupted (must stop), false otherwise
   */
  public boolean changeStatusFromStatusAnalyzer(
      DataServerHandler serverHandler, StatusMachineEvent event)
  {
    try
    {
      // Acquire lock on domain (see more details in comment of start() method
      // of ServerHandler)
      lock();
    }
    catch (InterruptedException ex)
    {
      // We have been interrupted for dying, from stopStatusAnalyzer
      // to prevent deadlock in this situation:
      // RS is being shutdown, and stopServer will call stopStatusAnalyzer.
      // Domain lock is taken by shutdown thread while status analyzer thread
      // is willing to change the status of a server at the same time so is
      // waiting for the domain lock at the same time. As shutdown thread is
      // waiting for analyzer thread death, a deadlock occurs. So we force
      // interruption of the status analyzer thread death after 2 seconds if
      // it has not finished (see StatusAnalyzer.waitForShutdown). This allows
      // to have the analyzer thread taking the domain lock only when the
      // status of a DS has to be changed. See more comments in run method of
      // StatusAnalyzer.
      if (debugEnabled())
        TRACER
            .debugInfo("Status analyzer for domain "
                + baseDn
                + " has been interrupted when"
                + " trying to acquire domain lock for changing the status"
                + " of DS "
                + serverHandler.getServerId());
      return true;
    }

    try
    {
      ServerStatus newStatus = ServerStatus.INVALID_STATUS;
      ServerStatus oldStatus = serverHandler.getStatus();
      try
      {
        newStatus = serverHandler
            .changeStatusFromStatusAnalyzer(event);
      }
      catch (IOException e)
      {
        logError(ERR_EXCEPTION_CHANGING_STATUS_FROM_STATUS_ANALYZER
            .get(baseDn,
                Integer.toString(serverHandler.getServerId()),
                e.getMessage()));
      }

      if ((newStatus == ServerStatus.INVALID_STATUS)
          || (newStatus == oldStatus))
      {
        // Change was impossible or already occurred (see StatusAnalyzer
        // comments)
        return false;
      }

      // Update every peers (RS/DS) with topology changes
      buildAndSendTopoInfoToDSs(serverHandler);
      buildAndSendTopoInfoToRSs();
    }
    catch (Exception e)
    {
      logError(Message.raw(Category.SYNC, Severity.NOTICE,
          stackTraceToSingleLineString(e)));
    }
    finally
    {
      release();
    }

    return false;
  }

  /**
   * Clears the Db associated with that domain.
   */
  public void clearDbs()
  {
    // Reset the localchange and state db for the current domain
    synchronized (sourceDbHandlers)
    {
      for (DbHandler dbHandler : sourceDbHandlers.values())
      {
        try
        {
          dbHandler.clear();
        } catch (Exception e)
        {
          // TODO: i18n
          MessageBuilder mb = new MessageBuilder();
          mb.append(ERR_ERROR_CLEARING_DB.get(dbHandler.toString(),
            e.getMessage() + " " +
            stackTraceToSingleLineString(e)));
          logError(mb.toMessage());
        }
      }
      stopDbHandlers();
    }
    try
    {
      replicationServer.clearGenerationId(baseDn);
    } catch (Exception e)
    {
      // TODO: i18n
      logError(Message.raw(
        "Exception caught while clearing generationId:" +
        e.getLocalizedMessage()));
    }
  }

  /**
   * Returns whether the provided server is in degraded
   * state due to the fact that the peer server has an invalid
   * generationId for this domain.
   *
   * @param serverId The serverId for which we want to know the
   *                 the state.
   * @return Whether it is degraded or not.
   */
  public boolean isDegradedDueToGenerationId(int serverId)
  {
    if (debugEnabled())
      TRACER.debugInfo(
        "In " + this.replicationServer.getMonitorInstanceName() +
        " baseDN=" + baseDn +
        " isDegraded serverId=" + serverId +
        " given local generation Id=" + this.generationId);

    ServerHandler handler = replicationServers.get(serverId);
    if (handler == null)
    {
      handler = directoryServers.get(serverId);
      if (handler == null)
      {
        return false;
      }
    }

    if (debugEnabled())
      TRACER.debugInfo(
        "In " + this.replicationServer.getMonitorInstanceName() +
        " baseDN=" + baseDn +
        " Compute degradation of serverId=" + serverId +
        " LS server generation Id=" + handler.getGenerationId());
    return (handler.getGenerationId() != this.generationId);
  }

  /**
   * Return the associated replication server.
   * @return The replication server.
   */
  public ReplicationServer getReplicationServer()
  {
    return replicationServer;
  }

  /**
   * Process topology information received from a peer RS.
   * @param topoMsg The just received topo message from remote RS
   * @param handler The handler that received the message.
   * @param allowResetGenId True for allowing to reset the generation id (
   * when called after initial handshake)
   * @throws IOException If an error occurred.
   * @throws DirectoryException If an error occurred.
   */
  public void receiveTopoInfoFromRS(TopologyMsg topoMsg,
      ReplicationServerHandler handler,
    boolean allowResetGenId)
    throws IOException, DirectoryException
  {
    if (debugEnabled())
    {
      TRACER.debugInfo(
        "In RS " + getReplicationServer().getServerId() +
        " Receiving TopologyMsg from " + handler.getServerId() +
        " for baseDn " + baseDn + ":\n" + topoMsg);
    }

    try
    {
      // Acquire lock on domain (see more details in comment of start() method
      // of ServerHandler)
      lock();
    }
    catch (InterruptedException ex)
    {
      // We can't deal with this here, so re-interrupt thread so that it is
      // caught during subsequent IO.
      Thread.currentThread().interrupt();
      return;
    }

    try
    {
      /*
       * Store DS connected to remote RS & update information about the peer RS
       */
      handler.processTopoInfoFromRS(topoMsg);

      /*
       * Handle generation id
       */
      if (allowResetGenId)
      {
        // Check if generation id has to be reseted
        mayResetGenerationId();
        if (generationId < 0)
          generationId = handler.getGenerationId();
      }

      if (generationId > 0 && (generationId != handler.getGenerationId()))
      {
        Message message = WARN_BAD_GENERATION_ID_FROM_RS.get(handler
            .getServerId(), handler.session
            .getReadableRemoteAddress(), handler.getGenerationId(),
            baseDn, getReplicationServer().getServerId(),
            generationId);
        logError(message);

        ErrorMsg errorMsg = new ErrorMsg(
            getReplicationServer().getServerId(),
            handler.getServerId(),
            message);
        handler.send(errorMsg);
      }

      /*
       * Sends the currently known topology information to every connected
       * DS we have.
       */
      buildAndSendTopoInfoToDSs(null);
    }
    catch(Exception e)
    {
      logError(Message.raw(Category.SYNC, Severity.NOTICE,
          stackTraceToSingleLineString(e)));
    }
    finally
    {
      release();
    }
  }

  /* =======================
   * Monitor Data generation
   * =======================
   */

  /**
   * Returns the latest monitor data available for this replication server
   * domain.
   *
   * @return The latest monitor data available for this replication server
   *         domain, which is never {@code null}.
   */
  MonitorData getDomainMonitorData()
  {
    return monitorData;
  }



  /**
   * Recomputes the monitor data for this replication server domain.
   *
   * @return The recomputed monitor data for this replication server domain.
   * @throws InterruptedException
   *           If this thread is interrupted while waiting for a response.
   */
  MonitorData computeDomainMonitorData() throws InterruptedException
  {
    // Only allow monitor recalculation at a time.
    synchronized (pendingMonitorLock)
    {
      if ((monitorDataLastBuildDate + monitorDataLifeTime) < TimeThread
          .getTime())
      {
        try
        {
          // Prevent out of band monitor responses from updating our pending
          // table until we are ready.
          synchronized (pendingMonitorDataLock)
          {
            // Clear the pending monitor data.
            pendingMonitorDataServerIDs.clear();
            pendingMonitorData = new MonitorData();

            // Initialize the monitor data.
            initializePendingMonitorData();

            // Send the monitor requests to the connected replication servers.
            for (ReplicationServerHandler rs : replicationServers.values())
            {
              // Add server ID to pending table.
              int serverId = rs.getServerId();

              MonitorRequestMsg msg = new MonitorRequestMsg(
                  this.replicationServer.getServerId(), serverId);
              try
              {
                rs.send(msg);

                // Only register this server ID if we were able to send the
                // message.
                pendingMonitorDataServerIDs.add(serverId);
              }
              catch (IOException e)
              {
                // Log a message and do a best effort from here.
                Message message = ERR_SENDING_REMOTE_MONITOR_DATA_REQUEST
                    .get(baseDn, serverId, e.getMessage());
                logError(message);
              }
            }

            // Create the pending response latch based on the number of expected
            // monitor responses.
            pendingMonitorDataLatch = new CountDownLatch(
                pendingMonitorDataServerIDs.size());
          }

          // Wait for the responses to come back.
          pendingMonitorDataLatch.await(5, TimeUnit.SECONDS);

          // Log messages for replication servers that have gone or come back.
          synchronized (pendingMonitorDataLock)
          {
            // Log servers that have come back.
            for (int serverId : monitorDataLateServers)
            {
              // Ensure that we only log once per server: don't fill the
              // error log with repeated messages.
              if (!pendingMonitorDataServerIDs.contains(serverId))
              {
                logError(NOTE_MONITOR_DATA_RECEIVED.get(baseDn,
                    serverId));
              }
            }

            // Log servers that have gone away.
            for (int serverId : pendingMonitorDataServerIDs)
            {
              // Ensure that we only log once per server: don't fill the
              // error log with repeated messages.
              if (!monitorDataLateServers.contains(serverId))
              {
                logError(WARN_MISSING_REMOTE_MONITOR_DATA.get(baseDn,
                    serverId));
              }
            }

            // Remember which servers were late this time.
            monitorDataLateServers.clear();
            monitorDataLateServers.addAll(pendingMonitorDataServerIDs);
          }

          // Store the new computed data as the reference
          synchronized (pendingMonitorDataLock)
          {
            // Now we have the expected answers or an error occurred
            pendingMonitorData.completeComputing();
            monitorData = pendingMonitorData;
            monitorDataLastBuildDate = TimeThread.getTime();
          }
        }
        finally
        {
          synchronized (pendingMonitorDataLock)
          {
            // Clear pending state.
            pendingMonitorData = null;
            pendingMonitorDataLatch = null;
            pendingMonitorDataServerIDs.clear();
          }
        }
      }
    }

    return monitorData;
  }



  /**
   * Start collecting global monitoring information for this
   * ReplicationServerDomain.
   */

  private void initializePendingMonitorData()
  {
    // Let's process our directly connected DS
    // - in the ServerHandler for a given DS1, the stored state contains :
    // - the max CN produced by DS1
    // - the last CN consumed by DS1 from DS2..n
    // - in the RSdomain/dbHandler, the built-in state contains :
    // - the max CN produced by each server
    // So for a given DS connected we can take the state and the max from
    // the DS/state.

    for (ServerHandler ds : directoryServers.values())
    {
      int serverID = ds.getServerId();

      // the state comes from the state stored in the SH
      ServerState dsState = ds.getServerState()
          .duplicate();

      // the max CN sent by that LS also comes from the SH
      ChangeNumber maxcn = dsState.getMaxChangeNumber(serverID);
      if (maxcn == null)
      {
        // This directly connected LS has never produced any change
        maxcn = new ChangeNumber(0, 0, serverID);
      }
      pendingMonitorData.setMaxCN(serverID, maxcn);
      pendingMonitorData.setLDAPServerState(serverID, dsState);
      pendingMonitorData.setFirstMissingDate(serverID,
          ds.getApproxFirstMissingDate());
    }

    // Then initialize the max CN for the LS that produced something
    // - from our own local db state
    // - whatever they are directly or indirectly connected
    ServerState dbServerState = getDbServerState();
    pendingMonitorData.setRSState(replicationServer.getServerId(),
        dbServerState);
    for (int sid : dbServerState) {
      ChangeNumber storedCN = dbServerState.getMaxChangeNumber(sid);
      pendingMonitorData.setMaxCN(sid, storedCN);
    }
  }



  /**
   * Processes a Monitor message receives from a remote Replication Server and
   * stores the data received.
   *
   * @param msg
   *          The message to be processed.
   * @param serverId
   *          server handler that is receiving the message.
   */
  private void receivesMonitorDataResponse(MonitorMsg msg,
      int serverId)
  {
    synchronized (pendingMonitorDataLock)
    {
      if (pendingMonitorData == null)
      {
        // This is a response for an earlier request whose computing is
        // already complete.
        logError(INFO_IGNORING_REMOTE_MONITOR_DATA.get(baseDn,
            msg.getSenderID()));
        return;
      }

      try
      {
        // Here is the RS state : list <serverID, lastChangeNumber>
        // For each LDAP Server, we keep the max CN across the RSes
        ServerState replServerState = msg.getReplServerDbState();
        pendingMonitorData.setMaxCNs(replServerState);

        // store the remote RS states.
        pendingMonitorData.setRSState(msg.getSenderID(),
            replServerState);

        // Store the remote LDAP servers states
        Iterator<Integer> lsidIterator = msg.ldapIterator();
        while (lsidIterator.hasNext())
        {
          int sid = lsidIterator.next();
          ServerState dsServerState = msg.getLDAPServerState(sid);
          pendingMonitorData.setMaxCNs(dsServerState);
          pendingMonitorData.setLDAPServerState(sid, dsServerState);
          pendingMonitorData.setFirstMissingDate(sid,
              msg.getLDAPApproxFirstMissingDate(sid));
        }

        // Process the latency reported by the remote RSi on its connections
        // to the other RSes
        Iterator<Integer> rsidIterator = msg.rsIterator();
        while (rsidIterator.hasNext())
        {
          int rsid = rsidIterator.next();
          if (rsid == replicationServer.getServerId())
          {
            // this is the latency of the remote RSi regarding the current RS
            // let's update the fmd of my connected LS
            for (ServerHandler connectedlsh : directoryServers
                .values())
            {
              int connectedlsid = connectedlsh.getServerId();
              Long newfmd = msg.getRSApproxFirstMissingDate(rsid);
              pendingMonitorData.setFirstMissingDate(connectedlsid,
                  newfmd);
            }
          }
          else
          {
            // this is the latency of the remote RSi regarding another RSj
            // let's update the latency of the LSes connected to RSj
            ReplicationServerHandler rsjHdr = replicationServers
                .get(rsid);
            if (rsjHdr != null)
            {
              for (int remotelsid : rsjHdr
                  .getConnectedDirectoryServerIds())
              {
                Long newfmd = msg.getRSApproxFirstMissingDate(rsid);
                pendingMonitorData.setFirstMissingDate(remotelsid,
                    newfmd);
              }
            }
          }
        }
      }
      catch (RuntimeException e)
      {
        // FIXME: do we really expect these???
        logError(ERR_PROCESSING_REMOTE_MONITOR_DATA.get(e
            .getMessage() + stackTraceToSingleLineString(e)));
      }
      finally
      {
        // Decreases the number of expected responses and potentially
        // wakes up the waiting requester thread.
        if (pendingMonitorDataServerIDs.remove(serverId))
        {
          pendingMonitorDataLatch.countDown();
        }
      }
    }
  }

  /**
   * Set the purge delay on all the db Handlers for this Domain
   * of Replication.
   *
   * @param delay The new purge delay to use.
   */
  public void setPurgeDelay(long delay)
  {
    for (DbHandler handler : sourceDbHandlers.values())
    {
      handler.setPurgeDelay(delay);
    }
  }

  /**
   * Get the map of connected DSs.
   * @return The map of connected DSs
   */
  public Map<Integer, DataServerHandler> getConnectedDSs()
  {
    return directoryServers;
  }

  /**
   * Get the map of connected RSs.
   * @return The map of connected RSs
   */
  public Map<Integer, ReplicationServerHandler> getConnectedRSs()
  {
    return replicationServers;
  }


  /**
   * A synchronization mechanism is created to insure exclusive access to the
   * domain. The goal is to have a consistent view of the topology by locking
   * the structures holding the topology view of the domain: directoryServers
   * and replicationServers. When a connection is established with a peer DS or
   * RS, the lock should be taken before updating these structures, then
   * released. The same mechanism should be used when updating any data related
   * to the view of the topology: for instance if the status of a DS is changed,
   * the lock should be taken before updating the matching server handler and
   * sending the topology messages to peers and released after.... This allows
   * every member of the topology to have a consistent view of the topology and
   * to be sure it will not miss some information.
   * So the locking system must be called (not exhaustive list):
   * - when connection established with a DS or RS
   * - when connection ended with a DS or RS
   * - when receiving a TopologyMsg and updating structures
   * - when creating and sending a TopologyMsg
   * - when a DS status is changing (ChangeStatusMsg received or sent)...
   */
  private final ReentrantLock lock = new ReentrantLock();

  /**
   * This lock is used to protect the generationid variable.
   */
  private final Object generationIDLock = new Object();

  /**
   * Tests if the current thread has the lock on this domain.
   * @return True if the current thread has the lock.
   */
  public boolean hasLock()
  {
    return (lock.getHoldCount() > 0);
  }

  /**
   * Takes the lock on this domain (blocking until lock can be acquired) or
   * calling thread is interrupted.
   * @throws java.lang.InterruptedException If interrupted.
   */
  public void lock() throws InterruptedException
  {
    lock.lockInterruptibly();
  }

  /**
   * Releases the lock on this domain.
   */
  public void release()
  {
    lock.unlock();
  }

  /**
   * Tries to acquire the lock on the domain within a given amount of time.
   * @param timeout The amount of milliseconds to wait for acquiring the lock.
   * @return True if the lock was acquired, false if timeout occurred.
   * @throws java.lang.InterruptedException When call was interrupted.
   */
  public boolean tryLock(long timeout) throws InterruptedException
  {
    return lock.tryLock(timeout, TimeUnit.MILLISECONDS);
  }

  /**
   * Starts the status analyzer for the domain.
   */
  public void startStatusAnalyzer()
  {
    if (statusAnalyzer == null)
    {
      int degradedStatusThreshold =
        replicationServer.getDegradedStatusThreshold();
      if (degradedStatusThreshold > 0) // 0 means no status analyzer
      {
        statusAnalyzer = new StatusAnalyzer(this, degradedStatusThreshold);
        statusAnalyzer.start();
      }
    }
  }

  /**
   * Stops the status analyzer for the domain.
   */
  public void stopStatusAnalyzer()
  {
    if (statusAnalyzer != null)
    {
      statusAnalyzer.shutdown();
      statusAnalyzer.waitForShutdown();
      statusAnalyzer = null;
    }
  }

  /**
   * Tests if the status analyzer for this domain is running.
   * @return True if the status analyzer is running, false otherwise.
   */
  public boolean isRunningStatusAnalyzer()
  {
    return (statusAnalyzer != null);
  }

  /**
   * Update the status analyzer with the new threshold value.
   * @param degradedStatusThreshold The new threshold value.
   */
  public void updateStatusAnalyzer(int degradedStatusThreshold)
  {
    if (statusAnalyzer != null)
    {
      statusAnalyzer.setDegradedStatusThreshold(degradedStatusThreshold);
    }
  }

  /**
   * Starts the monitoring publisher for the domain.
   */
  public void startMonitoringPublisher()
  {
    if (monitoringPublisher == null)
    {
      long period =
        replicationServer.getMonitoringPublisherPeriod();
      if (period > 0) // 0 means no monitoring publisher
      {
        monitoringPublisher = new MonitoringPublisher(this, period);
        monitoringPublisher.start();
      }
    }
  }

  /**
   * Stops the monitoring publisher for the domain.
   */
  public void stopMonitoringPublisher()
  {
    if (monitoringPublisher != null)
    {
      monitoringPublisher.shutdown();
      monitoringPublisher.waitForShutdown();
      monitoringPublisher = null;
    }
  }

  /**
   * Tests if the monitoring publisher for this domain is running.
   * @return True if the monitoring publisher is running, false otherwise.
   */
  public boolean isRunningMonitoringPublisher()
  {
    return (monitoringPublisher != null);
  }

  /**
   * Update the monitoring publisher with the new period value.
   * @param period The new period value.
   */
  public void updateMonitoringPublisher(long period)
  {
    if (monitoringPublisher != null)
    {
      monitoringPublisher.setPeriod(period);
    }
  }

  /**
   * {@inheritDoc}
   */
  @Override
  public void initializeMonitorProvider(MonitorProviderCfg configuraiton)
  {
    // Nothing to do for now
  }

  /**
   * {@inheritDoc}
   */
  @Override
  public String getMonitorInstanceName()
  {
    return "Replication server RS(" + replicationServer.getServerId() + ") "
        + replicationServer.getServerURL() + ",cn="
        + baseDn.replace(',', '_').replace('=', '_') + ",cn=Replication";
  }

  /**
   * {@inheritDoc}
   */
  @Override
  public ArrayList<Attribute> getMonitorData()
  {
    /*
     * publish the server id and the port number.
     */
    ArrayList<Attribute> attributes = new ArrayList<Attribute>();
    attributes.add(Attributes.create("replication-server-id",
        String.valueOf(replicationServer.getServerId())));
    attributes.add(Attributes.create("replication-server-port",
        String.valueOf(replicationServer.getReplicationPort())));

    /*
     * Add all the base DNs that are known by this replication server.
     */
    AttributeBuilder builder = new AttributeBuilder("domain-name");
    builder.add(baseDn);
    attributes.add(builder.toAttribute());

    // Publish to monitor the generation ID by replicationServerDomain
    builder = new AttributeBuilder("generation-id");
    builder.add(baseDn + " " + generationId);
    attributes.add(builder.toAttribute());

    MonitorData md = getDomainMonitorData();

    // Missing changes
    long missingChanges = md.getMissingChangesRS(replicationServer
        .getServerId());
    attributes.add(Attributes.create("missing-changes",
        String.valueOf(missingChanges)));

    return attributes;
  }

  /**
   * Register in the domain an handler that subscribes to changes.
   * @param handler the provided subscribing handler.
   */
  public void registerHandler(MessageHandler handler)
  {
    this.otherHandlers.add(handler);
  }

  /**
   * Unregister from the domain an handler.
   * @param handler the provided unsubscribing handler.
   * @return Whether this handler has been unregistered with success.
   */
  public boolean unRegisterHandler(MessageHandler handler)
  {
    return this.otherHandlers.remove(handler);
  }

  /**
   * Return the state that contain for each server the time of eligibility.
   * @return the state.
   */
  public ServerState getChangeTimeHeartbeatState()
  {
    if (ctHeartbeatState == null)
    {
      ctHeartbeatState = this.getDbServerState().duplicate();
    }
    return ctHeartbeatState;
  }

  /**
   * Computes the eligible server state for the domain.
   *
   *     s1               s2          s3
   *     --               --          --
   *                                 cn31
   *     cn15
   *
   *  ----------------------------------------- eligibleCN
   *     cn14
   *                     cn26
   *     cn13
   *
   * The eligibleState is : s1;cn14 / s2;cn26 / s3;cn31
   *
   * @param eligibleCN              The provided eligibleCN.
   * @return The computed eligible server state.
   */
  public ServerState getEligibleState(ChangeNumber eligibleCN)
  {
    ServerState dbState = this.getDbServerState();

    // The result is initialized from the dbState.
    // From it, we don't want to keep the changes newer than eligibleCN.
    ServerState result = dbState.duplicate();

    if (eligibleCN != null)
    {
      for (int sid : dbState) {
        DbHandler h = sourceDbHandlers.get(sid);
        ChangeNumber mostRecentDbCN = dbState.getMaxChangeNumber(sid);
        try {
          // Is the most recent change in the Db newer than eligible CN ?
          // if yes (like cn15 in the example above, then we have to go back
          // to the Db and look for the change older than  eligible CN (cn14)
          if (eligibleCN.olderOrEqual(mostRecentDbCN)) {
            // let's try to seek the first change <= eligibleCN
            ReplicationIterator ri = null;
            try {
              ri = h.generateIterator(eligibleCN);
              if ((ri != null) && (ri.getChange() != null)) {
                ChangeNumber newCN = ri.getChange().getChangeNumber();
                result.update(newCN);
              }
            } catch (Exception e) {
              // there's no change older than eligibleCN (case of s3/cn31)
              result.update(new ChangeNumber(0, 0, sid));
            } finally {
              if (ri != null) {
                ri.releaseCursor();
              }
            }
          } else {
            // for this serverId, all changes in the ChangelogDb are holder
            // than eligibleCN , the most recent in the db is our guy.
            result.update(mostRecentDbCN);
          }
        } catch (Exception e) {
          Message errMessage = ERR_WRITER_UNEXPECTED_EXCEPTION.get(
              " " + stackTraceToSingleLineString(e));
          logError(errMessage);
          TRACER.debugCaught(DebugLogLevel.ERROR, e);
        }
      }
    }

    if (debugEnabled())
      TRACER.debugInfo("In " + this
        + " getEligibleState() result is " + result);
    return result;
  }

  /**
   * Returns the start state of the domain, made of the first (oldest)
   * change stored for each serverId.
   * Note: Because the replication changelogdb trimming always keep one change
   * whatever its date, the change contained in the returned state can be very
   * old.
   * @return the start state of the domain.
   */
  public ServerState getStartState()
  {
    ServerState domainStartState = new ServerState();
    for (DbHandler dbHandler : sourceDbHandlers.values())
    {
      domainStartState.update(dbHandler.getFirstChange());
    }
    return domainStartState;
  }

  /**
   * Returns the eligibleCN for that domain - relies on the ChangeTimeHeartbeat
   * state.
   * For each DS, take the oldest CN from the changetime heartbeat state
   * and from the changelog db last CN. Can be null.
   * @return the eligible CN.
   */
  public ChangeNumber getEligibleCN()
  {
    ChangeNumber eligibleCN = null;

    for (DbHandler db : sourceDbHandlers.values())
    {
      // Consider this producer (DS/db).
      int sid = db.getServerId();

      // Should it be considered for eligibility ?
      ChangeNumber heartbeatLastDN =
        getChangeTimeHeartbeatState().getMaxChangeNumber(sid);

      // If the most recent UpdateMsg or CLHeartbeatMsg received is very old
      // then the domain is considered down and not considered for eligibility
      /*
      if ((heartbeatLastDN != null) &&
          (TimeThread.getTime()- heartbeatLastDN.getTime() > 5000))
      {
        if (debugEnabled())
          TRACER.debugInfo("In " + this.getName() +
            " Server " + sid
            + " is not considered for eligibility ... potentially down");
        continue;
      }
      */

      boolean sidConnected = false;
      if (directoryServers.containsKey(sid))
      {
        sidConnected = true;
      }
      else
      {
        // not directly connected
        for (ReplicationServerHandler rsh : replicationServers.values())
        {
          if (rsh.isRemoteLDAPServer(sid))
          {
            sidConnected = true;
            break;
          }
        }
      }
      if (!sidConnected)
      {
        if (debugEnabled())
          TRACER.debugInfo("In " + "Replication Server " +
            replicationServer.getReplicationPort() + " " +
            baseDn + " " + replicationServer.getServerId() +
            " Server " + sid
            + " is not considered for eligibility ... potentially down");
        continue;
      }

      ChangeNumber changelogLastCN = db.getLastChange();
      if (changelogLastCN != null)
      {
        if ((eligibleCN == null) || (changelogLastCN.newer(eligibleCN)))
        {
          eligibleCN = changelogLastCN;
        }
      }

      if ((heartbeatLastDN != null) &&
       ((eligibleCN == null) || (heartbeatLastDN.newer(eligibleCN))))
      {
        eligibleCN = heartbeatLastDN;
      }
    }

    if (debugEnabled())
      TRACER.debugInfo(
        "In " + "Replication Server " + replicationServer.getReplicationPort() +
        " " + baseDn + " " + replicationServer.getServerId() +
        " getEligibleCN() returns result =" + eligibleCN);
    return eligibleCN;
  }


  /**
   * Processes a ChangeTimeHeartbeatMsg received, by storing the CN (timestamp)
   * value received, and forwarding the message to the other RSes.
   * @param senderHandler The handler for the server that sent the heartbeat.
   * @param msg The message to process.
   */
  public void processChangeTimeHeartbeatMsg(ServerHandler senderHandler,
      ChangeTimeHeartbeatMsg msg )
  {
    try
    {
      // Acquire lock on domain (see more details in comment of start() method
      // of ServerHandler)
      lock();
    }
    catch (InterruptedException ex)
    {
      // We can't deal with this here, so re-interrupt thread so that it is
      // caught during subsequent IO.
      Thread.currentThread().interrupt();
      return;
    }

    try
    {
      storeReceivedCTHeartbeat(msg.getChangeNumber());
      if (senderHandler.isDataServer())
      {
        // If we are the first replication server warned,
        // then forwards the message to the remote replication servers
        for (ReplicationServerHandler rsHandler : replicationServers
            .values())
        {
          try
          {
            if (rsHandler.getProtocolVersion() >=
              ProtocolVersion.REPLICATION_PROTOCOL_V3)
            {
              rsHandler.send(msg);
            }
          }
          catch (IOException e)
          {
            TRACER.debugCaught(DebugLogLevel.ERROR, e);
            logError(ERR_CHANGELOG_ERROR_SENDING_MSG
                .get("Replication Server "
                    + replicationServer.getReplicationPort() + " "
                    + baseDn + " " + replicationServer.getServerId()));
            stopServer(rsHandler, false);
          }
        }
      }
    }
    finally
    {
      release();
    }
  }

  /**
   * Store a change time value received from a data server.
   * @param cn The provided change time.
   */
  public void storeReceivedCTHeartbeat(ChangeNumber cn)
  {
    // TODO:May be we can spare processing by only storing CN (timestamp)
    // instead of a server state.
    getChangeTimeHeartbeatState().update(cn);

    /*
    if (debugEnabled())
    {
      Set<String> ss = ctHeartbeatState.toStringSet();
      String dss = "";
      for (String s : ss)
      {
        dss = dss + " \\ " + s;
      }
      TRACER.debugInfo("In " + this.getName() + " " + dss);
    }
    */
  }

  /**
   * This methods count the changes, server by server :
   * - from a serverState start point
   * - to (inclusive) an end point (the provided endCN).
   * @param startState The provided start server state.
   * @param endCN The provided end change number.
   * @return The number of changes between startState and endCN.
   */
  public long getEligibleCount(ServerState startState, ChangeNumber endCN)
  {
    long res = 0;

    // Parses the dbState of the domain , server by server
    ServerState dbState = this.getDbServerState();
    for (int sid : dbState) {
      // process one sid
      ChangeNumber startCN = null;
      if (startState.getMaxChangeNumber(sid) != null)
        startCN = startState.getMaxChangeNumber(sid);
      long sidRes = getCount(sid, startCN, endCN);

      // The startPoint is excluded when counting the ECL eligible changes
      if ((startCN != null) && (sidRes > 0))
        sidRes--;

      res += sidRes;
    }
    return res;
  }

  /**
   * This methods count the changes, server by server :
   * - from a start CN
   * - to (inclusive) an end point (the provided endCN).
   * @param startCN The provided start changeNumber.
   * @param endCN The provided end change number.
   * @return The number of changes between startTime and endCN.
   */
  public long getEligibleCount(ChangeNumber startCN, ChangeNumber endCN)
  {
    long res = 0;

    // Parses the dbState of the domain , server by server
    ServerState dbState = this.getDbServerState();
    for (int sid : dbState) {
      // process one sid
      ChangeNumber lStartCN =
          new ChangeNumber(startCN.getTime(), startCN.getSeqnum(), sid);
      res += getCount(sid, lStartCN, endCN);
    }
    return res;
  }

  /**
   * Get the latest (more recent) trim date of the changelog dbs associated
   * to this domain.
   * @return The latest trim date.
   */
  public long getLatestDomainTrimDate()
  {
    long latest = 0;
    for (DbHandler db : sourceDbHandlers.values())
    {
      if ((latest==0) || (latest<db.getLatestTrimDate()))
      {
        latest = db.getLatestTrimDate();
      }
    }
    return latest;
  }
}