/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at legal-notices/CDDLv1_0.txt
* or http://forgerock.org/license/CDDLv1.0.html.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at legal-notices/CDDLv1_0.txt.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information:
* Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*
*
* Copyright 2006-2010 Sun Microsystems, Inc.
* Portions Copyright 2011-2015 ForgeRock AS
*/
package org.opends.server.replication.server;
import java.io.IOException;
import java.util.HashSet;
import java.util.Set;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit;
import org.forgerock.i18n.slf4j.LocalizedLogger;
import org.opends.server.replication.common.CSN;
import org.opends.server.replication.common.ServerState;
import org.opends.server.replication.protocol.MonitorMsg;
import org.opends.server.replication.protocol.MonitorRequestMsg;
import org.opends.server.types.DN;
import org.opends.server.util.TimeThread;
import static org.opends.messages.ReplicationMessages.*;
import static org.opends.server.util.StaticUtils.*;
/**
* This class maintains monitor data for a replication domain.
*/
class ReplicationDomainMonitor
{
private static final LocalizedLogger logger = LocalizedLogger.getLoggerForThisClass();
/**
* The monitor data consolidated over the topology.
*/
private volatile ReplicationDomainMonitorData monitorData =
new ReplicationDomainMonitorData();
/**
* This lock guards against multiple concurrent monitor data recalculation.
*/
private final Object pendingMonitorLock = new Object();
/** Guarded by pendingMonitorLock. */
private long monitorDataLastBuildDate;
/**
* The set of replication servers which are already known to be slow to send
* monitor data.
* <p>
* Guarded by pendingMonitorLock.
*/
private final Set<Integer> monitorDataLateServers = new HashSet<>();
/** This lock serializes updates to the pending monitor data. */
private final Object pendingMonitorDataLock = new Object();
/**
* Monitor data which is currently being calculated.
* <p>
* Guarded by pendingMonitorDataLock.
*/
private ReplicationDomainMonitorData pendingMonitorData;
/**
* A set containing the IDs of servers from which we are currently expecting
* monitor responses. When a response is received from a server we remove the
* ID from this table, and count down the latch if the ID was in the table.
* <p>
* Guarded by pendingMonitorDataLock.
*/
private final Set<Integer> pendingMonitorDataServerIDs = new HashSet<>();
/**
* This latch is non-null and is used in order to count incoming responses as
* they arrive. Since incoming response may arrive at any time, even when
* there is no pending monitor request, access to the latch must be guarded.
* <p>
* Guarded by pendingMonitorDataLock.
*/
private CountDownLatch pendingMonitorDataLatch;
/**
* TODO: Remote monitor data cache lifetime is 500ms/should be configurable.
*/
private final long monitorDataLifeTime = 500;
/** The replication domain monitored by this class. */
private final ReplicationServerDomain domain;
/**
* Builds an object of this class.
*
* @param replicationDomain
* The replication domain that will be monitored by this class
*/
public ReplicationDomainMonitor(ReplicationServerDomain replicationDomain)
{
this.domain = replicationDomain;
}
/**
* Returns the latest monitor data available for this replication server
* domain.
*
* @return The latest monitor data available for this replication server
* domain, which is never {@code null}.
*/
public ReplicationDomainMonitorData getMonitorData()
{
return monitorData;
}
/**
* Recomputes the monitor data for this replication server domain.
*
* @return The recomputed monitor data for this replication server domain.
* @throws InterruptedException
* If this thread is interrupted while waiting for a response.
*/
public ReplicationDomainMonitorData recomputeMonitorData()
throws InterruptedException
{
// Only allow monitor recalculation at a time.
synchronized (pendingMonitorLock)
{
if (monitorDataLastBuildDate + monitorDataLifeTime < TimeThread.getTime())
{
try
{
DN baseDN = domain.getBaseDN();
// Prevent out of band monitor responses from updating our pending
// table until we are ready.
synchronized (pendingMonitorDataLock)
{
// Clear the pending monitor data.
pendingMonitorDataServerIDs.clear();
pendingMonitorData = new ReplicationDomainMonitorData();
initializePendingMonitorData();
// Send the monitor requests to the connected replication servers.
for (ServerHandler rs : domain.getConnectedRSs().values())
{
final int serverId = rs.getServerId();
MonitorRequestMsg msg =
new MonitorRequestMsg(domain.getLocalRSServerId(), serverId);
try
{
rs.send(msg);
// Only register this server ID to pending table if we were able
// to send the message.
pendingMonitorDataServerIDs.add(serverId);
}
catch (IOException e)
{
// Log a message and do a best effort from here.
logger.error(ERR_SENDING_REMOTE_MONITOR_DATA_REQUEST, baseDN, serverId, e.getMessage());
}
}
// Create the pending response latch based on the number of expected
// monitor responses.
pendingMonitorDataLatch =
new CountDownLatch(pendingMonitorDataServerIDs.size());
}
// Wait for the responses to come back.
pendingMonitorDataLatch.await(5, TimeUnit.SECONDS);
// Log messages for replication servers that have gone or come back.
synchronized (pendingMonitorDataLock)
{
// Log servers that have come back.
for (int serverId : monitorDataLateServers)
{
// Ensure that we only log once per server: don't fill the
// error log with repeated messages.
if (!pendingMonitorDataServerIDs.contains(serverId))
{
logger.info(NOTE_MONITOR_DATA_RECEIVED, baseDN, serverId);
}
}
// Log servers that have gone away.
for (int serverId : pendingMonitorDataServerIDs)
{
// Ensure that we only log once per server: don't fill the
// error log with repeated messages.
if (!monitorDataLateServers.contains(serverId))
{
logger.warn(WARN_MISSING_REMOTE_MONITOR_DATA, baseDN, serverId);
}
}
// Remember which servers were late this time.
monitorDataLateServers.clear();
monitorDataLateServers.addAll(pendingMonitorDataServerIDs);
}
// Store the new computed data as the reference
synchronized (pendingMonitorDataLock)
{
// Now we have the expected answers or an error occurred
pendingMonitorData.completeComputing();
monitorData = pendingMonitorData;
monitorDataLastBuildDate = TimeThread.getTime();
}
}
finally
{
synchronized (pendingMonitorDataLock)
{
// Clear pending state.
pendingMonitorData = null;
pendingMonitorDataLatch = null;
pendingMonitorDataServerIDs.clear();
}
}
}
}
return monitorData;
}
/**
* Start collecting global monitoring information for the replication domain.
*/
private void initializePendingMonitorData()
{
// Let's process our directly connected DS
// - in the ServerHandler for a given DS1, the stored state contains :
// -- the max CSN produced by DS1
// -- the last CSN consumed by DS1 from DS2..n
// - in the ReplicationDomainDB/ReplicaDB, the built-in state contains:
// -- the max CSN produced by each server
// So for a given DS connected we can take the state and the max from
// the DS/state.
for (ServerHandler ds : domain.getConnectedDSs().values())
{
final int serverId = ds.getServerId();
final ServerState dsState = ds.getServerState().duplicate();
CSN maxCSN = dsState.getCSN(serverId);
if (maxCSN == null)
{
// This directly connected LS has never produced any change
maxCSN = new CSN(0, 0, serverId);
}
pendingMonitorData.setMaxCSN(maxCSN);
pendingMonitorData.setLDAPServerState(serverId, dsState);
pendingMonitorData.setFirstMissingDate(serverId,
ds.getApproxFirstMissingDate());
}
// Then initialize the max CSN for the LS that produced something
// - from our own local db state
// - whatever they are directly or indirectly connected
final ServerState dbServerState = domain.getLatestServerState();
pendingMonitorData.setRSState(domain.getLocalRSServerId(), dbServerState);
for (CSN storedCSN : dbServerState)
{
pendingMonitorData.setMaxCSN(storedCSN);
}
}
/**
* Processes a Monitor message receives from a remote Replication Server and
* stores the data received.
*
* @param msg
* The message to be processed.
* @param serverId
* server handler that is receiving the message.
*/
public void receiveMonitorDataResponse(MonitorMsg msg, int serverId)
{
synchronized (pendingMonitorDataLock)
{
if (pendingMonitorData == null)
{
// This is a response for an earlier request whose computing is
// already complete.
logger.debug(INFO_IGNORING_REMOTE_MONITOR_DATA, domain.getBaseDN(), msg.getSenderID());
return;
}
try
{
// Here is the RS state : list <serverID, lastCSN>
// For each LDAP Server, we keep the max CSN across the RSes
ServerState replServerState = msg.getReplServerDbState();
pendingMonitorData.setMaxCSNs(replServerState);
// store the remote RS states.
pendingMonitorData.setRSState(msg.getSenderID(), replServerState);
// Store the remote LDAP servers states
for (int dsServerId : toIterable(msg.ldapIterator()))
{
ServerState dsServerState = msg.getLDAPServerState(dsServerId);
pendingMonitorData.setMaxCSNs(dsServerState);
pendingMonitorData.setLDAPServerState(dsServerId, dsServerState);
pendingMonitorData.setFirstMissingDate(dsServerId,
msg.getLDAPApproxFirstMissingDate(dsServerId));
}
// Process the latency reported by the remote RSi on its connections
// to the other RSes
for (int rsServerId : toIterable(msg.rsIterator()))
{
long newFmd = msg.getRSApproxFirstMissingDate(rsServerId);
if (rsServerId == domain.getLocalRSServerId())
{
// this is the latency of the remote RSi regarding the current RS
// let's update the first missing date of my connected LS
for (DataServerHandler ds : domain.getConnectedDSs().values())
{
int connectedServerId = ds.getServerId();
pendingMonitorData.setFirstMissingDate(connectedServerId, newFmd);
}
}
else
{
// this is the latency of the remote RSi regarding another RSj
// let's update the latency of the LSes connected to RSj
ReplicationServerHandler rsjHdr =
domain.getConnectedRSs().get(rsServerId);
if (rsjHdr != null)
{
for (int remoteServerId : rsjHdr.getConnectedDirectoryServerIds())
{
pendingMonitorData.setFirstMissingDate(remoteServerId, newFmd);
}
}
}
}
}
catch (RuntimeException e)
{
// FIXME: do we really expect these???
logger.error(ERR_PROCESSING_REMOTE_MONITOR_DATA, e.getMessage() + " " + stackTraceToSingleLineString(e));
}
finally
{
// Decreases the number of expected responses and potentially
// wakes up the waiting requester thread.
if (pendingMonitorDataServerIDs.remove(serverId))
{
pendingMonitorDataLatch.countDown();
}
}
}
}
}