/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.cluster.coordination.heartbeat;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.function.Function;
import java.util.stream.Collectors;
import javax.xml.bind.JAXBContext;
import javax.xml.bind.Unmarshaller;
import org.apache.nifi.cluster.coordination.ClusterCoordinator;
import org.apache.nifi.cluster.coordination.node.NodeConnectionState;
import org.apache.nifi.cluster.coordination.node.NodeConnectionStatus;
import org.apache.nifi.cluster.coordination.node.NodeWorkload;
import org.apache.nifi.cluster.protocol.Heartbeat;
import org.apache.nifi.cluster.protocol.HeartbeatPayload;
import org.apache.nifi.cluster.protocol.NodeIdentifier;
import org.apache.nifi.cluster.protocol.ProtocolException;
import org.apache.nifi.cluster.protocol.ProtocolHandler;
import org.apache.nifi.cluster.protocol.ProtocolListener;
import org.apache.nifi.cluster.protocol.message.HeartbeatMessage;
import org.apache.nifi.cluster.protocol.message.HeartbeatResponseMessage;
import org.apache.nifi.cluster.protocol.message.ClusterWorkloadRequestMessage;
import org.apache.nifi.cluster.protocol.message.ClusterWorkloadResponseMessage;
import org.apache.nifi.cluster.protocol.message.ProtocolMessage;
import org.apache.nifi.cluster.protocol.message.ProtocolMessage.MessageType;
import org.apache.nifi.util.NiFiProperties;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Uses Apache ZooKeeper to advertise the address to send heartbeats to, and
* then relies on the NiFi Cluster Protocol to receive heartbeat messages from
* nodes in the cluster.
*/
public class ClusterProtocolHeartbeatMonitor extends AbstractHeartbeatMonitor implements HeartbeatMonitor, ProtocolHandler {
protected static final Logger logger = LoggerFactory.getLogger(ClusterProtocolHeartbeatMonitor.class);
private final String heartbeatAddress;
private final ConcurrentMap<NodeIdentifier, NodeHeartbeat> heartbeatMessages = new ConcurrentHashMap<>();
protected static final Unmarshaller nodeIdentifierUnmarshaller;
static {
try {
final JAXBContext jaxbContext = JAXBContext.newInstance(NodeIdentifier.class);
nodeIdentifierUnmarshaller = jaxbContext.createUnmarshaller();
} catch (final Exception e) {
throw new RuntimeException("Failed to create an Unmarshaller for unmarshalling Node Identifier", e);
}
}
public ClusterProtocolHeartbeatMonitor(final ClusterCoordinator clusterCoordinator, final ProtocolListener protocolListener, final NiFiProperties nifiProperties) {
super(clusterCoordinator, nifiProperties);
protocolListener.addHandler(this);
String hostname = nifiProperties.getProperty(NiFiProperties.CLUSTER_NODE_ADDRESS);
if (hostname == null || hostname.trim().isEmpty()) {
hostname = "localhost";
}
final String port = nifiProperties.getProperty(NiFiProperties.CLUSTER_NODE_PROTOCOL_PORT);
if (port == null || port.trim().isEmpty()) {
throw new RuntimeException("Unable to determine which port Cluster Coordinator Protocol is listening on because the '"
+ NiFiProperties.CLUSTER_NODE_PROTOCOL_PORT + "' property is not set");
}
try {
Integer.parseInt(port);
} catch (final NumberFormatException nfe) {
throw new RuntimeException("Unable to determine which port Cluster Coordinator Protocol is listening on because the '"
+ NiFiProperties.CLUSTER_NODE_PROTOCOL_PORT + "' property is set to '" + port + "', which is not a valid port number.");
}
heartbeatAddress = hostname + ":" + port;
}
@Override
public String getHeartbeatAddress() {
return heartbeatAddress;
}
@Override
public void onStart() {
// We don't know what the heartbeats look like for the nodes, since we were just elected to monitoring
// them. However, the map may be filled with old heartbeats. So we clear the heartbeats and populate the
// map with new heartbeats set to the current time and using the currently known status. We do this so
// that if we go the required amount of time without receiving a heartbeat, we do know to mark the node
// as disconnected.
heartbeatMessages.clear();
for (final NodeIdentifier nodeId : clusterCoordinator.getNodeIdentifiers()) {
final NodeHeartbeat heartbeat = new StandardNodeHeartbeat(nodeId, System.currentTimeMillis(),
clusterCoordinator.getConnectionStatus(nodeId), 0, 0L, 0, System.currentTimeMillis());
heartbeatMessages.put(nodeId, heartbeat);
}
}
@Override
public void onStop() {
}
@Override
protected Map<NodeIdentifier, NodeHeartbeat> getLatestHeartbeats() {
return Collections.unmodifiableMap(heartbeatMessages);
}
@Override
public synchronized void removeHeartbeat(final NodeIdentifier nodeId) {
logger.debug("Deleting heartbeat for node {}", nodeId);
heartbeatMessages.remove(nodeId);
}
@Override
public synchronized void purgeHeartbeats() {
logger.debug("Purging old heartbeats");
heartbeatMessages.clear();
}
@Override
public ProtocolMessage handle(final ProtocolMessage msg) throws ProtocolException {
switch (msg.getType()) {
case HEARTBEAT:
return handleHeartbeat((HeartbeatMessage) msg);
case CLUSTER_WORKLOAD_REQUEST:
return handleClusterWorkload((ClusterWorkloadRequestMessage) msg);
default:
throw new ProtocolException("Cannot handle message of type " + msg.getType());
}
}
private ProtocolMessage handleHeartbeat(final HeartbeatMessage msg) {
final HeartbeatMessage heartbeatMsg = msg;
final Heartbeat heartbeat = heartbeatMsg.getHeartbeat();
final NodeIdentifier nodeId = heartbeat.getNodeIdentifier();
final NodeConnectionStatus connectionStatus = heartbeat.getConnectionStatus();
final byte[] payloadBytes = heartbeat.getPayload();
final HeartbeatPayload payload = HeartbeatPayload.unmarshal(payloadBytes);
final int activeThreadCount = payload.getActiveThreadCount();
final int flowFileCount = (int) payload.getTotalFlowFileCount();
final long flowFileBytes = payload.getTotalFlowFileBytes();
final long systemStartTime = payload.getSystemStartTime();
final NodeHeartbeat nodeHeartbeat = new StandardNodeHeartbeat(nodeId, System.currentTimeMillis(),
connectionStatus, flowFileCount, flowFileBytes, activeThreadCount, systemStartTime);
heartbeatMessages.put(heartbeat.getNodeIdentifier(), nodeHeartbeat);
logger.debug("Received new heartbeat from {}", nodeId);
// Formulate a List of differences between our view of the cluster topology and the node's view
// and send that back to the node so that it is in-sync with us
List<NodeConnectionStatus> nodeStatusList = payload.getClusterStatus();
if (nodeStatusList == null) {
nodeStatusList = Collections.emptyList();
}
final List<NodeConnectionStatus> updatedStatuses = getUpdatedStatuses(nodeStatusList);
final HeartbeatResponseMessage responseMessage = new HeartbeatResponseMessage();
responseMessage.setUpdatedNodeStatuses(updatedStatuses);
if (!getClusterCoordinator().isFlowElectionComplete()) {
responseMessage.setFlowElectionMessage(getClusterCoordinator().getFlowElectionStatus());
}
return responseMessage;
}
private ProtocolMessage handleClusterWorkload(final ClusterWorkloadRequestMessage msg) {
final ClusterWorkloadResponseMessage response = new ClusterWorkloadResponseMessage();
final Map<NodeIdentifier, NodeWorkload> workloads = new HashMap<>();
getLatestHeartbeats().values().stream()
.filter(hb -> NodeConnectionState.CONNECTED.equals(hb.getConnectionStatus().getState()))
.forEach(hb -> {
NodeWorkload wl = new NodeWorkload();
wl.setReportedTimestamp(hb.getTimestamp());
wl.setSystemStartTime(hb.getSystemStartTime());
wl.setActiveThreadCount(hb.getActiveThreadCount());
wl.setFlowFileCount(hb.getFlowFileCount());
wl.setFlowFileBytes(hb.getFlowFileBytes());
workloads.put(hb.getNodeIdentifier(), wl);
});
response.setNodeWorkloads(workloads);
return response;
}
private List<NodeConnectionStatus> getUpdatedStatuses(final List<NodeConnectionStatus> nodeStatusList) {
// Map node's statuses by NodeIdentifier for quick & easy lookup
final Map<NodeIdentifier, NodeConnectionStatus> nodeStatusMap = nodeStatusList.stream()
.collect(Collectors.toMap(status -> status.getNodeIdentifier(), Function.identity()));
// Check if our connection status is the same for each Node Identifier and if not, add our version of the status
// to a List of updated statuses.
final List<NodeConnectionStatus> currentStatuses = clusterCoordinator.getConnectionStatuses();
final List<NodeConnectionStatus> updatedStatuses = new ArrayList<>();
for (final NodeConnectionStatus currentStatus : currentStatuses) {
final NodeConnectionStatus nodeStatus = nodeStatusMap.get(currentStatus.getNodeIdentifier());
if (!currentStatus.equals(nodeStatus)) {
updatedStatuses.add(currentStatus);
}
}
// If the node has any statuses that we do not have, add a REMOVED status to the update list
final Set<NodeIdentifier> nodeIds = currentStatuses.stream().map(status -> status.getNodeIdentifier()).collect(Collectors.toSet());
for (final NodeConnectionStatus nodeStatus : nodeStatusList) {
if (!nodeIds.contains(nodeStatus.getNodeIdentifier())) {
updatedStatuses.add(new NodeConnectionStatus(nodeStatus.getNodeIdentifier(), NodeConnectionState.REMOVED, null));
}
}
logger.debug("\n\nCalculated diff between current cluster status and node cluster status as follows:\nNode: {}\nSelf: {}\nDifference: {}\n\n",
nodeStatusList, currentStatuses, updatedStatuses);
return updatedStatuses;
}
@Override
public boolean canHandle(ProtocolMessage msg) {
return msg.getType() == MessageType.HEARTBEAT || msg.getType() == MessageType.CLUSTER_WORKLOAD_REQUEST;
}
}