/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.ambari.server.controller.metrics;
import java.util.HashSet;
import java.util.Random;
import java.util.Set;
import java.util.concurrent.CopyOnWriteArraySet;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.ambari.server.Role;
import org.apache.ambari.server.controller.AmbariManagementController;
import org.apache.ambari.server.controller.AmbariServer;
import org.apache.ambari.server.controller.internal.HostStatusHelper;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.inject.Inject;
/*
Class used to hold the status of metric collector hosts for a cluster.
*/
public class MetricsCollectorHAClusterState {
private String clusterName;
private Set<String> liveCollectorHosts;
private Set<String> deadCollectorHosts;
private AtomicInteger collectorDownRefreshCounter;
private static int collectorDownRefreshCounterLimit = 5;
private String currentCollectorHost = null;
@Inject
AmbariManagementController managementController;
protected final static Logger LOG =
LoggerFactory.getLogger(MetricsCollectorHAClusterState.class);
public MetricsCollectorHAClusterState(String clusterName) {
if (managementController == null) {
managementController = AmbariServer.getController();
}
this.clusterName = clusterName;
this.liveCollectorHosts = new CopyOnWriteArraySet<>();
this.deadCollectorHosts = new CopyOnWriteArraySet<>();
collectorDownRefreshCounter = new AtomicInteger(0);
}
public void addMetricsCollectorHost(String collectorHost) {
if (HostStatusHelper.isHostComponentLive(managementController, clusterName, collectorHost, "AMBARI_METRICS",
Role.METRICS_COLLECTOR.name())) {
liveCollectorHosts.add(collectorHost);
deadCollectorHosts.remove(collectorHost);
} else {
deadCollectorHosts.add(collectorHost);
liveCollectorHosts.remove(collectorHost);
}
//If there is no current collector host or the current host is down, this will be a proactive switch.
if (currentCollectorHost == null || !HostStatusHelper.isHostComponentLive(managementController, clusterName,
currentCollectorHost, "AMBARI_METRICS",
Role.METRICS_COLLECTOR.name())) {
refreshCollectorHost(currentCollectorHost);
}
}
private void refreshCollectorHost(String currentHost) {
LOG.info("Refreshing collector host, current collector host : " + currentHost);
testAndAddDeadCollectorsToLiveList(); //A good time to check if there are some dead collectors that have now become alive.
if (currentHost != null) {
if (liveCollectorHosts.contains(currentHost)) {
liveCollectorHosts.remove(currentHost);
}
if (!deadCollectorHosts.contains(currentHost)) {
deadCollectorHosts.add(currentHost);
}
}
if (!liveCollectorHosts.isEmpty()) {
currentCollectorHost = getRandom(liveCollectorHosts);
}
if (currentCollectorHost == null && !deadCollectorHosts.isEmpty()) {
currentCollectorHost = getRandom(deadCollectorHosts);
}
LOG.info("After refresh, new collector host : " + currentCollectorHost);
}
public String getCurrentCollectorHost() {
return currentCollectorHost;
}
public void onCollectorHostDown(String deadCollectorHost) {
if (deadCollectorHost == null) {
// Case 1: Collector is null. Ideally this can never happen
refreshCollectorHost(null);
} else if (deadCollectorHost.equals(currentCollectorHost) && numCollectors() > 1) {
// Case 2: Event informing us that the current collector is dead. We have not refreshed it yet.
if (testRefreshCounter()) {
refreshCollectorHost(deadCollectorHost);
}
}
//Case 3 : Got a dead collector event. Already changed the collector to a new one.
//No-Op
}
private void testAndAddDeadCollectorsToLiveList() {
Set<String> liveHosts = new HashSet<>();
for (String deadHost : deadCollectorHosts) {
if (isValidAliveCollectorHost(clusterName, deadHost)) {
liveHosts.add(deadHost);
}
}
for (String liveHost : liveHosts) {
LOG.info("Removing collector " + liveHost + " from dead list to live list");
deadCollectorHosts.remove(liveHost);
liveCollectorHosts.add(liveHost);
}
}
private boolean isValidAliveCollectorHost(String clusterName, String collectorHost) {
return ((collectorHost != null) &&
HostStatusHelper.isHostLive(managementController, clusterName, collectorHost) &&
HostStatusHelper.isHostComponentLive(managementController, clusterName, collectorHost, "AMBARI_METRICS",
Role.METRICS_COLLECTOR.name()));
}
/*
A refresh counter to track number of collector down events received. If it exceeds the limit,
then we go ahead and refresh the collector.
*/
private boolean testRefreshCounter() {
collectorDownRefreshCounter.incrementAndGet();
if (collectorDownRefreshCounter.get() == collectorDownRefreshCounterLimit) {
collectorDownRefreshCounter = new AtomicInteger(0);
return true;
}
return false;
}
public boolean isCollectorHostLive() {
for (String host : liveCollectorHosts) {
if (HostStatusHelper.isHostLive(managementController, clusterName, host)) {
return true;
}
}
//If no host is alive, check if some dead collectors have become live.
testAndAddDeadCollectorsToLiveList();
//try one more time
for (String host : liveCollectorHosts) {
if (HostStatusHelper.isHostLive(managementController, clusterName, host)) {
return true;
}
}
return false;
}
public boolean isCollectorComponentAlive() {
//Check in live hosts
for (String host : liveCollectorHosts) {
if (HostStatusHelper.isHostComponentLive(managementController, clusterName, host, "AMBARI_METRICS",
Role.METRICS_COLLECTOR.name())) {
return true;
}
}
//Check in dead hosts. Don't update live and dead lists. Can be done on refresh call.
for (String host : deadCollectorHosts) {
if (HostStatusHelper.isHostComponentLive(managementController, clusterName, host, "AMBARI_METRICS",
Role.METRICS_COLLECTOR.name())) {
return true;
}
}
return false;
}
private int numCollectors() {
return this.liveCollectorHosts.size() + deadCollectorHosts.size();
}
private String getRandom(Set<String> collectorSet) {
int randIndex = new Random().nextInt(collectorSet.size());
int i = 0;
for(String host : collectorSet)
{
if (i == randIndex) {
return host;
}
i = i + 1;
}
return null;
}
}