package org.ovirt.engine.core.bll.gluster;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Objects;
import java.util.Set;
import javax.inject.Inject;
import javax.inject.Singleton;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang.StringUtils;
import org.ovirt.engine.core.bll.interfaces.BackendInternal;
import org.ovirt.engine.core.bll.job.ExecutionHandler;
import org.ovirt.engine.core.common.AuditLogType;
import org.ovirt.engine.core.common.action.SetNonOperationalVdsParameters;
import org.ovirt.engine.core.common.action.VdcActionType;
import org.ovirt.engine.core.common.action.VdcReturnValueBase;
import org.ovirt.engine.core.common.action.gluster.GlusterVolumeActionParameters;
import org.ovirt.engine.core.common.businessentities.Cluster;
import org.ovirt.engine.core.common.businessentities.ExternalStatus;
import org.ovirt.engine.core.common.businessentities.NonOperationalReason;
import org.ovirt.engine.core.common.businessentities.VDS;
import org.ovirt.engine.core.common.businessentities.VDSStatus;
import org.ovirt.engine.core.common.businessentities.gluster.BrickDetails;
import org.ovirt.engine.core.common.businessentities.gluster.BrickProperties;
import org.ovirt.engine.core.common.businessentities.gluster.GlusterBrickEntity;
import org.ovirt.engine.core.common.businessentities.gluster.GlusterServer;
import org.ovirt.engine.core.common.businessentities.gluster.GlusterServerInfo;
import org.ovirt.engine.core.common.businessentities.gluster.GlusterStatus;
import org.ovirt.engine.core.common.businessentities.gluster.GlusterVolumeAdvancedDetails;
import org.ovirt.engine.core.common.businessentities.gluster.GlusterVolumeEntity;
import org.ovirt.engine.core.common.businessentities.gluster.GlusterVolumeOptionEntity;
import org.ovirt.engine.core.common.businessentities.gluster.PeerStatus;
import org.ovirt.engine.core.common.businessentities.gluster.TransportType;
import org.ovirt.engine.core.common.businessentities.network.Network;
import org.ovirt.engine.core.common.businessentities.network.VdsNetworkInterface;
import org.ovirt.engine.core.common.config.Config;
import org.ovirt.engine.core.common.config.ConfigValues;
import org.ovirt.engine.core.common.constants.gluster.GlusterConstants;
import org.ovirt.engine.core.common.gluster.GlusterFeatureSupported;
import org.ovirt.engine.core.common.utils.gluster.GlusterCoreUtil;
import org.ovirt.engine.core.common.vdscommands.RemoveVdsVDSCommandParameters;
import org.ovirt.engine.core.common.vdscommands.VDSCommandType;
import org.ovirt.engine.core.common.vdscommands.VDSReturnValue;
import org.ovirt.engine.core.common.vdscommands.VdsIdVDSCommandParametersBase;
import org.ovirt.engine.core.common.vdscommands.gluster.AddGlusterServerVDSParameters;
import org.ovirt.engine.core.common.vdscommands.gluster.GlusterVolumeAdvancedDetailsVDSParameters;
import org.ovirt.engine.core.common.vdscommands.gluster.GlusterVolumeVDSParameters;
import org.ovirt.engine.core.common.vdscommands.gluster.GlusterVolumesListVDSParameters;
import org.ovirt.engine.core.compat.Guid;
import org.ovirt.engine.core.compat.TransactionScopeOption;
import org.ovirt.engine.core.dal.dbbroker.auditloghandling.AlertDirector;
import org.ovirt.engine.core.dal.dbbroker.auditloghandling.AuditLogDirector;
import org.ovirt.engine.core.dal.dbbroker.auditloghandling.AuditLogableBase;
import org.ovirt.engine.core.dao.gluster.GlusterDBUtils;
import org.ovirt.engine.core.di.Injector;
import org.ovirt.engine.core.utils.lock.EngineLock;
import org.ovirt.engine.core.utils.timer.OnTimerMethodAnnotation;
import org.ovirt.engine.core.utils.transaction.TransactionSupport;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* This class is responsible for keeping the Gluster related data of engine in sync with the actual data retrieved from
* GlusterFS. This helps to make sure that any changes done on Gluster servers using the Gluster CLI are propagated to
* engine as well.
*/
@Singleton
public class GlusterSyncJob extends GlusterJob {
private static final Logger log = LoggerFactory.getLogger(GlusterSyncJob.class);
@Inject
private AuditLogDirector auditLogDirector;
@Inject
private BackendInternal backend;
@Inject
private GlusterDBUtils glusterDBUtils;
@Inject
private AlertDirector alertDirector;
@Override
public Collection<GlusterJobSchedulingDetails> getSchedulingDetails() {
return Arrays.asList(
new GlusterJobSchedulingDetails(
"refreshLightWeightData", getRefreshRate(ConfigValues.GlusterRefreshRateLight)),
new GlusterJobSchedulingDetails(
"refreshHeavyWeightData", getRefreshRate(ConfigValues.GlusterRefreshRateHeavy)),
new GlusterJobSchedulingDetails(
"refreshSelfHealInfo", getRefreshRate(ConfigValues.GlusterRefreshRateHealInfo))
);
}
/**
* Refreshes details of all volume across all clusters being managed in the engine. It can end up doing the
* following in engine DB to make sure that the volume details in engine DB are in sync with GlusterFS:<br>
* <li>Insert volumes</li><li>Delete volumes</li><li>Update properties of volume e.g. status, volume type</li><li>
* Add / remove bricks to / from volumes</li><li>Set / Unset volume options</li><br>
* These are all fetched using the 'volume info' command on gluster CLI, which is relatively lightweight, and hence
* this method is scheduled more frequently as compared to the other method <code>refreshHeavyWeightData</code>,
* which uses 'volume status' to fetch and update status of volume bricks.
*/
@OnTimerMethodAnnotation("refreshLightWeightData")
public void refreshLightWeightData() {
log.debug("Refreshing Gluster Data [lightweight]");
List<Cluster> clusters = clusterDao.getAll();
for (Cluster cluster : clusters) {
if (cluster.supportsGlusterService()) {
try {
refreshClusterData(cluster);
} catch (Exception e) {
log.error("Error while refreshing Gluster lightweight data of cluster '{}': {}",
cluster.getName(),
e.getMessage());
log.debug("Exception", e);
}
}
}
}
private void refreshClusterData(Cluster cluster) {
log.debug("Refreshing Gluster lightweight Data for cluster '{}'", cluster.getName());
List<VDS> existingServers = vdsDao.getAllForCluster(cluster.getId());
VDS upServer = glusterUtil.getUpServer(cluster.getId());
if (upServer == null) {
log.debug("No server UP in cluster '{}'. Can't refresh it's data at this point.", cluster.getName());
return;
}
refreshServerData(cluster, upServer, existingServers);
refreshVolumeData(cluster, upServer, existingServers);
}
/**
* If any servers have been added to the Gluster cluster directly from the Gluster CLI, we still don't add them
* automatically to the engine DB, as addition of servers requires user approval from the GUI. If the cluster is a
* gluster-only cluster, and one or more servers have been removed directly from the Gluster CLI, we remove them
* from the engine DB, and also invoke the corresponding VDS command.
*/
private void refreshServerData(Cluster cluster, VDS upServer, List<VDS> existingServers) {
acquireLock(cluster.getId());
log.debug("Refreshing Gluster Server data for cluster '{}' using server '{}'",
cluster.getName(),
upServer.getName());
try {
List<GlusterServerInfo> fetchedServers = fetchServers(cluster, upServer, existingServers);
if (fetchedServers != null) {
syncServers(cluster, existingServers, fetchedServers);
}
} catch(Exception e) {
log.error("Error while refreshing server data for cluster '{}' from database: {}",
cluster.getName(),
e.getMessage());
log.debug("Exception", e);
} finally {
releaseLock(cluster.getId());
}
}
private void syncServers(Cluster cluster, List<VDS> existingServers, List<GlusterServerInfo> fetchedServers) {
log.debug("Existing servers list returned '{}' comparing with fetched servers '{}'", existingServers, fetchedServers);
boolean serverRemoved = false;
Network glusterNetwork = findGlusterNetwork(cluster.getId());
for (VDS server : existingServers) {
GlusterServerInfo glusterServer = findGlusterServer(server, fetchedServers);
if (isSyncableStatus(server.getStatus())) {
if (glusterServer == null) {
if (cluster.supportsVirtService()) {
// If the cluster supports virt service as well, we should not be removing any servers from it, even
// if they have been removed from the Gluster cluster using the Gluster cli, as they could
// potentially be
// used for running VMs. Will mark this server status as DISCONNECTED instead
log.debug("As cluster '{}' supports virt service as well, server '{}' detected as removed from glusterfs will not be removed from engine",
cluster.getName(),
server.getHostName());
setNonOperational(server);
continue;
}
log.info(
"Server '{}' has been removed directly using the gluster CLI. Removing it from engine as well.",
server.getName());
logUtil.logServerMessage(server, AuditLogType.GLUSTER_SERVER_REMOVED_FROM_CLI);
try (EngineLock lock = glusterUtil.acquireGlusterLockWait(server.getId())) {
removeServerFromDb(server);
// if last but one server, reset alternate probed address for last server
checkAndResetKnownAddress(existingServers, server);
// remove the server from resource manager
runVdsCommand(VDSCommandType.RemoveVds, new RemoveVdsVDSCommandParameters(server.getId()));
serverRemoved = true;
} catch (Exception e) {
log.error("Error while removing server '{}' from database: {}",
server.getName(),
e.getMessage());
log.debug("Exception", e);
}
} else if (server.getStatus() == VDSStatus.Up && glusterServer.getStatus() == PeerStatus.DISCONNECTED) {
// check gluster is running, if down then move the host to Non-Operational
VDSReturnValue returnValue =
runVdsCommand(VDSCommandType.GlusterServersList,
new VdsIdVDSCommandParametersBase(server.getId()));
if (!returnValue.getSucceeded()) {
setNonOperational(server);
}
} else {
// update correct status and check if all interfaces with gluster network have been peer probed.
updateStatusAndpeerProbeOtherIface(glusterNetwork, server, glusterServer);
}
}
}
if (serverRemoved) {
log.info("Servers detached using gluster CLI is removed from engine after inspecting the Gluster servers"
+ " list returned '{}' - comparing with db servers '{}'",
fetchedServers, existingServers);
}
}
// Check if only 1 host remaining in cluster, if so reset it's known address so that new host will
// be peer probed with this alternate address
private void checkAndResetKnownAddress(List<VDS> servers, VDS removedServer) {
if (servers.size() == 2) {
for (VDS server : servers) {
// set the known address on the remaining server.
if (!Objects.equals(server.getId(), removedServer.getId())) {
serverDao.updateKnownAddresses(server.getId(), null);
}
}
}
}
private void updateStatusAndpeerProbeOtherIface(Network glusterNetwork, VDS host, GlusterServerInfo fetchedServerInfo) {
GlusterServer glusterServer = serverDao.get(host.getId());
if (glusterServer == null) {
return;
}
if (glusterServer.getPeerStatus() == PeerStatus.DISCONNECTED
&& fetchedServerInfo.getStatus() == PeerStatus.CONNECTED) {
//change the status to indicate that host is now part of cluster
serverDao.updatePeerStatus(host.getId(), PeerStatus.CONNECTED);
}
if (glusterNetwork == null || host.getStatus() != VDSStatus.Up) {
return;
}
List<VdsNetworkInterface> interfaces = interfaceDao.getAllInterfacesForVds(host.getId());
for (VdsNetworkInterface iface : interfaces) {
if (glusterNetwork.getName().equals(iface.getNetworkName()) &&
StringUtils.isNotBlank(iface.getIpv4Address())
&& !glusterServer.getKnownAddresses().contains(iface.getIpv4Address())) {
// get another server in the cluster
VDS upServer = getAlternateUpServerInCluster(host.getClusterId(), host.getId());
if (upServer != null) {
boolean peerProbed = glusterPeerProbeAdditionalInterface(upServer.getId(), iface.getIpv4Address());
if (peerProbed) {
serverDao.addKnownAddress(host.getId(), iface.getIpv4Address());
}
} else {
log.warn("probe could not be done for server '{}' as no alternate UP server found", host.getHostName());
}
}
}
}
private Network findGlusterNetwork(Guid clusterId) {
List<Network> allNetworksInCluster = networkDao.getAllForCluster(clusterId);
for (Network network : allNetworksInCluster) {
if (network.getCluster().isGluster()) {
return network;
}
}
return null;
}
private VDS getAlternateUpServerInCluster(Guid clusterId, Guid vdsId) {
List<VDS> vdsList =
vdsDao.getAllForClusterWithStatusAndPeerStatus(clusterId, VDSStatus.Up, PeerStatus.CONNECTED);
// If the cluster already having Gluster servers, get an up server
if (vdsList.isEmpty()) {
return null;
}
for (VDS vds : vdsList) {
if (!vdsId.equals(vds.getId()) && vds.getExternalStatus() == ExternalStatus.Ok) {
return vds;
}
}
return null;
}
private boolean glusterPeerProbeAdditionalInterface(Guid upServerId, String newServerName) {
try {
VDSReturnValue returnValue =
runVdsCommand(VDSCommandType.AddGlusterServer,
new AddGlusterServerVDSParameters(upServerId, newServerName));
if (!returnValue.getSucceeded()) {
AuditLogableBase logable = Injector.injectMembers(new AuditLogableBase(upServerId));
logable.updateCallStackFromThrowable(returnValue.getExceptionObject());
auditLogDirector.log(logable, AuditLogType.GLUSTER_SERVER_ADD_FAILED);
}
return returnValue.getSucceeded();
} catch (Exception e) {
log.info("Exception in peer probing alernate name '{}' on host with id '{}'", newServerName, upServerId);
log.debug("Exception", e);
return false;
}
}
private void removeServerFromDb(final VDS server) {
TransactionSupport.executeInNewTransaction(() -> {
removeVdsStatisticsFromDb(server);
removeVdsDynamicFromDb(server);
removeVdsStaticFromDb(server);
return null;
});
}
/**
* We need to be particularly careful about what servers we remove from the DB. A newly added (bootstrapped) server
* gets peer probed after it's first reboot, and we don't want to accidentally remove such legitimate servers just
* before they are probed.
*/
private boolean isSyncableStatus(VDSStatus status) {
return status == VDSStatus.Up || status == VDSStatus.Down;
}
/**
* Returns the equivalent GlusterServer from the list of fetched servers.
*/
private GlusterServerInfo findGlusterServer(VDS server, List<GlusterServerInfo> fetchedServers) {
// compare gluster host uuid stored in server with the ones fetched from list
GlusterServer glusterServer = serverDao.getByServerId(server.getId());
for (GlusterServerInfo fetchedServer : fetchedServers) {
if (fetchedServer.getUuid().equals(glusterServer.getGlusterServerUuid())) {
return fetchedServer;
}
}
return null;
}
private List<GlusterServerInfo> fetchServers(Cluster cluster, VDS upServer, List<VDS> existingServers) {
// Create a copy of the existing servers as the fetchServer method can potentially remove elements from it
List<VDS> tempServers = new ArrayList<>(existingServers);
List<GlusterServerInfo> fetchedServers = fetchServers(upServer, tempServers);
if (fetchedServers == null) {
log.error("gluster peer status command failed on all servers of the cluster '{}'."
+ "Can't refresh it's data at this point.", cluster.getName());
return null;
}
if (fetchedServers.size() == 1 && existingServers.size() > 2) {
log.info("Gluster servers list fetched from server '{}' has only one server", upServer.getName());
// It's possible that the server we are using to get list of servers itself has been removed from the
// cluster, and hence is returning a single server (itself)
GlusterServerInfo server = fetchedServers.iterator().next();
if (isSameServer(upServer, server)) {
// Find a different UP server, and get servers list from it
tempServers.remove(upServer);
upServer = getNewUpServer(tempServers, upServer);
if (upServer == null) {
log.warn("The only UP server in cluster '{}' seems to have been removed from it using gluster CLI. "
+ "Can't refresh it's data at this point.",
cluster.getName());
return null;
}
fetchedServers = fetchServers(upServer, tempServers);
if (fetchedServers == null) {
log.warn("The only UP server in cluster '{}' (or the only one on which gluster peer status "
+ "command is working) seems to have been removed from it using gluster CLI. "
+ "Can't refresh it's data at this point.", cluster.getName());
return null;
}
}
}
return fetchedServers;
}
private boolean isSameServer(VDS upServer, GlusterServerInfo server) {
GlusterServer glusterUpServer = serverDao.getByServerId(upServer.getId());
return glusterUpServer.getGlusterServerUuid().equals(server.getUuid());
}
/**
* Fetches list of gluster servers by executing the gluster peer status command on the given UP server. If the
* gluster command fails, tries on other UP servers from the list of existing Servers recursively. Returns null if
* the command fails on all the servers.
*/
private List<GlusterServerInfo> fetchServers(VDS upServer, List<VDS> existingServers) {
List<GlusterServerInfo> fetchedServers = null;
while (fetchedServers == null && !existingServers.isEmpty()) {
log.debug("Fetching gluster servers list from server '{}'", upServer.getName());
fetchedServers = fetchServers(upServer);
if (fetchedServers == null) {
log.info("Gluster servers list failed in server '{}' moving it to NonOperational",
upServer.getName());
logUtil.logServerMessage(upServer, AuditLogType.GLUSTER_SERVERS_LIST_FAILED);
// Couldn't fetch servers from the up server. Mark it as non-operational
setNonOperational(upServer);
existingServers.remove(upServer);
upServer = getNewUpServer(existingServers, upServer);
}
}
return fetchedServers;
}
private void setNonOperational(VDS server) {
Cluster cluster = clusterDao.get(server.getClusterId());
if (!cluster.supportsVirtService()) {
SetNonOperationalVdsParameters nonOpParams =
new SetNonOperationalVdsParameters(server.getId(),
NonOperationalReason.GLUSTER_COMMAND_FAILED,
Collections.singletonMap(GlusterConstants.COMMAND, "gluster peer status"));
backend.runInternalAction(VdcActionType.SetNonOperationalVds,
nonOpParams,
ExecutionHandler.createInternalJobContext());
}
serverDao.updatePeerStatus(server.getId(), PeerStatus.DISCONNECTED);
logUtil.logServerMessage(server, AuditLogType.GLUSTER_SERVER_STATUS_DISCONNECTED);
}
/**
* Returns an UP server from given list of servers, provided it is not same as the given server.
*/
private VDS getNewUpServer(List<VDS> servers, VDS exceptServer) {
for (VDS server : servers) {
if (server.getStatus() == VDSStatus.Up && !server.getId().equals(exceptServer.getId())) {
return server;
}
}
return null;
}
private void refreshVolumeData(Cluster cluster, VDS upServer, List<VDS> existingServers) {
acquireLock(cluster.getId());
try {
// Pass a copy of the existing servers as the fetchVolumes method can potentially remove elements from it
Map<Guid, GlusterVolumeEntity> volumesMap = fetchVolumes(upServer, new ArrayList<>(existingServers));
if (volumesMap == null) {
log.error("gluster volume info command failed on all servers of the cluster '{}'."
+ "Can't refresh it's data at this point.", cluster.getName());
return;
}
// remove deleted volumes must happen before adding new ones,
// to handle cases where user deleted a volume and created a
// new one with same name in a very short time
removeDeletedVolumes(cluster.getId(), volumesMap);
updateExistingAndNewVolumes(cluster.getId(), volumesMap);
} finally {
releaseLock(cluster.getId());
}
}
/**
* Fetches list of gluster volumes by executing the gluster volume info command on the given UP server. If the
* gluster command fails, tries on other UP servers from the list of existing Servers recursively. Returns null if
* the command fails on all the servers.
*/
private Map<Guid, GlusterVolumeEntity> fetchVolumes(VDS upServer, List<VDS> existingServers) {
Map<Guid, GlusterVolumeEntity> fetchedVolumes = null;
while (fetchedVolumes == null && existingServers.size() > 0) {
fetchedVolumes = fetchVolumes(upServer);
if (fetchedVolumes == null) {
// Couldn't fetch volumes from the up server. Mark it as non-operational
logUtil.logServerMessage(upServer, AuditLogType.GLUSTER_VOLUME_INFO_FAILED);
setNonOperational(upServer);
existingServers.remove(upServer);
upServer = getNewUpServer(existingServers, upServer);
}
}
return fetchedVolumes;
}
@SuppressWarnings("unchecked")
protected Map<Guid, GlusterVolumeEntity> fetchVolumes(VDS upServer) {
VDSReturnValue result =
runVdsCommand(VDSCommandType.GlusterVolumesList, new GlusterVolumesListVDSParameters(upServer.getId(),
upServer.getClusterId()));
return result.getSucceeded() ? (Map<Guid, GlusterVolumeEntity>) result.getReturnValue() : null;
}
private void removeDeletedVolumes(Guid clusterId, Map<Guid, GlusterVolumeEntity> volumesMap) {
List<Guid> idsToRemove = new ArrayList<>();
for (GlusterVolumeEntity volume : volumeDao.getByClusterId(clusterId)) {
if (!volumesMap.containsKey(volume.getId())) {
idsToRemove.add(volume.getId());
log.debug("Volume '{}' has been removed directly using the gluster CLI. Removing it from engine as well.",
volume.getName());
logUtil.logVolumeMessage(volume, AuditLogType.GLUSTER_VOLUME_DELETED_FROM_CLI);
// Set the gluster cli schedule enabled flag back to true
if (Config.<String> getValue(ConfigValues.GlusterMetaVolumeName).equalsIgnoreCase(volume.getName())) {
Cluster cluster = clusterDao.get(clusterId);
cluster.setGlusterCliBasedSchedulingOn(true);
clusterDao.update(cluster);
}
}
}
if (!idsToRemove.isEmpty()) {
try {
volumeDao.removeAll(idsToRemove);
} catch (Exception e) {
log.error("Error while removing volumes from database!", e);
}
}
}
private void updateExistingAndNewVolumes(Guid clusterId, Map<Guid, GlusterVolumeEntity> volumesMap) {
Cluster cluster = clusterDao.get(clusterId);
for (Entry<Guid, GlusterVolumeEntity> entry : volumesMap.entrySet()) {
GlusterVolumeEntity volume = entry.getValue();
log.debug("Analyzing volume '{}'", volume.getName());
GlusterVolumeEntity existingVolume = volumeDao.getById(entry.getKey());
if (existingVolume == null) {
try {
createVolume(volume);
} catch (Exception e) {
log.error("Could not save volume {} in database: {}", volume.getName(), e.getMessage());
log.debug("Exception", e);
}
// Set the CLI based snapshot scheduling flag accordingly
disableCliSnapshotSchedulingFlag(cluster, volume);
} else {
try {
log.debug("Volume '{}' exists in engine. Checking if it needs to be updated.",
existingVolume.getName());
updateVolume(existingVolume, volume);
} catch (Exception e) {
log.error("Error while updating volume '{}': {}", volume.getName(), e.getMessage());
log.debug("Exception", e);
}
}
}
}
private void disableCliSnapshotSchedulingFlag(Cluster cluster, GlusterVolumeEntity volume) {
if (cluster.isGlusterCliBasedSchedulingOn()
&& Config.<String> getValue(ConfigValues.GlusterMetaVolumeName).equalsIgnoreCase(volume.getName())) {
VdcReturnValueBase returnValue =
backend.runInternalAction(VdcActionType.DisableGlusterCliSnapshotScheduleInternal,
new GlusterVolumeActionParameters(volume.getId(), false),
ExecutionHandler.createInternalJobContext());
if (!returnValue.getSucceeded()) {
log.warn("Unbale to set volume snapshot scheduling flag to gluster CLI scheduler on cluster {}",
cluster.getName());
} else {
logUtil.logVolumeMessage(volume, AuditLogType.GLUSTER_CLI_SNAPSHOT_SCHEDULE_DISABLED);
}
}
}
/**
* Creates a new volume in engine
*/
private void createVolume(final GlusterVolumeEntity volume) {
if (volume.getBricks() == null) {
log.warn("Bricks of volume '{}' were not fetched. " +
"Hence will not add it to engine at this point.", volume.getName());
return;
}
for (GlusterBrickEntity brick : volume.getBricks()) {
if (brick == null) {
log.warn("Volume '{}' contains a apparently corrupt brick(s). " +
"Hence will not add it to engine at this point.",
volume.getName());
return;
} else if (brick.getServerId() == null) {
log.warn("Volume '{}' contains brick(s) from unknown hosts. " +
"Hence will not add it to engine at this point.",
volume.getName());
return;
}
// Set initial brick status as similar to volume status.
// As actual brick status is updated by another sync job,
// till it happens, this gives better UI experience
brick.setStatus(volume.getStatus());
}
logUtil.logVolumeMessage(volume, AuditLogType.GLUSTER_VOLUME_CREATED_FROM_CLI);
if (!volume.getVolumeType().isSupported()) {
logUtil.logAuditMessage(volume.getClusterId(),
volume,
null,
AuditLogType.GLUSTER_VOLUME_TYPE_UNSUPPORTED,
Collections.singletonMap(GlusterConstants.VOLUME_TYPE, volume.getVolumeType().toString()));
}
log.debug("Volume '{}' has been created directly using the gluster CLI. Creating it in engine as well.",
volume.getName());
volumeDao.save(volume);
}
private void updateVolume(GlusterVolumeEntity existingVolume, GlusterVolumeEntity fetchedVolume) {
updateVolumeProperties(existingVolume, fetchedVolume);
updateBricks(existingVolume, fetchedVolume);
updateOptions(existingVolume, fetchedVolume);
updateTransportTypes(existingVolume, fetchedVolume);
}
private void updateTransportTypes(GlusterVolumeEntity existingVolume, GlusterVolumeEntity fetchedVolume) {
Set<TransportType> existingTransportTypes = existingVolume.getTransportTypes();
Set<TransportType> fetchedTransportTypes = fetchedVolume.getTransportTypes();
Collection<TransportType> addedTransportTypes =
CollectionUtils.subtract(fetchedTransportTypes, existingTransportTypes);
if (!addedTransportTypes.isEmpty()) {
log.info("Adding transport type(s) '{}' to volume '{}'",
addedTransportTypes,
existingVolume.getName());
volumeDao.addTransportTypes(existingVolume.getId(), addedTransportTypes);
}
Collection<TransportType> removedTransportTypes =
CollectionUtils.subtract(existingTransportTypes, fetchedTransportTypes);
if (!removedTransportTypes.isEmpty()) {
log.info("Removing transport type(s) '{}' from volume '{}'",
removedTransportTypes,
existingVolume.getName());
volumeDao.removeTransportTypes(existingVolume.getId(), removedTransportTypes);
}
}
private void updateBricks(GlusterVolumeEntity existingVolume, GlusterVolumeEntity fetchedVolume) {
List<GlusterBrickEntity> fetchedBricks = fetchedVolume.getBricks();
if (fetchedBricks == null) {
log.warn("Bricks of volume '{}' were not fetched. " +
"Hence will not try to update them in engine at this point.",
fetchedVolume.getName());
return;
}
removeDeletedBricks(existingVolume, fetchedBricks);
updateExistingAndNewBricks(existingVolume, fetchedBricks);
}
@SuppressWarnings("serial")
private void removeDeletedBricks(GlusterVolumeEntity existingVolume, List<GlusterBrickEntity> fetchedBricks) {
List<Guid> idsToRemove = new ArrayList<>();
for (final GlusterBrickEntity existingBrick : existingVolume.getBricks()) {
if (!GlusterCoreUtil.containsBrick(fetchedBricks, existingBrick)) {
idsToRemove.add(existingBrick.getId());
log.info("Detected brick '{}' removed from volume '{}'. Removing it from engine DB as well.",
existingBrick.getQualifiedName(),
existingVolume.getName());
logUtil.logAuditMessage(existingVolume.getClusterId(), existingVolume, null,
AuditLogType.GLUSTER_VOLUME_BRICK_REMOVED_FROM_CLI,
Collections.singletonMap(GlusterConstants.BRICK, existingBrick.getQualifiedName()));
}
}
if (!idsToRemove.isEmpty()) {
try {
brickDao.removeAll(idsToRemove);
} catch (Exception e) {
log.error("Error while removing bricks from database: {}", e.getMessage());
log.debug("Exception", e);
}
}
}
@SuppressWarnings("serial")
private void updateExistingAndNewBricks(GlusterVolumeEntity existingVolume, List<GlusterBrickEntity> fetchedBricks) {
for (final GlusterBrickEntity fetchedBrick : fetchedBricks) {
GlusterBrickEntity existingBrick = GlusterCoreUtil.findBrick(existingVolume.getBricks(), fetchedBrick);
if (existingBrick == null) {
// server id could be null if the new brick resides on a server that is not yet added in the engine
// adding such servers to engine required manual approval by user, and hence can't be automated.
if (fetchedBrick.getServerId() != null) {
log.info("New brick '{}' added to volume '{}' from gluster CLI. Updating engine DB accordingly.",
fetchedBrick.getQualifiedName(),
existingVolume.getName());
fetchedBrick.setStatus(existingVolume.isOnline() ? GlusterStatus.UP : GlusterStatus.DOWN);
brickDao.save(fetchedBrick);
logUtil.logAuditMessage(existingVolume.getClusterId(), existingVolume, null,
AuditLogType.GLUSTER_VOLUME_BRICK_ADDED_FROM_CLI,
Collections.singletonMap(GlusterConstants.BRICK, fetchedBrick.getQualifiedName()));
}
} else {
// brick found. update it if required. Only property that could be different is the brick order
if (!Objects.equals(existingBrick.getBrickOrder(), fetchedBrick.getBrickOrder())) {
log.info("Brick order for brick '{}' changed from '{}' to '{}' because of direct CLI operations. Updating engine DB accordingly.",
existingBrick.getQualifiedName(),
existingBrick.getBrickOrder(),
fetchedBrick.getBrickOrder());
brickDao.updateBrickOrder(existingBrick.getId(), fetchedBrick.getBrickOrder());
}
// update network id, if different
if (!Objects.equals(existingBrick.getNetworkId(), fetchedBrick.getNetworkId())) {
log.info("Network address for brick '{}' detected as '{}'. Updating engine DB accordingly.",
existingBrick.getQualifiedName(),
fetchedBrick.getNetworkAddress());
brickDao.updateBrickNetworkId(existingBrick.getId(), fetchedBrick.getNetworkId());
}
}
}
}
private void updateOptions(GlusterVolumeEntity existingVolume, GlusterVolumeEntity fetchedVolume) {
Collection<GlusterVolumeOptionEntity> existingOptions = existingVolume.getOptions();
Collection<GlusterVolumeOptionEntity> fetchedOptions = fetchedVolume.getOptions();
updateExistingAndNewOptions(existingVolume, fetchedOptions);
removeDeletedOptions(fetchedVolume, existingOptions);
}
@SuppressWarnings("serial")
private void removeDeletedOptions(GlusterVolumeEntity fetchedVolume,
Collection<GlusterVolumeOptionEntity> existingOptions) {
List<Guid> idsToRemove = new ArrayList<>();
for (final GlusterVolumeOptionEntity existingOption : existingOptions) {
if (fetchedVolume.getOption(existingOption.getKey()) == null) {
idsToRemove.add(existingOption.getId());
log.info("Detected option '{}' reset on volume '{}'. Removing it from engine DB as well.",
existingOption.getKey(),
fetchedVolume.getName());
// The option "group" gets implicitly replaced with a set of options defined in the group file
// Hence it is not required to log it as a removed option, as that would be misleading.
if (!GlusterConstants.OPTION_GROUP.equals(existingOption.getKey())) {
Map<String, String> customValues = new HashMap<>();
customValues.put(GlusterConstants.OPTION_KEY, existingOption.getKey());
customValues.put(GlusterConstants.OPTION_VALUE, existingOption.getValue());
logUtil.logAuditMessage(fetchedVolume.getClusterId(), fetchedVolume, null,
AuditLogType.GLUSTER_VOLUME_OPTION_RESET_FROM_CLI, customValues);
}
}
}
if (!idsToRemove.isEmpty()) {
try {
optionDao.removeAll(idsToRemove);
} catch (Exception e) {
log.error("Error while removing options of volume '{}' from database: {}",
fetchedVolume.getName(),
e.getMessage());
log.debug("Exception", e);
}
}
}
private void updateExistingAndNewOptions(final GlusterVolumeEntity existingVolume,
Collection<GlusterVolumeOptionEntity> fetchedOptions) {
Map<String, GlusterVolumeOptionEntity> existingOptions = new HashMap<>();
Map<String, GlusterVolumeOptionEntity> newOptions = new HashMap<>();
for (final GlusterVolumeOptionEntity fetchedOption : fetchedOptions) {
final GlusterVolumeOptionEntity existingOption = existingVolume.getOption(fetchedOption.getKey());
if (existingOption == null) {
newOptions.put(fetchedOption.getKey(), fetchedOption);
} else if (!existingOption.getValue().equals(fetchedOption.getValue())) {
fetchedOption.setId(existingOption.getId());
existingOptions.put(fetchedOption.getKey(), fetchedOption);
}
}
final List<GlusterVolumeOptionEntity> newOptionsSortedList = new ArrayList<>(newOptions.values());
final List<GlusterVolumeOptionEntity> existingOptionsSortedList = new ArrayList<>(existingOptions.values());
Collections.sort(newOptionsSortedList);
Collections.sort(existingOptionsSortedList);
// Insert the new options in a single transaction
if (!newOptionsSortedList.isEmpty()) {
TransactionSupport.executeInScope(TransactionScopeOption.Required,
() -> {
saveNewOptions(existingVolume, newOptionsSortedList);
return null;
});
}
// Update the existing options in a single transaction
if (!existingOptionsSortedList.isEmpty()) {
TransactionSupport.executeInScope(TransactionScopeOption.Required,
() -> {
updateExistingOptions(existingVolume, existingOptionsSortedList);
return null;
});
}
}
private void saveNewOptions(GlusterVolumeEntity volume, Collection<GlusterVolumeOptionEntity> entities) {
optionDao.saveAll(entities);
for (final GlusterVolumeOptionEntity entity : entities) {
Map<String, String> customValues = new HashMap<>();
customValues.put(GlusterConstants.OPTION_KEY, entity.getKey());
customValues.put(GlusterConstants.OPTION_VALUE, entity.getValue());
logUtil.logAuditMessage(volume.getClusterId(), volume, null,
AuditLogType.GLUSTER_VOLUME_OPTION_SET_FROM_CLI, customValues);
log.info("New option '{}'='{}' set on volume '{}' from gluster CLI. Updating engine DB accordingly.",
entity.getKey(),
entity.getValue(),
volume.getName());
}
}
private void updateExistingOptions(final GlusterVolumeEntity volume, Collection<GlusterVolumeOptionEntity> entities) {
optionDao.updateAll("UpdateGlusterVolumeOption", entities);
for (final GlusterVolumeOptionEntity entity : entities) {
Map<String, String> customValues = new HashMap<>();
customValues.put(GlusterConstants.OPTION_KEY, entity.getKey());
customValues.put(GlusterConstants.OPTION_OLD_VALUE, volume.getOption(entity.getKey()).getValue());
customValues.put(GlusterConstants.OPTION_NEW_VALUE, entity.getValue());
logUtil.logAuditMessage(volume.getClusterId(), volume, null,
AuditLogType.GLUSTER_VOLUME_OPTION_CHANGED_FROM_CLI, customValues);
log.info("Detected change in value of option '{}' of volume '{}' from '{}' to '{}'. Updating engine DB accordingly.",
volume.getOption(entity.getKey()),
volume.getName(),
volume.getOption(entity.getKey()).getValue(),
entity.getValue());
}
}
/**
* Updates basic properties of the volume. Does not include bricks, options, and transport types
*
* @param existingVolume
* Volume that is to be updated
* @param fetchedVolume
* Volume fetched from GlusterFS, containing latest properties
*/
@SuppressWarnings("incomplete-switch")
public void updateVolumeProperties(GlusterVolumeEntity existingVolume, final GlusterVolumeEntity fetchedVolume) {
boolean changed = false;
boolean volumeTypeUnSupported = false;
if (existingVolume.getVolumeType() != fetchedVolume.getVolumeType()) {
if(existingVolume.getVolumeType().isSupported() && !fetchedVolume.getVolumeType().isSupported()){
volumeTypeUnSupported= true;
}
existingVolume.setVolumeType(fetchedVolume.getVolumeType());
changed = true;
}
if (existingVolume.getVolumeType().isReplicatedType() &&
!Objects.equals(existingVolume.getReplicaCount(), fetchedVolume.getReplicaCount())) {
existingVolume.setReplicaCount(fetchedVolume.getReplicaCount());
changed = true;
}
if (existingVolume.getVolumeType().isStripedType() &&
!Objects.equals(existingVolume.getStripeCount(), fetchedVolume.getStripeCount())) {
existingVolume.setStripeCount(fetchedVolume.getStripeCount());
changed = true;
}
if (changed) {
log.info("Updating volume '{}' with fetched properties.", existingVolume.getName());
volumeDao.updateGlusterVolume(existingVolume);
logUtil.logVolumeMessage(existingVolume, AuditLogType.GLUSTER_VOLUME_PROPERTIES_CHANGED_FROM_CLI);
if (volumeTypeUnSupported) {
logUtil.logAuditMessage(fetchedVolume.getClusterId(),
fetchedVolume,
null,
AuditLogType.GLUSTER_VOLUME_TYPE_UNSUPPORTED,
Collections.singletonMap(GlusterConstants.VOLUME_TYPE, fetchedVolume.getVolumeType().toString()));
}
}
if (existingVolume.getStatus() != fetchedVolume.getStatus()) {
existingVolume.setStatus(fetchedVolume.getStatus());
glusterDBUtils.updateVolumeStatus(existingVolume.getId(), fetchedVolume.getStatus());
logUtil.logVolumeMessage(existingVolume,
fetchedVolume.getStatus() == GlusterStatus.UP ? AuditLogType.GLUSTER_VOLUME_STARTED_FROM_CLI
: AuditLogType.GLUSTER_VOLUME_STOPPED_FROM_CLI);
}
}
/**
* Refreshes the brick statuses from GlusterFS. This method is scheduled less frequently as it uses the 'volume
* status' command, that adds significant overhead on Gluster processes, and hence should not be invoked too
* frequently.
*/
@OnTimerMethodAnnotation("refreshHeavyWeightData")
public void refreshHeavyWeightData() {
log.debug("Refreshing Gluster Data [heavyweight]");
for (Cluster cluster : clusterDao.getAll()) {
if (cluster.supportsGlusterService()) {
try {
refreshClusterHeavyWeightData(cluster);
} catch (Exception e) {
log.error("Error while refreshing Gluster heavyweight data of cluster '{}': {}",
cluster.getName(),
e.getMessage());
log.debug("Exception", e);
}
}
}
}
private void refreshClusterHeavyWeightData(Cluster cluster) {
VDS upServer = glusterUtil.getRandomUpServer(cluster.getId());
if (upServer == null) {
log.debug("No server UP in cluster '{}'. Can't refresh it's data at this point.", cluster.getName());
return;
}
for (GlusterVolumeEntity volume : volumeDao.getByClusterId(cluster.getId())) {
log.debug("Refreshing brick statuses for volume '{}' of cluster '{}'",
volume.getName(),
cluster.getName());
// brick statuses can be fetched only for started volumes
if (volume.isOnline()) {
acquireLock(cluster.getId());
try {
refreshVolumeDetails(upServer, volume);
} catch (Exception e) {
log.error("Error while refreshing brick statuses for volume '{}' of cluster '{}': {}",
volume.getName(),
cluster.getName(),
e.getMessage());
log.debug("Exception", e);
} finally {
releaseLock(cluster.getId());
}
}
}
}
public void refreshVolumeDetails(VDS upServer, GlusterVolumeEntity volume) {
List<GlusterBrickEntity> bricksToUpdate = new ArrayList<>();
List<GlusterBrickEntity> brickPropertiesToUpdate = new ArrayList<>();
List<GlusterBrickEntity> brickPropertiesToAdd = new ArrayList<>();
GlusterVolumeAdvancedDetails volumeAdvancedDetails = getVolumeAdvancedDetails(upServer, volume.getClusterId(), volume.getName());
if (volumeAdvancedDetails == null) {
log.error("Error while refreshing brick statuses for volume '{}'. Failed to get volume advanced details ",
volume.getName());
return;
}
if (volumeAdvancedDetails.getCapacityInfo() != null) {
if (volume.getAdvancedDetails().getCapacityInfo() == null) {
volumeDao.addVolumeCapacityInfo(volumeAdvancedDetails.getCapacityInfo());
} else {
volumeDao.updateVolumeCapacityInfo(volumeAdvancedDetails.getCapacityInfo());
}
}
Map<Guid, BrickProperties> brickPropertiesMap =
getBrickPropertiesMap(volumeAdvancedDetails);
for (GlusterBrickEntity brick : volume.getBricks()) {
BrickProperties brickProperties = brickPropertiesMap.get(brick.getId());
if (brickProperties != null) {
if (brickProperties.getStatus() != brick.getStatus()) {
logBrickStatusChange(volume, brick, brickProperties.getStatus());
brick.setStatus(brickProperties.getStatus());
bricksToUpdate.add(brick);
}
if (brick.getBrickProperties() == null) {
BrickDetails brickDetails = new BrickDetails();
brickDetails.setBrickProperties(brickProperties);
brick.setBrickDetails(brickDetails);
brickPropertiesToAdd.add(brick);
} else if (brickProperties.getTotalSize() != brick.getBrickProperties().getTotalSize()
|| brickProperties.getFreeSize() != brick.getBrickProperties().getFreeSize()) {
brick.getBrickDetails().setBrickProperties(brickProperties);
brickPropertiesToUpdate.add(brick);
}
}
}
if (!brickPropertiesToAdd.isEmpty()) {
brickDao.addBrickProperties(brickPropertiesToAdd);
}
if (!brickPropertiesToUpdate.isEmpty()) {
brickDao.updateBrickProperties(brickPropertiesToUpdate);
}
if (!bricksToUpdate.isEmpty()) {
brickDao.updateBrickStatuses(bricksToUpdate);
}
}
private void logBrickStatusChange(GlusterVolumeEntity volume, final GlusterBrickEntity brick, final GlusterStatus fetchedStatus) {
log.debug("Detected that status of brick '{}' in volume '{}' changed from '{}' to '{}'",
brick.getQualifiedName(), volume.getName(), brick.getStatus(), fetchedStatus);
Map<String, String> customValues = new HashMap<>();
customValues.put(GlusterConstants.BRICK_PATH, brick.getQualifiedName());
customValues.put(GlusterConstants.OPTION_OLD_VALUE, brick.getStatus().toString());
customValues.put(GlusterConstants.OPTION_NEW_VALUE, fetchedStatus.toString());
logUtil.logAuditMessage(volume.getClusterId(), volume, null,
AuditLogType.GLUSTER_BRICK_STATUS_CHANGED, customValues);
if(fetchedStatus == GlusterStatus.DOWN){
logUtil.logAuditMessage(volume.getClusterId(), volume, null,
AuditLogType.GLUSTER_BRICK_STATUS_DOWN, brick.getId(), brick.getQualifiedName());
}else if(fetchedStatus == GlusterStatus.UP){
alertDirector.removeAlertsByBrickIdLogType(brick.getId(), AuditLogType.GLUSTER_BRICK_STATUS_DOWN);
}
}
private Map<Guid, BrickProperties> getBrickPropertiesMap(GlusterVolumeAdvancedDetails volumeDetails) {
Map<Guid, BrickProperties> brickStatusMap = new HashMap<>();
for (BrickDetails brickDetails : volumeDetails.getBrickDetails()) {
if (brickDetails.getBrickProperties().getBrickId() != null) {
brickStatusMap.put(brickDetails.getBrickProperties().getBrickId(), brickDetails.getBrickProperties());
}
}
return brickStatusMap;
}
protected GlusterVolumeAdvancedDetails getVolumeAdvancedDetails(VDS upServer, Guid clusterId, String volumeName) {
VDSReturnValue result = runVdsCommand(VDSCommandType.GetGlusterVolumeAdvancedDetails,
new GlusterVolumeAdvancedDetailsVDSParameters(upServer.getId(),
clusterId,
volumeName,
null,
false, true));
// Purposely returning returnValue as is without checking result.getSucceeded(), because
// VolumeAdvancedDetails runs multiple commands internally and if the last one fails, getSucceeded() will be
// false. But still we have the brick status details and we can update the brick status without any issue.
return (GlusterVolumeAdvancedDetails) result.getReturnValue();
}
private void removeVdsStatisticsFromDb(VDS server) {
vdsStatisticsDao.remove(server.getId());
}
private void removeVdsStaticFromDb(VDS server) {
vdsStaticDao.remove(server.getId());
}
private void removeVdsDynamicFromDb(VDS server) {
vdsDynamicDao.remove(server.getId());
}
/**
* Refreshes self heal info from GlusterFS. This method is scheduled less frequently as it uses the 'volume
* heal info' command, that adds significant overhead on Gluster processes, and hence should not be invoked too
* frequently.
*/
@OnTimerMethodAnnotation("refreshSelfHealInfo")
public void refreshSelfHealInfo() {
log.debug("Refreshing Gluster Self Heal Data");
for (Cluster cluster : clusterDao.getAll()) {
if (supportsGlusterSelfHealMonitoring(cluster)) {
try {
refreshSelfHealData(cluster);
} catch (Exception e) {
log.error("Error while refreshing Gluster self heal data of cluster '{}': {}",
cluster.getName(),
e.getMessage());
log.debug("Exception", e);
}
}
}
log.debug("Refreshing Gluster Self Heal data is completed");
}
/**
* Refresh self heal information for the given cluster. It is made public so that it can be called from BLL Command
* directly.
*/
public void refreshSelfHealData(Cluster cluster) {
VDS upServer = glusterUtil.getRandomUpServer(cluster.getId());
if (upServer == null) {
log.debug("No server UP in cluster '{}'. Can't refresh self heal data at this point.", cluster.getName());
return;
}
for (GlusterVolumeEntity volume : volumeDao.getByClusterId(cluster.getId())) {
log.debug("Refreshing self heal status for volume '{}' of cluster '{}'",
volume.getName(),
cluster.getName());
// self heal info can be fetched only for started volumes
if (volume.isOnline()) {
try {
refreshSelfHealData(upServer, volume);
} catch (Exception e) {
log.error("Error while refreshing brick statuses for volume '{}' of cluster '{}': {}",
volume.getName(),
cluster.getName(),
e.getMessage());
log.debug("Exception", e);
}
}
}
}
private void refreshSelfHealData(VDS upServer, GlusterVolumeEntity volume) {
Integer usageHistoryLimit = Config.getValue(ConfigValues.GlusterUnSyncedEntriesHistoryLimit);
Map<Guid, Integer> healInfo = getGlusterVolumeHealInfo(upServer, volume.getName());
for (GlusterBrickEntity brick : volume.getBricks()) {
brick.setUnSyncedEntries(healInfo.get(brick.getId()));
brick.setUnSyncedEntriesTrend(
addToHistory(brick.getUnSyncedEntriesTrend(), healInfo.get(brick.getId()), usageHistoryLimit));
}
brickDao.updateUnSyncedEntries(volume.getBricks());
}
private List<Integer> addToHistory(List<Integer> current, Integer newValue, int limit) {
if (newValue == null) {
//Store -1 instead of Null so that we can maintain the fixed time interval between each entries.
newValue = -1;
}
if (current == null || current.isEmpty()) {
return Collections.singletonList(newValue);
}
if (limit == 0) {
return Collections.emptyList();
}
List<Integer> res = new ArrayList<>(current);
res.add(newValue);
if (limit >= res.size()) {
return res;
}
return res.subList(res.size() - limit, res.size());
}
private Map<Guid, Integer> getGlusterVolumeHealInfo(VDS upServer, String volumeName) {
VDSReturnValue result = runVdsCommand(VDSCommandType.GetGlusterVolumeHealInfo,
new GlusterVolumeVDSParameters(upServer.getId(), volumeName));
if(result.getSucceeded()){
return (Map<Guid, Integer>) result.getReturnValue();
}else{
return Collections.emptyMap();
}
}
private boolean supportsGlusterSelfHealMonitoring(Cluster cluster) {
return cluster.supportsGlusterService()
&& GlusterFeatureSupported.glusterSelfHealMonitoring(cluster.getCompatibilityVersion());
}
}