/* * Copyright 2017 LinkedIn Corp. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */ package com.github.ambry.clustermap; import com.github.ambry.config.ClusterMapConfig; import com.github.ambry.config.VerifiableProperties; import com.github.ambry.utils.Utils; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.ListIterator; import java.util.Map; import java.util.Properties; import java.util.Set; import java.util.TreeSet; import org.apache.helix.HelixAdmin; import org.apache.helix.model.IdealState; import org.apache.helix.model.InstanceConfig; import org.apache.helix.model.LeaderStandbySMD; import org.apache.helix.model.builder.AutoModeISBuilder; import org.json.JSONException; import org.json.JSONObject; /** * A class to bootstrap static cluster map information into Helix. * * For each node that is added to Helix, its {@link InstanceConfig} will contain the node level information, which is * of the following format currently: * *InstanceConfig: { * "id" : "localhost_17088", # id is the instanceName [host_port] * "mapFields" : { * "/tmp/c/0" : { # disk is identified by the [mountpath]. DiskInfo conists of: * "capacityInBytes" : "912680550400", # [capacity] * "diskState" : "AVAILABLE", # [state] * "Replicas" : "10:107374182400," # comma-separated list of partition ids whose replicas are * }, # hosted on this disk in [replica:replicaCapacity] format. * "/tmp/c/1" : { * "capacityInBytes" : "912680550400", * "diskState" : "AVAILABLE", * "Replicas" : "40:107374182400,20:107374182400," * }, * "/tmp/c/2" : { * "capacityInBytes" : "912680550400", * "diskState" : "AVAILABLE", * "Replicas" : "30:107374182400," * } * }, * "listFields" : { * "SEALED" : [ "20" ] # comma-separated list of sealed replicas on this node. * }, * "simpleFields" : { * "HELIX_HOST" : "localhost", # hostname (Helix field) * "HELIX_PORT" : "17088", # port (Helix field) * "datacenter" : "dc1", # [datacenterName] * "rackId" : "1611", # [rackId] * "sslPort": "27088" # [sslPort] * # @todo: version. * } *} */ class HelixBootstrapUpgradeUtil { private final StaticClusterManager staticClusterMap; private final Map<String, HelixAdmin> adminForDc = new HashMap<>(); // The set of partitions already present in Helix when this tool is run. private final TreeSet<Long> existingPartitions = new TreeSet<>(); // The set of resources already present in Helix when this tool is run. private final TreeSet<Long> existingResources = new TreeSet<>(); private final String localDc; private final String clusterName; private final int maxPartitionsInOneResource; private Map<String, String> dataCenterToZkAddress; /** * Takes in the path to the files that make up the static cluster map and adds or updates the cluster map information * in Helix to make the two consistent. * @param hardwareLayoutPath the path to the hardware layout file. * @param partitionLayoutPath the path to the partition layout file. * @param zkLayoutPath the path to the zookeeper layout file. * @param clusterNamePrefix the prefix that when combined with the cluster name in the static cluster map files * will give the cluster name in Helix to bootstrap or upgrade. * @param localDc the name of the local datacenter. This can be null. * @param maxPartitionsInOneResource the maximum number of Ambry partitions to group under a single Helix resource. * @param helixAdminFactory the {@link HelixAdminFactory} to use to instantiate {@link HelixAdmin} * @throws IOException if there is an error reading a file. * @throws JSONException if there is an error parsing the JSON content in any of the files. */ static void bootstrapOrUpgrade(String hardwareLayoutPath, String partitionLayoutPath, String zkLayoutPath, String clusterNamePrefix, String localDc, int maxPartitionsInOneResource, HelixAdminFactory helixAdminFactory) throws Exception { HelixBootstrapUpgradeUtil clusterMapToHelixMapper = new HelixBootstrapUpgradeUtil(hardwareLayoutPath, partitionLayoutPath, zkLayoutPath, clusterNamePrefix, localDc, maxPartitionsInOneResource); clusterMapToHelixMapper.updateClusterMapInHelix(helixAdminFactory); clusterMapToHelixMapper.validateAndClose(); } /** * Instantiates this class with the given information. * @param hardwareLayoutPath the path to the hardware layout file. * @param partitionLayoutPath the path to the partition layout file. * @param zkLayoutPath the path to the zookeeper layout file. * @param clusterNamePrefix the prefix that when combined with the cluster name in the static cluster map files * will give the cluster name in Helix to bootstrap or upgrade. * @param localDc the name of the local datacenter. This can be null. * @throws IOException if there is an error reading a file. * @throws JSONException if there is an error parsing the JSON content in any of the files. */ private HelixBootstrapUpgradeUtil(String hardwareLayoutPath, String partitionLayoutPath, String zkLayoutPath, String clusterNamePrefix, String localDc, int maxPartitionsInOneResource) throws Exception { this.localDc = localDc; this.maxPartitionsInOneResource = maxPartitionsInOneResource; this.dataCenterToZkAddress = ClusterMapUtils.parseZkJsonAndPopulateZkInfo(Utils.readStringFromFile(zkLayoutPath)); Properties props = new Properties(); // The following properties are immaterial for the tool, but the ClusterMapConfig mandates their presence. props.setProperty("clustermap.host.name", "localhost"); props.setProperty("clustermap.cluster.name", ""); props.setProperty("clustermap.datacenter.name", localDc == null ? "none" : localDc); ClusterMapConfig clusterMapConfig = new ClusterMapConfig(new VerifiableProperties(props)); if (new File(partitionLayoutPath).exists()) { staticClusterMap = (new StaticClusterAgentsFactory(clusterMapConfig, hardwareLayoutPath, partitionLayoutPath)).getClusterMap(); } else { staticClusterMap = (new StaticClusterAgentsFactory(clusterMapConfig, new PartitionLayout( new HardwareLayout(new JSONObject(Utils.readStringFromFile(hardwareLayoutPath)), clusterMapConfig)))).getClusterMap(); } String clusterNameInStaticClusterMap = staticClusterMap.partitionLayout.getClusterName(); this.clusterName = clusterNamePrefix + clusterNameInStaticClusterMap; System.out.println( "Associating static Ambry cluster \"" + clusterNameInStaticClusterMap + "\" with cluster\"" + clusterName + "\" in Helix"); for (Datacenter datacenter : staticClusterMap.hardwareLayout.getDatacenters()) { if (!dataCenterToZkAddress.keySet().contains(datacenter.getName())) { throw new IllegalArgumentException( "There is no ZK host for datacenter " + datacenter.getName() + " in the static clustermap"); } } } /** * Map the information in the layout files to Helix. Calling this method multiple times has no effect if the * information in the static files do not change. This tool is therefore safe to use for upgrades. * * Instead of defining the entire cluster under a single resource, or defining a resource for every partition, the * tool groups together partitions under resources, with a limit to the number of partitions that will be grouped * under a single resource. * * @param helixAdminFactory the {@link HelixAdminFactory} to use to instantiate {@link HelixAdmin} */ private void updateClusterMapInHelix(HelixAdminFactory helixAdminFactory) { initializeAdminsAndAddCluster(helixAdminFactory); HelixAdmin refAdmin = localDc != null ? adminForDc.get(localDc) : adminForDc.values().iterator().next(); populateResourcesAndPartitionsSet(refAdmin); addNewDataNodes(); long nextResource = existingResources.isEmpty() ? 1 : existingResources.last() + 1; List<Partition> partitionsUnderNextResource = new ArrayList<>(); for (PartitionId partitionId : staticClusterMap.partitionLayout.getPartitions()) { Partition partition = (Partition) partitionId; if (existingPartitions.contains(partition.getId())) { updatePartitionInfoIfChanged(partition); } else { partitionsUnderNextResource.add(partition); if (partitionsUnderNextResource.size() == maxPartitionsInOneResource) { addNewAmbryPartitions(partitionsUnderNextResource, Long.toString(nextResource)); partitionsUnderNextResource.clear(); nextResource++; } } } if (!partitionsUnderNextResource.isEmpty()) { addNewAmbryPartitions(partitionsUnderNextResource, Long.toString(nextResource)); } } /** * Initialize a map of dataCenter to HelixAdmin based on the given zk Connect Strings. * @param helixAdminFactory the {@link HelixAdminFactory} to use to instantiate {@link HelixAdmin} */ private void initializeAdminsAndAddCluster(HelixAdminFactory helixAdminFactory) { for (Map.Entry<String, String> entry : dataCenterToZkAddress.entrySet()) { HelixAdmin admin = helixAdminFactory.getHelixAdmin(entry.getValue()); adminForDc.put(entry.getKey(), admin); // Add a cluster entry in every DC if (!admin.getClusters().contains(clusterName)) { admin.addCluster(clusterName); admin.addStateModelDef(clusterName, LeaderStandbySMD.name, LeaderStandbySMD.build()); } } } /** * Populate the set of existing resources and existing partitions in the cluster. This assumes that all partitions * and resources exist in all datacenters (This assumption helps simplify the logic. This can be gotten rid of in * the future if need be). * @param dcAdmin the reference admin (preferably the admin to the zookeeper server in the local datacenter). */ private void populateResourcesAndPartitionsSet(HelixAdmin dcAdmin) { for (String resource : dcAdmin.getResourcesInCluster(clusterName)) { existingResources.add(Long.valueOf(resource)); for (String partition : dcAdmin.getResourceIdealState(clusterName, resource).getPartitionSet()) { existingPartitions.add(Long.valueOf(partition)); } } } /** * Add nodes in the static cluster map that is not already present in Helix. * Ignores those that are already present. This is to make upgrades smooth. * * Replica/Partition information is not updated by this method. That is updated when * replicas and partitions are added. * * At this time, node removals are not dealt with. */ private void addNewDataNodes() { for (Datacenter dc : staticClusterMap.hardwareLayout.getDatacenters()) { HelixAdmin dcAdmin = adminForDc.get(dc.getName()); for (DataNode node : dc.getDataNodes()) { String instanceName = getInstanceName(node); if (!dcAdmin.getInstancesInCluster(clusterName).contains(instanceName)) { InstanceConfig instanceConfig = new InstanceConfig(instanceName); instanceConfig.setHostName(node.getHostname()); instanceConfig.setPort(Integer.toString(node.getPort())); // populate mountPath -> Disk information. Map<String, Map<String, String>> diskInfos = new HashMap<>(); for (Disk disk : node.getDisks()) { Map<String, String> diskInfo = new HashMap<>(); diskInfo.put(ClusterMapUtils.DISK_CAPACITY_STR, Long.toString(disk.getRawCapacityInBytes())); diskInfo.put(ClusterMapUtils.DISK_STATE, ClusterMapUtils.AVAILABLE_STR); // Note: An instance config has to contain the information for each disk about the replicas it hosts. // This information will be initialized to the empty string - but will be updated whenever the partition // is added to the cluster. diskInfo.put(ClusterMapUtils.REPLICAS_STR, ""); diskInfos.put(disk.getMountPath(), diskInfo); } // Add all instance configuration. instanceConfig.getRecord().setMapFields(diskInfos); if (node.hasSSLPort()) { instanceConfig.getRecord().setSimpleField(ClusterMapUtils.SSLPORT_STR, Integer.toString(node.getSSLPort())); } instanceConfig.getRecord().setSimpleField(ClusterMapUtils.DATACENTER_STR, node.getDatacenterName()); instanceConfig.getRecord().setSimpleField(ClusterMapUtils.RACKID_STR, Long.toString(node.getRackId())); instanceConfig.getRecord().setListField(ClusterMapUtils.SEALED_STR, new ArrayList<String>()); // Finally, add this node to the DC. dcAdmin.addInstance(clusterName, instanceConfig); } } System.out.println("Added all new nodes in datacenter " + dc.getName()); } } /** * Goes through each existing partition and updates the {@link PartitionState} and capacity information for the * replicas in each of the instances that hosts a replica for this partition, if it has changed and is different from * the current information in the static cluster map. * @param partition the partition whose {@link PartitionState} and/or capacity may have to be updated. */ private void updatePartitionInfoIfChanged(Partition partition) { for (Map.Entry<String, HelixAdmin> entry : adminForDc.entrySet()) { String dcName = entry.getKey(); HelixAdmin dcAdmin = entry.getValue(); String partitionName = Long.toString(partition.getId()); long replicaCapacityInStatic = partition.getReplicaCapacityInBytes(); boolean isSealed = partition.getPartitionState().equals(PartitionState.READ_ONLY); List<ReplicaId> replicaList = getReplicasInDc(partition, dcName); for (ReplicaId replicaId : replicaList) { DataNodeId node = replicaId.getDataNodeId(); String instanceName = getInstanceName(node); InstanceConfig instanceConfig = dcAdmin.getInstanceConfig(clusterName, instanceName); boolean shouldSetInstanceConfig = false; if (updateSealedStateIfRequired(partitionName, instanceConfig, isSealed)) { System.out.println( "Sealed state change of partition " + partitionName + " will be updated for instance " + instanceName); shouldSetInstanceConfig = true; } if (updateReplicaCapacityIfRequired(partitionName, instanceConfig, replicaId.getMountPath(), replicaCapacityInStatic)) { System.out.println( "Replica capacity change of partition " + partitionName + " will be updated for instance" + instanceName); shouldSetInstanceConfig = true; } if (shouldSetInstanceConfig) { dcAdmin.setInstanceConfig(clusterName, instanceName, instanceConfig); System.out.println("Successfully updated InstanceConfig for instance " + instanceName); } } } } /** * Update the sealed state of the given partition on the node corresponding to the given {@link InstanceConfig}, if * there has been a change. * @param partitionName the partition * @param instanceConfig the {@link InstanceConfig} of the node. * @param isSealed whether the partition is in SEALED state in the static cluster map. * @return true, if the {@link InstanceConfig} was updated by this method; false otherwise. */ private boolean updateSealedStateIfRequired(String partitionName, InstanceConfig instanceConfig, boolean isSealed) { boolean instanceConfigUpdated = false; List<String> currentSealedPartitions = instanceConfig.getRecord().getListField(ClusterMapUtils.SEALED_STR); List<String> newSealedPartitionsList = new ArrayList<>(currentSealedPartitions); if (isSealed && !currentSealedPartitions.contains(partitionName)) { newSealedPartitionsList.add(partitionName); } else if (!isSealed && currentSealedPartitions.contains(partitionName)) { newSealedPartitionsList.remove(partitionName); } if (!currentSealedPartitions.equals(newSealedPartitionsList)) { instanceConfig.getRecord().setListField(ClusterMapUtils.SEALED_STR, newSealedPartitionsList); instanceConfigUpdated = true; } return instanceConfigUpdated; } /** * Update replica capacity for the given partition on the node corresponding to the given * {@link InstanceConfig}, if there has been a change. * @param partitionName the partition * @param instanceConfig the {@link InstanceConfig} of the node. * @param mountPath the mount path of the replica. * @return true, if the {@link InstanceConfig} was updated by this method; false otherwise. */ private boolean updateReplicaCapacityIfRequired(String partitionName, InstanceConfig instanceConfig, String mountPath, long actualReplicaCapacity) { boolean instanceConfigUpdated = false; Map<String, String> diskInfo = instanceConfig.getRecord().getMapField(mountPath); String currentReplicasStr = diskInfo.get(ClusterMapUtils.REPLICAS_STR); StringBuilder newReplicaStrBuilder = new StringBuilder(); List<String> replicaInfoList = Arrays.asList(currentReplicasStr.split(ClusterMapUtils.REPLICAS_DELIM_STR)); for (String replicaInfo : replicaInfoList) { String[] info = replicaInfo.split(ClusterMapUtils.REPLICAS_STR_SEPARATOR); if (info[0].equals(partitionName)) { long capacityInHelix = Long.valueOf(info[1]); if (capacityInHelix != actualReplicaCapacity) { info[1] = Long.toString(actualReplicaCapacity); } } newReplicaStrBuilder.append(info[0]) .append(ClusterMapUtils.REPLICAS_STR_SEPARATOR) .append(info[1]) .append(ClusterMapUtils.REPLICAS_DELIM_STR); } String newReplicaStr = newReplicaStrBuilder.toString(); if (!currentReplicasStr.equals(newReplicaStr)) { diskInfo.put(ClusterMapUtils.REPLICAS_STR, newReplicaStr); instanceConfig.getRecord().setMapField(mountPath, diskInfo); instanceConfigUpdated = true; } return instanceConfigUpdated; } /** * Adds all partitions to every datacenter with replicas in nodes as specified in the static clustermap (unless it * was already added). * * The assumption is that in the static layout, every partition is contained in every colo. We make this assumption * to ensure that partitions are grouped under the same resource in all colos (since the resource id is not * something that is present today in the static cluster map). This is not a strict requirement though, but helps * ease the logic. * * Note: 1. We ensure that the partition names are unique in the Ambry cluster even across resources. * 2. New Ambry partitions will not be added to Helix resources that are already present before the call to this * method. */ private void addNewAmbryPartitions(List<Partition> partitions, String resourceName) { // In the future, a resource may be used to group together partitions of a container. For now, multiple // resources are created and partitions are grouped under these resources upto a maximum threshold. if (partitions.isEmpty()) { throw new IllegalArgumentException("Cannot add resource with zero partitions"); } for (Map.Entry<String, HelixAdmin> entry : adminForDc.entrySet()) { String dcName = entry.getKey(); HelixAdmin dcAdmin = entry.getValue(); AutoModeISBuilder resourceISBuilder = new AutoModeISBuilder(resourceName); int numReplicas = 0; resourceISBuilder.setStateModel(LeaderStandbySMD.name); for (Partition partition : partitions) { String partitionName = Long.toString(partition.getId()); boolean sealed = partition.getPartitionState().equals(PartitionState.READ_ONLY); List<ReplicaId> replicaList = getReplicasInDc(partition, dcName); numReplicas = replicaList.size(); String[] instances = updateInstancesAndGetInstanceNames(dcAdmin, partitionName, replicaList, sealed); Collections.shuffle(Arrays.asList(instances)); resourceISBuilder.assignPreferenceList(partitionName, instances); } resourceISBuilder.setNumPartitions(partitions.size()); resourceISBuilder.setNumReplica(numReplicas); IdealState idealState = resourceISBuilder.build(); dcAdmin.addResource(clusterName, resourceName, idealState); System.out.println( "Added " + partitions.size() + " new partitions under resource " + resourceName + " in datacenter " + dcName); } } /** * Updates instances that hosts replicas of this partition with the replica information (including the mount points * on which these replicas should reside, which will be purely an instance level information). * @param dcAdmin the admin to the Zk server on which this operation is to be done. * @param partitionName the partition name. * @param replicaList the list of replicas of this partition. * @param sealed whether the given partition state is sealed. * @return an array of Strings containing the names of the instances on which the replicas of this partition reside. */ private String[] updateInstancesAndGetInstanceNames(HelixAdmin dcAdmin, String partitionName, List<ReplicaId> replicaList, boolean sealed) { String[] instances = new String[replicaList.size()]; for (int i = 0; i < replicaList.size(); i++) { Replica replica = (Replica) replicaList.get(i); DataNodeId node = replica.getDataNodeId(); String instanceName = getInstanceName(node); instances[i] = instanceName; InstanceConfig instanceConfig = dcAdmin.getInstanceConfig(clusterName, instanceName); Map<String, String> diskInfo = instanceConfig.getRecord().getMapField(replica.getMountPath()); String replicasStr = diskInfo.get(ClusterMapUtils.REPLICAS_STR); replicasStr += replica.getPartition().getId() + ClusterMapUtils.REPLICAS_STR_SEPARATOR + replica.getCapacityInBytes() + ClusterMapUtils.REPLICAS_DELIM_STR; diskInfo.put(ClusterMapUtils.REPLICAS_STR, replicasStr); instanceConfig.getRecord().setMapField(replica.getMountPath(), diskInfo); if (sealed) { List<String> currentSealedPartitions = instanceConfig.getRecord().getListField(ClusterMapUtils.SEALED_STR); List<String> newSealedPartitionsList = new ArrayList<>(currentSealedPartitions); newSealedPartitionsList.add(partitionName); instanceConfig.getRecord().setListField(ClusterMapUtils.SEALED_STR, newSealedPartitionsList); } dcAdmin.setInstanceConfig(clusterName, instanceName, instanceConfig); } return instances; } /** * Helper method to get the list of {@link ReplicaId} of all replicas of a partition in the given datacenter. * @param partition the partition of interest. * @param dcName the datacenter to which the returned replicas should belong. * @return a list of {@link ReplicaId} of all replicas of the given partition in the given datacenter. */ private List<ReplicaId> getReplicasInDc(Partition partition, String dcName) { // returns a copy unlike getReplicas() List<ReplicaId> replicaList = partition.getReplicaIds(); ListIterator<ReplicaId> iter = replicaList.listIterator(); while (iter.hasNext()) { if (!iter.next().getDataNodeId().getDatacenterName().equals(dcName)) { iter.remove(); } } return replicaList; } /** * Get the instance name string associated with this data node in Helix. * @param dataNode the {@link DataNodeId} of the data node. * @return the instance name string. */ private static String getInstanceName(DataNodeId dataNode) { return ClusterMapUtils.getInstanceName(dataNode.getHostname(), dataNode.getPort()); } /** * Validate that the information in Helix is consistent with the information in the static clustermap; and close * all the admin connections to ZK hosts. */ private void validateAndClose() throws Exception { try { verifyEquivalencyWithStaticClusterMap(staticClusterMap.hardwareLayout, staticClusterMap.partitionLayout); } finally { for (HelixAdmin admin : adminForDc.values()) { admin.close(); } } } /** * Verify that the information in Helix and the information in the static clustermap are equivalent. * @param hardwareLayout the {@link HardwareLayout} of the static clustermap. * @param partitionLayout the {@link PartitionLayout} of the static clustermap. */ private void verifyEquivalencyWithStaticClusterMap(HardwareLayout hardwareLayout, PartitionLayout partitionLayout) throws Exception { String clusterNameInStaticClusterMap = hardwareLayout.getClusterName(); System.out.println("Verifying equivalency of static cluster: " + clusterNameInStaticClusterMap + " with the " + "corresponding cluster in Helix: " + clusterName); for (Datacenter dc : hardwareLayout.getDatacenters()) { HelixAdmin admin = adminForDc.get(dc.getName()); ensureOrThrow(admin != null, "No ZkInfo for datacenter " + dc.getName()); ensureOrThrow(admin.getClusters().contains(clusterName), "Cluster not found in ZK " + dataCenterToZkAddress.get(dc.getName())); verifyResourcesAndPartitionEquivalencyInDc(dc, clusterName, partitionLayout); verifyDataNodeAndDiskEquivalencyInDc(dc, clusterName, partitionLayout); } System.out.println("Successfully verified equivalency of static cluster: " + clusterNameInStaticClusterMap + " with the corresponding cluster in Helix: " + clusterName); } /** * Verify that the hardware layout information is in sync - which includes the node and disk information. Also verify * that the replicas belonging to disks are in sync between the static cluster map and Helix. * @param dc the datacenter whose information is to be verified. * @param clusterName the cluster to be verified. * @param partitionLayout the {@link PartitionLayout} of the static clustermap. */ private void verifyDataNodeAndDiskEquivalencyInDc(Datacenter dc, String clusterName, PartitionLayout partitionLayout) throws Exception { StaticClusterManager staticClusterMap = (new StaticClusterAgentsFactory(null, partitionLayout)).getClusterMap(); HelixAdmin admin = adminForDc.get(dc.getName()); List<String> allInstancesInHelix = admin.getInstancesInCluster(clusterName); for (DataNodeId dataNodeId : dc.getDataNodes()) { Map<String, Map<String, String>> mountPathToReplicas = getMountPathToReplicas(staticClusterMap, dataNodeId); DataNode dataNode = (DataNode) dataNodeId; String instanceName = getInstanceName(dataNode); ensureOrThrow(allInstancesInHelix.remove(instanceName), "Instance not present in Helix " + instanceName); InstanceConfig instanceConfig = admin.getInstanceConfig(clusterName, instanceName); Map<String, Map<String, String>> diskInfos = new HashMap<>(instanceConfig.getRecord().getMapFields()); for (Disk disk : dataNode.getDisks()) { Map<String, String> diskInfoInHelix = diskInfos.remove(disk.getMountPath()); ensureOrThrow(diskInfoInHelix != null, "Disk not present for instance " + instanceName + " disk " + disk.getMountPath()); ensureOrThrow( disk.getRawCapacityInBytes() == Long.valueOf(diskInfoInHelix.get(ClusterMapUtils.DISK_CAPACITY_STR)), "Capacity mismatch for instance " + instanceName + " disk " + disk.getMountPath()); Set<String> replicasInClusterMap; Map<String, String> replicaList = mountPathToReplicas.get(disk.getMountPath()); replicasInClusterMap = new HashSet<>(); if (replicaList != null) { replicasInClusterMap.addAll(replicaList.keySet()); } Set<String> replicasInHelix; String replicasStr = diskInfoInHelix.get(ClusterMapUtils.REPLICAS_STR); if (replicasStr.isEmpty()) { replicasInHelix = new HashSet<>(); } else { replicasInHelix = new HashSet<>(); List<String> replicaInfoList = Arrays.asList(replicasStr.split(ClusterMapUtils.REPLICAS_DELIM_STR)); for (String replicaInfo : replicaInfoList) { String[] info = replicaInfo.split(ClusterMapUtils.REPLICAS_STR_SEPARATOR); replicasInHelix.add(info[0]); ensureOrThrow(info[1].equals(replicaList.get(info[0])), "Replica capacity should be the same."); } } ensureOrThrow(replicasInClusterMap.equals(replicasInHelix), "Replica information not consistent for instance " + instanceName + " disk " + disk.getMountPath() + "\n in Helix: " + replicaList + "\n in static clustermap: " + replicasInClusterMap); } ensureOrThrow(diskInfos.isEmpty(), "Instance " + instanceName + " has extra disks in Helix: " + diskInfos); ensureOrThrow(!dataNode.hasSSLPort() || (dataNode.getSSLPort() == Integer.valueOf( instanceConfig.getRecord().getSimpleField(ClusterMapUtils.SSLPORT_STR))), "SSL Port mismatch for instance " + instanceName); ensureOrThrow(dataNode.getDatacenterName() .equals(instanceConfig.getRecord().getSimpleField(ClusterMapUtils.DATACENTER_STR)), "Datacenter mismatch for instance " + instanceName); ensureOrThrow( dataNode.getRackId() == Long.valueOf(instanceConfig.getRecord().getSimpleField(ClusterMapUtils.RACKID_STR)), "Rack Id mismatch for instance " + instanceName); Set<String> sealedReplicasInHelix = new HashSet<>(instanceConfig.getRecord().getListField(ClusterMapUtils.SEALED_STR)); Set<String> sealedReplicasInClusterMap = new HashSet<>(); for (Replica replica : staticClusterMap.getReplicas(dataNodeId)) { if (replica.getPartition().partitionState.equals(PartitionState.READ_ONLY)) { sealedReplicasInClusterMap.add(Long.toString(replica.getPartition().getId())); } } ensureOrThrow(sealedReplicasInClusterMap.equals(sealedReplicasInHelix), "Sealed replicas info mismatch for " + "instance " + instanceName); } ensureOrThrow(allInstancesInHelix.isEmpty(), "Following instances in Helix not found in the clustermap " + allInstancesInHelix); } /** * Verify that the partition layout information is in sync. * @param dc the datacenter whose information is to be verified. * @param clusterName the cluster to be verified. * @param partitionLayout the {@link PartitionLayout} of the static clustermap. */ private void verifyResourcesAndPartitionEquivalencyInDc(Datacenter dc, String clusterName, PartitionLayout partitionLayout) { String dcName = dc.getName(); HelixAdmin admin = adminForDc.get(dc.getName()); Map<String, Set<String>> allPartitionsToInstancesInHelix = new HashMap<>(); for (String resourceName : admin.getResourcesInCluster(clusterName)) { IdealState resourceIS = admin.getResourceIdealState(clusterName, resourceName); ensureOrThrow(resourceIS.getStateModelDefRef().equals(LeaderStandbySMD.name), "StateModel name mismatch for resource " + resourceName); int numReplicasAtResourceLevel = Integer.valueOf(resourceIS.getReplicas()); Set<String> resourcePartitions = resourceIS.getPartitionSet(); for (String resourcePartition : resourcePartitions) { Set<String> partitionInstanceSet = resourceIS.getInstanceSet(resourcePartition); ensureOrThrow(numReplicasAtResourceLevel == partitionInstanceSet.size(), "NumReplicas at resource level " + numReplicasAtResourceLevel + " different from number of replicas for partition " + partitionInstanceSet); ensureOrThrow(allPartitionsToInstancesInHelix.put(resourcePartition, partitionInstanceSet) == null, "Partition " + resourcePartition + " already found under a different resource."); } } for (PartitionId partitionId : partitionLayout.getPartitions()) { Partition partition = (Partition) partitionId; String partitionName = Long.toString(partition.getId()); Set<String> replicaHostsInHelix = allPartitionsToInstancesInHelix.remove(partitionName); ensureOrThrow(replicaHostsInHelix != null, "No replicas found for partition " + partitionName + " in Helix"); for (Replica replica : partition.getReplicas()) { if (replica.getDataNodeId().getDatacenterName().equals(dcName)) { String instanceName = getInstanceName(replica.getDataNodeId()); ensureOrThrow(replicaHostsInHelix.remove(instanceName), "Instance " + instanceName + " for the given " + "replica in the clustermap not found in Helix"); } } ensureOrThrow(replicaHostsInHelix.isEmpty(), "More instances in Helix than in clustermap for partition: " + partitionName + " additional instances: " + replicaHostsInHelix); } ensureOrThrow(allPartitionsToInstancesInHelix.isEmpty(), "More partitions in Helix than in clustermap, additional partitions: " + allPartitionsToInstancesInHelix.keySet()); } /** * A helper method that returns a map of mountPaths to a map of replicas -> replicaCapacity for a given * {@link DataNodeId} * @param staticClusterMap the static {@link StaticClusterManager} * @param dataNodeId the {@link DataNodeId} of interest. * @return the constructed map. */ private static Map<String, Map<String, String>> getMountPathToReplicas(StaticClusterManager staticClusterMap, DataNodeId dataNodeId) { Map<String, Map<String, String>> mountPathToReplicas = new HashMap<>(); for (Replica replica : staticClusterMap.getReplicas(dataNodeId)) { Map<String, String> replicaStrs = mountPathToReplicas.get(replica.getMountPath()); if (replicaStrs != null) { replicaStrs.put(Long.toString(replica.getPartition().getId()), Long.toString(replica.getCapacityInBytes())); } else { replicaStrs = new HashMap<>(); replicaStrs.put(Long.toString(replica.getPartition().getId()), Long.toString(replica.getCapacityInBytes())); mountPathToReplicas.put(replica.getMountPath(), replicaStrs); } } return mountPathToReplicas; } /** * Throw {@link AssertionError} if the given condition is false. * @param condition the boolean condition to check. * @param errStr the error message to associate with the assertion error. */ private void ensureOrThrow(boolean condition, String errStr) { if (!condition) { throw new AssertionError(errStr); } } }