/*************************************************************************** * Copyright (c) 2012-2015 VMware, Inc. All Rights Reserved. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. ***************************************************************************/ package com.vmware.bdd.plugin.ironfan.impl; import java.net.URI; import java.util.ArrayList; import java.util.EnumSet; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import com.vmware.bdd.utils.*; import org.apache.log4j.Logger; import com.vmware.bdd.exception.ClusterConfigException; import com.vmware.bdd.plugin.ironfan.utils.ChefServerUtils; import com.vmware.bdd.software.mgmt.plugin.exception.SoftwareManagementPluginException; import com.vmware.bdd.software.mgmt.plugin.exception.ValidationException; import com.vmware.bdd.software.mgmt.plugin.model.ClusterBlueprint; import com.vmware.bdd.software.mgmt.plugin.model.NodeGroupInfo; import com.vmware.bdd.spectypes.HadoopRole; import com.vmware.bdd.spectypes.NodeGroupRole; import com.vmware.bdd.spectypes.ServiceType; import com.vmware.bdd.utils.AppConfigValidationUtils.ValidationType; public class ClusterValidator { private static final Logger logger = Logger.getLogger(ClusterValidator.class); public boolean validateBlueprint(ClusterBlueprint blueprint, List<String> distroRoles) throws ValidationException { logger.info("Start to validate blueprint for cluster " + blueprint.getName()); return validateDistros(blueprint, distroRoles); } private boolean validateDistros(ClusterBlueprint blueprint, List<String> distroRoles) throws ValidationException { validateClusterConfig(blueprint); return validateRoles(blueprint, distroRoles); } private void validateClusterConfig(ClusterBlueprint blueprint) { validateHadoopConfig(blueprint.getConfiguration(), blueprint.isNeedToValidateConfig()); for (NodeGroupInfo group : blueprint.getNodeGroups()) { validateHadoopConfig(group.getConfiguration(), blueprint.isNeedToValidateConfig()); } } /** * Validate role's existence * * @param blueprint * @param distroRoles * @return * @throws SoftwareManagementPluginException */ private boolean validateRoles(ClusterBlueprint blueprint, List<String> distroRoles) throws ValidationException { assert (blueprint != null && distroRoles != null); List<String> failedMsgList = new ArrayList<String>(); List<String> warningMsgList = new ArrayList<String>(); // only check roles validity in server side, but not in CLI and GUI, because roles info exist in server side. checkUnsupportedRoles(blueprint, distroRoles, failedMsgList); boolean result = validateRoleDependency(failedMsgList, blueprint); // only validate group config for non-mapr distros if (!Constants.MAPR_VENDOR.equalsIgnoreCase(blueprint.getHadoopStack().getVendor())) { validateGroupConfig(blueprint, failedMsgList, warningMsgList); } if (!failedMsgList.isEmpty() || !warningMsgList.isEmpty()) { throw ValidationException.VALIDATION_FAIL("Roles", failedMsgList, warningMsgList); } return result; } /** * Check whether the roles used in the cluster exist in distro manifest and * Chef Server. * */ private void checkUnsupportedRoles(ClusterBlueprint blueprint, List<String> distroRoles, List<String> failedMsgList) { List<NodeGroupInfo> nodeGroupInfos = blueprint.getNodeGroups(); assert (nodeGroupInfos != null && !nodeGroupInfos.isEmpty()); List<String> invalidRoleList = new ArrayList<String>(); List<String> unspportedRoleList = new ArrayList<String>(); for (NodeGroupInfo nodeGroup : nodeGroupInfos) { List<String> roles = nodeGroup.getRoles(); if (roles != null) { for (String role : roles) { if (!ChefServerUtils.isValidRole(role)) { invalidRoleList.add(role); } else if (!distroRoles.contains(role) && !HadoopRole.isCustomizedRole(role)) { unspportedRoleList.add(role); /* .append(" is not supported by distro ") .append(blueprint.getHadoopStack().getDistro());*/ } } } } if(invalidRoleList.size() > 0) { String msgFormat = invalidRoleList.size() > 1 ? "roles: %1s are invalid." : "role: %1s is invalid."; failedMsgList.add(String.format( msgFormat, new ListToStringConverter(invalidRoleList, ',') )); } if(unspportedRoleList.size() > 0) { String msgFormat = unspportedRoleList.size() > 1 ? "roles: %1s are not supported by %2s." : "role: %1s is not supported by %2s."; failedMsgList.add(String.format( msgFormat, new ListToStringConverter(unspportedRoleList, ','), blueprint.getHadoopStack().getDistro() )); } } /* * Validate role dependency: * Case 1: compute node group with external hdfs node group. * Case 2: The dependency check of HDFS, MapReduce, HBase, Zookeeper, * Hadoop Client(Pig, Hive, Hadoop Client), and HBase Client Combinations. The rules are below: * - HDFS includes roles of "haddop_namenode" and "hadoop_datanode"; * - MapReduce includes roles of "haddop_jobtracker" and "hadoop_takstracker"; * - HBase includes roles of "hbase_master" and "hbase_regionserver; * - Zookeeper includes a single role of "zookeeper"; * - Hadoop Client includes roles of "hadoop_client"; * - HBase client includes roles of "hbase_client"; * - Pig includes roles of "pig"; * - Hive includes roles of "hive"; * - Hive Server includes roles of "hive_server"; * - MapReduce depends on HDFS, HBase depends on HDFS and Zookeeper; * - Pig, Hive, Hive Server depends on MapReduce, HBase Client depends on HBase; * - Hadoop Client depends on HDFS. */ public boolean validateRoleDependency(List<String> failedMsgList, ClusterBlueprint blueprint) { boolean valid = true; Set<String> roles = new HashSet<String>(); List<NodeGroupInfo> nodeGroups = blueprint.getNodeGroups(); if (nodeGroups == null) { // failedMsgList.add("Missing JobTracker or TaskTracker role."); return false; } for (NodeGroupInfo nodeGroupCreate : nodeGroups) { List<String> nodeGroupRoles = nodeGroupCreate.getRoles(); if (nodeGroupRoles == null || nodeGroupRoles.isEmpty()) { valid = false; failedMsgList.add("Missing role attribute for node group " + nodeGroupCreate.getName() + "."); } else { roles.addAll(nodeGroupCreate.getRoles()); } } if (validateHDFSUrl(blueprint)) { if(!roles.contains("hbase_master")){ // TODO: consider Ambari need specify hadoop_namenode role when use of external HDFS if (roles.contains("hadoop_namenode") || roles.contains("hadoop_datanode")) { valid = false; failedMsgList.add("Duplicate NameNode or DataNode role."); } if (!hasMapreduceConfigured(blueprint)) { if (!roles.contains("hadoop_jobtracker") && !roles.contains("hadoop_resourcemanager")) { valid = false; failedMsgList.add("Missing JobTracker or ResourceManager role."); } if (!roles.contains("hadoop_tasktracker") && !roles.contains("hadoop_nodemanager")) { valid = false; failedMsgList.add("Missing TaskTracker or NodeManager role."); } } } } else if (!hasMapreduceConfigured(blueprint)){ //case 2 // get involved service types of the spec file EnumSet<ServiceType> serviceTypes = EnumSet.noneOf(ServiceType.class); for (ServiceType service : ServiceType.values()) { //identify partially match List<HadoopRole> missingRoles = new ArrayList<HadoopRole>(); for (HadoopRole role : service.getRoles()) { if (!roles.contains(role.toString())) { missingRoles.add(role); } } //no missing roles, meaning this service is added in the spec if (missingRoles.size() == 0) { serviceTypes.add(service); } //if the roles for this service is not enough, we have to set it as failure. else if(missingRoles.size() < service.getRoles().size()){ failedMsgList.add( String.format("Missing role(s): %1s for service: %2s.", new ListToStringConverter(missingRoles, ','), service) ); valid = false; } //if all roles are missing, meaning this service is not added in the spec //then later check service dependencies. //lixl. } boolean isYarn = serviceTypes.contains(ServiceType.YARN); if (isYarn && serviceTypes.contains(ServiceType.MAPRED)) { failedMsgList.add("You cannot set " + ServiceType.MAPRED + " " + ServiceType.MAPRED.getRoles() + " and " + ServiceType.YARN + " " + ServiceType.YARN.getRoles() + " \nat the same time."); valid = false; } //validate the relationships of services if (valid == true && !serviceTypes.isEmpty()) { for (ServiceType service : serviceTypes) { EnumSet<ServiceType> dependency = service.depend(isYarn); if (dependency != null && !serviceTypes.containsAll(dependency)) { failedMsgList.add("Some dependent services " + dependency + " " + service + " relies on cannot be found in the spec file."); valid = false; } } } } return valid; } public void validateGroupConfig(ClusterBlueprint blueprint, List<String> failedMsgList, List<String> warningMsgList) { List<NodeGroupInfo> nodeGroups = blueprint.getNodeGroups(); // if hadoop2 namenode ha is enabled boolean namenodeHACheck = false; //role count int masterCount = 0, jobtrackerCount = 0, resourcemanagerCount = 0, hbasemasterCount = 0, zookeeperCount = 0, workerCount = 0, numOfJournalNode = 0; for (NodeGroupInfo nodeGroup : nodeGroups) { // get node group role. List<NodeGroupRole> groupRoles = getNodeGroupRoles(nodeGroup); if (groupRoles != null) { for (NodeGroupRole role : groupRoles) { switch (role) { case MASTER: masterCount++; int numOfInstance = nodeGroup.getInstanceNum(); if (numOfInstance >= 0 && numOfInstance != 1) { if (numOfInstance != 2) { //namenode ha only support 2 nodes currently collectInstanceNumInvalidateMsg(nodeGroup, failedMsgList); } else { namenodeHACheck = true; } } break; case JOB_TRACKER: jobtrackerCount++; if (nodeGroup.getInstanceNum() >= 0 && nodeGroup.getInstanceNum() != 1) { failedMsgList.add(Constants.WRONG_NUM_OF_JOBTRACKER); } break; case RESOURCEMANAGER: resourcemanagerCount++; if (nodeGroup.getInstanceNum() >= 0 && nodeGroup.getInstanceNum() != 1) { failedMsgList.add(Constants.WRONG_NUM_OF_RESOURCEMANAGER); } break; case HBASE_MASTER: hbasemasterCount++; if (nodeGroup.getInstanceNum() == 0) { collectInstanceNumInvalidateMsg(nodeGroup, failedMsgList); } break; case ZOOKEEPER: zookeeperCount++; if (nodeGroup.getInstanceNum() > 0 && nodeGroup.getInstanceNum() < 3) { failedMsgList.add(Constants.WRONG_NUM_OF_ZOOKEEPER); } else if (nodeGroup.getInstanceNum() > 0 && nodeGroup.getInstanceNum() % 2 == 0) { warningMsgList.add(Constants.ODD_NUM_OF_ZOOKEEPER); } break; case JOURNAL_NODE: numOfJournalNode += nodeGroup.getInstanceNum(); if (nodeGroup.getRoles().contains( HadoopRole.HADOOP_DATANODE.toString()) || nodeGroup.getRoles().contains( HadoopRole.HADOOP_CLIENT_ROLE.toString())) { failedMsgList .add(Constants.DATA_CLIENT_NODE_JOURNALNODE_COEXIST); } break; case WORKER: workerCount++; if (nodeGroup.getInstanceNum() == 0) { collectInstanceNumInvalidateMsg(nodeGroup, failedMsgList); } else if (nodeGroup.isHaEnabled()) { warningMsgList.add(Constants.WORKER_CLIENT_HA_FLAG); } //check if datanode and region server are seperate List<String> roles = nodeGroup.getRoles(); // TODO: After refactor the error message handle, need uncomment it. /* if (roles.contains(HadoopRole.HBASE_REGIONSERVER_ROLE .toString()) && !roles.contains(HadoopRole.HADOOP_DATANODE .toString())) { warningMsgList .add(Constants.REGISONSERVER_DATANODE_SEPERATION); } */ break; case CLIENT: if (nodeGroup.isHaEnabled()) { warningMsgList.add(Constants.WORKER_CLIENT_HA_FLAG); } break; case NONE: // server side will validate whether the roles of this group exist break; default: break; } } } } if (!supportedWithHdfs2(blueprint)) { if (namenodeHACheck || masterCount > 1) { failedMsgList.add(Constants.CURRENT_DISTRO_CAN_NOT_SUPPORT_HDFS2); } } else if (namenodeHACheck) { if (numOfJournalNode >= 0 && numOfJournalNode < 3) { failedMsgList.add(Constants.WRONG_NUM_OF_JOURNALNODE); } else if (numOfJournalNode > 0 && numOfJournalNode % 2 == 0) { warningMsgList.add(Constants.ODD_NUM_OF_JOURNALNODE); } //check if zookeeper exists for automatic namenode ha failover if (zookeeperCount == 0) { failedMsgList.add(Constants.NAMENODE_AUTO_FAILOVER_ZOOKEEPER); } } if ((jobtrackerCount > 1) || (resourcemanagerCount > 1) || (zookeeperCount > 1) || (hbasemasterCount > 1)) { failedMsgList.add(Constants.WRONG_NUM_OF_NODEGROUPS); } if (numOfJournalNode > 0 && !namenodeHACheck) { failedMsgList.add(Constants.NO_NAMENODE_HA); } if (!warningMsgList.isEmpty() && !warningMsgList.get(0).startsWith("Warning: ")) { warningMsgList.set(0, "Warning: " + warningMsgList.get(0)); } } private void validateHadoopConfig(Map<String, Object> appConfigs, boolean checkWhiteList) { if (appConfigs == null || appConfigs.isEmpty()) return; // validate hadoop config if (checkWhiteList) { logger.debug("Validate hadoop configuration in white list."); ValidateResult valid = AppConfigValidationUtils.validateConfig(ValidationType.WHITE_LIST, appConfigs); switch (valid.getType()) { case WHITE_LIST_INVALID_VALUE: throw ClusterConfigException.INVALID_APP_CONFIG_VALUE(valid.getFailureValues()); case WHITE_LIST_INVALID_NAME: logger.warn("Hadoop configurations " + valid.getNoExistFileNames() + " " +valid.getFailureNames() + " not in white list."); break; default: logger.debug("Passed white list validation."); break; } } logger.debug("Validate hadoop configuration in black list."); ValidateResult valid = AppConfigValidationUtils.validateConfig(ValidationType.BLACK_LIST, appConfigs); switch (valid.getType()) { case NAME_IN_BLACK_LIST: logger.warn("Hadoop configurations " + valid.getFailureNames() + " in black list. The configuration for these parameters do not take effect."); default: logger.debug("Passed black list validation."); break; } } public boolean validateHDFSUrl(ClusterBlueprint blueprint) { if (blueprint.getExternalHDFS() != null) { try { URI uri = new URI(blueprint.getExternalHDFS()); if (!"hdfs".equalsIgnoreCase(uri.getScheme()) || uri.getHost() == null) { return false; } return true; } catch (Exception ex) { ex.printStackTrace(); return false; } } return false; } private void collectInstanceNumInvalidateMsg(NodeGroupInfo nodeGroup, List<String> failedMsgList) { failedMsgList.add(new StringBuilder().append(nodeGroup.getName()) .append(".").append("instanceNum=") .append(nodeGroup.getInstanceNum()).append(".").toString()); } private List<NodeGroupRole> getNodeGroupRoles(NodeGroupInfo nodeGroup) { List<NodeGroupRole> groupRoles = new ArrayList<NodeGroupRole>(); //Find roles list from current NodeGroupCreate instance. List<String> roles = nodeGroup.getRoles(); for (NodeGroupRole role : NodeGroupRole.values()) { if (roles != null && matchRole(role, roles)) { groupRoles.add(role); } } if (groupRoles.size() == 0) { groupRoles.add(NodeGroupRole.NONE); } return groupRoles; } /** * Check the roles was introduced, whether matching with system's specialize * role. */ private boolean matchRole(NodeGroupRole role, List<String> roles) { switch (role) { case MASTER: if (roles.contains(HadoopRole.HADOOP_NAMENODE_ROLE.toString())) { return true; } else { return false; } case JOB_TRACKER: if (roles.contains(HadoopRole.HADOOP_JOBTRACKER_ROLE.toString())) { return true; } else { return false; } case RESOURCEMANAGER: if (roles.contains(HadoopRole.HADOOP_RESOURCEMANAGER_ROLE.toString())) { return true; } else { return false; } case HBASE_MASTER: if (roles.contains(HadoopRole.HBASE_MASTER_ROLE.toString())) { return true; } else { return false; } case ZOOKEEPER: if (roles.contains(HadoopRole.ZOOKEEPER_ROLE.toString())) { return true; } else { return false; } case JOURNAL_NODE: if (roles.contains(HadoopRole.HADOOP_JOURNALNODE_ROLE.toString())) { return true; } else { return false; } case WORKER: if (roles.contains(HadoopRole.HADOOP_DATANODE.toString()) || roles.contains(HadoopRole.HADOOP_TASKTRACKER.toString()) || roles.contains(HadoopRole.HBASE_REGIONSERVER_ROLE.toString()) || roles.contains(HadoopRole.HADOOP_NODEMANAGER_ROLE.toString())) { return true; } else { return false; } case CLIENT: if (roles.contains(HadoopRole.HADOOP_CLIENT_ROLE.toString()) || roles.contains(HadoopRole.HIVE_ROLE.toString()) || roles.contains(HadoopRole.HIVE_SERVER_ROLE.toString()) || roles.contains(HadoopRole.PIG_ROLE.toString()) || roles.contains(HadoopRole.HBASE_CLIENT_ROLE.toString())) { return true; } else { return false; } case NONE: break; default: break; } return false; } // For HDFS2, apache, mapr, and gphd distros do not have hdfs2 features. private boolean supportedWithHdfs2(ClusterBlueprint blueprint) { String vendor = blueprint.getHadoopStack().getVendor(); if (vendor != null && (vendor.equalsIgnoreCase(Constants.APACHE_VENDOR) || vendor.equalsIgnoreCase(Constants.MAPR_VENDOR) || vendor.equalsIgnoreCase(Constants.GPHD_VENDOR))){ return false; } return true; } public boolean hasMapreduceConfigured(ClusterBlueprint cluster) { return !CommonUtil.isBlank(cluster.getExternalMapReduce()); } }